~alcinnz/rhapsode

a59899ddb2ccbe23b8848fa2695fea074c325c44 — Adrian Cochrane 5 years ago 55478a6
Parse HTML, draft code to apply it.
2 files changed, 66 insertions(+), 11 deletions(-)

M rhapsode.cabal
M src/Main.hs
M rhapsode.cabal => rhapsode.cabal +2 -1
@@ 63,7 63,8 @@ executable rhapsode
  build-depends:       base >=4.9 && <4.10,
        http-client, http-client-tls, bytestring,
        html-conduit, xml-conduit, text, containers,
        network-uri
        network-uri,
        stylish-haskell >= 0.2.0, css-syntax
  
  -- Directories containing source files.
  hs-source-dirs:      src

M src/Main.hs => src/Main.hs +64 -10
@@ 7,11 7,17 @@ import qualified Network.HTTP.Client as HTTP
import           Network.HTTP.Client.Internal
import qualified Network.HTTP.Client.TLS as TLS
import           Network.URI
import qualified Data.ByteString.Lazy.Char8 as C8

import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Data.Text (unpack)
import           Data.Text as Txt (pack, unpack, Text(..), append)
import qualified Data.Map as M
import qualified Data.ByteString.Lazy.Char8 as C8

import qualified Data.CSS.Syntax.StyleSheet as CSS
import qualified Data.CSS.Style as Style
import qualified Data.CSS.Syntax.Tokens as CSSTok
import qualified Data.List as L

import DefaultCSS



@@ 24,11 30,21 @@ main = do
    response <- HTTP.httpLbs request manager
    let html = XML.documentRoot $ HTML.parseLBS $ HTTP.responseBody response
    style <- retreiveStyles html manager request
    putStrLn $ unlines style
    C8.putStrLn $ HTTP.responseBody response


cssPriorityAgent = 1
cssPriorityUser = 2
cssPriorityAuthor = 3

retreiveStyles html manager base = do
    css <- externalStyles html manager base
    return $ userAgentCSS : css ++ internalStyles html
    return $ authorStyle (css ++ internalStyles html)
  where
    emptyStyle :: Style.QueryableStyleSheet Style.TrivialPropertyParser
    emptyStyle = Style.queryableStyleSheet {Style.priority = cssPriorityAgent}
    agentStyle = CSS.parse emptyStyle $ Txt.pack userAgentCSS
    authorStyle = foldl CSS.parse $ agentStyle {Style.priority = cssPriorityAuthor}

externalStyles html manager base = go $ linkedStyles html
    where -- TODO parallelise loads


@@ 36,14 52,14 @@ externalStyles html manager base = go $ linkedStyles html
            request <- setUriRelative base link
            response <- HTTP.httpLbs request manager
            rest <- go links
            return (C8.unpack (HTTP.responseBody response) : rest)
            return (Txt.pack (C8.unpack $ HTTP.responseBody response) : rest)
        go [] = return []

linkedStyles (XML.Element "link" attrs _)
    | Just link <- "href" `M.lookup` attrs,
        Just "stylesheet" <- "rel" `M.lookup` attrs,
        testMedia attrs,
        Just uri <- parseURIReference $ unpack link = [uri]
        Just uri <- parseURIReference $ Txt.unpack link = [uri]
linkedStyles (XML.Element _ _ children) =
    concat [linkedStyles el | XML.NodeElement el <- children]



@@ 55,11 71,49 @@ internalStyles (XML.Element _ _ children) =
testMedia attrs = media == Nothing || media == Just "speech"
    where media = "media" `M.lookup` attrs


strContent (XML.NodeContent text : rest) = unpack text ++ strContent rest
strContent :: [XML.Node] -> Txt.Text
strContent (XML.NodeContent text : rest) = text `Txt.append` strContent rest
-- We do want to read in comments for CSS, just not for display.
strContent (XML.NodeComment text : rest) = unpack text ++ strContent rest
strContent (XML.NodeComment text : rest) = text `Txt.append` strContent rest
strContent (XML.NodeElement (XML.Element _ _ children):rest) =
    strContent children ++ strContent rest
    strContent children `Txt.append` strContent rest
strContent (_:rest) = strContent rest
strContent [] = ""


stylizeEl = stylizeEl' Nothing Style.temp Nothing
stylizeEl' parent parentStyle previous stylesheet el@(XML.Element _ _ children) =
    XML.Element {
        XML.elementName = XML.Name "style" Nothing Nothing,
        XML.elementAttributes = innerStyle,
        XML.elementNodes = stylizeNodes (Just stylishEl) style Nothing stylesheet children
    } where
        style@(MPP innerStyle) = Style.cascade stylesheet stylishEl overrides parentStyle
        stylishEl = elToStylish el parent previous
        overrides = [] -- TODO parse style attribute
stylizeNodes up upStyle prev styles (XML.NodeContent txt:nodes) =
    XML.NodeContent txt : stylizeNodes up upStyle prev styles nodes
stylizeNodes up upStyle prev styles (XML.NodeElement el:nodes) =
    XML.NodeElement (stylizeEl' up upStyle prev styles el) :
        stylizeNodes up upStyle stylishEl styles nodes
    where stylishEl = Just $ elToStylish el up prev
stylizeNodes _ _ _ _ [] = []

elToStylish (XML.Element (XML.Name name _ _) attrs _) parent previous =
    Style.ElementNode {
        Style.name = name,
        Style.attributes = L.sort [
            Style.Attribute (XML.nameLocalName name) (Txt.unpack value)
            | (name, value) <- M.toList attrs
        ],
        Style.parent = parent,
        Style.previous = previous
    }

data MapPropertyParser = MPP (M.Map XML.Name Text)
instance Style.PropertyParser MapPropertyParser where
    temp = MPP M.empty
    longhand _ (MPP self) name value = Just $ MPP $ M.insert xmlName xmlValue self
        where
            xmlName = XML.Name name Nothing Nothing
            xmlValue = CSSTok.serialize value