~alcinnz/haskell-stylist

ref: 7061161d3db1783671a9d594ac4974d11c182bdc haskell-stylist/stylish-html-conduit/src/Data/HTML2CSS.hs -rw-r--r-- 3.8 KiB
7061161d — Adrian Cochrane Parse HTML 'style' attributes. 4 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
{-# LANGUAGE OverloadedStrings #-}
module Data.HTML2CSS(
        externalStyles, internalStyles,
        cssPriorityAgent, cssPriorityUser, cssPriorityAuthor,
        traverseStyles, traverseStyles', elToStylish
    ) where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as Txt

import qualified Text.XML as XML
import Data.CSS.Syntax.StyleSheet
import Data.CSS.Style
import Data.CSS.Syntax.Tokens (tokenize)

import Network.URI

---- Constants
cssPriorityAgent styles = styles {priority = 1}
cssPriorityUser styles = styles {priority = 2}
cssPriorityAuthor styles = styles {priority = 3}

---- Parsing
externalStyles :: PropertyParser s => QueryableStyleSheet s -> (M.Map XML.Name Txt.Text -> Bool) ->
        XML.Element -> (URI -> IO Txt.Text) -> IO (QueryableStyleSheet s)
externalStyles stylesheet testMedia html loadURL = do
    css <- externalStyles' testMedia html loadURL
    return $ foldl parse (cssPriorityAuthor stylesheet) css
externalStyles' testMedia html loadURL = go $ linkedStyles' testMedia html
    where -- TODO parallelise loads
        go (link:links) = do
            response <- loadURL $ link
            rest <- go links
            return $ response : rest
        go [] = return []

linkedStyles' testMedia (XML.Element (XML.Name "link" _ _) attrs _)
    | Just link <- "href" `M.lookup` attrs,
        Just "stylesheet" <- "rel" `M.lookup` attrs,
        testMedia attrs,
        Just uri <- parseURIReference $ Txt.unpack link = [uri]
linkedStyles' testMedia (XML.Element _ _ children) =
    concat [linkedStyles' testMedia el | XML.NodeElement el <- children]

internalStyles testMedia stylesheet html =
    foldl parse (cssPriorityAuthor stylesheet) $ internalStyles' testMedia html
internalStyles' testMedia (XML.Element (XML.Name "style"_ _) attrs children)
    | testMedia attrs = [strContent children]
internalStyles' testMedia (XML.Element _ _ children) =
    concat [internalStyles' testMedia el | XML.NodeElement el <- children]


strContent :: [XML.Node] -> Txt.Text
strContent (XML.NodeContent text : rest) = text `Txt.append` strContent rest
-- We do want to read in comments for CSS, just not for display.
strContent (XML.NodeComment text : rest) = text `Txt.append` strContent rest
strContent (XML.NodeElement (XML.Element _ _ children):rest) =
    strContent children `Txt.append` strContent rest
strContent (_:rest) = strContent rest
strContent [] = ""

---- Styling
traverseStyles :: PropertyParser s => (s -> [o] -> o) -> (s -> Txt.Text -> o) ->
        QueryableStyleSheet s -> XML.Element -> o
traverseStyles = traverseStyles' Nothing temp Nothing
traverseStyles' parent parentStyle previous builder textBuilder stylesheet el@(
        XML.Element _ attrs children
    ) = builder style $ traverseChildren Nothing children
    where
        stylishEl = elToStylish el parent previous
        maybeEl = Just stylishEl
        style = cascade stylesheet stylishEl overrides parentStyle
        overrides | Just styleAttr <- "style" `M.lookup` attrs =
                fst $ parseProperties' $ tokenize styleAttr
            | otherwise = []

        traverseChildren prev (XML.NodeContent txt:nodes) =
            textBuilder style txt : traverseChildren prev nodes
        traverseChildren prev (XML.NodeElement el:nodes) =
            traverseStyles' maybeEl style prev builder textBuilder stylesheet el :
                traverseChildren (Just $ elToStylish el maybeEl prev) nodes
        traverseChildren prev (_:nodes) = traverseChildren prev nodes
        traverseChildren _ [] = []

elToStylish (XML.Element (XML.Name name _ _) attrs _) parent previous =
    ElementNode {
        name = name,
        attributes = L.sort [
            Attribute (XML.nameLocalName name) (Txt.unpack value)
            | (name, value) <- M.toList attrs
        ],
        parent = parent,
        previous = previous
    }