~alcinnz/haskell-stylist

ref: 7061161d3db1783671a9d594ac4974d11c182bdc haskell-stylist/stylish-html-conduit/src/Data/HTML2CSS.hs -rw-r--r-- 3.8 KiB
7061161d — Adrian Cochrane Parse HTML 'style' attributes. 5 years ago
                                                                                
3630f677 Adrian Cochrane
03ba02a4 Adrian Cochrane
3630f677 Adrian Cochrane
03ba02a4 Adrian Cochrane
3630f677 Adrian Cochrane
7061161d Adrian Cochrane
3630f677 Adrian Cochrane
0dc35ad2 Adrian Cochrane
3630f677 Adrian Cochrane
0dc35ad2 Adrian Cochrane
3630f677 Adrian Cochrane
0dc35ad2 Adrian Cochrane
3630f677 Adrian Cochrane
932c3586 Adrian Cochrane
3630f677 Adrian Cochrane
7061161d Adrian Cochrane
3630f677 Adrian Cochrane
7061161d Adrian Cochrane
3630f677 Adrian Cochrane
932c3586 Adrian Cochrane
3630f677 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
{-# LANGUAGE OverloadedStrings #-}
module Data.HTML2CSS(
        externalStyles, internalStyles,
        cssPriorityAgent, cssPriorityUser, cssPriorityAuthor,
        traverseStyles, traverseStyles', elToStylish
    ) where

import qualified Data.List as L
import qualified Data.Map as M
import qualified Data.Text as Txt

import qualified Text.XML as XML
import Data.CSS.Syntax.StyleSheet
import Data.CSS.Style
import Data.CSS.Syntax.Tokens (tokenize)

import Network.URI

---- Constants
cssPriorityAgent styles = styles {priority = 1}
cssPriorityUser styles = styles {priority = 2}
cssPriorityAuthor styles = styles {priority = 3}

---- Parsing
externalStyles :: PropertyParser s => QueryableStyleSheet s -> (M.Map XML.Name Txt.Text -> Bool) ->
        XML.Element -> (URI -> IO Txt.Text) -> IO (QueryableStyleSheet s)
externalStyles stylesheet testMedia html loadURL = do
    css <- externalStyles' testMedia html loadURL
    return $ foldl parse (cssPriorityAuthor stylesheet) css
externalStyles' testMedia html loadURL = go $ linkedStyles' testMedia html
    where -- TODO parallelise loads
        go (link:links) = do
            response <- loadURL $ link
            rest <- go links
            return $ response : rest
        go [] = return []

linkedStyles' testMedia (XML.Element (XML.Name "link" _ _) attrs _)
    | Just link <- "href" `M.lookup` attrs,
        Just "stylesheet" <- "rel" `M.lookup` attrs,
        testMedia attrs,
        Just uri <- parseURIReference $ Txt.unpack link = [uri]
linkedStyles' testMedia (XML.Element _ _ children) =
    concat [linkedStyles' testMedia el | XML.NodeElement el <- children]

internalStyles testMedia stylesheet html =
    foldl parse (cssPriorityAuthor stylesheet) $ internalStyles' testMedia html
internalStyles' testMedia (XML.Element (XML.Name "style"_ _) attrs children)
    | testMedia attrs = [strContent children]
internalStyles' testMedia (XML.Element _ _ children) =
    concat [internalStyles' testMedia el | XML.NodeElement el <- children]


strContent :: [XML.Node] -> Txt.Text
strContent (XML.NodeContent text : rest) = text `Txt.append` strContent rest
-- We do want to read in comments for CSS, just not for display.
strContent (XML.NodeComment text : rest) = text `Txt.append` strContent rest
strContent (XML.NodeElement (XML.Element _ _ children):rest) =
    strContent children `Txt.append` strContent rest
strContent (_:rest) = strContent rest
strContent [] = ""

---- Styling
traverseStyles :: PropertyParser s => (s -> [o] -> o) -> (s -> Txt.Text -> o) ->
        QueryableStyleSheet s -> XML.Element -> o
traverseStyles = traverseStyles' Nothing temp Nothing
traverseStyles' parent parentStyle previous builder textBuilder stylesheet el@(
        XML.Element _ attrs children
    ) = builder style $ traverseChildren Nothing children
    where
        stylishEl = elToStylish el parent previous
        maybeEl = Just stylishEl
        style = cascade stylesheet stylishEl overrides parentStyle
        overrides | Just styleAttr <- "style" `M.lookup` attrs =
                fst $ parseProperties' $ tokenize styleAttr
            | otherwise = []

        traverseChildren prev (XML.NodeContent txt:nodes) =
            textBuilder style txt : traverseChildren prev nodes
        traverseChildren prev (XML.NodeElement el:nodes) =
            traverseStyles' maybeEl style prev builder textBuilder stylesheet el :
                traverseChildren (Just $ elToStylish el maybeEl prev) nodes
        traverseChildren prev (_:nodes) = traverseChildren prev nodes
        traverseChildren _ [] = []

elToStylish (XML.Element (XML.Name name _ _) attrs _) parent previous =
    ElementNode {
        name = name,
        attributes = L.sort [
            Attribute (XML.nameLocalName name) (Txt.unpack value)
            | (name, value) <- M.toList attrs
        ],
        parent = parent,
        previous = previous
    }