~alcinnz/amphiarao

ref: 1067e7dade93a5f4193da768bf2e3d6a771c65db amphiarao/src/Internal/Load.hs -rw-r--r-- 12.6 KiB
1067e7da — Adrian Cochrane Integrate Haskell Stylist for CSS debugging. 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
{-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-}
module Internal.Load(load, load', back, next, parseAbsoluteURI, clickEl, isClickableEl) where

import Internal

import Control.Concurrent.MVar
import System.Timeout (timeout)
import Control.Monad.IO.Class

import Data.Aeson
import Data.Text (Text, pack, unpack)
import qualified Data.Text as Txt
import Data.Text.Lazy (fromStrict)
import GHC.Generics

import Data.Maybe (fromMaybe, mapMaybe)
import qualified Data.ByteString.Lazy as B (toStrict)
import qualified Data.HashMap.Strict as HM
import Data.List (delete)

import Network.URI as URI
import Network.URI.Fetch as URI
import Network.URI.Charset (convertCharset, charsets)
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import qualified Text.XML.Cursor as XC
import qualified Text.XML.Cursor ((>=>))
import qualified Data.Map as M

import Data.HTML2CSS (html2css)
import Data.CSS.Preprocessor.Conditions as CSS
import Data.CSS.Preprocessor.PsuedoClasses as CSS
import Data.CSS.Style (queryableStyleSheet, TrivialPropertyParser)
import Data.CSS.Syntax.Tokens as CSS

import Internal.Forms

mime = words "text/html text/xml application/xml application/xhtml+xml text/plain"

load' :: Internal.Session -> URI -> IO ()
load' session uri = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do
    resp@(redirected, _, _) <- URI.fetchURL' (loader session') mime uri
    let doc = parseDocument resp
    css' <- CSS.loadImports (loadText $ loader session') false false
        (html2css doc uri :: ConditionalStyles TrivialPropertyParser) []
    let css'' = CSS.inner $ resolve false false (htmlPsuedoFilter queryableStyleSheet) css'
    return $ session' {
        currentURL = redirected, document = doc, css = css'',
        knownEls = HM.empty, id2els = HM.fromList $ indexedIDs doc
      }
  where
    loadText manager url = do
        response <- fetchURL manager ["text/css"] url
        let charsets' = map unpack charsets
        return $ case response of
            ("text/css", Left text) -> text
            ("text/css", Right bytes) -> applyCSScharset charsets' $ B.toStrict bytes
            (_, _) -> ""
    false = const $ CSS.B False

submit' :: Internal.Session -> (URI, Txt.Text, Txt.Text) -> IO ()
submit' session (uri, query, method) = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do
    resp@(redirected, _, _) <- submitURL (loader session') mime uri method $ Txt.unpack query
    let doc = parseDocument resp
    return $ session' { currentURL = redirected, document = doc,
        knownEls = HM.empty, id2els = HM.fromList $ indexedIDs doc }

maybeTimeout :: Session' -> URI -> IO Session' -> IO Session'
---maybeTimeout session uri act | currentURL session /= uri, Just delay <- pageLoad $ timeouts session =
---    -- WebDriver deals in terms of milliseconds, I think?, Haskell deals in terms of microseconds.
---    fromMaybe session <$> timeout (delay * 1000) act
maybeTimeout _ _ act = act

---

load :: Internal.Session -> URI -> IO ()
load session uri = do
    modifyMVar_ session $ return . inner
    load' session uri
  where
    inner session'@Session {backStack = backStack', currentURL = currentURL' } =
        session' { backStack = currentURL' : backStack' }

submit :: Internal.Session -> (URI, Txt.Text, Txt.Text) -> IO ()
submit session form = do
    modifyMVar_ session $ return . inner
    submit' session form
  where
    inner session'@Session {backStack = backStack', currentURL = currentURL' } =
        session' { backStack = currentURL' : backStack' }

back :: Internal.Session -> IO ()
back session = do
    uri <- modifyMVar session $ return . inner
    load' session uri
  where
    inner session'@Session { backStack = b:bs, currentURL = n, nextStack = ns } =
        (session' { backStack = bs, nextStack = n:ns }, b)

next :: Internal.Session -> IO ()
next session = do
    uri <- modifyMVar session $ return . inner
    load' session uri
  where
    inner session'@Session { backStack = bs, currentURL = b, nextStack = n:ns } =
        (session' { backStack = b:bs, nextStack = ns }, n)

---

parseDocument (uri, "html/x-error\t", resp) = parseDocument (uri, "text/html", resp)
parseDocument (_, "text/html", Left text) = HTML.parseLT $ fromStrict text
parseDocument (_, "text/html", Right bytes) = HTML.parseLBS bytes
parseDocument (uri, mime, resp) | mime /= mime' = parseDocument (uri, mime', resp)
    where mime' = takeWhile (/= ';') mime
parseDocument (_, _, Left text)
    | Right doc <- XML.parseText XML.def $ fromStrict text = doc
    | otherwise = pageForText text
parseDocument (_, _, Right bytes) | Right doc <- XML.parseLBS XML.def bytes = doc
parseDocument (_, 't':'e':'x':'t':'/':_, Right bytes) = pageForText $ utf8' bytes -- charset wasn't specified, so assume utf-8.
parseDocument (_, mime, Right _) = pageForText $ pack ('(':mime ++ " binary data)")

pageForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }

utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes

--------
---- Gemini implementation
--------
-- Copied from css-syntax.
pattern (:.) :: Char -> Txt.Text -> Txt.Text
pattern x :. xs <- (Txt.uncons -> Just (x, xs))

infixr 5 :.

el name text = XML.Element name M.empty [XML.NodeContent text]

parseGemini :: Maybe String -> Txt.Text -> XML.Document
parseGemini lang txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "body",
            XML.elementAttributes = M.fromList [
                ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]],
            XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
        },
        XML.documentEpilogue = []
    }

csv (',':_) = ""
csv (c:rest) = c:csv rest
csv "" = ""

parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
    el "h6" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) =
    el "h5" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.line):lines) =
    el "h4" line : parseGemini' lines
parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines
parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines
parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines

parseGemini' (('=':.'>':.line):lines)
    | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {
            XML.elementAttributes = M.insert "href" url M.empty
        } : parseGemini' lines
    | otherwise = (el "a" $ Txt.strip line) {
            XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
        } : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines
    where
        go (('`':.'`':.'`':._):lines) = parseGemini' lines
        go (_:lines) = go lines
        go [] = []
parseGemini' ("```":lines) = go [] lines
    where
        go texts (('`':.'`':.'`':._):lines) =
            el "pre" (Txt.unlines texts) : parseGemini' lines
        go texts (line:lines) = go (texts ++ [line]) lines
        go texts [] = []

parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []

----

clickEl :: Internal.Session -> XC.Cursor -> IO ()
clickEl session el | XML.NodeElement el' <- XC.node el = clickEl' session el' el
    | otherwise = return ()

clickEl' session (XML.Element (XML.Name name _ _) attrs _) cursor
    -- LINKS
    -- There's more nuances to links in Rhapsode, but that covered most of them.
    | Just href <- "href" `M.lookup` attrs', Just uri <- URI.parseURIReference $ Txt.unpack href = do
        base <- withMVar session (return . Internal.currentURL)
        load session $ URI.relativeTo uri base
    | Just src <- "src" `M.lookup` attrs', Just uri <- URI.parseURIReference $ Txt.unpack src = do
        base <- withMVar session (return . Internal.currentURL)
        load session $ URI.relativeTo uri base
    -- FORMS
    | name `elem` ["button", "input"], Just "submit" <- "type" `M.lookup` attrs' = do
        case ("name" `M.lookup` attrs', "value" `M.lookup` attrs') of
            (Just name, Just value) -> modifyForm ((:) value) name cursor session
            _ -> return ()
        form <- formAction cursor session
        submit session form
    | name `elem` ["button", "input"], Just "reset" <- "type" `M.lookup` attrs' =
      modifyMVar_ session $ \session' -> do
        let form = findForm cursor $ id2els session'
        let blanked = prefillForm form $ XC.fromDocument $ document session'
        return $ case form of
            XML.Element (XML.Name "form" _ _) _ _ ->
                session' { forms = M.insert form blanked $ forms session' }
            _ -> session'
    | name == "input", Just "radio" <- "type" `M.lookup` attrs',
            Just name <- "name" `M.lookup` attrs', Just value <- "value" `M.lookup` attrs' =
        modifyForm (const [value]) name cursor session
    | name == "input", Just "checkbox" <- "type" `M.lookup` attrs',
            Just name <- "name" `M.lookup` attrs', Just value <- "value" `M.lookup` attrs' =
        modifyForm (toggleOption value) name cursor session
    | name == "option", Just value <- "value" `M.lookup` attrs',
            (datalist:_) <- (XC.checkName (== "datalist") XC.>=> XC.ancestor) cursor,
            XML.NodeElement (XML.Element "datalist" attrs2 _) <- XC.node datalist,
            Just id <- "id" `M.lookup` M.mapKeys XML.nameLocalName attrs2 = do
        root <- withMVar session (\session' -> return . XC.fromDocument $ document session')
        let refsList (XML.Element _ attrs3 _) = M.lookup "list" (M.mapKeys XML.nameLocalName attrs3) == Just id
        let input = XC.checkElement refsList XC.>=> XC.checkName (== "input") XC.>=> XC.descendant
        let (input', attrs4') = case input root of {
            (input':_) | XML.NodeElement (XML.Element "input" attrs4 _) <- XC.node input' ->
                (input', M.mapKeys XML.nameLocalName attrs4);
            _ -> (root, M.empty)
        }
        case "name" `M.lookup` attrs4' of
            Just name -> modifyForm (const [value]) name input' session
            Nothing -> return ()
    | name == "option", (select:_) <- XC.parent cursor,
            XML.NodeElement select'@(XML.Element "select" attrs2 _) <- XC.node select,
            let attrs2' = M.mapKeys XML.nameLocalName attrs2,
            Just name <- "name" `M.lookup` attrs2, Just value <- "value" `M.lookup` attrs' =
        if "multiple" `M.member` attrs2'
        then modifyForm (toggleOption value) name cursor session
        else modifyForm (const [value]) name cursor session
    | name == "label", Just for <- "for" `M.lookup` attrs' = do
        input <- withMVar session (return . HM.lookup for . id2els)
        case input of { Just input' -> clickEl session input'; Nothing -> return () }
    | otherwise = return ()
  where
    attrs' = M.mapKeys XML.nameLocalName attrs
    toggleOption value old | value `elem` old = delete value old
        | otherwise = value:old

-- Keep inline with clickEl
isClickableEl :: XC.Cursor -> Bool
isClickableEl el | XML.NodeElement el' <- XC.node el = isClickableEl' el'
    | otherwise = False
isClickableEl' (XML.Element name attrs _)
    | Just href <- "href" `M.lookup` attrs', Just _ <- URI.parseURIReference $ Txt.unpack href = True
    | Just src <- "src" `M.lookup` attrs', Just _ <- URI.parseURIReference $ Txt.unpack src = True
    | name `elem` ["input", "label", "button", "option"] = True
    | otherwise = False
  where attrs' = M.mapKeys XML.nameLocalName attrs

indexedIDs doc = mapMaybe extractId $ XC.orSelf XC.descendant $ XC.fromDocument doc
  where
    extractId cursor | XML.NodeElement el@(XML.Element _ attrs _) <- XC.node cursor,
        Just id <- "id" `M.lookup` M.mapKeys XML.nameLocalName attrs = Just (id, cursor)
      | otherwise = Nothing

--------
---- CSS charset sniffing
--------
applyCSScharset (charset:charsets) bytes
        | cssCharset (tokenize text) == pack charset = text
        | otherwise = applyCSScharset charsets bytes
    where
        text = convertCharset charset bytes
applyCSScharset _ bytes = convertCharset "utf-8" bytes
cssCharset toks | (AtKeyword "charset":toks') <- skipCSSspace toks,
        (CSS.String charset:_) <- skipCSSspace toks' = charset
    | otherwise = ""
skipCSSspace (Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks