~alcinnz/amphiarao

ref: b8351f834a84aaae9a523cb90c57a78bbc05e39e amphiarao/src/Internal/Load.hs -rw-r--r-- 5.7 KiB
b8351f83 — Adrian Cochrane Support searching for links by label, both in JSON & HTML interfaces 3 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
{-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-}
module Internal.Load(load, load', back, next, parseAbsoluteURI) where

import Internal

import Control.Concurrent.MVar
import System.Timeout (timeout)
import Control.Monad.IO.Class

import Data.Aeson
import Data.Text (Text, pack)
import qualified Data.Text as Txt
import Data.Text.Lazy (fromStrict)
import GHC.Generics

import Data.Maybe (fromMaybe)
import qualified Data.ByteString.Lazy as B (toStrict)
import qualified Data.HashMap.Strict as HM

import Network.URI as URI
import Network.URI.Fetch as URI
import Network.URI.Charset (convertCharset)
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import qualified Data.Map as M

mime = words "text/html text/xml application/xml application/xhtml+xml text/plain"

load' :: Internal.Session -> URI -> IO ()
load' session uri = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do
    resp@(redirected, _, _) <- fetchURL' (loader session') mime uri
    return $ session' { currentURL = redirected, document = parseDocument resp, knownEls = HM.empty }

maybeTimeout :: Session' -> URI -> IO Session' -> IO Session'
maybeTimeout session uri act | currentURL session /= uri, Just delay <- pageLoad $ timeouts session =
    -- WebDriver deals in terms of milliseconds, I think?, Haskell deals in terms of microseconds.
    fromMaybe session <$> timeout (delay * 1000) act
maybeTimeout _ _ act = act

---

load :: Internal.Session -> URI -> IO ()
load session uri = do
    modifyMVar_ session $ return . inner
    load' session uri
  where
    inner session'@Session {backStack = backStack', currentURL = currentURL' } =
        session' { backStack = currentURL' : backStack' }

back :: Internal.Session -> IO ()
back session = do
    uri <- modifyMVar session $ return . inner
    load' session uri
  where
    inner session'@Session { backStack = b:bs, currentURL = n, nextStack = ns } =
        (session' { backStack = bs, nextStack = n:ns }, b)

next :: Internal.Session -> IO ()
next session = do
    uri <- modifyMVar session $ return . inner
    load' session uri
  where
    inner session'@Session { backStack = bs, currentURL = b, nextStack = n:ns } =
        (session' { backStack = b:bs, nextStack = ns }, n)

---

parseDocument (uri, "html/x-error\t", resp) = parseDocument (uri, "text/html", resp)
parseDocument (_, "text/html", Left text) = HTML.parseLT $ fromStrict text
parseDocument (_, "text/html", Right bytes) = HTML.parseLBS bytes
parseDocument (uri, mime, resp) | mime /= mime' = parseDocument (uri, mime', resp)
    where mime' = takeWhile (/= ';') mime
parseDocument (_, _, Left text)
    | Right doc <- XML.parseText XML.def $ fromStrict text = doc
    | otherwise = pageForText text
parseDocument (_, _, Right bytes) | Right doc <- XML.parseLBS XML.def bytes = doc
parseDocument (_, 't':'e':'x':'t':'/':_, Right bytes) = pageForText $ utf8' bytes -- charset wasn't specified, so assume utf-8.
parseDocument (_, mime, Right _) = pageForText $ pack ('(':mime ++ " binary data)")

pageForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }

utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes

--------
---- Gemini implementation
--------
-- Copied from css-syntax.
pattern (:.) :: Char -> Txt.Text -> Txt.Text
pattern x :. xs <- (Txt.uncons -> Just (x, xs))

infixr 5 :.

el name text = XML.Element name M.empty [XML.NodeContent text]

parseGemini :: Maybe String -> Txt.Text -> XML.Document
parseGemini lang txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "body",
            XML.elementAttributes = M.fromList [
                ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]],
            XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
        },
        XML.documentEpilogue = []
    }

csv (',':_) = ""
csv (c:rest) = c:csv rest
csv "" = ""

parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
    el "h6" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) =
    el "h5" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.line):lines) =
    el "h4" line : parseGemini' lines
parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines
parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines
parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines

parseGemini' (('=':.'>':.line):lines)
    | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {
            XML.elementAttributes = M.insert "href" url M.empty
        } : parseGemini' lines
    | otherwise = (el "a" $ Txt.strip line) {
            XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
        } : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines
    where
        go (('`':.'`':.'`':._):lines) = parseGemini' lines
        go (_:lines) = go lines
        go [] = []
parseGemini' ("```":lines) = go [] lines
    where
        go texts (('`':.'`':.'`':._):lines) =
            el "pre" (Txt.unlines texts) : parseGemini' lines
        go texts (line:lines) = go (texts ++ [line]) lines
        go texts [] = []

parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []