~alcinnz/rhapsode

ref: ae78008cd512e65b97a7b47d4d7d50d1d48a7ca8 rhapsode/src/Input.hs -rw-r--r-- 5.2 KiB
ae78008c — Adrian Cochrane Useragent CSS: Properly count table cells, adjust image labelling. 4 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
module Input(fetchDocument, docForText) where

import           Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import           Data.Text.Encoding
import qualified Data.Text.Lazy as LTxt
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import           Network.URI.Fetch
import           Network.URI.Charset
import qualified Data.Map as M

import System.IO
import System.IO.Temp
import Data.Default.Class
import System.Directory
import Data.FileEmbed

-- For C API
import Types
import Data.HTML2CSS (html2css)
import Data.Maybe (fromMaybe)
import Foreign.StablePtr
import Foreign.C.String

utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes

fetchDocument http mime uri = fetchURL' http mime uri >>= parseDocument http
parseDocument sess (uri, "html/x-error\t", resp) = parseDocument sess (uri, "text/html", resp)
parseDocument _ (_, "text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ (_, "text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument _ (_, "text/gemini", Left text) = return $ parseGemini text
parseDocument _ (_, "text/gemini", Right bytes) = return $ parseGemini $ utf8' bytes

parseDocument _ (_, _, Left text)
    | Right doc <- XML.parseText def $ fromStrict text = return doc
    | otherwise = return $ docForText text
parseDocument _ (_, _, Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
parseDocument _ (_, 't':'e':'x':'t':'/':_, Right bytes) =
    -- charset wasn't specified, so assume utf-8.
    return $ docForText $ utf8' bytes
parseDocument sess resp@(_, mime, _) = do
    dir <- getCurrentDirectory -- TODO find Downloads directory.
    ret <- saveDownload nullURI {
        uriScheme = "file:",
        uriAuthority = Just (URIAuth "" "" "")
    } dir resp >>= dispatchByMIME sess mime
    return $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret

docForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }

--------
---- Gemini implementation
--------
-- Copied from css-syntax.
pattern (:.) :: Char -> Txt.Text -> Txt.Text
pattern x :. xs <- (Txt.uncons -> Just (x, xs))

infixr 5 :.

el name text = XML.Element name M.empty [XML.NodeContent text]

parseGemini :: Txt.Text -> XML.Document
parseGemini txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "body",
            XML.elementAttributes = M.empty,
            XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
        },
        XML.documentEpilogue = []
    }

parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
    el "h6" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) =
    el "h5" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.line):lines) =
    el "h4" line : parseGemini' lines
parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines
parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines

parseGemini' (('=':.'>':.line):lines)
    | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {
            XML.elementAttributes = M.insert "href" url M.empty
        } : parseGemini' lines
    | otherwise = (el "a" $ Txt.strip line) {
            XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
        } : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = go [line] lines
    where
        go texts ("```":lines) = el "pre" (Txt.unlines texts) : parseGemini' lines
        go texts (('`':.'`':.'`':.line):lines) =
            el "pre" (Txt.unlines texts) : el "p" line : parseGemini' lines
        go texts (line:lines) = go (texts ++ [line]) lines
        go texts [] = []
parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []

--------
---- C API
--------
foreign export ccall c_newSession :: IO (StablePtr Session)
foreign export ccall c_freeSession :: StablePtr Session -> IO ()

c_newSession = do
    sess <- newSession
    newStablePtr $ sess {aboutPages = map lazify $(embedDir "about")}
  where lazify (a, b) = (a, B.fromStrict b)
c_freeSession = freeStablePtr


foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page)

c_fetchURL c_session c_mimes c_referer c_uri = do
    session <- deRefStablePtr c_session
    mimes <- peekCString c_mimes
    referer <- deRefStablePtr c_referer
    uri <- peekCString c_uri
    let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer
    doc <- fetchDocument session (words mimes) uri'
    newStablePtr $ Page {url = uri', html = doc, css = html2css doc uri'}