~alcinnz/rhapsode

ref: a3dc72fecc0c1ea5705e723c2bfe06d53d57c695 rhapsode/src/Input.hs -rw-r--r-- 3.0 KiB
a3dc72fe — Adrian Cochrane Refactor Rhapsode to use new HURL APIs and more leniantly handle MIMEtypes. 4 years ago
                                                                                
c900dfa4 Adrian Cochrane
a3dc72fe Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
a3dc72fe Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
a3dc72fe Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
2e1b1a76 Adrian Cochrane
c900dfa4 Adrian Cochrane
a3dc72fe Adrian Cochrane
06d8ec36 Adrian Cochrane
4777781c Adrian Cochrane
a3dc72fe Adrian Cochrane
06d8ec36 Adrian Cochrane
a3dc72fe Adrian Cochrane
c900dfa4 Adrian Cochrane
4777781c Adrian Cochrane
0a65b2c1 Adrian Cochrane
4777781c Adrian Cochrane
a3dc72fe Adrian Cochrane
4777781c Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
module Input(fetchDocument, docForText) where

import           Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import           Data.Text.Encoding
import qualified Data.Text.Lazy as LTxt
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import           Network.URI.Fetch
import           Network.URI.Charset
import qualified Data.Map as M

import System.IO
import System.IO.Temp
import Data.Default.Class
import System.Directory
import Data.FileEmbed

-- For C API
import Types
import Data.HTML2CSS (html2css)
import Data.Maybe (fromMaybe)
import Foreign.StablePtr
import Foreign.C.String

fetchDocument http mime uri = fetchURL' http mime uri >>= parseDocument http
parseDocument sess (uri, "html/x-error\t", resp) = parseDocument sess (uri, "text/html", resp)
parseDocument _ (_, "text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ (_, "text/html", Right bytes) = return $ HTML.parseLBS bytes

parseDocument _ (_, _, Left text)
    | Right doc <- XML.parseText def $ fromStrict text = return doc
    | otherwise = return $ docForText text
parseDocument _ (_, _, Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
parseDocument _ (_, 't':'e':'x':'t':'/':_, Right bytes) =
    -- charset wasn't specified, so assume utf-8.
    return $ docForText $ convertCharset "utf-8" $ B.toStrict bytes
parseDocument sess resp@(_, mime, _) = do
    dir <- getCurrentDirectory -- TODO find Downloads directory.
    ret <- saveDownload nullURI {
        uriScheme = "file:",
        uriAuthority = Just (URIAuth "" "" "")
    } dir resp >>= dispatchByMIME sess mime
    return $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret

docForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }

--------
---- C API
--------
foreign export ccall c_newSession :: IO (StablePtr Session)
foreign export ccall c_freeSession :: StablePtr Session -> IO ()

c_newSession = do
    sess <- newSession
    newStablePtr $ sess {aboutPages = map lazify $(embedDir "about")}
  where lazify (a, b) = (a, B.fromStrict b)
c_freeSession = freeStablePtr


foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page)

c_fetchURL c_session c_mimes c_referer c_uri = do
    session <- deRefStablePtr c_session
    mimes <- peekCString c_mimes
    referer <- deRefStablePtr c_referer
    uri <- peekCString c_uri
    let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer
    doc <- fetchDocument session (words mimes) uri'
    newStablePtr $ Page {url = uri', html = doc, css = html2css doc uri'}