~alcinnz/rhapsode

ref: fc38beb15f3e11de1a26b9b066d9b40c2ecba6af rhapsode/src/Input.hs -rw-r--r-- 4.2 KiB
fc38beb1 — Adrian Cochrane Add new ISSUE 4 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{-# LANGUAGE OverloadedStrings #-}
module Input(fetchDocument, docForText, writeDownloadToFile) where

import           Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import           Data.Text.Encoding
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import           Network.URI.Fetch
import qualified Data.Map as M

import System.IO
import System.IO.Temp
import Data.Default.Class

-- For C API
import Types
import Data.HTML2CSS (html2css)
import Data.Maybe (fromMaybe)
import Foreign.StablePtr
import Foreign.C.String

-- FIXME segfaults when exposed to C.
--fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do
--    let u = relativeTo url refererURL
--    let charsets' = map Txt.unpack charsets
--    resp <- fetchURL session mime u
--    (a, b) <- case resp of
--        ("text/css", bytes) -> let
--                cssParser = CSS.parseForURL (conditionalStyles u "document") u
--            in return (refererDoc, cssParser $ applyCSScharset charsets' bytes)
--        _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL)
--    return (u, a, b)

--applyCSScharset (charset:charsets) (Right bytes)
--        | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
--        | otherwise = applyCSScharset charsets $ Right bytes
--    where
--        text = convertCharset charset $ B.toStrict bytes
--applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes
--applyCSScharset _ (Left text) = text
--cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
--        (CSSTok.String charset:_) <- skipCSSspace toks' = charset
--    | otherwise = ""
--skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
--skipCSSspace toks = toks

fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http
parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument _ ("text/plain", Left text) = return $ docForText text
parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
  | otherwise = return $ docForText "Unreadable webpage!"
parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
  | otherwise = return $ docForText "Unreadable webpage!"
parseDocument session (mime, download) = do
    localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download
    result <- dispatchByMIME session mime localURI
    case result of
        Just text -> return $ docForText $ Txt.pack text
        Nothing -> parseDocument session ("application/xhtml+xml", download)

writeDownloadToFile (Left text) file handle = do
    hPutStr handle $ Txt.unpack text
    return $ URI "file:" Nothing file "" ""
writeDownloadToFile (Right bytes) file handle = do
    B.hPut handle bytes
    return $ URI "file:" Nothing file "" ""

docForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }

--------
---- C API
--------
foreign export ccall c_newSession :: IO (StablePtr Session)
foreign export ccall c_freeSession :: StablePtr Session -> IO ()

c_newSession = newSession >>= newStablePtr
c_freeSession = freeStablePtr


foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page)

c_fetchURL c_session c_mimes c_referer c_uri = do
    session <- deRefStablePtr c_session
    mimes <- peekCString c_mimes
    referer <- deRefStablePtr c_referer
    uri <- peekCString c_uri
    let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer
    doc <- fetchDocument session (words mimes) uri'
    newStablePtr $ Page {url = uri', html = doc, css = html2css doc uri'}