~alcinnz/rhapsode

ref: c07c86488309ce17d3ee024e4fe00cd555919a66 rhapsode/src/Input.hs -rw-r--r-- 4.2 KiB
c07c8648 — Adrian Cochrane Allow disabling of URL rewriting for SSML output. 4 years ago
                                                                                
c900dfa4 Adrian Cochrane
0a65b2c1 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
2e1b1a76 Adrian Cochrane
c900dfa4 Adrian Cochrane
06d8ec36 Adrian Cochrane
4777781c Adrian Cochrane
8670cb04 Adrian Cochrane
5914c323 Adrian Cochrane
06d8ec36 Adrian Cochrane
8670cb04 Adrian Cochrane
06d8ec36 Adrian Cochrane
2e1b1a76 Adrian Cochrane
c900dfa4 Adrian Cochrane
4777781c Adrian Cochrane
0a65b2c1 Adrian Cochrane
4777781c Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{-# LANGUAGE OverloadedStrings #-}
module Input(fetchDocument, docForText, writeDownloadToFile) where

import           Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import           Data.Text.Encoding
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import           Network.URI.Fetch
import qualified Data.Map as M

import System.IO
import System.IO.Temp
import Data.Default.Class

-- For C API
import Types
import Data.HTML2CSS (html2css)
import Data.Maybe (fromMaybe)
import Foreign.StablePtr
import Foreign.C.String

-- FIXME segfaults when exposed to C.
--fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do
--    let u = relativeTo url refererURL
--    let charsets' = map Txt.unpack charsets
--    resp <- fetchURL session mime u
--    (a, b) <- case resp of
--        ("text/css", bytes) -> let
--                cssParser = CSS.parseForURL (conditionalStyles u "document") u
--            in return (refererDoc, cssParser $ applyCSScharset charsets' bytes)
--        _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL)
--    return (u, a, b)

--applyCSScharset (charset:charsets) (Right bytes)
--        | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
--        | otherwise = applyCSScharset charsets $ Right bytes
--    where
--        text = convertCharset charset $ B.toStrict bytes
--applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes
--applyCSScharset _ (Left text) = text
--cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
--        (CSSTok.String charset:_) <- skipCSSspace toks' = charset
--    | otherwise = ""
--skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
--skipCSSspace toks = toks

fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http
parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument _ ("text/plain", Left text) = return $ docForText text
parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
  | otherwise = return $ docForText "Unreadable webpage!"
parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
  | otherwise = return $ docForText "Unreadable webpage!"
parseDocument session (mime, download) = do
    localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download
    result <- dispatchByMIME session mime localURI
    case result of
        Just text -> return $ docForText $ Txt.pack text
        Nothing -> parseDocument session ("application/xhtml+xml", download)

writeDownloadToFile (Left text) file handle = do
    hPutStr handle $ Txt.unpack text
    return $ URI "file:" Nothing file "" ""
writeDownloadToFile (Right bytes) file handle = do
    B.hPut handle bytes
    return $ URI "file:" Nothing file "" ""

docForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }

--------
---- C API
--------
foreign export ccall c_newSession :: IO (StablePtr Session)
foreign export ccall c_freeSession :: StablePtr Session -> IO ()

c_newSession = newSession >>= newStablePtr
c_freeSession = freeStablePtr


foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page)

c_fetchURL c_session c_mimes c_referer c_uri = do
    session <- deRefStablePtr c_session
    mimes <- peekCString c_mimes
    referer <- deRefStablePtr c_referer
    uri <- peekCString c_uri
    let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer
    doc <- fetchDocument session (words mimes) uri'
    newStablePtr $ Page {url = uri', html = doc, css = html2css doc uri'}