~alcinnz/rhapsode

ref: 8670cb04f09e22133c7b26dd01b9426f021062a3 rhapsode/src/Input.hs -rw-r--r-- 3.3 KiB
8670cb04 — Adrian Cochrane Remove dead imports. 4 years ago
                                                                                
c900dfa4 Adrian Cochrane
5914c323 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
c900dfa4 Adrian Cochrane
e41dd031 Adrian Cochrane
2e1b1a76 Adrian Cochrane
c900dfa4 Adrian Cochrane
06d8ec36 Adrian Cochrane
8670cb04 Adrian Cochrane
5914c323 Adrian Cochrane
06d8ec36 Adrian Cochrane
8670cb04 Adrian Cochrane
06d8ec36 Adrian Cochrane
2e1b1a76 Adrian Cochrane
c900dfa4 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
{-# LANGUAGE OverloadedStrings #-}
module Input(fetchDocument, docForText) where

import           Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import           Data.Text.Encoding
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import           Network.URI.Fetch
import qualified Data.Map as M

import System.IO
import System.IO.Temp
import Data.Default.Class

-- FIXME segfaults when exposed to C.
--fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do
--    let u = relativeTo url refererURL
--    let charsets' = map Txt.unpack charsets
--    resp <- fetchURL session mime u
--    (a, b) <- case resp of
--        ("text/css", bytes) -> let
--                cssParser = CSS.parseForURL (conditionalStyles u "document") u
--            in return (refererDoc, cssParser $ applyCSScharset charsets' bytes)
--        _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL)
--    return (u, a, b)

--applyCSScharset (charset:charsets) (Right bytes)
--        | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
--        | otherwise = applyCSScharset charsets $ Right bytes
--    where
--        text = convertCharset charset $ B.toStrict bytes
--applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes
--applyCSScharset _ (Left text) = text
--cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
--        (CSSTok.String charset:_) <- skipCSSspace toks' = charset
--    | otherwise = ""
--skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
--skipCSSspace toks = toks

fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http
parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument _ ("text/plain", Left text) = return $ docForText text
parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
  | otherwise = return $ docForText "Unreadable webpage!"
parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
  | otherwise = return $ docForText "Unreadable webpage!"
parseDocument session (mime, download) = do
    localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download
    result <- dispatchByMIME session mime localURI
    case result of
        Just text -> return $ docForText $ Txt.pack text
        Nothing -> parseDocument session ("application/xhtml+xml", download)

writeDownloadToFile (Left text) file handle = do
    hPutStr handle $ Txt.unpack text
    return $ URI "file:" Nothing file "" ""
writeDownloadToFile (Right bytes) file handle = do
    B.hPut handle bytes
    return $ URI "file:" Nothing file "" ""

docForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }