{-# LANGUAGE OverloadedStrings #-}
module Input(fetchDocument, docForText) where
import Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import Data.Text.Encoding
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Network.URI
import Network.URI.Fetch
import qualified Data.Map as M
import System.IO
import System.IO.Temp
import Data.Default.Class
-- FIXME segfaults when exposed to C.
--fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do
-- let u = relativeTo url refererURL
-- let charsets' = map Txt.unpack charsets
-- resp <- fetchURL session mime u
-- (a, b) <- case resp of
-- ("text/css", bytes) -> let
-- cssParser = CSS.parseForURL (conditionalStyles u "document") u
-- in return (refererDoc, cssParser $ applyCSScharset charsets' bytes)
-- _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL)
-- return (u, a, b)
--applyCSScharset (charset:charsets) (Right bytes)
-- | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
-- | otherwise = applyCSScharset charsets $ Right bytes
-- where
-- text = convertCharset charset $ B.toStrict bytes
--applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes
--applyCSScharset _ (Left text) = text
--cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
-- (CSSTok.String charset:_) <- skipCSSspace toks' = charset
-- | otherwise = ""
--skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
--skipCSSspace toks = toks
fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http
parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument _ ("text/plain", Left text) = return $ docForText text
parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
| otherwise = return $ docForText "Unreadable webpage!"
parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
| otherwise = return $ docForText "Unreadable webpage!"
parseDocument session (mime, download) = do
localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download
result <- dispatchByMIME session mime localURI
case result of
Just text -> return $ docForText $ Txt.pack text
Nothing -> parseDocument session ("application/xhtml+xml", download)
writeDownloadToFile (Left text) file handle = do
hPutStr handle $ Txt.unpack text
return $ URI "file:" Nothing file "" ""
writeDownloadToFile (Right bytes) file handle = do
B.hPut handle bytes
return $ URI "file:" Nothing file "" ""
docForText txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
XML.elementAttributes = M.empty,
XML.elementNodes = [XML.NodeContent txt]
},
XML.documentEpilogue = []
}