{-# LANGUAGE OverloadedStrings #-} module Input(fetchDocument, docForText) where import Data.Text.Lazy (fromStrict) import qualified Data.Text as Txt import Data.Text.Encoding import qualified Data.ByteString.Lazy as B import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import Network.URI import Network.URI.Fetch import qualified Data.Map as M import System.IO import System.IO.Temp import Data.Default.Class -- FIXME segfaults when exposed to C. --fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do -- let u = relativeTo url refererURL -- let charsets' = map Txt.unpack charsets -- resp <- fetchURL session mime u -- (a, b) <- case resp of -- ("text/css", bytes) -> let -- cssParser = CSS.parseForURL (conditionalStyles u "document") u -- in return (refererDoc, cssParser $ applyCSScharset charsets' bytes) -- _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL) -- return (u, a, b) --applyCSScharset (charset:charsets) (Right bytes) -- | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text -- | otherwise = applyCSScharset charsets $ Right bytes -- where -- text = convertCharset charset $ B.toStrict bytes --applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes --applyCSScharset _ (Left text) = text --cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks, -- (CSSTok.String charset:_) <- skipCSSspace toks' = charset -- | otherwise = "" --skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks --skipCSSspace toks = toks fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes parseDocument _ ("text/plain", Left text) = return $ docForText text parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc | otherwise = return $ docForText "Unreadable webpage!" parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc | otherwise = return $ docForText "Unreadable webpage!" parseDocument session (mime, download) = do localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download result <- dispatchByMIME session mime localURI case result of Just text -> return $ docForText $ Txt.pack text Nothing -> parseDocument session ("application/xhtml+xml", download) writeDownloadToFile (Left text) file handle = do hPutStr handle $ Txt.unpack text return $ URI "file:" Nothing file "" "" writeDownloadToFile (Right bytes) file handle = do B.hPut handle bytes return $ URI "file:" Nothing file "" "" docForText txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] }