{-# LANGUAGE OverloadedStrings #-} module Input(fetchDocument, docForText) where import Data.Text.Lazy (fromStrict) import qualified Data.Text as Txt import Data.Text.Encoding import qualified Data.ByteString.Lazy as B import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import Network.URI import Network.URI.Fetch import qualified Data.Map as M import Control.Concurrent.Async import System.IO import System.Environment import System.Directory import System.IO.Temp import Data.List import Data.Default.Class import Data.Maybe (fromMaybe) import System.FilePath import qualified Data.CSS.Syntax.StyleSheet as CSS import Data.CSS.Preprocessor.Conditions import qualified Data.CSS.Syntax.Tokens as CSSTok import qualified Data.HTML2CSS as H2C import Network.URI.Charset --fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do -- let u = relativeTo url refererURL -- let charsets' = map Txt.unpack charsets -- resp <- fetchURL session mime u -- (a, b) <- case resp of -- ("text/css", bytes) -> let -- cssParser = CSS.parseForURL (conditionalStyles u "document") u -- in return (refererDoc, cssParser $ applyCSScharset charsets' bytes) -- _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL) -- return (u, a, b) applyCSScharset (charset:charsets) (Right bytes) | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text | otherwise = applyCSScharset charsets $ Right bytes where text = convertCharset charset $ B.toStrict bytes applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes applyCSScharset _ (Left text) = text cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks, (CSSTok.String charset:_) <- skipCSSspace toks' = charset | otherwise = "" skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks skipCSSspace toks = toks fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes parseDocument _ ("text/plain", Left text) = return $ docForText text parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc | otherwise = return $ docForText "Unreadable webpage!" parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc | otherwise = return $ docForText "Unreadable webpage!" parseDocument session (mime, download) = do localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download result <- dispatchByMIME session mime localURI case result of Just text -> return $ docForText $ Txt.pack text Nothing -> parseDocument session ("application/xhtml+xml", download) writeDownloadToFile (Left text) file handle = do hPutStr handle $ Txt.unpack text return $ URI "file:" Nothing file "" "" writeDownloadToFile (Right bytes) file handle = do B.hPut handle bytes return $ URI "file:" Nothing file "" "" docForText txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] }