{-# LANGUAGE OverloadedStrings #-}
module Input(fetchDocument, docForText) where
import Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import Data.Text.Encoding
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Network.URI
import Network.URI.Fetch
import qualified Data.Map as M
import Control.Concurrent.Async
import System.IO
import System.Environment
import System.Directory
import System.IO.Temp
import Data.List
import Data.Default.Class
import Data.Maybe (fromMaybe)
import System.FilePath
import qualified Data.CSS.Syntax.StyleSheet as CSS
import Data.CSS.Preprocessor.Conditions
import qualified Data.CSS.Syntax.Tokens as CSSTok
import qualified Data.HTML2CSS as H2C
import Network.URI.Charset
--fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do
-- let u = relativeTo url refererURL
-- let charsets' = map Txt.unpack charsets
-- resp <- fetchURL session mime u
-- (a, b) <- case resp of
-- ("text/css", bytes) -> let
-- cssParser = CSS.parseForURL (conditionalStyles u "document") u
-- in return (refererDoc, cssParser $ applyCSScharset charsets' bytes)
-- _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL)
-- return (u, a, b)
applyCSScharset (charset:charsets) (Right bytes)
| cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
| otherwise = applyCSScharset charsets $ Right bytes
where
text = convertCharset charset $ B.toStrict bytes
applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes
applyCSScharset _ (Left text) = text
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
(CSSTok.String charset:_) <- skipCSSspace toks' = charset
| otherwise = ""
skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks
fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http
parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument _ ("text/plain", Left text) = return $ docForText text
parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
| otherwise = return $ docForText "Unreadable webpage!"
parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
| otherwise = return $ docForText "Unreadable webpage!"
parseDocument session (mime, download) = do
localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download
result <- dispatchByMIME session mime localURI
case result of
Just text -> return $ docForText $ Txt.pack text
Nothing -> parseDocument session ("application/xhtml+xml", download)
writeDownloadToFile (Left text) file handle = do
hPutStr handle $ Txt.unpack text
return $ URI "file:" Nothing file "" ""
writeDownloadToFile (Right bytes) file handle = do
B.hPut handle bytes
return $ URI "file:" Nothing file "" ""
docForText txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
XML.elementAttributes = M.empty,
XML.elementNodes = [XML.NodeContent txt]
},
XML.documentEpilogue = []
}