{-# LANGUAGE OverloadedStrings #-}
module Input(parseArgs, ProgramCtl(..), fetchURLs, fetchDocument, docForText) where
import Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import Data.Text.Encoding
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Network.URI
import Network.URI.Fetch
import qualified Data.Map as M
import Control.Concurrent.Async
import System.IO
import System.Environment
import System.Directory
import System.IO.Temp
import Data.List
import Data.Default.Class
import Data.Maybe (fromMaybe)
import System.FilePath
import qualified Data.CSS.Syntax.StyleSheet as CSS
import Data.CSS.Preprocessor.Conditions
import qualified Data.CSS.Syntax.Tokens as CSSTok
import qualified Data.HTML2CSS as H2C
import Network.URI.Charset
--- Commandline arguments
data ProgramCtl = ProgramCtl {
docs :: [(URI, XML.Document)],
outSSML :: Maybe Handle,
outLinks :: Maybe Handle
}
parseArgs :: Session -> IO ProgramCtl
parseArgs http = do
args <- getArgs
let (inputs, outputs) = partition (isPrefixOf "-" . fst) $ preparseArgs args
cwd <- getCurrentDirectory
let base = URI {uriScheme = "file:", uriPath = cwd,
uriAuthority = Nothing, uriQuery = "", uriFragment = ""}
let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs]
docs <- forConcurrently inputs' $ evalInput http
outSSML <- parseSSMLout outputs
outLinks <- parseLinksOut outputs
return $ ProgramCtl {
docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs],
outSSML = outSSML,
outLinks = outLinks
}
preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val =
(flag, Just url) : preparseArgs args
preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs [flag@('+':_)] = [(flag, Nothing)]
preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args
preparseArgs (val:args) | Just url <- parseURIReference val =
("-h", Just url) : preparseArgs args
preparseArgs [] = []
preparseArgs (arg:_) = error ("Unsupported argument " ++ arg)
parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseSSMLout (("+s", Nothing):_) = return $ Just stdout
parseSSMLout (("+ssml", Nothing):_) = return $ Just stdout
parseSSMLout (_:args) = parseSSMLout args
parseSSMLout [] = return Nothing
parseLinksOut (("+l", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseLinksOut (("+links", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseLinksOut (("+l", Nothing):_) = return $ Just stdout
parseLinksOut (("+links", Nothing):_) = return $ Just stdout
parseLinksOut (_:args) = parseLinksOut args
parseLinksOut [] = return Nothing
evalInput http ("-h", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url
evalInput http ("-html", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url
evalInput http ("-x", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url
evalInput http ("-xml", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url
evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag)
fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do
let u = relativeTo url refererURL
let charsets' = map Txt.unpack charsets
resp <- fetchURL session mime u
(a, b) <- case resp of
("text/css", bytes) -> let
cssParser = CSS.parseForURL (conditionalStyles u "document") u
in return (refererDoc, cssParser $ applyCSScharset charsets' bytes)
_ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL)
return (u, a, b)
applyCSScharset (charset:charsets) (Right bytes)
| cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
| otherwise = applyCSScharset charsets $ Right bytes
where
text = convertCharset charset $ B.toStrict bytes
applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes
applyCSScharset _ (Left text) = text
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
(CSSTok.String charset:_) <- skipCSSspace toks' = charset
| otherwise = ""
skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks
fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http
parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument _ ("text/plain", Left text) = return $ docForText text
parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
| otherwise = return $ docForText "Unreadable webpage!"
parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
| otherwise = return $ docForText "Unreadable webpage!"
parseDocument session (mime, download) = do
localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download
result <- dispatchByMIME session mime localURI
case result of
Just text -> return $ docForText $ Txt.pack text
Nothing -> parseDocument session ("application/xhtml+xml", download)
writeDownloadToFile (Left text) file handle = do
hPutStr handle $ Txt.unpack text
return $ URI "file:" Nothing file "" ""
writeDownloadToFile (Right bytes) file handle = do
B.hPut handle bytes
return $ URI "file:" Nothing file "" ""
docForText txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
XML.elementAttributes = M.empty,
XML.elementNodes = [XML.NodeContent txt]
},
XML.documentEpilogue = []
}