{-# LANGUAGE OverloadedStrings #-} module Input(parseArgs, ProgramCtl(..), fetchURLs) where import Data.Text.Lazy (fromStrict) import qualified Data.Text as Txt import Data.Text.Encoding import qualified Data.ByteString.Lazy as B import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import Network.URI import Network.URI.Fetch import qualified Data.Map as M import Control.Concurrent.Async import System.IO import System.Environment import System.Directory import System.IO.Temp import Data.List import Data.Default.Class import Data.Maybe (fromMaybe) import System.FilePath import qualified Data.CSS.Syntax.StyleSheet as CSS import Data.CSS.Preprocessor.Conditions import qualified Data.CSS.Syntax.Tokens as CSSTok import qualified Data.HTML2CSS as H2C import Network.URI.Charset --- Commandline arguments data ProgramCtl = ProgramCtl { docs :: [(URI, XML.Document)], outSSML :: Maybe Handle, outLinks :: Maybe Handle } parseArgs :: Session -> IO ProgramCtl parseArgs http = do args <- getArgs let (inputs, outputs) = partition (isPrefixOf "-" . fst) $ preparseArgs args cwd <- getCurrentDirectory let base = URI {uriScheme = "file:", uriPath = cwd, uriAuthority = Nothing, uriQuery = "", uriFragment = ""} let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs] docs <- forConcurrently inputs' $ evalInput http outSSML <- parseSSMLout outputs outLinks <- parseLinksOut outputs return $ ProgramCtl { docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs], outSSML = outSSML, outLinks = outLinks } preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val = (flag, Just url) : preparseArgs args preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args preparseArgs [flag@('+':_)] = [(flag, Nothing)] preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args preparseArgs (val:args) | Just url <- parseURIReference val = ("-h", Just url) : preparseArgs args preparseArgs [] = [] preparseArgs (arg:_) = error ("Unsupported argument " ++ arg) parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just parseSSMLout (("+s", Nothing):_) = return $ Just stdout parseSSMLout (("+ssml", Nothing):_) = return $ Just stdout parseSSMLout (_:args) = parseSSMLout args parseSSMLout [] = return Nothing parseLinksOut (("+l", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just parseLinksOut (("+links", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just parseLinksOut (("+l", Nothing):_) = return $ Just stdout parseLinksOut (("+links", Nothing):_) = return $ Just stdout parseLinksOut (_:args) = parseLinksOut args parseLinksOut [] = return Nothing evalInput http ("-h", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url evalInput http ("-html", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url evalInput http ("-x", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url evalInput http ("-xml", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag) fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do let u = relativeTo url refererURL let charsets' = map Txt.unpack charsets resp <- fetchURL session mime u (a, b) <- case resp of ("text/css", bytes) -> let cssParser = CSS.parseForURL (conditionalStyles u "document") u in return (refererDoc, cssParser $ applyCSScharset charsets' bytes) _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL) return (u, a, b) applyCSScharset (charset:charsets) (Right bytes) | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text | otherwise = applyCSScharset charsets $ Right bytes where text = convertCharset charset $ B.toStrict bytes applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes applyCSScharset _ (Left text) = text cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks, (CSSTok.String charset:_) <- skipCSSspace toks' = charset | otherwise = "" skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks skipCSSspace toks = toks fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes parseDocument _ ("text/plain", Left text) = return $ docForText text parseDocument _ ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes parseDocument _ ("application/xhtml+xml", Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc | otherwise = return $ docForText "Unreadable webpage!" parseDocument _ ("application/xhtml+xml", Right bytes) | Right doc <- XML.parseLBS def bytes = return doc | otherwise = return $ docForText "Unreadable webpage!" parseDocument session (mime, download) = do localURI <- withSystemTempFile "rhapsode-download" $ writeDownloadToFile download result <- dispatchByMIME session mime localURI case result of Just text -> return $ docForText $ Txt.pack text Nothing -> parseDocument session ("application/xhtml+xml", download) writeDownloadToFile (Left text) file handle = do hPutStr handle $ Txt.unpack text return $ URI "file:" Nothing file "" "" writeDownloadToFile (Right bytes) file handle = do B.hPut handle bytes return $ URI "file:" Nothing file "" "" docForText txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] }