@@ 60,11 60,12 @@ executable rhapsode
-- other-extensions:
-- Other library packages from which modules are imported.
- build-depends: base >=4.9 && <4.10, directory,
- http-client, http-client-tls, bytestring,
- html-conduit, xml-conduit, text, containers,
+ build-depends: base >=4.9 && <=4.12, directory,
+ http-client, http-client-tls, bytestring, http-types,
+ html-conduit, xml-conduit, text, containers, data-default-class,
network-uri,
- stylist >= 1.1, css-syntax, xml-conduit-stylist, scientific
+ stylist >= 1.1, css-syntax, xml-conduit-stylist, scientific,
+ async
-- Directories containing source files.
hs-source-dirs: src
@@ 0,0 1,128 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Input(parseArgs, ProgramCtl(..), fetchURL, convertCharset, charsets) where
+
+import qualified Network.HTTP.Client as HTTP
+import Network.HTTP.Types
+import qualified Data.Text as Txt
+import Data.Text.Lazy (fromStrict)
+import Data.Text (Text)
+import Data.Text.Encoding
+import qualified Data.Text.IO as TxtIO
+import qualified Data.ByteString.Lazy as B
+import Data.ByteString.Lazy (ByteString)
+import qualified Text.HTML.DOM as HTML
+import qualified Text.XML as XML
+import Network.URI
+import qualified Data.Map as M
+
+import Control.Concurrent.Async
+
+import System.IO
+import System.Environment
+import System.Directory
+import Data.List
+import Data.Default.Class
+
+--- Commandline arguments
+data ProgramCtl = ProgramCtl {
+ docs :: [(URI, XML.Document)],
+ outSSML :: Handle
+}
+parseArgs :: HTTP.Manager -> IO ProgramCtl
+parseArgs http = do
+ args <- getArgs
+ let (inputs, outputs) = partition (isPrefixOf "-" . fst) $ preparseArgs args
+ cwd <- getCurrentDirectory
+ let base = URI {uriScheme = "file:", uriPath = cwd,
+ uriAuthority = Nothing, uriQuery = "", uriFragment = ""}
+ let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs]
+ docs <- forConcurrently inputs' $ evalInput http
+ outSSML <- parseSSMLout outputs
+ return $ ProgramCtl {
+ docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs],
+ outSSML = outSSML
+ }
+
+preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val =
+ (flag, Just url) : preparseArgs args
+preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args
+preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args
+preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args
+preparseArgs (val:args) | Just url <- parseURIReference val =
+ ("-h", Just url) : preparseArgs args
+preparseArgs [] = []
+preparseArgs (arg:_) = error ("Unsupported argument " ++ arg)
+
+parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) ReadMode
+parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) ReadMode
+parseSSMLout (("+s", Nothing):_) = return stdout
+parseSSMLout (("+ssml", Nothing):_) = return stdout
+parseSSMLout (_:args) = parseSSMLout args
+parseSSMLout [] = return stdout
+
+evalInput http ("-h", url) = fetchDocument http "text/html" url
+evalInput http ("-html", url) = fetchDocument http "text/html" url
+evalInput http ("-x", url) = fetchDocument http "text/xml" url
+evalInput http ("-xml", url) = fetchDocument http "text/xml" url
+evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag)
+
+fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument
+parseDocument ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
+parseDocument ("text/html", Right bytes) = return $ HTML.parseLBS bytes
+parseDocument ("text/plain", Left text) = return $ docForText text
+parseDocument ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
+parseDocument (_, Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
+parseDocument (_, Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
+parseDocument (mime, _) = return $ docForText $ Txt.concat ["Unsupported MIMEtype ", Txt.pack mime]
+
+docForText txt = XML.Document {
+ XML.documentPrologue = XML.Prologue [] Nothing [],
+ XML.documentRoot = XML.Element {
+ XML.elementName = "pre",
+ XML.elementAttributes = M.empty,
+ XML.elementNodes = [XML.NodeContent txt]
+ },
+ XML.documentEpilogue = []
+ }
+
+--------
+
+fetchURL :: HTTP.Manager -> String -> URI -> IO (String, Either Text ByteString)
+fetchURL http defaultMIME uri | uriScheme uri `elem` ["http:", "https:"] = do
+ request <- HTTP.requestFromURI uri
+ response <- HTTP.httpLbs request http
+ return $ case (
+ HTTP.responseBody response,
+ [val | ("content-type", val) <- HTTP.responseHeaders response]
+ ) of
+ ("", _) -> ("text/plain", Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response)
+ (response, (mimetype:_)) -> let mime = Txt.toLower $ decodeUtf8 mimetype
+ in resolveCharset (map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" $ mime) response
+ (response, []) -> (defaultMIME, Right response)
+
+fetchURL _ defaultMIME uri@URI {uriScheme = "file:"} = do
+ response <- B.readFile $ uriPath uri
+ return (defaultMIME, Right response)
+
+fetchURL _ _ uri = return ("text/plain", Left $ Txt.concat ["Unsupported link type ", Txt.pack $ uriScheme uri])
+
+resolveCharset :: [String] -> ByteString -> (String, Either Text ByteString)
+resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):_) response =
+ (mime, Left $ convertCharset charset $ B.toStrict response)
+resolveCharset (mime:_:params) response = resolveCharset (mime:params) response
+resolveCharset [mime] response = (mime, Right $ response)
+resolveCharset [] response = ("text/plain", Left "Filetype unspecified")
+
+convertCharset "iso-8859-1" = decodeLatin1
+convertCharset "latin1" = decodeLatin1
+convertCharset "us-ascii" = decodeUtf8
+convertCharset "utf-8" = decodeUtf8
+convertCharset "utf-16be" = decodeUtf16BE
+convertCharset "utf-16le" = decodeUtf16LE
+convertCharset "utf-16" = decodeUtf16LE
+convertCharset "utf-32be" = decodeUtf32BE
+convertCharset "utf-32le" = decodeUtf32LE
+convertCharset "utf-32" = decodeUtf32LE
+convertCharset _ = \_ -> "Unsupported text encoding!"
+charsets :: [Text]
+charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"]
@@ 9,6 9,7 @@ import Network.HTTP.Client.Internal
import qualified Network.HTTP.Client.TLS as TLS
import Network.URI
import qualified Data.ByteString.Lazy.Char8 as C8
+import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
@@ 26,23 27,24 @@ import qualified Data.Map as M
import Data.Scientific (toRealFloat)
import Data.Maybe (fromJust, fromMaybe)
import System.Directory as Dir
+import Control.Monad
import DefaultCSS
import StyleTree
import SSML
+import Input
+import Links
main :: IO ()
main = do
- url:_ <- getArgs
- -- TODO support more URI schemes, and do nonblocking networking. This could be it's own module.
- request <- HTTP.parseRequest url
- manager <- HTTP.newManager TLS.tlsManagerSettings
- response <- HTTP.httpLbs request manager
- let html = XML.documentRoot $ HTML.parseLBS $ HTTP.responseBody response
- let aboutBlank = fromJust $ parseURI "about:blank"
- style <- retreiveStyles (fromMaybe aboutBlank $ parseURI url) html manager $ fromJust $ parseURI url
- let transcript = stylize style html
- C8.putStrLn $ renderElLBS $ styleToSSML $ applyCounters transcript
+ http <- HTTP.newManager TLS.tlsManagerSettings
+ ProgramCtl docs outSSML <- parseArgs http
+ forM docs $ \(uri, doc) -> do
+ let html = XML.documentRoot doc
+ style <- retreiveStyles uri doc http uri
+ let transcript = stylize style html
+ C8.putStrLn $ renderElLBS $ styleToSSML $ applyCounters transcript
+ return ()
renderElLBS el = XML.renderLBS XML.def $ XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
@@ 59,16 61,27 @@ retreiveStyles uri html manager base = do
authorStyle :: CSSCond.ConditionalStyles StyleTree
authorStyle = H2C.html2css html base
- loadURL url = do -- TODO parallelise.
- request <- requestFromURI url
- response <- HTTP.httpLbs request manager
- return $ Txt.pack $ C8.unpack $ HTTP.responseBody response
+ loadURL url = do
+ response <- fetchURL manager "text/css" url
+ return $ case response of
+ ("text/css", Left text) -> text
+ ("text/css", Right bytes) -> applyCSScharset charsets $ B.toStrict bytes
+ (_, _) -> ""
lowerVars "speech" = CSSCond.B True
lowerVars "-rhapsode" = CSSCond.B True
lowerVars _ = CSSCond.B False
lowerToks _ = CSSCond.B False
+applyCSScharset (charset:charsets) bytes | cssCharset (CSSTok.tokenize text) == charset = text
+ | otherwise = applyCSScharset charsets bytes
+ where text = convertCharset charset bytes
+cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
+ (CSSTok.String charset:_) <- skipCSSspace toks' = charset
+ | otherwise = ""
+skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
+skipCSSspace toks = toks
+
loadUserStyles styles = do
dir <- Dir.getXdgDirectory Dir.XdgConfig "rhapsode"
exists <- Dir.doesDirectoryExist dir