~alcinnz/rhapsode

c900dfa44910044796d0fa99bc987d4d7c2dbd4e — Adrian Cochrane 5 years ago 1b39f12
Add new URL & commandline arguments input interface.
3 files changed, 160 insertions(+), 18 deletions(-)

M rhapsode.cabal
A src/Input.hs
M src/Main.hs
M rhapsode.cabal => rhapsode.cabal +5 -4
@@ 60,11 60,12 @@ executable rhapsode
  -- other-extensions:    
  
  -- Other library packages from which modules are imported.
  build-depends:       base >=4.9 && <4.10, directory,
        http-client, http-client-tls, bytestring,
        html-conduit, xml-conduit, text, containers,
  build-depends:       base >=4.9 && <=4.12, directory,
        http-client, http-client-tls, bytestring, http-types,
        html-conduit, xml-conduit, text, containers, data-default-class,
        network-uri,
        stylist >= 1.1, css-syntax, xml-conduit-stylist, scientific
        stylist >= 1.1, css-syntax, xml-conduit-stylist, scientific,
        async
  
  -- Directories containing source files.
  hs-source-dirs:      src

A src/Input.hs => src/Input.hs +128 -0
@@ 0,0 1,128 @@
{-# LANGUAGE OverloadedStrings #-}
module Input(parseArgs, ProgramCtl(..), fetchURL, convertCharset, charsets) where

import qualified Network.HTTP.Client as HTTP
import           Network.HTTP.Types
import qualified Data.Text as Txt
import           Data.Text.Lazy (fromStrict)
import           Data.Text (Text)
import           Data.Text.Encoding
import qualified Data.Text.IO as TxtIO
import qualified Data.ByteString.Lazy as B
import           Data.ByteString.Lazy (ByteString)
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import qualified Data.Map as M

import Control.Concurrent.Async

import           System.IO
import System.Environment
import System.Directory
import Data.List
import Data.Default.Class

--- Commandline arguments
data ProgramCtl = ProgramCtl {
    docs :: [(URI, XML.Document)],
    outSSML :: Handle
}
parseArgs :: HTTP.Manager -> IO ProgramCtl
parseArgs http = do
    args <- getArgs
    let (inputs, outputs) = partition (isPrefixOf "-" . fst) $ preparseArgs args
    cwd <- getCurrentDirectory
    let base = URI {uriScheme = "file:", uriPath = cwd,
        uriAuthority = Nothing, uriQuery = "", uriFragment = ""}
    let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs]
    docs <- forConcurrently inputs' $ evalInput http
    outSSML <- parseSSMLout outputs
    return $ ProgramCtl {
        docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs],
        outSSML = outSSML
    }

preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val =
    (flag, Just url) : preparseArgs args
preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args
preparseArgs (val:args) | Just url <- parseURIReference val =
    ("-h", Just url) : preparseArgs args
preparseArgs [] = []
preparseArgs (arg:_) = error ("Unsupported argument " ++ arg)

parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) ReadMode
parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) ReadMode
parseSSMLout (("+s", Nothing):_) = return stdout
parseSSMLout (("+ssml", Nothing):_) = return stdout
parseSSMLout (_:args) = parseSSMLout args
parseSSMLout [] = return stdout

evalInput http ("-h", url) = fetchDocument http "text/html" url
evalInput http ("-html", url) = fetchDocument http "text/html" url
evalInput http ("-x", url) = fetchDocument http "text/xml" url
evalInput http ("-xml", url) = fetchDocument http "text/xml" url
evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag)

fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument
parseDocument ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument ("text/html", Right bytes) = return $ HTML.parseLBS bytes
parseDocument ("text/plain", Left text) = return $ docForText text
parseDocument ("text/plain", Right bytes) = return $ docForText $ decodeUtf8 $ B.toStrict bytes
parseDocument (_, Left text) | Right doc <- XML.parseText def $ fromStrict text = return doc
parseDocument (_, Right bytes) | Right doc <- XML.parseLBS def bytes = return doc
parseDocument (mime, _) = return $ docForText $ Txt.concat ["Unsupported MIMEtype ", Txt.pack mime]

docForText txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }

--------

fetchURL :: HTTP.Manager -> String -> URI -> IO (String, Either Text ByteString)
fetchURL http defaultMIME uri | uriScheme uri `elem` ["http:", "https:"] = do
    request <- HTTP.requestFromURI uri
    response <- HTTP.httpLbs request http
    return $ case (
            HTTP.responseBody response,
            [val | ("content-type", val) <- HTTP.responseHeaders response]
      ) of
        ("", _) -> ("text/plain", Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response)
        (response, (mimetype:_)) -> let mime = Txt.toLower $ decodeUtf8 mimetype
            in resolveCharset (map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" $ mime) response
        (response, []) -> (defaultMIME, Right response)

fetchURL _ defaultMIME uri@URI {uriScheme = "file:"} = do
    response <- B.readFile $ uriPath uri
    return (defaultMIME, Right response)

fetchURL _ _ uri = return ("text/plain", Left $ Txt.concat ["Unsupported link type ", Txt.pack $ uriScheme uri])

resolveCharset :: [String] -> ByteString -> (String, Either Text ByteString)
resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):_) response =
    (mime, Left $ convertCharset charset $ B.toStrict response)
resolveCharset (mime:_:params) response = resolveCharset (mime:params) response
resolveCharset [mime] response = (mime, Right $ response)
resolveCharset [] response = ("text/plain", Left "Filetype unspecified")

convertCharset "iso-8859-1" = decodeLatin1
convertCharset "latin1" = decodeLatin1
convertCharset "us-ascii" = decodeUtf8
convertCharset "utf-8" = decodeUtf8
convertCharset "utf-16be" = decodeUtf16BE
convertCharset "utf-16le" = decodeUtf16LE
convertCharset "utf-16" = decodeUtf16LE
convertCharset "utf-32be" = decodeUtf32BE
convertCharset "utf-32le" = decodeUtf32LE
convertCharset "utf-32" = decodeUtf32LE
convertCharset _ = \_ -> "Unsupported text encoding!"
charsets :: [Text]
charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"]

M src/Main.hs => src/Main.hs +27 -14
@@ 9,6 9,7 @@ import           Network.HTTP.Client.Internal
import qualified Network.HTTP.Client.TLS as TLS
import           Network.URI
import qualified Data.ByteString.Lazy.Char8 as C8
import qualified Data.ByteString.Lazy as B

import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML


@@ 26,23 27,24 @@ import qualified Data.Map as M
import Data.Scientific (toRealFloat)
import Data.Maybe (fromJust, fromMaybe)
import System.Directory as Dir
import Control.Monad

import DefaultCSS
import StyleTree
import SSML
import Input
import Links

main :: IO ()
main = do
    url:_ <- getArgs
    -- TODO support more URI schemes, and do nonblocking networking. This could be it's own module.
    request <- HTTP.parseRequest url
    manager <- HTTP.newManager TLS.tlsManagerSettings
    response <- HTTP.httpLbs request manager
    let html = XML.documentRoot $ HTML.parseLBS $ HTTP.responseBody response
    let aboutBlank = fromJust $ parseURI "about:blank"
    style <- retreiveStyles (fromMaybe aboutBlank $ parseURI url) html manager $ fromJust $ parseURI url
    let transcript = stylize style html
    C8.putStrLn $ renderElLBS $ styleToSSML $ applyCounters transcript
    http <- HTTP.newManager TLS.tlsManagerSettings
    ProgramCtl docs outSSML <- parseArgs http
    forM docs $ \(uri, doc) -> do
        let html = XML.documentRoot doc
        style <- retreiveStyles uri doc http uri
        let transcript = stylize style html
        C8.putStrLn $ renderElLBS $ styleToSSML $ applyCounters transcript
    return ()

renderElLBS el = XML.renderLBS XML.def $ XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],


@@ 59,16 61,27 @@ retreiveStyles uri html manager base = do
    authorStyle :: CSSCond.ConditionalStyles StyleTree
    authorStyle = H2C.html2css html base

    loadURL url = do -- TODO parallelise.
        request <- requestFromURI url
        response <- HTTP.httpLbs request manager
        return $ Txt.pack $ C8.unpack $ HTTP.responseBody response
    loadURL url = do
        response <- fetchURL manager "text/css" url
        return $ case response of
            ("text/css", Left text) -> text
            ("text/css", Right bytes) -> applyCSScharset charsets $ B.toStrict bytes
            (_, _) -> ""

    lowerVars "speech" = CSSCond.B True
    lowerVars "-rhapsode" = CSSCond.B True
    lowerVars _ = CSSCond.B False
    lowerToks _ = CSSCond.B False

applyCSScharset (charset:charsets) bytes | cssCharset (CSSTok.tokenize text) == charset = text
        | otherwise = applyCSScharset charsets bytes
    where text = convertCharset charset bytes
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
        (CSSTok.String charset:_) <- skipCSSspace toks' = charset
    | otherwise = ""
skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks

loadUserStyles styles = do
    dir <- Dir.getXdgDirectory Dir.XdgConfig "rhapsode"
    exists <- Dir.doesDirectoryExist dir