~alcinnz/rhapsode

06d8ec3612e8fd2d8a5a7109c4e79bb238a43bc4 — Adrian Cochrane 4 years ago e90a1a7
Prepare API to expose to C.

This'll be useful for accessing text-to-speech/vice-versa APIs.
2 files changed, 40 insertions(+), 14 deletions(-)

M src/Input.hs
M src/Main.hs
M src/Input.hs => src/Input.hs +30 -1
@@ 1,5 1,5 @@
{-# LANGUAGE OverloadedStrings #-}
module Input(parseArgs, ProgramCtl(..)) where
module Input(parseArgs, ProgramCtl(..), fetchURLs) where

import           Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt


@@ 21,6 21,12 @@ import Data.Default.Class
import Data.Maybe (fromMaybe)
import System.FilePath

import qualified Data.CSS.Syntax.StyleSheet as CSS
import Data.CSS.Preprocessor.Conditions
import qualified Data.CSS.Syntax.Tokens as CSSTok
import qualified Data.HTML2CSS as H2C
import Network.URI.Charset

--- Commandline arguments
data ProgramCtl = ProgramCtl {
    docs :: [(URI, XML.Document)],


@@ 77,6 83,29 @@ evalInput http ("-x", url) = fetchDocument http ["text/xml", "text/html", "text/
evalInput http ("-xml", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url
evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag)

fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do
    let u = relativeTo url refererURL
    let charsets' = map Txt.unpack charsets
    resp <- fetchURL session mime u
    case resp of
        ("text/css", bytes) -> let
                cssParser = CSS.parseForURL (conditionalStyles u "document") u
            in return (refererDoc, cssParser $ applyCSScharset charsets' bytes)
        _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL)

applyCSScharset (charset:charsets) (Right bytes)
        | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
        | otherwise = applyCSScharset charsets $ Right bytes
    where
        text = convertCharset charset $ B.toStrict bytes
applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes
applyCSScharset _ (Left text) = text
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
        (CSSTok.String charset:_) <- skipCSSspace toks' = charset
    | otherwise = ""
skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks

fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http
parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text
parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes

M src/Main.hs => src/Main.hs +10 -13
@@ 39,23 39,22 @@ main = do
    ProgramCtl docs outSSML outLinks <- parseArgs http
    forM docs $ \(uri, doc) -> do
        let html = XML.documentRoot doc
        style <- retreiveStyles uri doc http uri
        style <- retreiveStyles http $ H2C.html2css doc uri

        case (outLinks, outSSML) of
            (Nothing, Nothing) -> renderDoc stdout style html
            (Nothing, Nothing) -> C8.putStrLn $ renderDoc style html
            (Just hLinks, Just hSSML) -> do
                forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText)
                renderDoc hSSML style html
                C8.hPutStrLn hSSML $ renderDoc style html
                return ()
            (Just hLinks, Nothing) -> do
                forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText)
                return ()
            (Nothing, Just hSSML) -> renderDoc hSSML style html
            (Nothing, Just hSSML) -> C8.hPutStrLn hSSML $ renderDoc style html
    return ()

renderDoc outSSML style html = do
    let transcript = stylize style html
    let ssml = styleToSSML $ applyCounters transcript
    C8.hPutStrLn outSSML $ renderElLBS $ ssml
renderDoc style html =
    renderElLBS $ styleToSSML $ applyCounters $ stylize style html

renderElLBS el = XML.renderLBS XML.def $ XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],


@@ 63,15 62,13 @@ renderElLBS el = XML.renderLBS XML.def $ XML.Document {
        XML.documentEpilogue = []
    }

retreiveStyles uri html manager base = do
retreiveStyles :: Session -> CSSCond.ConditionalStyles StyleTree -> IO (Style.QueryableStyleSheet (Style.VarParser StyleTree))
retreiveStyles manager authorStyle = do
    let agentStyle = H2C.cssPriorityAgent authorStyle `CSS.parse` Txt.pack userAgentCSS
    userStyle <- loadUserStyles agentStyle
    importedStyle <- CSSCond.loadImports loadURL lowerVars lowerToks userStyle []
    return $ CSSCond.resolve lowerVars lowerToks Style.queryableStyleSheet importedStyle
  where
    agentStyle = H2C.cssPriorityAgent authorStyle `CSS.parse` Txt.pack userAgentCSS
    authorStyle :: CSSCond.ConditionalStyles StyleTree
    authorStyle = H2C.html2css html base

    loadURL url = do
        response <- fetchURL manager ["text/css"] url
        let charsets' = map unpack charsets