From 06d8ec3612e8fd2d8a5a7109c4e79bb238a43bc4 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Thu, 2 Apr 2020 19:21:37 +1300 Subject: [PATCH] Prepare API to expose to C. This'll be useful for accessing text-to-speech/vice-versa APIs. --- src/Input.hs | 31 ++++++++++++++++++++++++++++++- src/Main.hs | 23 ++++++++++------------- 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/src/Input.hs b/src/Input.hs index 6245dbd..0ed1a58 100644 --- a/src/Input.hs +++ b/src/Input.hs @@ -1,5 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} -module Input(parseArgs, ProgramCtl(..)) where +module Input(parseArgs, ProgramCtl(..), fetchURLs) where import Data.Text.Lazy (fromStrict) import qualified Data.Text as Txt @@ -21,6 +21,12 @@ import Data.Default.Class import Data.Maybe (fromMaybe) import System.FilePath +import qualified Data.CSS.Syntax.StyleSheet as CSS +import Data.CSS.Preprocessor.Conditions +import qualified Data.CSS.Syntax.Tokens as CSSTok +import qualified Data.HTML2CSS as H2C +import Network.URI.Charset + --- Commandline arguments data ProgramCtl = ProgramCtl { docs :: [(URI, XML.Document)], @@ -77,6 +83,29 @@ evalInput http ("-x", url) = fetchDocument http ["text/xml", "text/html", "text/ evalInput http ("-xml", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag) +fetchURLs session refererDoc refererURL srcs = forConcurrently srcs $ \(mime, url) -> do + let u = relativeTo url refererURL + let charsets' = map Txt.unpack charsets + resp <- fetchURL session mime u + case resp of + ("text/css", bytes) -> let + cssParser = CSS.parseForURL (conditionalStyles u "document") u + in return (refererDoc, cssParser $ applyCSScharset charsets' bytes) + _ -> parseDocument session resp >>= \doc -> return (doc, H2C.html2css doc refererURL) + +applyCSScharset (charset:charsets) (Right bytes) + | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text + | otherwise = applyCSScharset charsets $ Right bytes + where + text = convertCharset charset $ B.toStrict bytes +applyCSScharset _ (Right bytes) = convertCharset "utf-8" $ B.toStrict bytes +applyCSScharset _ (Left text) = text +cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks, + (CSSTok.String charset:_) <- skipCSSspace toks' = charset + | otherwise = "" +skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks +skipCSSspace toks = toks + fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument http parseDocument _ ("text/html", Left text) = return $ HTML.parseLT $ fromStrict text parseDocument _ ("text/html", Right bytes) = return $ HTML.parseLBS bytes diff --git a/src/Main.hs b/src/Main.hs index f214d8a..684dd2b 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -39,23 +39,22 @@ main = do ProgramCtl docs outSSML outLinks <- parseArgs http forM docs $ \(uri, doc) -> do let html = XML.documentRoot doc - style <- retreiveStyles uri doc http uri + style <- retreiveStyles http $ H2C.html2css doc uri case (outLinks, outSSML) of - (Nothing, Nothing) -> renderDoc stdout style html + (Nothing, Nothing) -> C8.putStrLn $ renderDoc style html (Just hLinks, Just hSSML) -> do forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText) - renderDoc hSSML style html + C8.hPutStrLn hSSML $ renderDoc style html + return () (Just hLinks, Nothing) -> do forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText) return () - (Nothing, Just hSSML) -> renderDoc hSSML style html + (Nothing, Just hSSML) -> C8.hPutStrLn hSSML $ renderDoc style html return () -renderDoc outSSML style html = do - let transcript = stylize style html - let ssml = styleToSSML $ applyCounters transcript - C8.hPutStrLn outSSML $ renderElLBS $ ssml +renderDoc style html = + renderElLBS $ styleToSSML $ applyCounters $ stylize style html renderElLBS el = XML.renderLBS XML.def $ XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], @@ -63,15 +62,13 @@ renderElLBS el = XML.renderLBS XML.def $ XML.Document { XML.documentEpilogue = [] } -retreiveStyles uri html manager base = do +retreiveStyles :: Session -> CSSCond.ConditionalStyles StyleTree -> IO (Style.QueryableStyleSheet (Style.VarParser StyleTree)) +retreiveStyles manager authorStyle = do + let agentStyle = H2C.cssPriorityAgent authorStyle `CSS.parse` Txt.pack userAgentCSS userStyle <- loadUserStyles agentStyle importedStyle <- CSSCond.loadImports loadURL lowerVars lowerToks userStyle [] return $ CSSCond.resolve lowerVars lowerToks Style.queryableStyleSheet importedStyle where - agentStyle = H2C.cssPriorityAgent authorStyle `CSS.parse` Txt.pack userAgentCSS - authorStyle :: CSSCond.ConditionalStyles StyleTree - authorStyle = H2C.html2css html base - loadURL url = do response <- fetchURL manager ["text/css"] url let charsets' = map unpack charsets -- 2.30.2