From 36f30081305cdb56e7ff44f1301fe76497a52fe5 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Sat, 11 Jan 2020 20:52:24 +1300 Subject: [PATCH] Find links in the document (for interactions). --- src/Input.hs | 27 +++++++++++++++++------- src/Links.hs | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/Main.hs | 20 +++++++++++++++--- 3 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 src/Links.hs diff --git a/src/Input.hs b/src/Input.hs index e4142a5..5ea0f17 100644 --- a/src/Input.hs +++ b/src/Input.hs @@ -26,7 +26,8 @@ import Data.Default.Class --- Commandline arguments data ProgramCtl = ProgramCtl { docs :: [(URI, XML.Document)], - outSSML :: Handle + outSSML :: Maybe Handle, + outLinks :: Maybe Handle } parseArgs :: HTTP.Manager -> IO ProgramCtl parseArgs http = do @@ -37,28 +38,40 @@ parseArgs http = do uriAuthority = Nothing, uriQuery = "", uriFragment = ""} let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs] docs <- forConcurrently inputs' $ evalInput http + outSSML <- parseSSMLout outputs + outLinks <- parseLinksOut outputs return $ ProgramCtl { docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs], - outSSML = outSSML + outSSML = outSSML, + outLinks = outLinks } preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val = (flag, Just url) : preparseArgs args preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args +preparseArgs [flag@('+':_)] = [(flag, Nothing)] preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args preparseArgs (val:args) | Just url <- parseURIReference val = ("-h", Just url) : preparseArgs args preparseArgs [] = [] preparseArgs (arg:_) = error ("Unsupported argument " ++ arg) -parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) ReadMode -parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) ReadMode -parseSSMLout (("+s", Nothing):_) = return stdout -parseSSMLout (("+ssml", Nothing):_) = return stdout +parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just +parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just +parseSSMLout (("+s", Nothing):_) = return $ Just stdout +parseSSMLout (("+ssml", Nothing):_) = return $ Just stdout parseSSMLout (_:args) = parseSSMLout args -parseSSMLout [] = return stdout +parseSSMLout [] = return Nothing + +parseLinksOut (("+l", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just +parseLinksOut (("+links", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just +parseLinksOut (("+l", Nothing):_) = return $ Just stdout +parseLinksOut (("+links", Nothing):_) = return $ Just stdout +parseLinksOut (_:args) = parseLinksOut args +parseLinksOut [] = return Nothing + evalInput http ("-h", url) = fetchDocument http "text/html" url evalInput http ("-html", url) = fetchDocument http "text/html" url diff --git a/src/Links.hs b/src/Links.hs new file mode 100644 index 0000000..5ccb284 --- /dev/null +++ b/src/Links.hs @@ -0,0 +1,59 @@ +{-# LANGUAGE OverloadedStrings #-} +module Links(extractLinks, linkToText) where + +import Text.XML +import qualified Data.Map as M +import Network.URI +import Data.Text (Text, unpack, append, pack, replace, strip) +import Data.Maybe + +data Link = Link { + label :: Text, + title :: Text, + href :: URI +} + +linkToText :: Link -> Text +linkToText (Link label' title' href') = + rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href') + +rmWs text = strip $ replace "\t" " " $ replace "\n" " " text + +extractLinks :: Document -> [Link] +extractLinks (Document prologue root misc) = + extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++ + extractEl root ++ extractMisc misc + +extractMisc :: [Miscellaneous] -> [Link] +extractMisc (MiscInstruction (Instruction target dat):misc) + | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc +extractMisc (_:misc) = extractMisc misc +extractMisc [] = [] + +extractEl el@(Element _ _ children) = + extractElAttr el "{http://www.w3.org/1999/xlink}href" ++ + extractElAttr el "href" ++ + extractElAttr el "longdesc" ++ + extractElAttr el "src" ++ + extractNodes children + +extractElAttr (Element _ attrs children) attr + | Just val <- attr `M.lookup` attrs, + Just uri <- parseURIReference $ unpack val = [Link label' title' uri] + | otherwise = [] + where + label' = nodesText children + title' = fromMaybe "" $ M.lookup "title" attrs + +extractNodes (NodeElement el:nodes) = extractEl el ++ extractNodes nodes +extractNodes (NodeInstruction instruct:nodes) = + extractMisc [MiscInstruction instruct] ++ extractNodes nodes +extractNodes (_:nodes) = extractNodes nodes +extractNodes [] = [] + +(+++) = append +nodesText :: [Node] -> Text +nodesText (NodeElement (Element _ attrs children):nodes) = nodesText children +++ nodesText nodes +nodesText (NodeContent text:nodes) = text +++ nodesText nodes +nodesText (_:nodes) = nodesText nodes +nodesText [] = "" diff --git a/src/Main.hs b/src/Main.hs index dc0e6b5..d228e4a 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -28,6 +28,7 @@ import Data.Scientific (toRealFloat) import Data.Maybe (fromJust, fromMaybe) import System.Directory as Dir import Control.Monad +import System.IO (stdout, hPutStrLn) import DefaultCSS import StyleTree @@ -38,14 +39,27 @@ import Links main :: IO () main = do http <- HTTP.newManager TLS.tlsManagerSettings - ProgramCtl docs outSSML <- parseArgs http + ProgramCtl docs outSSML outLinks <- parseArgs http forM docs $ \(uri, doc) -> do let html = XML.documentRoot doc style <- retreiveStyles uri doc http uri - let transcript = stylize style html - C8.putStrLn $ renderElLBS $ styleToSSML $ applyCounters transcript + + case (outLinks, outSSML) of + (Nothing, Nothing) -> renderDoc stdout style html + (Just hLinks, Just hSSML) -> do + forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText) + renderDoc hSSML style html + (Just hLinks, Nothing) -> do + forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText) + return () + (Nothing, Just hSSML) -> renderDoc hSSML style html return () +renderDoc outSSML style html = do + let transcript = stylize style html + let ssml = styleToSSML $ applyCounters transcript + C8.hPutStrLn outSSML $ renderElLBS $ ssml + renderElLBS el = XML.renderLBS XML.def $ XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = el, -- 2.30.2