M src/Input.hs => src/Input.hs +20 -7
@@ 26,7 26,8 @@ import Data.Default.Class
--- Commandline arguments
data ProgramCtl = ProgramCtl {
docs :: [(URI, XML.Document)],
- outSSML :: Handle
+ outSSML :: Maybe Handle,
+ outLinks :: Maybe Handle
}
parseArgs :: HTTP.Manager -> IO ProgramCtl
parseArgs http = do
@@ 37,28 38,40 @@ parseArgs http = do
uriAuthority = Nothing, uriQuery = "", uriFragment = ""}
let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs]
docs <- forConcurrently inputs' $ evalInput http
+
outSSML <- parseSSMLout outputs
+ outLinks <- parseLinksOut outputs
return $ ProgramCtl {
docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs],
- outSSML = outSSML
+ outSSML = outSSML,
+ outLinks = outLinks
}
preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val =
(flag, Just url) : preparseArgs args
preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args
+preparseArgs [flag@('+':_)] = [(flag, Nothing)]
preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args
preparseArgs (val:args) | Just url <- parseURIReference val =
("-h", Just url) : preparseArgs args
preparseArgs [] = []
preparseArgs (arg:_) = error ("Unsupported argument " ++ arg)
-parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) ReadMode
-parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) ReadMode
-parseSSMLout (("+s", Nothing):_) = return stdout
-parseSSMLout (("+ssml", Nothing):_) = return stdout
+parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
+parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
+parseSSMLout (("+s", Nothing):_) = return $ Just stdout
+parseSSMLout (("+ssml", Nothing):_) = return $ Just stdout
parseSSMLout (_:args) = parseSSMLout args
-parseSSMLout [] = return stdout
+parseSSMLout [] = return Nothing
+
+parseLinksOut (("+l", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
+parseLinksOut (("+links", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
+parseLinksOut (("+l", Nothing):_) = return $ Just stdout
+parseLinksOut (("+links", Nothing):_) = return $ Just stdout
+parseLinksOut (_:args) = parseLinksOut args
+parseLinksOut [] = return Nothing
+
evalInput http ("-h", url) = fetchDocument http "text/html" url
evalInput http ("-html", url) = fetchDocument http "text/html" url
A src/Links.hs => src/Links.hs +59 -0
@@ 0,0 1,59 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Links(extractLinks, linkToText) where
+
+import Text.XML
+import qualified Data.Map as M
+import Network.URI
+import Data.Text (Text, unpack, append, pack, replace, strip)
+import Data.Maybe
+
+data Link = Link {
+ label :: Text,
+ title :: Text,
+ href :: URI
+}
+
+linkToText :: Link -> Text
+linkToText (Link label' title' href') =
+ rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href')
+
+rmWs text = strip $ replace "\t" " " $ replace "\n" " " text
+
+extractLinks :: Document -> [Link]
+extractLinks (Document prologue root misc) =
+ extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++
+ extractEl root ++ extractMisc misc
+
+extractMisc :: [Miscellaneous] -> [Link]
+extractMisc (MiscInstruction (Instruction target dat):misc)
+ | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc
+extractMisc (_:misc) = extractMisc misc
+extractMisc [] = []
+
+extractEl el@(Element _ _ children) =
+ extractElAttr el "{http://www.w3.org/1999/xlink}href" ++
+ extractElAttr el "href" ++
+ extractElAttr el "longdesc" ++
+ extractElAttr el "src" ++
+ extractNodes children
+
+extractElAttr (Element _ attrs children) attr
+ | Just val <- attr `M.lookup` attrs,
+ Just uri <- parseURIReference $ unpack val = [Link label' title' uri]
+ | otherwise = []
+ where
+ label' = nodesText children
+ title' = fromMaybe "" $ M.lookup "title" attrs
+
+extractNodes (NodeElement el:nodes) = extractEl el ++ extractNodes nodes
+extractNodes (NodeInstruction instruct:nodes) =
+ extractMisc [MiscInstruction instruct] ++ extractNodes nodes
+extractNodes (_:nodes) = extractNodes nodes
+extractNodes [] = []
+
+(+++) = append
+nodesText :: [Node] -> Text
+nodesText (NodeElement (Element _ attrs children):nodes) = nodesText children +++ nodesText nodes
+nodesText (NodeContent text:nodes) = text +++ nodesText nodes
+nodesText (_:nodes) = nodesText nodes
+nodesText [] = ""
M src/Main.hs => src/Main.hs +17 -3
@@ 28,6 28,7 @@ import Data.Scientific (toRealFloat)
import Data.Maybe (fromJust, fromMaybe)
import System.Directory as Dir
import Control.Monad
+import System.IO (stdout, hPutStrLn)
import DefaultCSS
import StyleTree
@@ 38,14 39,27 @@ import Links
main :: IO ()
main = do
http <- HTTP.newManager TLS.tlsManagerSettings
- ProgramCtl docs outSSML <- parseArgs http
+ ProgramCtl docs outSSML outLinks <- parseArgs http
forM docs $ \(uri, doc) -> do
let html = XML.documentRoot doc
style <- retreiveStyles uri doc http uri
- let transcript = stylize style html
- C8.putStrLn $ renderElLBS $ styleToSSML $ applyCounters transcript
+
+ case (outLinks, outSSML) of
+ (Nothing, Nothing) -> renderDoc stdout style html
+ (Just hLinks, Just hSSML) -> do
+ forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText)
+ renderDoc hSSML style html
+ (Just hLinks, Nothing) -> do
+ forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText)
+ return ()
+ (Nothing, Just hSSML) -> renderDoc hSSML style html
return ()
+renderDoc outSSML style html = do
+ let transcript = stylize style html
+ let ssml = styleToSSML $ applyCounters transcript
+ C8.hPutStrLn outSSML $ renderElLBS $ ssml
+
renderElLBS el = XML.renderLBS XML.def $ XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = el,