M src/Input.hs => src/Input.hs +20 -7
@@ 26,7 26,8 @@ import Data.Default.Class
 --- Commandline arguments
 data ProgramCtl = ProgramCtl {
     docs :: [(URI, XML.Document)],
-    outSSML :: Handle
+    outSSML :: Maybe Handle,
+    outLinks :: Maybe Handle
 }
 parseArgs :: HTTP.Manager -> IO ProgramCtl
 parseArgs http = do
@@ 37,28 38,40 @@ parseArgs http = do
         uriAuthority = Nothing, uriQuery = "", uriFragment = ""}
     let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs]
     docs <- forConcurrently inputs' $ evalInput http
+
     outSSML <- parseSSMLout outputs
+    outLinks <- parseLinksOut outputs
     return $ ProgramCtl {
         docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs],
-        outSSML = outSSML
+        outSSML = outSSML,
+        outLinks = outLinks
     }
 
 preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val =
     (flag, Just url) : preparseArgs args
 preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args
 preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args
+preparseArgs [flag@('+':_)] = [(flag, Nothing)]
 preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args
 preparseArgs (val:args) | Just url <- parseURIReference val =
     ("-h", Just url) : preparseArgs args
 preparseArgs [] = []
 preparseArgs (arg:_) = error ("Unsupported argument " ++ arg)
 
-parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) ReadMode
-parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) ReadMode
-parseSSMLout (("+s", Nothing):_) = return stdout
-parseSSMLout (("+ssml", Nothing):_) = return stdout
+parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
+parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
+parseSSMLout (("+s", Nothing):_) = return $ Just stdout
+parseSSMLout (("+ssml", Nothing):_) = return $ Just stdout
 parseSSMLout (_:args) = parseSSMLout args
-parseSSMLout [] = return stdout
+parseSSMLout [] = return Nothing
+
+parseLinksOut (("+l", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
+parseLinksOut (("+links", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
+parseLinksOut (("+l", Nothing):_) = return $ Just stdout
+parseLinksOut (("+links", Nothing):_) = return $ Just stdout
+parseLinksOut (_:args) = parseLinksOut args
+parseLinksOut [] = return Nothing
+
 
 evalInput http ("-h", url) = fetchDocument http "text/html" url
 evalInput http ("-html", url) = fetchDocument http "text/html" url
 
A src/Links.hs => src/Links.hs +59 -0
@@ 0,0 1,59 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Links(extractLinks, linkToText) where
+
+import Text.XML
+import qualified Data.Map as M
+import Network.URI
+import Data.Text (Text, unpack, append, pack, replace, strip)
+import Data.Maybe
+
+data Link = Link {
+    label :: Text,
+    title :: Text,
+    href :: URI
+}
+
+linkToText :: Link -> Text
+linkToText (Link label' title' href') =
+    rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href')
+
+rmWs text = strip $ replace "\t" " " $ replace "\n" " " text
+
+extractLinks :: Document -> [Link]
+extractLinks (Document prologue root misc) =
+    extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++
+    extractEl root ++ extractMisc misc
+
+extractMisc :: [Miscellaneous] -> [Link]
+extractMisc (MiscInstruction (Instruction target dat):misc)
+    | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc
+extractMisc (_:misc) = extractMisc misc
+extractMisc [] = []
+
+extractEl el@(Element _ _ children) =
+    extractElAttr el "{http://www.w3.org/1999/xlink}href" ++
+    extractElAttr el "href" ++
+    extractElAttr el "longdesc" ++
+    extractElAttr el "src" ++
+    extractNodes children
+
+extractElAttr (Element _ attrs children) attr
+        | Just val <- attr `M.lookup` attrs,
+            Just uri <- parseURIReference $ unpack val = [Link label' title' uri]
+        | otherwise = []
+    where
+        label' = nodesText children
+        title' = fromMaybe "" $ M.lookup "title" attrs
+
+extractNodes (NodeElement el:nodes) = extractEl el ++ extractNodes nodes
+extractNodes (NodeInstruction instruct:nodes) =
+    extractMisc [MiscInstruction instruct] ++ extractNodes nodes
+extractNodes (_:nodes) = extractNodes nodes
+extractNodes [] = []
+
+(+++) = append
+nodesText :: [Node] -> Text
+nodesText (NodeElement (Element _ attrs children):nodes) = nodesText children +++ nodesText nodes
+nodesText (NodeContent text:nodes) = text +++ nodesText nodes
+nodesText (_:nodes) = nodesText nodes
+nodesText [] = ""
 
M src/Main.hs => src/Main.hs +17 -3
@@ 28,6 28,7 @@ import Data.Scientific (toRealFloat)
 import Data.Maybe (fromJust, fromMaybe)
 import System.Directory as Dir
 import Control.Monad
+import System.IO (stdout, hPutStrLn)
 
 import DefaultCSS
 import StyleTree
@@ 38,14 39,27 @@ import Links
 main :: IO ()
 main = do
     http <- HTTP.newManager TLS.tlsManagerSettings
-    ProgramCtl docs outSSML <- parseArgs http
+    ProgramCtl docs outSSML outLinks <- parseArgs http
     forM docs $ \(uri, doc) -> do
         let html = XML.documentRoot doc
         style <- retreiveStyles uri doc http uri
-        let transcript = stylize style html
-        C8.putStrLn $ renderElLBS $ styleToSSML $ applyCounters transcript
+
+        case (outLinks, outSSML) of
+            (Nothing, Nothing) -> renderDoc stdout style html
+            (Just hLinks, Just hSSML) -> do
+                forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText)
+                renderDoc hSSML style html
+            (Just hLinks, Nothing) -> do
+                forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText)
+                return ()
+            (Nothing, Just hSSML) -> renderDoc hSSML style html
     return ()
 
+renderDoc outSSML style html = do
+    let transcript = stylize style html
+    let ssml = styleToSSML $ applyCounters transcript
+    C8.hPutStrLn outSSML $ renderElLBS $ ssml
+
 renderElLBS el = XML.renderLBS XML.def $ XML.Document {
         XML.documentPrologue = XML.Prologue [] Nothing [],
         XML.documentRoot = el,