~alcinnz/rhapsode

36f30081305cdb56e7ff44f1301fe76497a52fe5 — Adrian Cochrane 5 years ago c900dfa
Find links in the document (for interactions).
3 files changed, 96 insertions(+), 10 deletions(-)

M src/Input.hs
A src/Links.hs
M src/Main.hs
M src/Input.hs => src/Input.hs +20 -7
@@ 26,7 26,8 @@ import Data.Default.Class
--- Commandline arguments
data ProgramCtl = ProgramCtl {
    docs :: [(URI, XML.Document)],
    outSSML :: Handle
    outSSML :: Maybe Handle,
    outLinks :: Maybe Handle
}
parseArgs :: HTTP.Manager -> IO ProgramCtl
parseArgs http = do


@@ 37,28 38,40 @@ parseArgs http = do
        uriAuthority = Nothing, uriQuery = "", uriFragment = ""}
    let inputs' = [(f, relativeTo uri base) | (f, Just uri) <- inputs]
    docs <- forConcurrently inputs' $ evalInput http

    outSSML <- parseSSMLout outputs
    outLinks <- parseLinksOut outputs
    return $ ProgramCtl {
        docs = [(uri, doc) | ((_, uri), doc) <- zip inputs' docs],
        outSSML = outSSML
        outSSML = outSSML,
        outLinks = outLinks
    }

preparseArgs (flag@('-':_):val:args) | Just url <- parseURIReference val =
    (flag, Just url) : preparseArgs args
preparseArgs (flag@('+':_):args@(('+':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs (flag@('+':_):args@(('-':_):_)) = (flag, Nothing) : preparseArgs args
preparseArgs [flag@('+':_)] = [(flag, Nothing)]
preparseArgs (flag@('+':_):val:args) = (flag, parseURIReference val) : preparseArgs args
preparseArgs (val:args) | Just url <- parseURIReference val =
    ("-h", Just url) : preparseArgs args
preparseArgs [] = []
preparseArgs (arg:_) = error ("Unsupported argument " ++ arg)

parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) ReadMode
parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) ReadMode
parseSSMLout (("+s", Nothing):_) = return stdout
parseSSMLout (("+ssml", Nothing):_) = return stdout
parseSSMLout (("+s", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseSSMLout (("+ssml", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseSSMLout (("+s", Nothing):_) = return $ Just stdout
parseSSMLout (("+ssml", Nothing):_) = return $ Just stdout
parseSSMLout (_:args) = parseSSMLout args
parseSSMLout [] = return stdout
parseSSMLout [] = return Nothing

parseLinksOut (("+l", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseLinksOut (("+links", Just uri):_) = openFile (uriPath uri) WriteMode >>= return . Just
parseLinksOut (("+l", Nothing):_) = return $ Just stdout
parseLinksOut (("+links", Nothing):_) = return $ Just stdout
parseLinksOut (_:args) = parseLinksOut args
parseLinksOut [] = return Nothing


evalInput http ("-h", url) = fetchDocument http "text/html" url
evalInput http ("-html", url) = fetchDocument http "text/html" url

A src/Links.hs => src/Links.hs +59 -0
@@ 0,0 1,59 @@
{-# LANGUAGE OverloadedStrings #-}
module Links(extractLinks, linkToText) where

import Text.XML
import qualified Data.Map as M
import Network.URI
import Data.Text (Text, unpack, append, pack, replace, strip)
import Data.Maybe

data Link = Link {
    label :: Text,
    title :: Text,
    href :: URI
}

linkToText :: Link -> Text
linkToText (Link label' title' href') =
    rmWs label' +++ "\t" +++ rmWs title' +++ "\t" +++ pack (show href')

rmWs text = strip $ replace "\t" " " $ replace "\n" " " text

extractLinks :: Document -> [Link]
extractLinks (Document prologue root misc) =
    extractMisc (prologueBefore prologue) ++ extractMisc (prologueAfter prologue) ++
    extractEl root ++ extractMisc misc

extractMisc :: [Miscellaneous] -> [Link]
extractMisc (MiscInstruction (Instruction target dat):misc)
    | Just uri <- parseURIReference $ unpack target = Link dat "" uri : extractMisc misc
extractMisc (_:misc) = extractMisc misc
extractMisc [] = []

extractEl el@(Element _ _ children) =
    extractElAttr el "{http://www.w3.org/1999/xlink}href" ++
    extractElAttr el "href" ++
    extractElAttr el "longdesc" ++
    extractElAttr el "src" ++
    extractNodes children

extractElAttr (Element _ attrs children) attr
        | Just val <- attr `M.lookup` attrs,
            Just uri <- parseURIReference $ unpack val = [Link label' title' uri]
        | otherwise = []
    where
        label' = nodesText children
        title' = fromMaybe "" $ M.lookup "title" attrs

extractNodes (NodeElement el:nodes) = extractEl el ++ extractNodes nodes
extractNodes (NodeInstruction instruct:nodes) =
    extractMisc [MiscInstruction instruct] ++ extractNodes nodes
extractNodes (_:nodes) = extractNodes nodes
extractNodes [] = []

(+++) = append
nodesText :: [Node] -> Text
nodesText (NodeElement (Element _ attrs children):nodes) = nodesText children +++ nodesText nodes
nodesText (NodeContent text:nodes) = text +++ nodesText nodes
nodesText (_:nodes) = nodesText nodes
nodesText [] = ""

M src/Main.hs => src/Main.hs +17 -3
@@ 28,6 28,7 @@ import Data.Scientific (toRealFloat)
import Data.Maybe (fromJust, fromMaybe)
import System.Directory as Dir
import Control.Monad
import System.IO (stdout, hPutStrLn)

import DefaultCSS
import StyleTree


@@ 38,14 39,27 @@ import Links
main :: IO ()
main = do
    http <- HTTP.newManager TLS.tlsManagerSettings
    ProgramCtl docs outSSML <- parseArgs http
    ProgramCtl docs outSSML outLinks <- parseArgs http
    forM docs $ \(uri, doc) -> do
        let html = XML.documentRoot doc
        style <- retreiveStyles uri doc http uri
        let transcript = stylize style html
        C8.putStrLn $ renderElLBS $ styleToSSML $ applyCounters transcript

        case (outLinks, outSSML) of
            (Nothing, Nothing) -> renderDoc stdout style html
            (Just hLinks, Just hSSML) -> do
                forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText)
                renderDoc hSSML style html
            (Just hLinks, Nothing) -> do
                forM (extractLinks doc) (hPutStrLn hLinks . unpack . linkToText)
                return ()
            (Nothing, Just hSSML) -> renderDoc hSSML style html
    return ()

renderDoc outSSML style html = do
    let transcript = stylize style html
    let ssml = styleToSSML $ applyCounters transcript
    C8.hPutStrLn outSSML $ renderElLBS $ ssml

renderElLBS el = XML.renderLBS XML.def $ XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = el,