~alcinnz/rhapsode

65356894485c86c5bc5c0abe54941fbe2f6132cd — Adrian Cochrane 2 years ago 6be6e9a
Factor out URI Fetching logic into seperate hackage.
7 files changed, 41 insertions(+), 418 deletions(-)

M rhapsode.cabal
M src/CExports.hs
M src/Input.hs
M src/Links.hs
M src/Render.hs
D src/Table.hs
M src/Types.hs
M rhapsode.cabal => rhapsode.cabal +3 -3
@@ 54,7 54,7 @@ library
  exposed-modules: CExports, Input, Links, Render, Types
  
  -- Modules included in this library.
  other-modules:       SSML, SpeechStyle, Table
  other-modules:       SSML, SpeechStyle
  
  -- LANGUAGE extensions used by modules in this package.
  -- other-extensions:    


@@ 63,11 63,11 @@ library
  build-depends:       base >=4.9 && <5, directory >= 1.3.2, bytestring,
        file-embed >= 0.0.9 && < 0.1, time, parallel >= 1,
        -- HTML parsing, SSML output
        html-conduit, xml-conduit, text, containers, data-default-class,
        xml-conduit, text, containers, hurl-xml >= 0.2,
        -- Networking
        network-uri, async, hurl >= 2, filepath, temporary,
        -- CSS
        stylist >= 2.4 && <3, css-syntax, xml-conduit-stylist >= 2.3 && <3, scientific,
        stylist >= 2.4 && <3, stylist-traits, css-syntax, scientific, xml-conduit-stylist >= 3 && <3.1,
        -- Voice2Json input
        process, aeson >= 1.5 && <1.6, unordered-containers
  

M src/CExports.hs => src/CExports.hs +1 -1
@@ 38,7 38,7 @@ import Foreign.Marshal.Array
--pair a b = (a, b)

-- FIXME: Segfaults, was intended for the sake of easy concurrency.
foreign export ccall c_docLinksAndRendering :: StablePtr Session -> StablePtr Page -> Bool -> CString -> IO (CArray CString)
foreign export ccall c_docLinksAndRendering :: StablePtr Session -> StablePtr (Page RhapsodeCSS) -> Bool -> CString -> IO (CArray CString)

c_docLinksAndRendering c_session c_page rewriteUrls c_v2jProfile = do
    c_links <- c_extractLinks c_page c_v2jProfile

M src/Input.hs => src/Input.hs +5 -209
@@ 1,7 1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
module Input(fetchDocument, pageForText, applyCSScharset, readStrict) where
module Input(fetchDocument, readStrict) where

import           Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt


@@ 9,10 9,10 @@ import qualified Data.Text.IO as Txt
import           Data.Text.Encoding
import qualified Data.Text.Lazy as LTxt
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import           Network.URI.Fetch
import           Network.URI.Fetch.XML (fetchDocument)
import           Network.URI.Charset
import qualified Data.Map as M
import qualified Data.Set as Set


@@ 26,220 26,16 @@ import Data.CSS.Preprocessor.Conditions (conditionalStyles)

import System.IO
import System.IO.Temp
import Data.Default.Class
import System.Directory
import System.FilePath ((</>))
import Data.FileEmbed

import Table -- Apply table sorting here...

-- For C API
import Types
import Data.HTML2CSS (html2css)
import Data.Maybe (fromMaybe)
import Foreign.StablePtr
import Foreign.C.String

utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes

fetchDocument http referer mime uri@URI { uriScheme = 'n':'o':'c':'a':'c':'h':'e':'+':scheme } =
    fetchDocument http { cachingEnabled = False } referer mime uri { uriScheme = scheme }
fetchDocument http referer mime URI { uriScheme = "app:", uriPath = appID } = do
    dispatchByApp http Application {
        name = "", icon = nullURI, description = "",
        appId = appID
      } (pageMIME referer) $ Types.url referer
    return referer -- TODO play an error or success sound
fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument' referer http

parseDocument' ref@Page {visitedURLs = hist} sess resp@(URI {uriFragment = anchor}, mime, _) = do
    page <- parseDocument ref sess resp >>= logHistory hist
    apps' <- appsForMIME sess mime
    return $ attachHistory $ page {
        pageMIME = mime,
        apps = apps',
        html = applySortDoc anchor $ html page
    }
  where
    attachHistory x@Page { Types.url = uri'} | Types.url x == uri' = x
        | ((_, back):backs) <- backStack ref, back == uri' =
            x { backStack = backs, forwardStack = entry x:forwardStack ref }
        | ((_, next):nexts) <- forwardStack ref, next == uri' =
            x { forwardStack = nexts, backStack = entry x:backStack ref }
        | otherwise =
            x { forwardStack = entry x:forwardStack ref, backStack = backStack ref }
    entry x = (pageTitle x, Types.url x)
parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp)
parseDocument _ _ (uri, "text/html", Left text) = pageForDoc uri $ HTML.parseLT $ fromStrict text
parseDocument _ _ (uri, "text/html", Right bytes) = pageForDoc uri $ HTML.parseLBS bytes
parseDocument _ _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) =
    pageForDoc uri $ parseGemini (Just lang) text
parseDocument _ _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) =
    pageForDoc uri $ parseGemini (Just lang) $ utf8' bytes
parseDocument _ _ (uri, "text/gemini", Left text) = pageForDoc uri $ parseGemini Nothing text
parseDocument _ _ (uri, "text/gemini", Right bytes) = pageForDoc uri $ parseGemini Nothing $ utf8' bytes
parseDocument a b (a', b'@"text/css", Right bytes) =
    parseDocument a b (a', b', Left $ applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes)
parseDocument referer@Page {Types.url = uri'} _ (uri, "text/css", Left text)
  | URI {uriAuthority = Just host} <- Types.url referer = do
    -- Save this per-domain setting
    dir <- (</> "domain") <$> getXdgDirectory XdgConfig "rhapsode"
    createDirectoryIfMissing True dir
    Txt.writeFile (dir </> uriRegName host) $
        CSSTok.serialize $ map absolutizeCSS $ CSSTok.tokenize text

    return ret
  | otherwise = return ret
 where
  ret = referer {
        css = parseForURL (conditionalStyles uri' "document") uri text
    }
  absolutizeCSS (CSSTok.Url text) | Just rel <- parseRelativeReference $ Txt.unpack text =
    CSSTok.Url $ Txt.pack $ uriToStr' $ relativeTo rel uri'
  absolutizeCSS tok = tok

parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body)
    where mime' = takeWhile (/= ';') mime
parseDocument _ _ (uri, _, Left text)
    | Right doc <- XML.parseText def $ fromStrict text = pageForDoc uri doc
    | otherwise = pageForText uri text
parseDocument _ _ (uri, _, Right bytes) | Right doc <- XML.parseLBS def bytes = pageForDoc uri doc
parseDocument _ _ (uri, 't':'e':'x':'t':'/':_, Right bytes) =
    -- charset wasn't specified, so assume utf-8.
    pageForText uri $ utf8' bytes
parseDocument _ sess resp@(uri, mime, _) = do
    dir <- getCurrentDirectory -- TODO find Downloads directory.
    ret <- saveDownload nullURI {
        uriScheme = "file:",
        uriAuthority = Just (URIAuth "" "" "")
    } dir resp >>= dispatchByMIME sess mime
    pageForDoc uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret

pageForText uri txt = pageForDoc uri XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",
            XML.elementAttributes = M.empty,
            XML.elementNodes = [XML.NodeContent txt]
        },
        XML.documentEpilogue = []
    }

pageForDoc uri doc = do
    -- See if the user has configured an alternate stylesheet for this domain.
    let authorStyle = return $ html2css doc uri
    styles <- case uriAuthority uri of
        Nothing -> authorStyle
        Just host -> do
            dir <- getXdgDirectory XdgConfig "rhapsode"
            let path = dir </> "domain" </> uriRegName host
            hasAltStyle <- doesFileExist path
            if not hasAltStyle then authorStyle else
                parse (conditionalStyles uri "document") <$> Txt.readFile path

    return Page {Types.url = uri, html = doc, css = styles,
        -- These fields are all blank, to be filled in later by logHistory & parseDocument'
        pageTitle = "", pageMIME = "", apps = [],
        backStack = [], forwardStack = [], visitedURLs = Set.empty}

logHistory hist ret@Page {Types.url = url', html = doc} = do
    dir <- getXdgDirectory XdgData "rhapsode"
    createDirectoryIfMissing True dir
    now <- getCurrentTime
    let title = Txt.unpack $ getTitle $ XML.documentRoot doc
    appendFile (dir </> "history.gmni") $ '\n' : intercalate " " [
        "=>", uriToStr' url', show now, title
      ]

    return ret { pageTitle = title, visitedURLs = Set.insert (Txt.pack $ uriToStr' url') hist}
  where
    getTitle (XML.Element "title" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs]
    getTitle (XML.Element "h1" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs]
    getTitle (XML.Element _ _ childs)
        | title:_ <- [getTitle el | XML.NodeElement el <- childs] = title
        | otherwise = ""

uriToStr' :: URI -> String
uriToStr' uri = uriToString id uri ""

--------
---- CSS charset sniffing
--------
applyCSScharset (charset:charsets) bytes
        | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
        | otherwise = applyCSScharset charsets bytes
    where
        text = convertCharset charset bytes
applyCSScharset _ bytes = convertCharset "utf-8" bytes
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
        (CSSTok.String charset:_) <- skipCSSspace toks' = charset
    | otherwise = ""
skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks

--------
---- Gemini implementation
--------
-- Copied from css-syntax.
pattern (:.) :: Char -> Txt.Text -> Txt.Text
pattern x :. xs <- (Txt.uncons -> Just (x, xs))

infixr 5 :.

el name text = XML.Element name M.empty [XML.NodeContent text]

parseGemini :: Maybe String -> Txt.Text -> XML.Document
parseGemini lang txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "body",
            XML.elementAttributes = M.fromList [
                ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]],
            XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
        },
        XML.documentEpilogue = []
    }

csv (',':_) = ""
csv (c:rest) = c:csv rest
csv "" = ""

parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
    el "h6" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) =
    el "h5" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.line):lines) =
    el "h4" line : parseGemini' lines
parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines
parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines
parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines

parseGemini' (('=':.'>':.line):lines)
    | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {
            XML.elementAttributes = M.insert "href" url M.empty
        } : parseGemini' lines
    | otherwise = (el "a" $ Txt.strip line) {
            XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
        } : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines
    where
        go (('`':.'`':.'`':._):lines) = parseGemini' lines
        go (_:lines) = go lines
        go [] = []
parseGemini' ("```":lines) = go [] lines
    where
        go texts (('`':.'`':.'`':._):lines) =
            el "pre" (Txt.unlines texts) : parseGemini' lines
        go texts (line:lines) = go (texts ++ [line]) lines
        go texts [] = []

parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []

--------
---- C API
--------


@@ 253,15 49,15 @@ c_newSession = do
c_freeSession = freeStablePtr


foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page)
foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr (Page RhapsodeCSS) -> CString -> IO (StablePtr (Page RhapsodeCSS))

c_fetchURL c_session c_mimes c_referer c_uri = do
    session <- deRefStablePtr c_session
    mimes <- peekCString c_mimes
    referer <- deRefStablePtr c_referer
    uri <- peekCString c_uri
    let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` Types.url referer
    doc <- fetchDocument session referer (words mimes) uri'
    let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` pageURL referer
    doc <- fetchDocument session referer uri'
    newStablePtr doc

foreign export ccall c_enableLogging :: StablePtr Session -> IO (StablePtr Session)

M src/Links.hs => src/Links.hs +9 -8
@@ 12,7 12,8 @@ import qualified Data.Text.Foreign as FTxt
import Data.Maybe
import Text.Read (readMaybe)

import Table (splitTable)
-- FIXME: Expose this API from HURL XML...
-- import Table (splitTable)

import Types
import Foreign.StablePtr


@@ 83,8 84,8 @@ extractEl _ el@(Element (Name "link" _ _) attrs [])
        let Application name _ title _ = mimeInfo "text/css"
        in [Link (pack name) (pack title) uri]
  where attrs' = M.mapKeys nameLocalName attrs
extractEl path (Element (Name "table" _ _) _ childs) =
    extractTable path (splitTable childs) ++ extractNodes (0:path) childs
-- extractEl path (Element (Name "table" _ _) _ childs) =
--    extractTable path (splitTable childs) ++ extractNodes (0:path) childs
extractEl path el@(Element _ _ children) =
    extractElAttr el "href" ++
    extractElAttr el "longdesc" ++


@@ 142,9 143,9 @@ nodesText (NodeContent text:nodes) def = text +++ nodesText nodes def
nodesText (_:nodes) def = nodesText nodes def
nodesText [] def = def

linksFromPage :: Page -> [Link]
linksFromPage :: Page RhapsodeCSS -> [Link]
linksFromPage Page {
        url = url',
        pageURL = url',
        pageTitle = title',
        html = html',
        apps = apps',


@@ 198,10 199,10 @@ readBookmarks = do

-- | Write out a file of most frequently encountered unvisited links.
-- Hopefully this'll help surfers rely less on YouTube, et al's hueristics.
updateSuggestions :: Page -> IO ()
updateSuggestions :: Page RhapsodeCSS -> IO ()
updateSuggestions page = do
    let links = extractLinks $ html page
    let domain = maybe "" show $ uriAuthority $ url page
    let domain = maybe "" show $ uriAuthority $ pageURL page

    dir <- getXdgDirectory XdgData "rhapsode"
    let path = dir </> "suggestions.gmni"


@@ 239,7 240,7 @@ outputSentences links dir = do
------
--- C API
------
foreign export ccall c_extractLinks :: StablePtr Page -> CString -> IO (CArray CString)
foreign export ccall c_extractLinks :: StablePtr (Page RhapsodeCSS) -> CString -> IO (CArray CString)

c_extractLinks c_page c_v2jProfile = do
    page <- deRefStablePtr c_page

M src/Render.hs => src/Render.hs +11 -10
@@ 22,11 22,13 @@ import qualified Data.CSS.Preprocessor.Conditions as CSSCond
import           Data.CSS.Preprocessor.Assets
import qualified Data.CSS.Preprocessor.PsuedoClasses as CSSPseudo
import qualified Data.CSS.Preprocessor.Text as CSSTxt
import           Stylist (cssPriorityAgent, cssPriorityUser)
import           Data.HTML2CSS (el2stylist)

import qualified Data.HTML2CSS as H2C
import           Network.URI
import           Network.URI.Fetch
import           Network.URI.Charset
import           Network.URI.Fetch.XML (applyCSScharset)

--- For CSS assets
import           Data.List (nub, elem)


@@ 41,7 43,6 @@ import qualified Data.CSS.Syntax.Selector as CSSSel
-- Internal Rhapsode Subcomponents
import SpeechStyle
import SSML
import Input (applyCSScharset)

-- C API
import Types


@@ 51,12 52,12 @@ import Data.ByteString (useAsCString)

renderDoc :: Style.QueryableStyleSheet (Style.VarParser (CSSTxt.TextStyle SpeechStyle)) -> XML.Element -> B.ByteString
renderDoc style html =
    renderElLBS $ styleToSSML $ CSSTxt.resolve $ inlinePseudos $ H2C.stylizeEl style html
    renderElLBS $ styleToSSML $ CSSTxt.resolve $ inlinePseudos' $ stylize style $ el2stylist html

inlinePseudos :: Style.PropertyParser s => StyleTree [(Text, Style.VarParser s)] -> StyleTree s
inlinePseudos (StyleTree self childs) = StyleTree {
inlinePseudos' :: Style.PropertyParser s => StyleTree [(Text, Style.VarParser s)] -> StyleTree s
inlinePseudos' (StyleTree self childs) = StyleTree {
        style = fromMaybe Style.temp $ Style.innerParser <$> lookup "" self,
        children = pseudo "before" ++ map inlinePseudos childs ++ pseudo "after"
        children = pseudo "before" ++ map inlinePseudos' childs ++ pseudo "after"
    } where
        pseudo n
            | Just style <- Style.innerParser <$> lookup n self,


@@ 72,7 73,7 @@ renderElLBS el = XML.renderLBS XML.def $ XML.Document {

retreiveStyles :: Session -> CSSCond.ConditionalStyles (CSSTxt.TextStyle SpeechStyle) -> IO (CSSCond.ConditionalStyles (CSSTxt.TextStyle SpeechStyle))
retreiveStyles manager authorStyle = do
    let agentStyle = H2C.cssPriorityAgent authorStyle `CSS.parse` $(embedStringFile $ buildDirFile "useragent.css")
    let agentStyle = cssPriorityAgent authorStyle `CSS.parse` $(embedStringFile $ buildDirFile "useragent.css")
    userStyle <- loadUserStyles agentStyle
    CSSCond.loadImports loadURL lowerVars lowerToks userStyle []
  where


@@ 99,7 100,7 @@ loadUserStyles styles = do
    loadDirectory _ False = return styles
    loadDirectory dir True = do
        files <- Dir.listDirectory dir
        loadFiles (H2C.cssPriorityUser styles) files
        loadFiles (cssPriorityUser styles) files
    loadFiles style (file:files) = do
        source <- readFile file
        CSS.parse style (Txt.pack source) `loadFiles` files


@@ 162,13 163,13 @@ filterMIMEs mimes cb download@(_, mime, _)
---- C API
--------

foreign export ccall c_renderDoc :: StablePtr Session -> StablePtr Page -> Bool -> IO CString -- Hard to C bindings without IO
foreign export ccall c_renderDoc :: StablePtr Session -> StablePtr (Page RhapsodeCSS) -> Bool -> IO CString -- Hard to C bindings without IO

c_renderDoc c_session c_page rewriteURLs = do
    session <- deRefStablePtr c_session
    page <- deRefStablePtr c_page
    css' <- retreiveStyles session $ css page
    let pseudoFilter = rhapsodePseudoFilter (Types.url page) $ visitedURLs page
    let pseudoFilter = rhapsodePseudoFilter (pageURL page) $ visitedURLs page
    qCSS <- if rewriteURLs then do
        assets <- downloadAssets session [
                "audio/vnd.wav"

D src/Table.hs => src/Table.hs +0 -153
@@ 1,153 0,0 @@
{-# LANGUAGE OverloadedStrings #-}
module Table(applySort, applySortDoc, splitTable) where

import Text.XML
import Data.Text
import qualified Data.Map as M

import Data.Maybe
import qualified Data.List as L
import Text.Read (readMaybe)

applySortDoc :: String -> Document -> Document
applySortDoc anchor doc@Document {documentRoot = el} = doc {documentRoot = applySort anchor el}

applySort :: String -> Element -> Element
applySort ('#':'-':'r':'h':'a':'p':'s':'-':'%':anchor) el
    | (id', ord:col) <- L.break (`elem` ['<', '>']) anchor, Just col' <- readMaybe col =
        applySort' id' (ord == '<') col' el
applySort _ el = el

applySort' :: String -> Bool -> Int -> Element -> Element
applySort' ('.':id') asc col el@Element { elementNodes = childs }
    | (ix, subpath) <- L.break (== '.') id', Just ix' <- readMaybe ix =
        el { elementNodes = setAt ix' (rewriteNode subpath) childs }
    | otherwise = el
  where
    rewriteNode p (NodeElement child) = NodeElement $ applySort' p asc col child
    rewriteNode _ x = x
applySort' "" asc col el = applySort'' asc col el

applySort' id' asc col el@Element { elementAttributes = attrs, elementNodes = childs }
    | Just actual <- "id" `M.lookup` M.mapKeys nameLocalName attrs, pack id' == actual =
        applySort'' asc col el
    | otherwise = el { elementNodes = L.map searchNode childs }
  where
    searchNode (NodeElement child) = NodeElement $ applySort' id' asc col child
    searchNode x = x

applySort'' asc col el
    | Just sortable <- table2sorttable el = el {
        elementNodes = annotateTHead header asc col ++
            (L.concatMap (L.map NodeElement . markup) $ L.sortBy compareRows sortable)
            ++ footer
      }
    | otherwise = el
  where
    compareRows (TableRow a _) (TableRow b _)
        | asc = (a !! col) `compare` (b !! col)
        | otherwise = (b !! col) `compare` (a !! col)
    (header, _, footer) = splitTable $ elementNodes el

data TableRow = TableRow { keys :: [Text], markup :: [Element] }

table2sorttable Element {
        elementName = Name "table" _ _,
        elementAttributes = attrs,
        elementNodes = childs
    } | "-rhaps-unsortable" `elem` attrs, (_, body, _) <- splitTable childs =
        trs2sorttable body
table2sorttable _ = Nothing

splitTable :: [Node] -> ([Node], [Element], [Node])
splitTable (NodeElement el@Element { elementName = Name "caption" _ _}:els) =
    let (header, body, footer) = splitTable els in (NodeElement el:header, body, footer)
splitTable (NodeElement el@Element { elementName = Name "colgroup" _ _}:els) =
    let (header, body, footer) = splitTable els in (NodeElement el:header, body, footer)
splitTable (NodeElement el@Element { elementName = Name "thead" _ _}:els) =
    let (body, footer) = splitTableBody els in ([NodeElement el], body, footer)
splitTable (NodeElement el@Element { elementName = Name "tr" _ _, elementNodes = childs}:els)
    | L.all (== "th") [nameLocalName $ elementName el | NodeElement el <- childs] =
        let (body, footer) = splitTableBody els in ([NodeElement el], body, footer)
splitTable els@(NodeElement _:_) =
    let (body, footer) = splitTableBody els in ([], body, footer)
splitTable (_:els) = splitTable els
splitTable [] = ([], [], [])

splitTableBody :: [Node] -> ([Element], [Node])
splitTableBody (NodeElement el@Element { elementName = Name "tbody" _ _, elementNodes = childs }:els) =
    ([el | NodeElement el@Element { elementName = Name "tr" _ _ } <- childs], els)
splitTableBody (NodeElement el@Element { elementName = Name "tr" _ _ }:els) =
    let (body, footer) = splitTableBody els in (el:body, footer)
splitTableBody els@(NodeElement _:_) = ([], els)
splitTableBody (_:els) = splitTableBody els
splitTableBody [] = ([], [])

annotateTHead (NodeElement el@Element { elementName = Name "thead" _ _, elementNodes = childs }:nodes) a c =
    NodeElement el { elementNodes = annotateTHead childs a c } : nodes
annotateTHead (NodeElement el@Element { elementName = Name "tr" _ _, elementNodes = childs }:nodes) a c =
    NodeElement el { elementNodes = annotateTR childs a c 0 } : nodes
annotateTHead (child:childs) a c = child:annotateTHead childs a c
annotateTHead [] _ _ = []

annotateTR (NodeElement el@Element { elementName = Name n _ _, elementAttributes = attrs }:nodes) asc col count
    | n `elem` ["th", "td"], count >= col =
        NodeElement el { elementAttributes = M.insert "aria-sort" asc' attrs }:nodes
    | n `elem` ["th", "td"] = NodeElement el:annotateTR nodes asc col (count + colspan)
  where
    colspan = fromMaybe 1 (readMaybe =<< unpack <$> M.lookup "colspan" attrs')
    attrs' = M.mapKeys nameLocalName attrs
    asc' | asc = "ascending"
        | otherwise = "descending"
annotateTR (node:nodes) a c n = node:annotateTR nodes a c n
annotateTR [] _ _ _ = []

trs2sorttable els@(el@Element { elementName = Name "tr" _ _, elementNodes = childs }:_)
    | Just keys' <- tds2keys [el | NodeElement el <- childs],
      Just (group, rest) <- groupTrs els 1,
      Just rest' <- trs2sorttable rest = Just (TableRow keys' group : rest')
trs2sorttable [] = Just []
trs2sorttable _ = Nothing

tds2keys :: [Element] -> Maybe [Text]
tds2keys (el@Element {elementName = Name n _ _, elementAttributes = attrs, elementNodes = childs }:els)
    | n `elem` ["td", "th"], Just rest <- tds2keys els =
        Just (Prelude.replicate colspan (nodesText childs) ++ rest)
  where
    colspan | Just n <- "colspan" `M.lookup` M.mapKeys nameLocalName attrs,
            Just m <- readMaybe $ unpack n = m
        | otherwise = 1
tds2keys [] = Just []
tds2keys _ = Nothing

groupTrs (el@Element {elementName = Name "tr" _ _}:els) n
    | rowRowspan n el <= 1 = Just (el:[], els)
    | Just (tail, rest) <- groupTrs els $ pred n = Just (el:tail, rest)
groupTrs (_:els) n = groupTrs els n
groupTrs _ _ = Nothing

rowRowspan n Element {elementName = Name "tr" _ _, elementNodes = childs } =
    Prelude.maximum (n : [n |
            NodeElement (Element (Name name _ _) attrs _) <- childs,
            name `elem` ["td", "th"],
            rowspan <- maybeToList ("rowspan" `M.lookup` M.mapKeys nameLocalName attrs),
            n <- maybeToList $ readMaybe $ unpack rowspan])


--- Utils

(+++) = append
nodesText :: [Node] -> Text
nodesText (NodeElement (Element _ attrs children):nodes) = nodesText children +++ nodesText nodes
nodesText (NodeContent text:nodes) = text +++ nodesText nodes
nodesText (_:nodes) = nodesText nodes
nodesText [] = ""

setAt :: Int -> (a -> a) -> [a] -> [a]
setAt i a ls
  | i < 0 = ls
  | otherwise = go i ls
  where
    go 0 (x:xs) = a x : xs
    go n (x:xs) = x : go (n-1) xs
    go _ []     = []

M src/Types.hs => src/Types.hs +12 -34
@@ 1,5 1,5 @@
{-# LANGUAGE OverloadedStrings #-}
module Types(CArray, Page(..), Application(..), buildDirFile, readStrict) where
module Types(CArray, Page(..), Application(..), RhapsodeCSS, buildDirFile, readStrict) where

import System.Directory (getCurrentDirectory) -- default referer URI
import SpeechStyle (SpeechStyle)


@@ 8,7 8,8 @@ import Data.CSS.Preprocessor.Text (TextStyle)
import Text.XML
import qualified Data.Map.Strict as M
import Network.URI
import Network.URI.Fetch (Application(..))
import Network.URI.Fetch (Application(..), url)
import Network.URI.Fetch.XML (Page(..), loadVisited, readStrict)

-- For the in-memory history log
import qualified Data.Set as Set


@@ 26,54 27,31 @@ buildDir = "."
buildDirFile = (buildDir </>)

type CArray a = Ptr a
type RhapsodeCSS = ConditionalStyles (TextStyle SpeechStyle)

data Page = Page {
    url :: URI,
    css :: ConditionalStyles (TextStyle SpeechStyle),
    html :: Document,
    pageTitle :: String,
    pageMIME :: String,
    apps :: [Application],
    backStack :: [(String, URI)],
    forwardStack :: [(String, URI)],
    -- Probably don't need an MVar here, but let's be safe!
    visitedURLs :: Set Text
}

foreign export ccall c_initialReferer :: IO (StablePtr Page)

loadVisited :: IO (Set Text)
loadVisited = do
    dir <- getXdgDirectory XdgData "rhapsode"
    let path = dir </> "history.gmni"
    exists <- doesFileExist path

    if exists then do
        file <- readStrict path
        let hist = Set.fromList [Txt.pack uri | _:uri:_ <- map words $ lines file]
        return hist
    else return Set.empty

readStrict path = do s <- Prelude.readFile path; length s `seq` return s
foreign export ccall c_initialReferer :: IO (StablePtr (Page RhapsodeCSS))

c_initialReferer = do
    cwd <- getCurrentDirectory
    hist <- loadVisited
    hist <- loadVisited "rhapsode"
    newStablePtr $ Page {
        -- Default to URIs being relative to CWD.
        url = URI {uriScheme = "file:", uriPath = cwd,
        pageURL = URI {uriScheme = "file:", uriPath = cwd,
            uriAuthority = Nothing, uriQuery = "", uriFragment = ""},
        -- Blank values:
        css = conditionalStyles nullURI "temp",
        domain = "temp",
        html = Document {
            documentPrologue = Prologue [] Nothing [],
            documentRoot = Element "temp" M.empty [],
            documentEpilogue = []
        },
        pageTitle = "", pageMIME = "", apps = [],
        backStack = [], forwardStack = [], visitedURLs = hist
        backStack = [], forwardStack = [], visitedURLs = hist,
        initCSS = conditionalStyles,
        appName = "rhapsode"
    }

foreign export ccall c_freePage :: StablePtr Page -> IO ()
foreign export ccall c_freePage :: StablePtr (Page RhapsodeCSS) -> IO ()

c_freePage = freeStablePtr