~alcinnz/hurl

aedc142444374dae35cb4a1e568ca3462470e5c0 — Adrian Cochrane 2 years ago f605efd
Release HURL 0.2 exposing tables & parameterizing initial CSS.

While I was at it, I allowed useragent CSS to style errorpages specially.
3 files changed, 47 insertions(+), 50 deletions(-)

M hurl-xml/hurl-xml.cabal
R hurl-xml/src/Network/{MIME/XML.hs => URI/Fetch/XML.hs}
R hurl-xml/src/Network/{MIME/XML/Table.hs => URI/Fetch/XML/Table.hs}
M hurl-xml/hurl-xml.cabal => hurl-xml/hurl-xml.cabal +3 -3
@@ 10,7 10,7 @@ name:                hurl-xml
-- PVP summary:      +-+------- breaking API changes
--                   | | +----- non-breaking API additions
--                   | | | +--- code changes with no API change
version:             0.1.0.0
version:             0.2.0.0

-- A short (one-line) description of the package.
synopsis:            Fetch parsed XML & possibly CSS for a URL based on MIMEtype.


@@ 51,10 51,10 @@ cabal-version:       >=1.10

library
  -- Modules exported by the library.
  exposed-modules:     Network.MIME.XML
  exposed-modules:     Network.URI.Fetch.XML, Network.URI.Fetch.XML.Table

  -- Modules included in this library but not exported.
  other-modules:       Network.MIME.XML.Table
  other-modules:

  -- LANGUAGE extensions used by modules in this package.
  -- other-extensions:

R hurl-xml/src/Network/MIME/XML.hs => hurl-xml/src/Network/URI/Fetch/XML.hs +43 -46
@@ 1,7 1,7 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
module Network.MIME.XML(Page(..), loadVisited,
module Network.URI.Fetch.XML(Page(..), loadVisited,
    fetchDocument, pageForText, applyCSScharset, readStrict) where

import           Data.Text.Lazy (fromStrict)


@@ 36,13 36,14 @@ import Data.FileEmbed
import Data.Maybe (fromMaybe)
import Text.Read (readMaybe)

import Network.MIME.XML.Table -- Apply table sorting here...
import Network.URI.Fetch.XML.Table -- Apply table sorting here...
import Data.HTML2CSS (html2css)

data Page styles = Page {
    pageURL :: URI,
    css :: styles,
    initCSS :: styles,
    initCSS :: URI -> String -> styles,
    domain :: String,
    html :: Document,
    pageTitle :: String,
    pageMIME :: String,


@@ 118,7 119,7 @@ shiftHistory self@Page { forwardStack = (title, url):fs } delta | delta > 0 =
shiftHistory self _ = self -- Error case.

parseDocument' ref@Page {visitedURLs = hist} sess saveHist resp@(URI {uriFragment = anch}, mime, _) = do
    page <- parseDocument ref sess resp >>= logHistory hist
    page <- parseDocument ref {domain = "document"} sess resp >>= logHistory hist
    apps' <- appsForMIME sess mime
    return $ attachHistory page {
        pageMIME = mime,


@@ 131,21 132,22 @@ parseDocument' ref@Page {visitedURLs = hist} sess saveHist resp@(URI {uriFragmen
        | otherwise = x
parseDocument :: StyleSheet s => Page s -> Session -> (URI, String, Either Text B.ByteString)
        -> IO (Page s)
parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp)
parseDocument Page {initCSS = css', appName = name} _ (uri, "text/html", Left text) =
    pageForDoc css' name uri $ HTML.parseLT $ fromStrict text
parseDocument Page {initCSS = css', appName = name} _(uri, "text/html", Right bytes) =
    pageForDoc css' name uri $ HTML.parseLBS bytes
parseDocument Page {initCSS = css', appName = name} _
parseDocument ref sess (uri, "html/x-error\t", resp) =
    parseDocument ref { domain = "error" } sess (uri, "text/html", resp)
parseDocument p _ (uri, "text/html", Left text) =
    pageForDoc p uri $ HTML.parseLT $ fromStrict text
parseDocument p _(uri, "text/html", Right bytes) =
    pageForDoc p uri $ HTML.parseLBS bytes
parseDocument p _
        (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) =
    pageForDoc css' name uri $ parseGemini (Just lang) text
parseDocument Page {initCSS = css', appName = name} _
    pageForDoc p uri $ parseGemini (Just lang) text
parseDocument p _
        (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) =
    pageForDoc css' name uri $ parseGemini (Just lang) $ utf8' bytes
parseDocument Page {initCSS = css', appName = name} _ (uri, "text/gemini", Left text) =
    pageForDoc css' name uri $ parseGemini Nothing text
parseDocument Page {initCSS = css', appName = name} _ (uri, "text/gemini", Right bytes) =
    pageForDoc css' name uri $ parseGemini Nothing $ utf8' bytes
    pageForDoc p uri $ parseGemini (Just lang) $ utf8' bytes
parseDocument p _ (uri, "text/gemini", Left text) =
    pageForDoc p uri $ parseGemini Nothing text
parseDocument p _ (uri, "text/gemini", Right bytes) =
    pageForDoc p uri $ parseGemini Nothing $ utf8' bytes
parseDocument a b (a', b'@"text/css", Right bytes) =
    parseDocument a b (a', b', Left $ applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes)
parseDocument referer@Page {pageURL = uri', initCSS = css', appName = name} _


@@ 161,40 163,39 @@ parseDocument referer@Page {pageURL = uri', initCSS = css', appName = name} _
  | otherwise = return ret
 where
  ret = referer {
        css = parseForURL css' uri text
        css = parseForURL (css' uri' "document") uri text
    }
  absolutizeCSS (CSSTok.Url text) | Just rel <- parseRelativeReference $ Txt.unpack text =
    CSSTok.Url $ Txt.pack $ uriToStr' $ relativeTo rel uri'
  absolutizeCSS tok = tok
parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/csv", Left body) =
    pageForDoc css' name uri $ parseDelimitedToTable ',' body
parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/tab-separated-values", Left body) =
    pageForDoc css' name uri $ parseDelimitedToTable '\t' body
parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/csv", Right body) =
    pageForDoc css' name uri $ parseDelimitedToTable ',' $ utf8' body
parseDocument ref@Page {initCSS = css', appName = name} _
        (uri, "text/tab-separated-values", Right body) =
    pageForDoc css' name uri $ parseDelimitedToTable '\t' $ utf8' body
parseDocument ref _ (uri, "text/csv", Left body) =
    pageForDoc ref uri $ parseDelimitedToTable ',' body
parseDocument ref _ (uri, "text/tab-separated-values", Left body) =
    pageForDoc ref uri $ parseDelimitedToTable '\t' body
parseDocument ref _ (uri, "text/csv", Right body) =
    pageForDoc ref uri $ parseDelimitedToTable ',' $ utf8' body
parseDocument ref _ (uri, "text/tab-separated-values", Right body) =
    pageForDoc ref uri $ parseDelimitedToTable '\t' $ utf8' body

parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body)
    where mime' = takeWhile (/= ';') mime
parseDocument Page {initCSS = css', appName = name} _ (uri, _, Left text)
    | Right doc <- XML.parseText def $ fromStrict text = pageForDoc css' name uri doc
    | otherwise = pageForText css' name uri text
parseDocument Page {initCSS = css', appName = name} _ (uri, _, Right bytes)
    | Right doc <- XML.parseLBS def bytes = pageForDoc css' name uri doc
parseDocument Page {initCSS = css', appName = name} _ (uri, 't':'e':'x':'t':'/':_, Right bytes) =
parseDocument p _ (uri, _, Left text)
    | Right doc <- XML.parseText def $ fromStrict text = pageForDoc p uri doc
    | otherwise = pageForText p uri text
parseDocument p _ (uri, _, Right bytes)
    | Right doc <- XML.parseLBS def bytes = pageForDoc p uri doc
parseDocument p _ (uri, 't':'e':'x':'t':'/':_, Right bytes) =
    -- charset wasn't specified, so assume utf-8.
    pageForText css' name uri $ utf8' bytes
parseDocument Page {initCSS = css', appName = name} sess resp@(uri, mime, _) = do
    pageForText p uri $ utf8' bytes
parseDocument p sess resp@(uri, mime, _) = do
    dir <- getCurrentDirectory -- TODO find Downloads directory.
    ret <- saveDownload nullURI {
        uriScheme = "file:",
        uriAuthority = Just (URIAuth "" "" "")
    } dir resp >>= dispatchByMIME sess mime
    pageForDoc css' name uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret
    pageForDoc p uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret

pageForText css' appname uri txt = pageForDoc css' appname uri XML.Document {
pageForText referer uri txt = pageForDoc referer uri XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "pre",


@@ 204,23 205,19 @@ pageForText css' appname uri txt = pageForDoc css' appname uri XML.Document {
        XML.documentEpilogue = []
    }

pageForDoc :: StyleSheet s => s -> String -> URI -> Document -> IO (Page s)
pageForDoc css' appname uri doc = do
pageForDoc :: StyleSheet s => Page s -> URI -> Document -> IO (Page s)
pageForDoc referer@Page {initCSS = css', appName = appname, domain = d} uri doc = do
    -- See if the user has configured an alternate stylesheet for this domain.
    let authorStyle = return $ html2css doc uri css'
    let authorStyle = return $ html2css doc uri $ css' uri d
    styles <- case uriAuthority uri of
        Nothing -> authorStyle
        Just host -> do
            dir <- getXdgDirectory XdgConfig appname
            let path = dir </> "domain" </> uriRegName host
            hasAltStyle <- doesFileExist path
            if not hasAltStyle then authorStyle else parse css' <$> Txt.readFile path
            if not hasAltStyle then authorStyle else parse (css' uri d) <$> Txt.readFile path

    return Page {pageURL = uri, html = doc, css = styles,
        initCSS = css', appName = appname,
        -- These fields are all blank, to be filled in later by logHistory & parseDocument'
        pageTitle = "", pageMIME = "", apps = [],
        backStack = [], forwardStack = [], visitedURLs = Set.empty}
    return referer {pageURL = uri, html = doc, css = styles}

logHistory hist ret@Page {pageURL = url', html = doc, appName = name} = do
    dir <- getXdgDirectory XdgData name

R hurl-xml/src/Network/MIME/XML/Table.hs => hurl-xml/src/Network/URI/Fetch/XML/Table.hs +1 -1
@@ 1,5 1,5 @@
{-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-}
module Network.MIME.XML.Table(applySort, applySortDoc, splitTable) where
module Network.URI.Fetch.XML.Table(applySort, applySortDoc, splitTable) where

import Text.XML
import Data.Text as Txt