From aedc142444374dae35cb4a1e568ca3462470e5c0 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Tue, 20 Sep 2022 17:38:31 +1200 Subject: [PATCH] Release HURL 0.2 exposing tables & parameterizing initial CSS. While I was at it, I allowed useragent CSS to style errorpages specially. --- hurl-xml/hurl-xml.cabal | 6 +- .../src/Network/{MIME => URI/Fetch}/XML.hs | 89 +++++++++---------- .../Network/{MIME => URI/Fetch}/XML/Table.hs | 2 +- 3 files changed, 47 insertions(+), 50 deletions(-) rename hurl-xml/src/Network/{MIME => URI/Fetch}/XML.hs (81%) rename hurl-xml/src/Network/{MIME => URI/Fetch}/XML/Table.hs (99%) diff --git a/hurl-xml/hurl-xml.cabal b/hurl-xml/hurl-xml.cabal index 7e7ad3a..fe3c779 100644 --- a/hurl-xml/hurl-xml.cabal +++ b/hurl-xml/hurl-xml.cabal @@ -10,7 +10,7 @@ name: hurl-xml -- PVP summary: +-+------- breaking API changes -- | | +----- non-breaking API additions -- | | | +--- code changes with no API change -version: 0.1.0.0 +version: 0.2.0.0 -- A short (one-line) description of the package. synopsis: Fetch parsed XML & possibly CSS for a URL based on MIMEtype. @@ -51,10 +51,10 @@ cabal-version: >=1.10 library -- Modules exported by the library. - exposed-modules: Network.MIME.XML + exposed-modules: Network.URI.Fetch.XML, Network.URI.Fetch.XML.Table -- Modules included in this library but not exported. - other-modules: Network.MIME.XML.Table + other-modules: -- LANGUAGE extensions used by modules in this package. -- other-extensions: diff --git a/hurl-xml/src/Network/MIME/XML.hs b/hurl-xml/src/Network/URI/Fetch/XML.hs similarity index 81% rename from hurl-xml/src/Network/MIME/XML.hs rename to hurl-xml/src/Network/URI/Fetch/XML.hs index a87865b..9944d70 100644 --- a/hurl-xml/src/Network/MIME/XML.hs +++ b/hurl-xml/src/Network/URI/Fetch/XML.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE PatternSynonyms, ViewPatterns #-} -module Network.MIME.XML(Page(..), loadVisited, +module Network.URI.Fetch.XML(Page(..), loadVisited, fetchDocument, pageForText, applyCSScharset, readStrict) where import Data.Text.Lazy (fromStrict) @@ -36,13 +36,14 @@ import Data.FileEmbed import Data.Maybe (fromMaybe) import Text.Read (readMaybe) -import Network.MIME.XML.Table -- Apply table sorting here... +import Network.URI.Fetch.XML.Table -- Apply table sorting here... import Data.HTML2CSS (html2css) data Page styles = Page { pageURL :: URI, css :: styles, - initCSS :: styles, + initCSS :: URI -> String -> styles, + domain :: String, html :: Document, pageTitle :: String, pageMIME :: String, @@ -118,7 +119,7 @@ shiftHistory self@Page { forwardStack = (title, url):fs } delta | delta > 0 = shiftHistory self _ = self -- Error case. parseDocument' ref@Page {visitedURLs = hist} sess saveHist resp@(URI {uriFragment = anch}, mime, _) = do - page <- parseDocument ref sess resp >>= logHistory hist + page <- parseDocument ref {domain = "document"} sess resp >>= logHistory hist apps' <- appsForMIME sess mime return $ attachHistory page { pageMIME = mime, @@ -131,21 +132,22 @@ parseDocument' ref@Page {visitedURLs = hist} sess saveHist resp@(URI {uriFragmen | otherwise = x parseDocument :: StyleSheet s => Page s -> Session -> (URI, String, Either Text B.ByteString) -> IO (Page s) -parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp) -parseDocument Page {initCSS = css', appName = name} _ (uri, "text/html", Left text) = - pageForDoc css' name uri $ HTML.parseLT $ fromStrict text -parseDocument Page {initCSS = css', appName = name} _(uri, "text/html", Right bytes) = - pageForDoc css' name uri $ HTML.parseLBS bytes -parseDocument Page {initCSS = css', appName = name} _ +parseDocument ref sess (uri, "html/x-error\t", resp) = + parseDocument ref { domain = "error" } sess (uri, "text/html", resp) +parseDocument p _ (uri, "text/html", Left text) = + pageForDoc p uri $ HTML.parseLT $ fromStrict text +parseDocument p _(uri, "text/html", Right bytes) = + pageForDoc p uri $ HTML.parseLBS bytes +parseDocument p _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) = - pageForDoc css' name uri $ parseGemini (Just lang) text -parseDocument Page {initCSS = css', appName = name} _ + pageForDoc p uri $ parseGemini (Just lang) text +parseDocument p _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) = - pageForDoc css' name uri $ parseGemini (Just lang) $ utf8' bytes -parseDocument Page {initCSS = css', appName = name} _ (uri, "text/gemini", Left text) = - pageForDoc css' name uri $ parseGemini Nothing text -parseDocument Page {initCSS = css', appName = name} _ (uri, "text/gemini", Right bytes) = - pageForDoc css' name uri $ parseGemini Nothing $ utf8' bytes + pageForDoc p uri $ parseGemini (Just lang) $ utf8' bytes +parseDocument p _ (uri, "text/gemini", Left text) = + pageForDoc p uri $ parseGemini Nothing text +parseDocument p _ (uri, "text/gemini", Right bytes) = + pageForDoc p uri $ parseGemini Nothing $ utf8' bytes parseDocument a b (a', b'@"text/css", Right bytes) = parseDocument a b (a', b', Left $ applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes) parseDocument referer@Page {pageURL = uri', initCSS = css', appName = name} _ @@ -161,40 +163,39 @@ parseDocument referer@Page {pageURL = uri', initCSS = css', appName = name} _ | otherwise = return ret where ret = referer { - css = parseForURL css' uri text + css = parseForURL (css' uri' "document") uri text } absolutizeCSS (CSSTok.Url text) | Just rel <- parseRelativeReference $ Txt.unpack text = CSSTok.Url $ Txt.pack $ uriToStr' $ relativeTo rel uri' absolutizeCSS tok = tok -parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/csv", Left body) = - pageForDoc css' name uri $ parseDelimitedToTable ',' body -parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/tab-separated-values", Left body) = - pageForDoc css' name uri $ parseDelimitedToTable '\t' body -parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/csv", Right body) = - pageForDoc css' name uri $ parseDelimitedToTable ',' $ utf8' body -parseDocument ref@Page {initCSS = css', appName = name} _ - (uri, "text/tab-separated-values", Right body) = - pageForDoc css' name uri $ parseDelimitedToTable '\t' $ utf8' body +parseDocument ref _ (uri, "text/csv", Left body) = + pageForDoc ref uri $ parseDelimitedToTable ',' body +parseDocument ref _ (uri, "text/tab-separated-values", Left body) = + pageForDoc ref uri $ parseDelimitedToTable '\t' body +parseDocument ref _ (uri, "text/csv", Right body) = + pageForDoc ref uri $ parseDelimitedToTable ',' $ utf8' body +parseDocument ref _ (uri, "text/tab-separated-values", Right body) = + pageForDoc ref uri $ parseDelimitedToTable '\t' $ utf8' body parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body) where mime' = takeWhile (/= ';') mime -parseDocument Page {initCSS = css', appName = name} _ (uri, _, Left text) - | Right doc <- XML.parseText def $ fromStrict text = pageForDoc css' name uri doc - | otherwise = pageForText css' name uri text -parseDocument Page {initCSS = css', appName = name} _ (uri, _, Right bytes) - | Right doc <- XML.parseLBS def bytes = pageForDoc css' name uri doc -parseDocument Page {initCSS = css', appName = name} _ (uri, 't':'e':'x':'t':'/':_, Right bytes) = +parseDocument p _ (uri, _, Left text) + | Right doc <- XML.parseText def $ fromStrict text = pageForDoc p uri doc + | otherwise = pageForText p uri text +parseDocument p _ (uri, _, Right bytes) + | Right doc <- XML.parseLBS def bytes = pageForDoc p uri doc +parseDocument p _ (uri, 't':'e':'x':'t':'/':_, Right bytes) = -- charset wasn't specified, so assume utf-8. - pageForText css' name uri $ utf8' bytes -parseDocument Page {initCSS = css', appName = name} sess resp@(uri, mime, _) = do + pageForText p uri $ utf8' bytes +parseDocument p sess resp@(uri, mime, _) = do dir <- getCurrentDirectory -- TODO find Downloads directory. ret <- saveDownload nullURI { uriScheme = "file:", uriAuthority = Just (URIAuth "" "" "") } dir resp >>= dispatchByMIME sess mime - pageForDoc css' name uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret + pageForDoc p uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret -pageForText css' appname uri txt = pageForDoc css' appname uri XML.Document { +pageForText referer uri txt = pageForDoc referer uri XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", @@ -204,23 +205,19 @@ pageForText css' appname uri txt = pageForDoc css' appname uri XML.Document { XML.documentEpilogue = [] } -pageForDoc :: StyleSheet s => s -> String -> URI -> Document -> IO (Page s) -pageForDoc css' appname uri doc = do +pageForDoc :: StyleSheet s => Page s -> URI -> Document -> IO (Page s) +pageForDoc referer@Page {initCSS = css', appName = appname, domain = d} uri doc = do -- See if the user has configured an alternate stylesheet for this domain. - let authorStyle = return $ html2css doc uri css' + let authorStyle = return $ html2css doc uri $ css' uri d styles <- case uriAuthority uri of Nothing -> authorStyle Just host -> do dir <- getXdgDirectory XdgConfig appname let path = dir "domain" uriRegName host hasAltStyle <- doesFileExist path - if not hasAltStyle then authorStyle else parse css' <$> Txt.readFile path + if not hasAltStyle then authorStyle else parse (css' uri d) <$> Txt.readFile path - return Page {pageURL = uri, html = doc, css = styles, - initCSS = css', appName = appname, - -- These fields are all blank, to be filled in later by logHistory & parseDocument' - pageTitle = "", pageMIME = "", apps = [], - backStack = [], forwardStack = [], visitedURLs = Set.empty} + return referer {pageURL = uri, html = doc, css = styles} logHistory hist ret@Page {pageURL = url', html = doc, appName = name} = do dir <- getXdgDirectory XdgData name diff --git a/hurl-xml/src/Network/MIME/XML/Table.hs b/hurl-xml/src/Network/URI/Fetch/XML/Table.hs similarity index 99% rename from hurl-xml/src/Network/MIME/XML/Table.hs rename to hurl-xml/src/Network/URI/Fetch/XML/Table.hs index 36af47d..b87882b 100644 --- a/hurl-xml/src/Network/MIME/XML/Table.hs +++ b/hurl-xml/src/Network/URI/Fetch/XML/Table.hs @@ -1,5 +1,5 @@ {-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-} -module Network.MIME.XML.Table(applySort, applySortDoc, splitTable) where +module Network.URI.Fetch.XML.Table(applySort, applySortDoc, splitTable) where import Text.XML import Data.Text as Txt -- 2.30.2