From b2147925d257568770d9b3cf38bc95011350b88d Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Wed, 14 Oct 2020 20:01:34 +1300 Subject: [PATCH] Improve Gemini support, upgrade HURL, log history records. --- rhapsode.cabal | 4 ++-- src/Input.hs | 63 +++++++++++++++++++++++++++++++++++++++----------- src/Render.hs | 2 +- 3 files changed, 52 insertions(+), 17 deletions(-) diff --git a/rhapsode.cabal b/rhapsode.cabal index eabbc0f..6ad4723 100644 --- a/rhapsode.cabal +++ b/rhapsode.cabal @@ -64,8 +64,8 @@ library html-conduit, xml-conduit, text, containers, data-default-class, network-uri, stylist >= 2.2 && <3, css-syntax, xml-conduit-stylist >= 2.2 && <3, scientific, - async, hurl >= 1.4.1.0, filepath, temporary, - file-embed >= 0.0.9 && < 0.1 + async, hurl >= 1.4.2.0, filepath, temporary, + file-embed >= 0.0.9 && < 0.1, time -- Directories containing source files. hs-source-dirs: src diff --git a/src/Input.hs b/src/Input.hs index 9ba75f9..f760a78 100644 --- a/src/Input.hs +++ b/src/Input.hs @@ -14,6 +14,8 @@ import Network.URI import Network.URI.Fetch import Network.URI.Charset import qualified Data.Map as M +import Data.List (intercalate) +import Data.Time.Clock -- For alternative styles import qualified Data.CSS.Syntax.Tokens as CSSTok @@ -24,6 +26,7 @@ import System.IO import System.IO.Temp import Data.Default.Class import System.Directory +import System.FilePath (()) import Data.FileEmbed -- For C API @@ -35,21 +38,27 @@ import Foreign.C.String utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes -fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument referer http +fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument referer http >>= logHistory parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp) parseDocument _ _ (uri, "text/html", Left text) = pageForDoc uri $ HTML.parseLT $ fromStrict text parseDocument _ _ (uri, "text/html", Right bytes) = pageForDoc uri $ HTML.parseLBS bytes -parseDocument _ _ (uri, "text/gemini", Left text) = pageForDoc uri $ parseGemini text -parseDocument _ _ (uri, "text/gemini", Right bytes) = pageForDoc uri $ parseGemini $ utf8' bytes +parseDocument _ _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) = + pageForDoc uri $ parseGemini (Just lang) text +parseDocument _ _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) = + pageForDoc uri $ parseGemini (Just lang) $ utf8' bytes +parseDocument _ _ (uri, "text/gemini", Left text) = pageForDoc uri $ parseGemini Nothing text +parseDocument _ _ (uri, "text/gemini", Right bytes) = pageForDoc uri $ parseGemini Nothing $ utf8' bytes parseDocument referer _ (uri, "text/css", Left text) = return referer { - url = uri, + Types.url = uri, css = parseForURL (conditionalStyles uri "document") uri text } parseDocument referer _ (uri, "text/css", Right bytes) = return referer { - url = uri, + Types.url = uri, css = parseForURL (conditionalStyles uri "document") uri text } where text = applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes +parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body) + where mime' = takeWhile (/= ';') mime parseDocument _ _ (uri, _, Left text) | Right doc <- XML.parseText def $ fromStrict text = pageForDoc uri doc | otherwise = pageForText uri text @@ -75,7 +84,22 @@ pageForText uri txt = pageForDoc uri XML.Document { XML.documentEpilogue = [] } -pageForDoc uri doc = return Page {url = uri, html = doc, css = html2css doc uri} +pageForDoc uri doc = return Page {Types.url = uri, html = doc, css = html2css doc uri} + +logHistory ret@Page {Types.url = url', html = doc} = do + dir <- getXdgDirectory XdgData "rhapsode" + createDirectoryIfMissing True dir + now <- getCurrentTime + appendFile (dir "history.gmni") $ intercalate " " [ + "=>", uriToString id url' "", show now, Txt.unpack $ getTitle $ XML.documentRoot doc + ] + return ret + where + getTitle (XML.Element "title" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs] + getTitle (XML.Element "h1" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs] + getTitle (XML.Element _ _ childs) + | title:_ <- [getTitle el | XML.NodeElement el <- childs] = title + | otherwise = "" -------- ---- CSS charset sniffing @@ -103,17 +127,22 @@ infixr 5 :. el name text = XML.Element name M.empty [XML.NodeContent text] -parseGemini :: Txt.Text -> XML.Document -parseGemini txt = XML.Document { +parseGemini :: Maybe String -> Txt.Text -> XML.Document +parseGemini lang txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "body", - XML.elementAttributes = M.empty, + XML.elementAttributes = M.fromList [ + ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]], XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt }, XML.documentEpilogue = [] } +csv (',':_) = "" +csv (c:rest) = c:csv rest +csv "" = "" + parseGemini' :: [Txt.Text] -> [XML.Element] parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) = el "h6" line : parseGemini' lines @@ -126,6 +155,7 @@ parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines -- Not properly structured, but still sounds fine... parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines +parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines parseGemini' (('=':.'>':.line):lines) | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) { @@ -134,13 +164,18 @@ parseGemini' (('=':.'>':.line):lines) | otherwise = (el "a" $ Txt.strip line) { XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty } : parseGemini' lines -parseGemini' (('`':.'`':.'`':.line):lines) = go [line] lines +parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines where - go texts ("```":lines) = el "pre" (Txt.unlines texts) : parseGemini' lines - go texts (('`':.'`':.'`':.line):lines) = - el "pre" (Txt.unlines texts) : el "p" line : parseGemini' lines + go (('`':.'`':.'`':._):lines) = parseGemini' lines + go (_:lines) = go lines + go [] = [] +parseGemini' ("```":lines) = go [] lines + where + go texts (('`':.'`':.'`':._):lines) = + el "pre" (Txt.unlines texts) : parseGemini' lines go texts (line:lines) = go (texts ++ [line]) lines go texts [] = [] + parseGemini' (line:lines) = el "p" line : parseGemini' lines parseGemini' [] = [] @@ -164,6 +199,6 @@ c_fetchURL c_session c_mimes c_referer c_uri = do mimes <- peekCString c_mimes referer <- deRefStablePtr c_referer uri <- peekCString c_uri - let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer + let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` Types.url referer doc <- fetchDocument session referer (words mimes) uri' newStablePtr doc diff --git a/src/Render.hs b/src/Render.hs index 9be6cb7..111ed91 100644 --- a/src/Render.hs +++ b/src/Render.hs @@ -135,7 +135,7 @@ c_renderDoc c_session c_page rewriteURLs = do session <- deRefStablePtr c_session page <- deRefStablePtr c_page css' <- retreiveStyles session $ css page - let pseudoFilter = rhapsodePseudoFilter $ url page + let pseudoFilter = rhapsodePseudoFilter $ Types.url page qCSS <- if rewriteURLs then do assets <- downloadAssets session [ "audio/vnd.wav" -- 2.30.2