~alcinnz/rhapsode

b2147925d257568770d9b3cf38bc95011350b88d — Adrian Cochrane 3 years ago 0040d2b
Improve Gemini support, upgrade HURL, log history records.
3 files changed, 52 insertions(+), 17 deletions(-)

M rhapsode.cabal
M src/Input.hs
M src/Render.hs
M rhapsode.cabal => rhapsode.cabal +2 -2
@@ 64,8 64,8 @@ library
        html-conduit, xml-conduit, text, containers, data-default-class,
        network-uri,
        stylist >= 2.2 && <3, css-syntax, xml-conduit-stylist >= 2.2 && <3, scientific,
        async, hurl >= 1.4.1.0, filepath, temporary,
        file-embed >= 0.0.9 && < 0.1
        async, hurl >= 1.4.2.0, filepath, temporary,
        file-embed >= 0.0.9 && < 0.1, time
  
  -- Directories containing source files.
  hs-source-dirs:      src

M src/Input.hs => src/Input.hs +49 -14
@@ 14,6 14,8 @@ import           Network.URI
import           Network.URI.Fetch
import           Network.URI.Charset
import qualified Data.Map as M
import           Data.List (intercalate)
import           Data.Time.Clock

-- For alternative styles
import qualified Data.CSS.Syntax.Tokens as CSSTok


@@ 24,6 26,7 @@ import System.IO
import System.IO.Temp
import Data.Default.Class
import System.Directory
import System.FilePath ((</>))
import Data.FileEmbed

-- For C API


@@ 35,21 38,27 @@ import Foreign.C.String

utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes

fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument referer http
fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument referer http >>= logHistory
parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp)
parseDocument _ _ (uri, "text/html", Left text) = pageForDoc uri $ HTML.parseLT $ fromStrict text
parseDocument _ _ (uri, "text/html", Right bytes) = pageForDoc uri $ HTML.parseLBS bytes
parseDocument _ _ (uri, "text/gemini", Left text) = pageForDoc uri $ parseGemini text
parseDocument _ _ (uri, "text/gemini", Right bytes) = pageForDoc uri $ parseGemini $ utf8' bytes
parseDocument _ _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) =
    pageForDoc uri $ parseGemini (Just lang) text
parseDocument _ _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) =
    pageForDoc uri $ parseGemini (Just lang) $ utf8' bytes
parseDocument _ _ (uri, "text/gemini", Left text) = pageForDoc uri $ parseGemini Nothing text
parseDocument _ _ (uri, "text/gemini", Right bytes) = pageForDoc uri $ parseGemini Nothing $ utf8' bytes
parseDocument referer _ (uri, "text/css", Left text) = return referer {
        url = uri,
        Types.url = uri,
        css = parseForURL (conditionalStyles uri "document") uri text
    }
parseDocument referer _ (uri, "text/css", Right bytes) = return referer {
        url = uri,
        Types.url = uri,
        css = parseForURL (conditionalStyles uri "document") uri text
    } where text = applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes

parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body)
    where mime' = takeWhile (/= ';') mime
parseDocument _ _ (uri, _, Left text)
    | Right doc <- XML.parseText def $ fromStrict text = pageForDoc uri doc
    | otherwise = pageForText uri text


@@ 75,7 84,22 @@ pageForText uri txt = pageForDoc uri XML.Document {
        XML.documentEpilogue = []
    }

pageForDoc uri doc = return Page {url = uri, html = doc, css = html2css doc uri}
pageForDoc uri doc = return Page {Types.url = uri, html = doc, css = html2css doc uri}

logHistory ret@Page {Types.url = url', html = doc} = do
    dir <- getXdgDirectory XdgData "rhapsode"
    createDirectoryIfMissing True dir
    now <- getCurrentTime
    appendFile (dir </> "history.gmni") $ intercalate " " [
        "=>", uriToString id url' "", show now, Txt.unpack $ getTitle $ XML.documentRoot doc
      ]
    return ret
  where
    getTitle (XML.Element "title" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs]
    getTitle (XML.Element "h1" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs]
    getTitle (XML.Element _ _ childs)
        | title:_ <- [getTitle el | XML.NodeElement el <- childs] = title
        | otherwise = ""

--------
---- CSS charset sniffing


@@ 103,17 127,22 @@ infixr 5 :.

el name text = XML.Element name M.empty [XML.NodeContent text]

parseGemini :: Txt.Text -> XML.Document
parseGemini txt = XML.Document {
parseGemini :: Maybe String -> Txt.Text -> XML.Document
parseGemini lang txt = XML.Document {
        XML.documentPrologue = XML.Prologue [] Nothing [],
        XML.documentRoot = XML.Element {
            XML.elementName = "body",
            XML.elementAttributes = M.empty,
            XML.elementAttributes = M.fromList [
                ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]],
            XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
        },
        XML.documentEpilogue = []
    }

csv (',':_) = ""
csv (c:rest) = c:csv rest
csv "" = ""

parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
    el "h6" line : parseGemini' lines


@@ 126,6 155,7 @@ parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines
parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines

parseGemini' (('=':.'>':.line):lines)
    | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {


@@ 134,13 164,18 @@ parseGemini' (('=':.'>':.line):lines)
    | otherwise = (el "a" $ Txt.strip line) {
            XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
        } : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = go [line] lines
parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines
    where
        go texts ("```":lines) = el "pre" (Txt.unlines texts) : parseGemini' lines
        go texts (('`':.'`':.'`':.line):lines) =
            el "pre" (Txt.unlines texts) : el "p" line : parseGemini' lines
        go (('`':.'`':.'`':._):lines) = parseGemini' lines
        go (_:lines) = go lines
        go [] = []
parseGemini' ("```":lines) = go [] lines
    where
        go texts (('`':.'`':.'`':._):lines) =
            el "pre" (Txt.unlines texts) : parseGemini' lines
        go texts (line:lines) = go (texts ++ [line]) lines
        go texts [] = []

parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []



@@ 164,6 199,6 @@ c_fetchURL c_session c_mimes c_referer c_uri = do
    mimes <- peekCString c_mimes
    referer <- deRefStablePtr c_referer
    uri <- peekCString c_uri
    let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer
    let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` Types.url referer
    doc <- fetchDocument session referer (words mimes) uri'
    newStablePtr doc

M src/Render.hs => src/Render.hs +1 -1
@@ 135,7 135,7 @@ c_renderDoc c_session c_page rewriteURLs = do
    session <- deRefStablePtr c_session
    page <- deRefStablePtr c_page
    css' <- retreiveStyles session $ css page
    let pseudoFilter = rhapsodePseudoFilter $ url page
    let pseudoFilter = rhapsodePseudoFilter $ Types.url page
    qCSS <- if rewriteURLs then do
        assets <- downloadAssets session [
                "audio/vnd.wav"