{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE PatternSynonyms, ViewPatterns #-} module Network.MIME.XML(Page(..), loadVisited, fetchDocument, pageForText, applyCSScharset, readStrict) where import Data.Text.Lazy (fromStrict) import qualified Data.Text as Txt import Data.Text (Text) import qualified Data.Text.IO as Txt import Data.Text.Encoding import qualified Data.Text.Lazy as LTxt import qualified Data.ByteString.Lazy as B import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import Text.XML (Document(..)) import Network.URI import Network.URI.Fetch import Network.URI.Charset import qualified Data.Map as M import qualified Data.Set as Set import Data.Set (Set(..)) import Data.List (intercalate) import Data.Time.Clock -- For alternative styles import qualified Data.CSS.Syntax.Tokens as CSSTok import Stylist.Parse import System.IO import System.IO.Temp import Data.Default.Class import System.Directory import System.FilePath (()) import Data.FileEmbed import Data.Maybe (fromMaybe) import Text.Read (readMaybe) import Network.MIME.XML.Table -- Apply table sorting here... import Data.HTML2CSS (html2css) data Page styles = Page { pageURL :: URI, css :: styles, initCSS :: styles, html :: Document, pageTitle :: String, pageMIME :: String, apps :: [Application], backStack :: [(String, URI)], forwardStack :: [(String, URI)], -- Probably don't need an MVar here, but let's be safe! visitedURLs :: Set Text } loadVisited :: IO (Set Text) loadVisited = do dir <- getXdgDirectory XdgData "rhapsode" let path = dir "history.gmni" exists <- doesFileExist path if exists then do file <- readStrict path let hist = Set.fromList [Txt.pack uri | _:uri:_ <- map words $ lines file] return hist else return Set.empty readStrict path = do s <- Prelude.readFile path; length s `seq` return s utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "nocache" } = fetchDocument http { cachingEnabled = False } referer mime $ pageURL referer fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "novalidate" } = fetchDocument http { validateCertificates = False } referer mime $ pageURL referer fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "history/back" } = fetchURL' http mime (pageURL referer') >>= parseDocument' referer' http False where referer' = shiftHistory referer (-1) fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "history/forward" } = fetchURL' http mime (pageURL referer') >>= parseDocument' referer' http False where referer' = shiftHistory referer 1 fetchDocument http referer mime URI { uriScheme = "action:", uriPath = 'h':'i':'s':'t':'o':'r':'y':'/':x } | Just x' <- readMaybe x, referer' <- shiftHistory referer x' = fetchURL' http mime (pageURL referer') >>= parseDocument' referer http False fetchDocument http referer mime URI { uriScheme = "app:", uriPath = appID } = do dispatchByApp http Application { name = "", icon = nullURI, description = "", appId = appID } (pageMIME referer) $ pageURL referer return referer -- TODO play an error or success sound fetchDocument http referer@Page { pageURL = uri0 } mime uri@URI { uriFragment = anchor } | uri { uriFragment = "" } == uri0 { uriFragment = "" } = return referer { html = applySortDoc anchor $ html referer, pageURL = uri } fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument' referer http True shiftHistory :: Page style -> Integer -> Page style shiftHistory self 0 = self shiftHistory self@Page { backStack = (title, url):bs } delta | delta < 0 = shiftHistory self { backStack = bs, forwardStack = (pageTitle self, pageURL self):forwardStack self, pageTitle = title, pageURL = url } $ succ delta shiftHistory self@Page { forwardStack = (title, url):fs } delta | delta > 0 = shiftHistory self { forwardStack = fs, backStack = (pageTitle self, pageURL self):backStack self, pageTitle = title, pageURL = url } $ pred delta shiftHistory self _ = self -- Error case. parseDocument' ref@Page {visitedURLs = hist} sess saveHist resp@(URI {uriFragment = anch}, mime, _) = do page <- parseDocument ref sess resp >>= logHistory hist apps' <- appsForMIME sess mime return $ attachHistory page { pageMIME = mime, apps = apps', html = applySortDoc anch $ html page } where attachHistory x@Page { pageTitle = title, pageURL = url } | saveHist = x { backStack = (title, url):backStack ref, forwardStack = forwardStack ref } | otherwise = x parseDocument :: StyleSheet s => Page s -> Session -> (URI, String, Either Text B.ByteString) -> IO (Page s) parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp) parseDocument Page {initCSS = css'} _ (uri, "text/html", Left text) = pageForDoc css' uri $ HTML.parseLT $ fromStrict text parseDocument Page {initCSS = css'} _(uri, "text/html", Right bytes) = pageForDoc css' uri $ HTML.parseLBS bytes parseDocument Page {initCSS = css'} _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) = pageForDoc css' uri $ parseGemini (Just lang) text parseDocument Page {initCSS = css'} _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) = pageForDoc css' uri $ parseGemini (Just lang) $ utf8' bytes parseDocument Page {initCSS = css'} _ (uri, "text/gemini", Left text) = pageForDoc css' uri $ parseGemini Nothing text parseDocument Page {initCSS = css'} _ (uri, "text/gemini", Right bytes) = pageForDoc css' uri $ parseGemini Nothing $ utf8' bytes parseDocument a b (a', b'@"text/css", Right bytes) = parseDocument a b (a', b', Left $ applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes) parseDocument referer@Page {pageURL = uri', initCSS = css'} _ (uri, "text/css", Left text) | URI {uriAuthority = Just host} <- pageURL referer = do -- Save this per-domain setting dir <- ( "domain") <$> getXdgDirectory XdgConfig "rhapsode" createDirectoryIfMissing True dir Txt.writeFile (dir uriRegName host) $ CSSTok.serialize $ map absolutizeCSS $ CSSTok.tokenize text return ret | otherwise = return ret where ret = referer { css = parseForURL css' uri text } absolutizeCSS (CSSTok.Url text) | Just rel <- parseRelativeReference $ Txt.unpack text = CSSTok.Url $ Txt.pack $ uriToStr' $ relativeTo rel uri' absolutizeCSS tok = tok parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body) where mime' = takeWhile (/= ';') mime parseDocument Page {initCSS = css'} _ (uri, _, Left text) | Right doc <- XML.parseText def $ fromStrict text = pageForDoc css' uri doc | otherwise = pageForText css' uri text parseDocument Page {initCSS = css'} _ (uri, _, Right bytes) | Right doc <- XML.parseLBS def bytes = pageForDoc css' uri doc parseDocument Page {initCSS = css'} _ (uri, 't':'e':'x':'t':'/':_, Right bytes) = -- charset wasn't specified, so assume utf-8. pageForText css' uri $ utf8' bytes parseDocument Page {initCSS = css'} sess resp@(uri, mime, _) = do dir <- getCurrentDirectory -- TODO find Downloads directory. ret <- saveDownload nullURI { uriScheme = "file:", uriAuthority = Just (URIAuth "" "" "") } dir resp >>= dispatchByMIME sess mime pageForDoc css' uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret pageForText css' uri txt = pageForDoc css' uri XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] } pageForDoc :: StyleSheet s => s -> URI -> Document -> IO (Page s) pageForDoc css' uri doc = do -- See if the user has configured an alternate stylesheet for this domain. let authorStyle = return $ html2css doc uri css' styles <- case uriAuthority uri of Nothing -> authorStyle Just host -> do dir <- getXdgDirectory XdgConfig "rhapsode" let path = dir "domain" uriRegName host hasAltStyle <- doesFileExist path if not hasAltStyle then authorStyle else parse css' <$> Txt.readFile path return Page {pageURL = uri, html = doc, css = styles, initCSS = css', -- These fields are all blank, to be filled in later by logHistory & parseDocument' pageTitle = "", pageMIME = "", apps = [], backStack = [], forwardStack = [], visitedURLs = Set.empty} logHistory hist ret@Page {pageURL = url', html = doc} = do dir <- getXdgDirectory XdgData "rhapsode" createDirectoryIfMissing True dir now <- getCurrentTime let title = Txt.unpack $ getTitle $ XML.documentRoot doc appendFile (dir "history.gmni") $ '\n' : intercalate " " [ "=>", uriToStr' url', show now, title ] return ret { pageTitle = title, visitedURLs = Set.insert (Txt.pack $ uriToStr' url') hist} where getTitle (XML.Element "title" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs] getTitle (XML.Element "h1" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs] getTitle (XML.Element _ _ childs) | title:_ <- [getTitle el | XML.NodeElement el <- childs] = title | otherwise = "" uriToStr' :: URI -> String uriToStr' uri = uriToString id uri "" -------- ---- CSS charset sniffing -------- applyCSScharset (charset:charsets) bytes | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text | otherwise = applyCSScharset charsets bytes where text = convertCharset charset bytes applyCSScharset _ bytes = convertCharset "utf-8" bytes cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks, (CSSTok.String charset:_) <- skipCSSspace toks' = charset | otherwise = "" skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks skipCSSspace toks = toks -------- ---- Gemini implementation -------- -- Copied from css-syntax. pattern (:.) :: Char -> Txt.Text -> Txt.Text pattern x :. xs <- (Txt.uncons -> Just (x, xs)) infixr 5 :. el name text = XML.Element name M.empty [XML.NodeContent text] parseGemini :: Maybe String -> Txt.Text -> XML.Document parseGemini lang txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "body", XML.elementAttributes = M.fromList [ ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]], XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt }, XML.documentEpilogue = [] } csv (',':_) = "" csv (c:rest) = c:csv rest csv "" = "" parseGemini' :: [Txt.Text] -> [XML.Element] parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) = el "h6" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) = el "h5" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.line):lines) = el "h4" line : parseGemini' lines parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines -- Not properly structured, but still sounds fine... parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines parseGemini' (('=':.'>':.line):lines) | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) { XML.elementAttributes = M.insert "href" url M.empty } : parseGemini' lines | otherwise = (el "a" $ Txt.strip line) { XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty } : parseGemini' lines parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines where go (('`':.'`':.'`':._):lines) = parseGemini' lines go (_:lines) = go lines go [] = [] parseGemini' ("```":lines) = go [] lines where go texts (('`':.'`':.'`':._):lines) = el "pre" (Txt.unlines texts) : parseGemini' lines go texts (line:lines) = go (texts ++ [line]) lines go texts [] = [] parseGemini' (line:lines) = el "p" line : parseGemini' lines parseGemini' [] = []