{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
module Network.MIME.XML(Page(..), loadVisited,
fetchDocument, pageForText, applyCSScharset, readStrict) where
import Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import Data.Text (Text)
import qualified Data.Text.IO as Txt
import Data.Text.Encoding
import qualified Data.Text.Lazy as LTxt
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Text.XML (Document(..))
import Network.URI
import Network.URI.Fetch
import Network.URI.Charset
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Set (Set(..))
import Data.List (intercalate)
import Data.Time.Clock
-- For alternative styles
import qualified Data.CSS.Syntax.Tokens as CSSTok
import Stylist.Parse
import System.IO
import System.IO.Temp
import Data.Default.Class
import System.Directory
import System.FilePath ((</>))
import Data.FileEmbed
import Data.Maybe (fromMaybe)
import Text.Read (readMaybe)
import Network.MIME.XML.Table -- Apply table sorting here...
import Data.HTML2CSS (html2css)
data Page styles = Page {
pageURL :: URI,
css :: styles,
initCSS :: styles,
html :: Document,
pageTitle :: String,
pageMIME :: String,
apps :: [Application],
backStack :: [(String, URI)],
forwardStack :: [(String, URI)],
-- Probably don't need an MVar here, but let's be safe!
visitedURLs :: Set Text
}
loadVisited :: IO (Set Text)
loadVisited = do
dir <- getXdgDirectory XdgData "rhapsode"
let path = dir </> "history.gmni"
exists <- doesFileExist path
if exists then do
file <- readStrict path
let hist = Set.fromList [Txt.pack uri | _:uri:_ <- map words $ lines file]
return hist
else return Set.empty
readStrict path = do s <- Prelude.readFile path; length s `seq` return s
utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes
fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "nocache" } =
fetchDocument http { cachingEnabled = False } referer mime $ pageURL referer
fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "novalidate" } =
fetchDocument http { validateCertificates = False } referer mime $ pageURL referer
fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "history/back" } =
fetchURL' http mime (pageURL referer') >>= parseDocument' referer' http False
where referer' = shiftHistory referer (-1)
fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "history/forward" } =
fetchURL' http mime (pageURL referer') >>= parseDocument' referer' http False
where referer' = shiftHistory referer 1
fetchDocument http referer mime URI {
uriScheme = "action:", uriPath = 'h':'i':'s':'t':'o':'r':'y':'/':x
} | Just x' <- readMaybe x, referer' <- shiftHistory referer x' =
fetchURL' http mime (pageURL referer') >>= parseDocument' referer http False
fetchDocument http referer mime URI { uriScheme = "app:", uriPath = appID } = do
dispatchByApp http Application {
name = "", icon = nullURI, description = "",
appId = appID
} (pageMIME referer) $ pageURL referer
return referer -- TODO play an error or success sound
fetchDocument http referer@Page { pageURL = uri0 } mime uri@URI { uriFragment = anchor }
| uri { uriFragment = "" } == uri0 { uriFragment = "" } = return referer {
html = applySortDoc anchor $ html referer,
pageURL = uri
}
fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument' referer http True
shiftHistory :: Page style -> Integer -> Page style
shiftHistory self 0 = self
shiftHistory self@Page { backStack = (title, url):bs } delta | delta < 0 =
shiftHistory self {
backStack = bs,
forwardStack = (pageTitle self, pageURL self):forwardStack self,
pageTitle = title,
pageURL = url
} $ succ delta
shiftHistory self@Page { forwardStack = (title, url):fs } delta | delta > 0 =
shiftHistory self {
forwardStack = fs,
backStack = (pageTitle self, pageURL self):backStack self,
pageTitle = title,
pageURL = url
} $ pred delta
shiftHistory self _ = self -- Error case.
parseDocument' ref@Page {visitedURLs = hist} sess saveHist resp@(URI {uriFragment = anch}, mime, _) = do
page <- parseDocument ref sess resp >>= logHistory hist
apps' <- appsForMIME sess mime
return $ attachHistory page {
pageMIME = mime,
apps = apps',
html = applySortDoc anch $ html page
}
where
attachHistory x@Page { pageTitle = title, pageURL = url }
| saveHist = x { backStack = (title, url):backStack ref, forwardStack = forwardStack ref }
| otherwise = x
parseDocument :: StyleSheet s => Page s -> Session -> (URI, String, Either Text B.ByteString)
-> IO (Page s)
parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp)
parseDocument Page {initCSS = css'} _ (uri, "text/html", Left text) =
pageForDoc css' uri $ HTML.parseLT $ fromStrict text
parseDocument Page {initCSS = css'} _(uri, "text/html", Right bytes) =
pageForDoc css' uri $ HTML.parseLBS bytes
parseDocument Page {initCSS = css'} _
(uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) =
pageForDoc css' uri $ parseGemini (Just lang) text
parseDocument Page {initCSS = css'} _
(uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) =
pageForDoc css' uri $ parseGemini (Just lang) $ utf8' bytes
parseDocument Page {initCSS = css'} _ (uri, "text/gemini", Left text) =
pageForDoc css' uri $ parseGemini Nothing text
parseDocument Page {initCSS = css'} _ (uri, "text/gemini", Right bytes) =
pageForDoc css' uri $ parseGemini Nothing $ utf8' bytes
parseDocument a b (a', b'@"text/css", Right bytes) =
parseDocument a b (a', b', Left $ applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes)
parseDocument referer@Page {pageURL = uri', initCSS = css'} _ (uri, "text/css", Left text)
| URI {uriAuthority = Just host} <- pageURL referer = do
-- Save this per-domain setting
dir <- (</> "domain") <$> getXdgDirectory XdgConfig "rhapsode"
createDirectoryIfMissing True dir
Txt.writeFile (dir </> uriRegName host) $
CSSTok.serialize $ map absolutizeCSS $ CSSTok.tokenize text
return ret
| otherwise = return ret
where
ret = referer {
css = parseForURL css' uri text
}
absolutizeCSS (CSSTok.Url text) | Just rel <- parseRelativeReference $ Txt.unpack text =
CSSTok.Url $ Txt.pack $ uriToStr' $ relativeTo rel uri'
absolutizeCSS tok = tok
parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body)
where mime' = takeWhile (/= ';') mime
parseDocument Page {initCSS = css'} _ (uri, _, Left text)
| Right doc <- XML.parseText def $ fromStrict text = pageForDoc css' uri doc
| otherwise = pageForText css' uri text
parseDocument Page {initCSS = css'} _ (uri, _, Right bytes)
| Right doc <- XML.parseLBS def bytes = pageForDoc css' uri doc
parseDocument Page {initCSS = css'} _ (uri, 't':'e':'x':'t':'/':_, Right bytes) =
-- charset wasn't specified, so assume utf-8.
pageForText css' uri $ utf8' bytes
parseDocument Page {initCSS = css'} sess resp@(uri, mime, _) = do
dir <- getCurrentDirectory -- TODO find Downloads directory.
ret <- saveDownload nullURI {
uriScheme = "file:",
uriAuthority = Just (URIAuth "" "" "")
} dir resp >>= dispatchByMIME sess mime
pageForDoc css' uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret
pageForText css' uri txt = pageForDoc css' uri XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
XML.elementAttributes = M.empty,
XML.elementNodes = [XML.NodeContent txt]
},
XML.documentEpilogue = []
}
pageForDoc :: StyleSheet s => s -> URI -> Document -> IO (Page s)
pageForDoc css' uri doc = do
-- See if the user has configured an alternate stylesheet for this domain.
let authorStyle = return $ html2css doc uri css'
styles <- case uriAuthority uri of
Nothing -> authorStyle
Just host -> do
dir <- getXdgDirectory XdgConfig "rhapsode"
let path = dir </> "domain" </> uriRegName host
hasAltStyle <- doesFileExist path
if not hasAltStyle then authorStyle else parse css' <$> Txt.readFile path
return Page {pageURL = uri, html = doc, css = styles, initCSS = css',
-- These fields are all blank, to be filled in later by logHistory & parseDocument'
pageTitle = "", pageMIME = "", apps = [],
backStack = [], forwardStack = [], visitedURLs = Set.empty}
logHistory hist ret@Page {pageURL = url', html = doc} = do
dir <- getXdgDirectory XdgData "rhapsode"
createDirectoryIfMissing True dir
now <- getCurrentTime
let title = Txt.unpack $ getTitle $ XML.documentRoot doc
appendFile (dir </> "history.gmni") $ '\n' : intercalate " " [
"=>", uriToStr' url', show now, title
]
return ret { pageTitle = title, visitedURLs = Set.insert (Txt.pack $ uriToStr' url') hist}
where
getTitle (XML.Element "title" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs]
getTitle (XML.Element "h1" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs]
getTitle (XML.Element _ _ childs)
| title:_ <- [getTitle el | XML.NodeElement el <- childs] = title
| otherwise = ""
uriToStr' :: URI -> String
uriToStr' uri = uriToString id uri ""
--------
---- CSS charset sniffing
--------
applyCSScharset (charset:charsets) bytes
| cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
| otherwise = applyCSScharset charsets bytes
where
text = convertCharset charset bytes
applyCSScharset _ bytes = convertCharset "utf-8" bytes
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
(CSSTok.String charset:_) <- skipCSSspace toks' = charset
| otherwise = ""
skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks
--------
---- Gemini implementation
--------
-- Copied from css-syntax.
pattern (:.) :: Char -> Txt.Text -> Txt.Text
pattern x :. xs <- (Txt.uncons -> Just (x, xs))
infixr 5 :.
el name text = XML.Element name M.empty [XML.NodeContent text]
parseGemini :: Maybe String -> Txt.Text -> XML.Document
parseGemini lang txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "body",
XML.elementAttributes = M.fromList [
("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]],
XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
},
XML.documentEpilogue = []
}
csv (',':_) = ""
csv (c:rest) = c:csv rest
csv "" = ""
parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
el "h6" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) =
el "h5" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.line):lines) =
el "h4" line : parseGemini' lines
parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines
parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines
parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines
parseGemini' (('=':.'>':.line):lines)
| (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {
XML.elementAttributes = M.insert "href" url M.empty
} : parseGemini' lines
| otherwise = (el "a" $ Txt.strip line) {
XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
} : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines
where
go (('`':.'`':.'`':._):lines) = parseGemini' lines
go (_:lines) = go lines
go [] = []
parseGemini' ("```":lines) = go [] lines
where
go texts (('`':.'`':.'`':._):lines) =
el "pre" (Txt.unlines texts) : parseGemini' lines
go texts (line:lines) = go (texts ++ [line]) lines
go texts [] = []
parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []