{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
module Network.URI.Fetch.XML(Page(..), loadVisited,
fetchDocument, pageForText, applyCSScharset, readStrict) where
import Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import Data.Text (Text)
import qualified Data.Text.IO as Txt
import Data.Text.Encoding
import qualified Data.Text.Lazy as LTxt
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Text.XML (Document(..))
import Network.URI
import Network.URI.Fetch
import Network.URI.Charset
import qualified Data.Map as M
import qualified Data.Set as Set
import Data.Set (Set(..))
import Data.List (intercalate)
import Data.Time.Clock
-- For alternative styles
import qualified Data.CSS.Syntax.Tokens as CSSTok
import Stylist.Parse
import System.IO
import System.IO.Temp
import Data.Default.Class
import System.Directory
import System.FilePath ((</>))
import Data.FileEmbed
import Data.Maybe (fromMaybe)
import Text.Read (readMaybe)
import Network.URI.Fetch.XML.Table -- Apply table sorting here...
import Data.HTML2CSS (html2css)
data Page styles = Page {
pageURL :: URI,
css :: styles,
initCSS :: URI -> String -> styles,
domain :: String,
html :: Document,
pageTitle :: String,
pageMIME :: String,
apps :: [Application],
backStack :: [(String, URI)],
forwardStack :: [(String, URI)],
-- Probably don't need an MVar here, but let's be safe!
visitedURLs :: Set Text,
appName :: String
}
loadVisited :: String -> IO (Set Text)
loadVisited appname = do
dir <- getXdgDirectory XdgData appname
let path = dir </> "history.gmni"
exists <- doesFileExist path
if exists then do
file <- readStrict path
let hist = Set.fromList [Txt.pack uri | _:uri:_ <- map words $ lines file]
return hist
else return Set.empty
readStrict path = do s <- Prelude.readFile path; length s `seq` return s
utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes
aCCEPT = ["text/xml", "application/xml", "text/html", "text/gemini",
"text/csv", "text/tab-separated-values", "text/css", "text/*", "*/*"]
fetchDocument http referer URI { uriScheme = "action:", uriPath = "nocache" } =
fetchDocument http { cachingEnabled = False } referer $ pageURL referer
fetchDocument http referer URI { uriScheme = "action:", uriPath = "novalidate" } =
fetchDocument http { validateCertificates = False } referer $ pageURL referer
fetchDocument http referer URI { uriScheme = "action:", uriPath = "history/back" } =
fetchURL' http aCCEPT (pageURL referer') >>= parseDocument' referer' http False
where referer' = shiftHistory referer (-1)
fetchDocument http referer URI { uriScheme = "action:", uriPath = "history/forward" } =
fetchURL' http aCCEPT (pageURL referer') >>= parseDocument' referer' http False
where referer' = shiftHistory referer 1
fetchDocument http referer URI {
uriScheme = "action:", uriPath = 'h':'i':'s':'t':'o':'r':'y':'/':x
} | Just x' <- readMaybe x, referer' <- shiftHistory referer x' =
fetchURL' http aCCEPT (pageURL referer') >>= parseDocument' referer http False
fetchDocument http referer URI { uriScheme = "app:", uriPath = appID } = do
dispatchByApp http Application {
name = "", icon = nullURI, description = "",
appId = appID
} (pageMIME referer) $ pageURL referer
return referer -- TODO play an error or success sound
fetchDocument http referer@Page { pageURL = uri0 } uri@URI { uriFragment = anchor }
| uri { uriFragment = "" } == uri0 { uriFragment = "" } = return referer {
html = applySortDoc anchor $ html referer,
pageURL = uri
}
fetchDocument http referer uri = fetchURL' http aCCEPT uri >>= parseDocument' referer http True
shiftHistory :: Page style -> Integer -> Page style
shiftHistory self 0 = self
shiftHistory self@Page { backStack = (title, url):bs } delta | delta < 0 =
shiftHistory self {
backStack = bs,
forwardStack = (pageTitle self, pageURL self):forwardStack self,
pageTitle = title,
pageURL = url
} $ succ delta
shiftHistory self@Page { forwardStack = (title, url):fs } delta | delta > 0 =
shiftHistory self {
forwardStack = fs,
backStack = (pageTitle self, pageURL self):backStack self,
pageTitle = title,
pageURL = url
} $ pred delta
shiftHistory self _ = self -- Error case.
parseDocument' ref@Page {visitedURLs = hist} sess saveHist resp@(URI {uriFragment = anch}, mime, _) = do
page <- parseDocument ref {domain = "document"} sess resp >>= logHistory hist
apps' <- appsForMIME sess mime
return $ attachHistory page {
pageMIME = mime,
apps = apps',
html = applySortDoc anch $ html page
}
where
attachHistory x@Page { pageTitle = title, pageURL = url }
| saveHist = x { backStack = (title, url):backStack ref, forwardStack = forwardStack ref }
| otherwise = x
parseDocument :: StyleSheet s => Page s -> Session -> (URI, String, Either Text B.ByteString)
-> IO (Page s)
parseDocument ref sess (uri, "html/x-error\t", resp) =
parseDocument ref { domain = "error" } sess (uri, "text/html", resp)
parseDocument p _ (uri, "text/html", Left text) =
pageForDoc p uri $ HTML.parseLT $ fromStrict text
parseDocument p _(uri, "text/html", Right bytes) =
pageForDoc p uri $ HTML.parseLBS bytes
parseDocument p _
(uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) =
pageForDoc p uri $ parseGemini (Just lang) text
parseDocument p _
(uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) =
pageForDoc p uri $ parseGemini (Just lang) $ utf8' bytes
parseDocument p _ (uri, "text/gemini", Left text) =
pageForDoc p uri $ parseGemini Nothing text
parseDocument p _ (uri, "text/gemini", Right bytes) =
pageForDoc p uri $ parseGemini Nothing $ utf8' bytes
parseDocument a b (a', b'@"text/css", Right bytes) =
parseDocument a b (a', b', Left $ applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes)
parseDocument referer@Page {pageURL = uri', initCSS = css', appName = name} _
(uri, "text/css", Left text)
| URI {uriAuthority = Just host} <- pageURL referer = do
-- Save this per-domain setting
dir <- (</> "domain") <$> getXdgDirectory XdgConfig name
createDirectoryIfMissing True dir
Txt.writeFile (dir </> uriRegName host) $
CSSTok.serialize $ map absolutizeCSS $ CSSTok.tokenize text
return ret
| otherwise = return ret
where
ret = referer {
css = parseForURL (css' uri' "document") uri text
}
absolutizeCSS (CSSTok.Url text) | Just rel <- parseRelativeReference $ Txt.unpack text =
CSSTok.Url $ Txt.pack $ uriToStr' $ relativeTo rel uri'
absolutizeCSS tok = tok
parseDocument ref _ (uri, "text/csv", Left body) =
pageForDoc ref uri $ parseDelimitedToTable ',' body
parseDocument ref _ (uri, "text/tab-separated-values", Left body) =
pageForDoc ref uri $ parseDelimitedToTable '\t' body
parseDocument ref _ (uri, "text/csv", Right body) =
pageForDoc ref uri $ parseDelimitedToTable ',' $ utf8' body
parseDocument ref _ (uri, "text/tab-separated-values", Right body) =
pageForDoc ref uri $ parseDelimitedToTable '\t' $ utf8' body
parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body)
where mime' = takeWhile (/= ';') mime
parseDocument p _ (uri, _, Left text)
| Right doc <- XML.parseText def $ fromStrict text = pageForDoc p uri doc
| otherwise = pageForText p uri text
parseDocument p _ (uri, _, Right bytes)
| Right doc <- XML.parseLBS def bytes = pageForDoc p uri doc
parseDocument p _ (uri, 't':'e':'x':'t':'/':_, Right bytes) =
-- charset wasn't specified, so assume utf-8.
pageForText p uri $ utf8' bytes
parseDocument p sess resp@(uri, mime, _) = do
dir <- getCurrentDirectory -- TODO find Downloads directory.
ret <- saveDownload nullURI {
uriScheme = "file:",
uriAuthority = Just (URIAuth "" "" "")
} dir resp >>= dispatchByMIME sess mime
pageForDoc p uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret
pageForText referer uri txt = pageForDoc referer uri XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
XML.elementAttributes = M.empty,
XML.elementNodes = [XML.NodeContent txt]
},
XML.documentEpilogue = []
}
pageForDoc :: StyleSheet s => Page s -> URI -> Document -> IO (Page s)
pageForDoc referer@Page {initCSS = css', appName = appname, domain = d} uri doc = do
-- See if the user has configured an alternate stylesheet for this domain.
let authorStyle = return $ html2css doc uri $ css' uri d
styles <- case uriAuthority uri of
Nothing -> authorStyle
Just host -> do
dir <- getXdgDirectory XdgConfig appname
let path = dir </> "domain" </> uriRegName host
hasAltStyle <- doesFileExist path
if not hasAltStyle then authorStyle else parse (css' uri d) <$> Txt.readFile path
return referer {pageURL = uri, html = doc, css = styles}
logHistory hist ret@Page {pageURL = url', html = doc, appName = name} = do
dir <- getXdgDirectory XdgData name
createDirectoryIfMissing True dir
now <- getCurrentTime
let title = Txt.unpack $ getTitle $ XML.documentRoot doc
appendFile (dir </> "history.gmni") $ '\n' : intercalate " " [
"=>", uriToStr' url', show now, title
]
return ret { pageTitle = title, visitedURLs = Set.insert (Txt.pack $ uriToStr' url') hist}
where
getTitle (XML.Element "title" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs]
getTitle (XML.Element "h1" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs]
getTitle (XML.Element _ _ childs)
| title:_ <- [getTitle el | XML.NodeElement el <- childs] = title
| otherwise = ""
uriToStr' :: URI -> String
uriToStr' uri = uriToString id uri ""
--------
---- CSS charset sniffing
--------
applyCSScharset (charset:charsets) bytes
| cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
| otherwise = applyCSScharset charsets bytes
where
text = convertCharset charset bytes
applyCSScharset _ bytes = convertCharset "utf-8" bytes
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
(CSSTok.String charset:_) <- skipCSSspace toks' = charset
| otherwise = ""
skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks
--------
---- Gemini implementation
--------
-- Copied from css-syntax.
pattern (:.) :: Char -> Txt.Text -> Txt.Text
pattern x :. xs <- (Txt.uncons -> Just (x, xs))
infixr 5 :.
el name text = XML.Element name M.empty [XML.NodeContent text]
parseGemini :: Maybe String -> Txt.Text -> XML.Document
parseGemini lang txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "body",
XML.elementAttributes = M.fromList [
("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]],
XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
},
XML.documentEpilogue = []
}
csv (',':_) = ""
csv (c:rest) = c:csv rest
csv "" = ""
parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
el "h6" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) =
el "h5" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.line):lines) =
el "h4" line : parseGemini' lines
parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines
parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines
parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines
parseGemini' (('=':.'>':.line):lines)
| (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {
XML.elementAttributes = M.insert "href" url M.empty
} : parseGemini' lines
| otherwise = (el "a" $ Txt.strip line) {
XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
} : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines
where
go (('`':.'`':.'`':._):lines) = parseGemini' lines
go (_:lines) = go lines
go [] = []
parseGemini' ("```":lines) = go [] lines
where
go texts (('`':.'`':.'`':._):lines) =
el "pre" (Txt.unlines texts) : parseGemini' lines
go texts (line:lines) = go (texts ++ [line]) lines
go texts [] = []
parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []
--------
---- TSV, CSV, etc
--------
parseDelimitedValues _ "" row rows = reverse (reverse row : rows)
parseDelimitedValues delim ('\r':.cs) row rows = parseDelimitedValues delim cs row rows
parseDelimitedValues delim ('\n':.cs) row rows = parseDelimitedValues delim cs [] (reverse row : rows)
parseDelimitedValues delim (c:.'"':.cs) row rows | c == delim =
let (value, cs') = inner cs in parseDelimitedValues delim cs' (value:row) rows
where
inner (x:.y:.cs) | x == delim && y == delim = let (a, b) = inner cs in (delim `Txt.cons` a, b)
inner (c:.cs) | c == delim = ("", cs)
| otherwise = let (a, b) = inner cs in (c `Txt.cons` a, b)
inner "" = ("", "")
parseDelimitedValues delim (c:.cs) row rows | c == delim =
let (value, cs') = Txt.break (`elem` ['\r', '\n', delim]) cs
in parseDelimitedValues delim cs' (value:row) rows
parseDelimitedValues delim cs row rows =
let (value, cs') = Txt.break (`elem` ['\r', '\n', delim]) cs
in parseDelimitedValues delim cs (value:row) rows
escapeDelimitedValues delim source = map (map inner) $ parseDelimitedValues delim source [] []
where
inner = Txt.strip . Txt.replace "\\\\" "\\" . Txt.replace "\\n" "\n" .
Txt.replace "\\t" "\t" . Txt.replace "\\r" "\r"
parseDelimitedToTable delim source
| (head:body) <- filter (not . null) $ escapeDelimitedValues delim source =
XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "table",
XML.elementAttributes = M.empty,
XML.elementNodes = rowToTr "th" head : map (rowToTr "td") body
},
XML.documentEpilogue = []
}
| otherwise = XML.Document { -- Empty TSV/CSV/etc
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element "table" M.empty [],
XML.documentEpilogue = []
}
rowToTr tagname values = XML.NodeElement $ XML.Element "tr" M.empty $ map inner values
where
inner = XML.NodeElement . XML.Element tagname M.empty . singleton . XML.NodeContent
singleton a = [a]