{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE PatternSynonyms, ViewPatterns #-}
module Input(fetchDocument, pageForText, applyCSScharset) where
import Data.Text.Lazy (fromStrict)
import qualified Data.Text as Txt
import Data.Text.Encoding
import qualified Data.Text.Lazy as LTxt
import qualified Data.ByteString.Lazy as B
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import Network.URI
import Network.URI.Fetch
import Network.URI.Charset
import qualified Data.Map as M
-- For alternative styles
import qualified Data.CSS.Syntax.Tokens as CSSTok
import Data.CSS.Syntax.StyleSheet
import Data.CSS.Preprocessor.Conditions (conditionalStyles)
import System.IO
import System.IO.Temp
import Data.Default.Class
import System.Directory
import Data.FileEmbed
-- For C API
import Types
import Data.HTML2CSS (html2css)
import Data.Maybe (fromMaybe)
import Foreign.StablePtr
import Foreign.C.String
utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes
fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument referer http
parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp)
parseDocument _ _ (uri, "text/html", Left text) = pageForDoc uri $ HTML.parseLT $ fromStrict text
parseDocument _ _ (uri, "text/html", Right bytes) = pageForDoc uri $ HTML.parseLBS bytes
parseDocument _ _ (uri, "text/gemini", Left text) = pageForDoc uri $ parseGemini text
parseDocument _ _ (uri, "text/gemini", Right bytes) = pageForDoc uri $ parseGemini $ utf8' bytes
parseDocument referer _ (uri, "text/css", Left text) = return referer {
url = uri,
css = parseForURL (conditionalStyles uri "document") uri text
}
parseDocument referer _ (uri, "text/css", Right bytes) = return referer {
url = uri,
css = parseForURL (conditionalStyles uri "document") uri text
} where text = applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes
parseDocument _ _ (uri, _, Left text)
| Right doc <- XML.parseText def $ fromStrict text = pageForDoc uri doc
| otherwise = pageForText uri text
parseDocument _ _ (uri, _, Right bytes) | Right doc <- XML.parseLBS def bytes = pageForDoc uri doc
parseDocument _ _ (uri, 't':'e':'x':'t':'/':_, Right bytes) =
-- charset wasn't specified, so assume utf-8.
pageForText uri $ utf8' bytes
parseDocument _ sess resp@(uri, mime, _) = do
dir <- getCurrentDirectory -- TODO find Downloads directory.
ret <- saveDownload nullURI {
uriScheme = "file:",
uriAuthority = Just (URIAuth "" "" "")
} dir resp >>= dispatchByMIME sess mime
pageForDoc uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret
pageForText uri txt = pageForDoc uri XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
XML.elementAttributes = M.empty,
XML.elementNodes = [XML.NodeContent txt]
},
XML.documentEpilogue = []
}
pageForDoc uri doc = return Page {url = uri, html = doc, css = html2css doc uri}
--------
---- CSS charset sniffing
--------
applyCSScharset (charset:charsets) bytes
| cssCharset (CSSTok.tokenize text) == Txt.pack charset = text
| otherwise = applyCSScharset charsets bytes
where
text = convertCharset charset bytes
applyCSScharset _ bytes = convertCharset "utf-8" bytes
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
(CSSTok.String charset:_) <- skipCSSspace toks' = charset
| otherwise = ""
skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks
skipCSSspace toks = toks
--------
---- Gemini implementation
--------
-- Copied from css-syntax.
pattern (:.) :: Char -> Txt.Text -> Txt.Text
pattern x :. xs <- (Txt.uncons -> Just (x, xs))
infixr 5 :.
el name text = XML.Element name M.empty [XML.NodeContent text]
parseGemini :: Txt.Text -> XML.Document
parseGemini txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "body",
XML.elementAttributes = M.empty,
XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
},
XML.documentEpilogue = []
}
parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
el "h6" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) =
el "h5" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.line):lines) =
el "h4" line : parseGemini' lines
parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines
parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines
parseGemini' (('=':.'>':.line):lines)
| (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {
XML.elementAttributes = M.insert "href" url M.empty
} : parseGemini' lines
| otherwise = (el "a" $ Txt.strip line) {
XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
} : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = go [line] lines
where
go texts ("```":lines) = el "pre" (Txt.unlines texts) : parseGemini' lines
go texts (('`':.'`':.'`':.line):lines) =
el "pre" (Txt.unlines texts) : el "p" line : parseGemini' lines
go texts (line:lines) = go (texts ++ [line]) lines
go texts [] = []
parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []
--------
---- C API
--------
foreign export ccall c_newSession :: IO (StablePtr Session)
foreign export ccall c_freeSession :: StablePtr Session -> IO ()
c_newSession = do
sess <- newSession
newStablePtr $ sess {aboutPages = map lazify $(embedDir "about")}
where lazify (a, b) = (a, B.fromStrict b)
c_freeSession = freeStablePtr
foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page)
c_fetchURL c_session c_mimes c_referer c_uri = do
session <- deRefStablePtr c_session
mimes <- peekCString c_mimes
referer <- deRefStablePtr c_referer
uri <- peekCString c_uri
let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` url referer
doc <- fetchDocument session referer (words mimes) uri'
newStablePtr doc