{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE PatternSynonyms, ViewPatterns #-} module Input(fetchDocument, pageForText, applyCSScharset) where import Data.Text.Lazy (fromStrict) import qualified Data.Text as Txt import Data.Text.Encoding import qualified Data.Text.Lazy as LTxt import qualified Data.ByteString.Lazy as B import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import Network.URI import Network.URI.Fetch import Network.URI.Charset import qualified Data.Map as M import Data.List (intercalate) import Data.Time.Clock -- For alternative styles import qualified Data.CSS.Syntax.Tokens as CSSTok import Data.CSS.Syntax.StyleSheet import Data.CSS.Preprocessor.Conditions (conditionalStyles) import System.IO import System.IO.Temp import Data.Default.Class import System.Directory import System.FilePath (()) import Data.FileEmbed -- For C API import Types import Data.HTML2CSS (html2css) import Data.Maybe (fromMaybe) import Foreign.StablePtr import Foreign.C.String utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument referer http >>= logHistory parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp) parseDocument _ _ (uri, "text/html", Left text) = pageForDoc uri $ HTML.parseLT $ fromStrict text parseDocument _ _ (uri, "text/html", Right bytes) = pageForDoc uri $ HTML.parseLBS bytes parseDocument _ _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) = pageForDoc uri $ parseGemini (Just lang) text parseDocument _ _ (uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) = pageForDoc uri $ parseGemini (Just lang) $ utf8' bytes parseDocument _ _ (uri, "text/gemini", Left text) = pageForDoc uri $ parseGemini Nothing text parseDocument _ _ (uri, "text/gemini", Right bytes) = pageForDoc uri $ parseGemini Nothing $ utf8' bytes parseDocument referer _ (uri, "text/css", Left text) = return referer { Types.url = uri, css = parseForURL (conditionalStyles uri "document") uri text } parseDocument referer _ (uri, "text/css", Right bytes) = return referer { Types.url = uri, css = parseForURL (conditionalStyles uri "document") uri text } where text = applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body) where mime' = takeWhile (/= ';') mime parseDocument _ _ (uri, _, Left text) | Right doc <- XML.parseText def $ fromStrict text = pageForDoc uri doc | otherwise = pageForText uri text parseDocument _ _ (uri, _, Right bytes) | Right doc <- XML.parseLBS def bytes = pageForDoc uri doc parseDocument _ _ (uri, 't':'e':'x':'t':'/':_, Right bytes) = -- charset wasn't specified, so assume utf-8. pageForText uri $ utf8' bytes parseDocument _ sess resp@(uri, mime, _) = do dir <- getCurrentDirectory -- TODO find Downloads directory. ret <- saveDownload nullURI { uriScheme = "file:", uriAuthority = Just (URIAuth "" "" "") } dir resp >>= dispatchByMIME sess mime pageForDoc uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret pageForText uri txt = pageForDoc uri XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] } pageForDoc uri doc = return Page {Types.url = uri, html = doc, css = html2css doc uri} logHistory ret@Page {Types.url = url', html = doc} = do dir <- getXdgDirectory XdgData "rhapsode" createDirectoryIfMissing True dir now <- getCurrentTime appendFile (dir "history.gmni") $ intercalate " " [ "=>", uriToString id url' "", show now, Txt.unpack $ getTitle $ XML.documentRoot doc ] return ret where getTitle (XML.Element "title" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs] getTitle (XML.Element "h1" _ childs) = Txt.concat [txt | XML.NodeContent txt <- childs] getTitle (XML.Element _ _ childs) | title:_ <- [getTitle el | XML.NodeElement el <- childs] = title | otherwise = "" -------- ---- CSS charset sniffing -------- applyCSScharset (charset:charsets) bytes | cssCharset (CSSTok.tokenize text) == Txt.pack charset = text | otherwise = applyCSScharset charsets bytes where text = convertCharset charset bytes applyCSScharset _ bytes = convertCharset "utf-8" bytes cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks, (CSSTok.String charset:_) <- skipCSSspace toks' = charset | otherwise = "" skipCSSspace (CSSTok.Whitespace:toks) = skipCSSspace toks skipCSSspace toks = toks -------- ---- Gemini implementation -------- -- Copied from css-syntax. pattern (:.) :: Char -> Txt.Text -> Txt.Text pattern x :. xs <- (Txt.uncons -> Just (x, xs)) infixr 5 :. el name text = XML.Element name M.empty [XML.NodeContent text] parseGemini :: Maybe String -> Txt.Text -> XML.Document parseGemini lang txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "body", XML.elementAttributes = M.fromList [ ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]], XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt }, XML.documentEpilogue = [] } csv (',':_) = "" csv (c:rest) = c:csv rest csv "" = "" parseGemini' :: [Txt.Text] -> [XML.Element] parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) = el "h6" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) = el "h5" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.line):lines) = el "h4" line : parseGemini' lines parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines -- Not properly structured, but still sounds fine... parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines parseGemini' (('=':.'>':.line):lines) | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) { XML.elementAttributes = M.insert "href" url M.empty } : parseGemini' lines | otherwise = (el "a" $ Txt.strip line) { XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty } : parseGemini' lines parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines where go (('`':.'`':.'`':._):lines) = parseGemini' lines go (_:lines) = go lines go [] = [] parseGemini' ("```":lines) = go [] lines where go texts (('`':.'`':.'`':._):lines) = el "pre" (Txt.unlines texts) : parseGemini' lines go texts (line:lines) = go (texts ++ [line]) lines go texts [] = [] parseGemini' (line:lines) = el "p" line : parseGemini' lines parseGemini' [] = [] -------- ---- C API -------- foreign export ccall c_newSession :: IO (StablePtr Session) foreign export ccall c_freeSession :: StablePtr Session -> IO () c_newSession = do sess <- newSession newStablePtr $ sess {aboutPages = map lazify $(embedDir "about")} where lazify (a, b) = (a, B.fromStrict b) c_freeSession = freeStablePtr foreign export ccall c_fetchURL :: StablePtr Session -> CString -> StablePtr Page -> CString -> IO (StablePtr Page) c_fetchURL c_session c_mimes c_referer c_uri = do session <- deRefStablePtr c_session mimes <- peekCString c_mimes referer <- deRefStablePtr c_referer uri <- peekCString c_uri let uri' = nullURI `fromMaybe` parseURIReference uri `relativeTo` Types.url referer doc <- fetchDocument session referer (words mimes) uri' newStablePtr doc foreign export ccall c_enableLogging :: StablePtr Session -> IO () c_enableLogging c_session = deRefStablePtr c_session >>= enableLogging foreign export ccall c_writeLog :: CString -> StablePtr Session -> IO () c_writeLog c_path c_session = do path <- peekCString c_path withFile path AppendMode (\logfile -> deRefStablePtr c_session >>= writeLog logfile)