{-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-} module Internal.Load(load, load', back, next, parseAbsoluteURI) where import Internal import Control.Concurrent.MVar import System.Timeout (timeout) import Control.Monad.IO.Class import Data.Aeson import Data.Text (Text, pack) import qualified Data.Text as Txt import Data.Text.Lazy (fromStrict) import GHC.Generics import Data.Maybe (fromMaybe) import qualified Data.ByteString.Lazy as B (toStrict) import qualified Data.HashMap.Strict as HM import Network.URI as URI import Network.URI.Fetch as URI import Network.URI.Charset (convertCharset) import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import qualified Data.Map as M mime = words "text/html text/xml application/xml application/xhtml+xml text/plain" load' :: Internal.Session -> URI -> IO () load' session uri = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do resp@(redirected, _, _) <- fetchURL' (loader session') mime uri return $ session' { currentURL = redirected, document = parseDocument resp, knownEls = HM.empty } maybeTimeout :: Session' -> URI -> IO Session' -> IO Session' maybeTimeout session uri act | currentURL session /= uri, Just delay <- pageLoad $ timeouts session = -- WebDriver deals in terms of milliseconds, I think?, Haskell deals in terms of microseconds. fromMaybe session <$> timeout (delay * 1000) act maybeTimeout _ _ act = act --- load :: Internal.Session -> URI -> IO () load session uri = do modifyMVar_ session $ return . inner load' session uri where inner session'@Session {backStack = backStack', currentURL = currentURL' } = session' { backStack = currentURL' : backStack' } back :: Internal.Session -> IO () back session = do uri <- modifyMVar session $ return . inner load' session uri where inner session'@Session { backStack = b:bs, currentURL = n, nextStack = ns } = (session' { backStack = bs, nextStack = n:ns }, b) next :: Internal.Session -> IO () next session = do uri <- modifyMVar session $ return . inner load' session uri where inner session'@Session { backStack = bs, currentURL = b, nextStack = n:ns } = (session' { backStack = b:bs, nextStack = ns }, n) --- parseDocument (uri, "html/x-error\t", resp) = parseDocument (uri, "text/html", resp) parseDocument (_, "text/html", Left text) = HTML.parseLT $ fromStrict text parseDocument (_, "text/html", Right bytes) = HTML.parseLBS bytes parseDocument (uri, mime, resp) | mime /= mime' = parseDocument (uri, mime', resp) where mime' = takeWhile (/= ';') mime parseDocument (_, _, Left text) | Right doc <- XML.parseText XML.def $ fromStrict text = doc | otherwise = pageForText text parseDocument (_, _, Right bytes) | Right doc <- XML.parseLBS XML.def bytes = doc parseDocument (_, 't':'e':'x':'t':'/':_, Right bytes) = pageForText $ utf8' bytes -- charset wasn't specified, so assume utf-8. parseDocument (_, mime, Right _) = pageForText $ pack ('(':mime ++ " binary data)") pageForText txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] } utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes -------- ---- Gemini implementation -------- -- Copied from css-syntax. pattern (:.) :: Char -> Txt.Text -> Txt.Text pattern x :. xs <- (Txt.uncons -> Just (x, xs)) infixr 5 :. el name text = XML.Element name M.empty [XML.NodeContent text] parseGemini :: Maybe String -> Txt.Text -> XML.Document parseGemini lang txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "body", XML.elementAttributes = M.fromList [ ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]], XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt }, XML.documentEpilogue = [] } csv (',':_) = "" csv (c:rest) = c:csv rest csv "" = "" parseGemini' :: [Txt.Text] -> [XML.Element] parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) = el "h6" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) = el "h5" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.line):lines) = el "h4" line : parseGemini' lines parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines -- Not properly structured, but still sounds fine... parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines parseGemini' (('=':.'>':.line):lines) | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) { XML.elementAttributes = M.insert "href" url M.empty } : parseGemini' lines | otherwise = (el "a" $ Txt.strip line) { XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty } : parseGemini' lines parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines where go (('`':.'`':.'`':._):lines) = parseGemini' lines go (_:lines) = go lines go [] = [] parseGemini' ("```":lines) = go [] lines where go texts (('`':.'`':.'`':._):lines) = el "pre" (Txt.unlines texts) : parseGemini' lines go texts (line:lines) = go (texts ++ [line]) lines go texts [] = [] parseGemini' (line:lines) = el "p" line : parseGemini' lines parseGemini' [] = []