{-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-} module Internal.Load(load, load', back, next, parseAbsoluteURI, clickEl, isClickableEl) where import Internal import Control.Concurrent.MVar import System.Timeout (timeout) import Control.Monad.IO.Class import Data.Aeson import Data.Text (Text, pack) import qualified Data.Text as Txt import Data.Text.Lazy (fromStrict) import GHC.Generics import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.ByteString.Lazy as B (toStrict) import qualified Data.HashMap.Strict as HM import Network.URI as URI import Network.URI.Fetch as URI import Network.URI.Charset (convertCharset) import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import qualified Text.XML.Cursor as XC import qualified Text.XML.Cursor ((>=>)) import qualified Data.Map as M import Internal.Forms mime = words "text/html text/xml application/xml application/xhtml+xml text/plain" load' :: Internal.Session -> URI -> IO () load' session uri = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do resp@(redirected, _, _) <- fetchURL' (loader session') mime uri let doc = parseDocument resp return $ session' { currentURL = redirected, document = doc, knownEls = HM.empty, id2els = HM.fromList $ indexedIDs doc } maybeTimeout :: Session' -> URI -> IO Session' -> IO Session' maybeTimeout session uri act | currentURL session /= uri, Just delay <- pageLoad $ timeouts session = -- WebDriver deals in terms of milliseconds, I think?, Haskell deals in terms of microseconds. fromMaybe session <$> timeout (delay * 1000) act maybeTimeout _ _ act = act --- load :: Internal.Session -> URI -> IO () load session uri = do modifyMVar_ session $ return . inner load' session uri where inner session'@Session {backStack = backStack', currentURL = currentURL' } = session' { backStack = currentURL' : backStack' } back :: Internal.Session -> IO () back session = do uri <- modifyMVar session $ return . inner load' session uri where inner session'@Session { backStack = b:bs, currentURL = n, nextStack = ns } = (session' { backStack = bs, nextStack = n:ns }, b) next :: Internal.Session -> IO () next session = do uri <- modifyMVar session $ return . inner load' session uri where inner session'@Session { backStack = bs, currentURL = b, nextStack = n:ns } = (session' { backStack = b:bs, nextStack = ns }, n) --- parseDocument (uri, "html/x-error\t", resp) = parseDocument (uri, "text/html", resp) parseDocument (_, "text/html", Left text) = HTML.parseLT $ fromStrict text parseDocument (_, "text/html", Right bytes) = HTML.parseLBS bytes parseDocument (uri, mime, resp) | mime /= mime' = parseDocument (uri, mime', resp) where mime' = takeWhile (/= ';') mime parseDocument (_, _, Left text) | Right doc <- XML.parseText XML.def $ fromStrict text = doc | otherwise = pageForText text parseDocument (_, _, Right bytes) | Right doc <- XML.parseLBS XML.def bytes = doc parseDocument (_, 't':'e':'x':'t':'/':_, Right bytes) = pageForText $ utf8' bytes -- charset wasn't specified, so assume utf-8. parseDocument (_, mime, Right _) = pageForText $ pack ('(':mime ++ " binary data)") pageForText txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] } utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes -------- ---- Gemini implementation -------- -- Copied from css-syntax. pattern (:.) :: Char -> Txt.Text -> Txt.Text pattern x :. xs <- (Txt.uncons -> Just (x, xs)) infixr 5 :. el name text = XML.Element name M.empty [XML.NodeContent text] parseGemini :: Maybe String -> Txt.Text -> XML.Document parseGemini lang txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "body", XML.elementAttributes = M.fromList [ ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]], XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt }, XML.documentEpilogue = [] } csv (',':_) = "" csv (c:rest) = c:csv rest csv "" = "" parseGemini' :: [Txt.Text] -> [XML.Element] parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) = el "h6" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) = el "h5" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.line):lines) = el "h4" line : parseGemini' lines parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines -- Not properly structured, but still sounds fine... parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines parseGemini' (('=':.'>':.line):lines) | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) { XML.elementAttributes = M.insert "href" url M.empty } : parseGemini' lines | otherwise = (el "a" $ Txt.strip line) { XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty } : parseGemini' lines parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines where go (('`':.'`':.'`':._):lines) = parseGemini' lines go (_:lines) = go lines go [] = [] parseGemini' ("```":lines) = go [] lines where go texts (('`':.'`':.'`':._):lines) = el "pre" (Txt.unlines texts) : parseGemini' lines go texts (line:lines) = go (texts ++ [line]) lines go texts [] = [] parseGemini' (line:lines) = el "p" line : parseGemini' lines parseGemini' [] = [] ---- clickEl :: Internal.Session -> XC.Cursor -> IO () clickEl session el | XML.NodeElement el' <- XC.node el = clickEl' session el' | otherwise = return () clickEl' session (XML.Element _ attrs _) | Just href <- "href" `M.lookup` attrs', Just uri <- URI.parseURIReference $ Txt.unpack href = do base <- withMVar session (return . Internal.currentURL) load session $ URI.relativeTo uri base | Just src <- "src" `M.lookup` attrs', Just uri <- URI.parseURIReference $ Txt.unpack src = do base <- withMVar session (return . Internal.currentURL) load session $ URI.relativeTo uri base -- There's more nuances to links in Rhapsode, but that covered most of them. | otherwise = return () where attrs' = M.mapKeys XML.nameLocalName attrs -- Keep inline with clickEl isClickableEl :: XC.Cursor -> Bool isClickableEl el | XML.NodeElement el' <- XC.node el = isClickableEl' el' | otherwise = False isClickableEl' (XML.Element _ attrs _) | Just href <- "href" `M.lookup` attrs', Just _ <- URI.parseURIReference $ Txt.unpack href = True | Just src <- "src" `M.lookup` attrs', Just _ <- URI.parseURIReference $ Txt.unpack src = True | otherwise = False where attrs' = M.mapKeys XML.nameLocalName attrs indexedIDs doc = mapMaybe extractId $ XC.orSelf XC.descendant $ XC.fromDocument doc where extractId cursor | XML.NodeElement el@(XML.Element _ attrs _) <- XC.node cursor, Just id <- "id" `M.lookup` M.mapKeys XML.nameLocalName attrs = Just (id, cursor) | otherwise = Nothing