{-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-} module Internal.Load(load, load', back, next, parseAbsoluteURI, clickEl, isClickableEl) where import Internal import Control.Concurrent.MVar import System.Timeout (timeout) import Control.Monad.IO.Class import Data.Aeson import Data.Text (Text, pack) import qualified Data.Text as Txt import Data.Text.Lazy (fromStrict) import GHC.Generics import Data.Maybe (fromMaybe, mapMaybe) import qualified Data.ByteString.Lazy as B (toStrict) import qualified Data.HashMap.Strict as HM import Data.List (delete) import Network.URI as URI import Network.URI.Fetch as URI import Network.URI.Charset (convertCharset) import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import qualified Text.XML.Cursor as XC import qualified Text.XML.Cursor ((>=>)) import qualified Data.Map as M import Internal.Forms mime = words "text/html text/xml application/xml application/xhtml+xml text/plain" load' :: Internal.Session -> URI -> IO () load' session uri = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do resp@(redirected, _, _) <- fetchURL' (loader session') mime uri let doc = parseDocument resp return $ session' { currentURL = redirected, document = doc, knownEls = HM.empty, id2els = HM.fromList $ indexedIDs doc } maybeTimeout :: Session' -> URI -> IO Session' -> IO Session' maybeTimeout session uri act | currentURL session /= uri, Just delay <- pageLoad $ timeouts session = -- WebDriver deals in terms of milliseconds, I think?, Haskell deals in terms of microseconds. fromMaybe session <$> timeout (delay * 1000) act maybeTimeout _ _ act = act --- load :: Internal.Session -> URI -> IO () load session uri = do modifyMVar_ session $ return . inner load' session uri where inner session'@Session {backStack = backStack', currentURL = currentURL' } = session' { backStack = currentURL' : backStack' } back :: Internal.Session -> IO () back session = do uri <- modifyMVar session $ return . inner load' session uri where inner session'@Session { backStack = b:bs, currentURL = n, nextStack = ns } = (session' { backStack = bs, nextStack = n:ns }, b) next :: Internal.Session -> IO () next session = do uri <- modifyMVar session $ return . inner load' session uri where inner session'@Session { backStack = bs, currentURL = b, nextStack = n:ns } = (session' { backStack = b:bs, nextStack = ns }, n) --- parseDocument (uri, "html/x-error\t", resp) = parseDocument (uri, "text/html", resp) parseDocument (_, "text/html", Left text) = HTML.parseLT $ fromStrict text parseDocument (_, "text/html", Right bytes) = HTML.parseLBS bytes parseDocument (uri, mime, resp) | mime /= mime' = parseDocument (uri, mime', resp) where mime' = takeWhile (/= ';') mime parseDocument (_, _, Left text) | Right doc <- XML.parseText XML.def $ fromStrict text = doc | otherwise = pageForText text parseDocument (_, _, Right bytes) | Right doc <- XML.parseLBS XML.def bytes = doc parseDocument (_, 't':'e':'x':'t':'/':_, Right bytes) = pageForText $ utf8' bytes -- charset wasn't specified, so assume utf-8. parseDocument (_, mime, Right _) = pageForText $ pack ('(':mime ++ " binary data)") pageForText txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "pre", XML.elementAttributes = M.empty, XML.elementNodes = [XML.NodeContent txt] }, XML.documentEpilogue = [] } utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes -------- ---- Gemini implementation -------- -- Copied from css-syntax. pattern (:.) :: Char -> Txt.Text -> Txt.Text pattern x :. xs <- (Txt.uncons -> Just (x, xs)) infixr 5 :. el name text = XML.Element name M.empty [XML.NodeContent text] parseGemini :: Maybe String -> Txt.Text -> XML.Document parseGemini lang txt = XML.Document { XML.documentPrologue = XML.Prologue [] Nothing [], XML.documentRoot = XML.Element { XML.elementName = "body", XML.elementAttributes = M.fromList [ ("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]], XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt }, XML.documentEpilogue = [] } csv (',':_) = "" csv (c:rest) = c:csv rest csv "" = "" parseGemini' :: [Txt.Text] -> [XML.Element] parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) = el "h6" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) = el "h5" line : parseGemini' lines parseGemini' (('#':.'#':.'#' :. '#':.line):lines) = el "h4" line : parseGemini' lines parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines -- Not properly structured, but still sounds fine... parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines parseGemini' (('=':.'>':.line):lines) | (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) { XML.elementAttributes = M.insert "href" url M.empty } : parseGemini' lines | otherwise = (el "a" $ Txt.strip line) { XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty } : parseGemini' lines parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines where go (('`':.'`':.'`':._):lines) = parseGemini' lines go (_:lines) = go lines go [] = [] parseGemini' ("```":lines) = go [] lines where go texts (('`':.'`':.'`':._):lines) = el "pre" (Txt.unlines texts) : parseGemini' lines go texts (line:lines) = go (texts ++ [line]) lines go texts [] = [] parseGemini' (line:lines) = el "p" line : parseGemini' lines parseGemini' [] = [] ---- clickEl :: Internal.Session -> XC.Cursor -> IO () clickEl session el | XML.NodeElement el' <- XC.node el = clickEl' session el' el | otherwise = return () clickEl' session (XML.Element (XML.Name name _ _) attrs _) cursor -- LINKS -- There's more nuances to links in Rhapsode, but that covered most of them. | Just href <- "href" `M.lookup` attrs', Just uri <- URI.parseURIReference $ Txt.unpack href = do base <- withMVar session (return . Internal.currentURL) load session $ URI.relativeTo uri base | Just src <- "src" `M.lookup` attrs', Just uri <- URI.parseURIReference $ Txt.unpack src = do base <- withMVar session (return . Internal.currentURL) load session $ URI.relativeTo uri base -- FORMS -- TODO type=submit | name `elem` ["button", "input"], Just "reset" <- "type" `M.lookup` attrs' = modifyMVar_ session $ \session' -> do let form = findForm cursor $ id2els session' let blanked = prefillForm form $ XC.fromDocument $ document session' return $ case form of XML.Element (XML.Name "form" _ _) _ _ -> session' { forms = M.insert form blanked $ forms session' } _ -> session' | name == "input", Just "radio" <- "type" `M.lookup` attrs', Just name <- "name" `M.lookup` attrs', Just value <- "value" `M.lookup` attrs' = modifyForm (const [value]) name cursor session | name == "input", Just "checkbox" <- "type" `M.lookup` attrs', Just name <- "name" `M.lookup` attrs', Just value <- "value" `M.lookup` attrs' = modifyForm (toggleOption value) name cursor session | otherwise = return () where attrs' = M.mapKeys XML.nameLocalName attrs toggleOption value old | value `elem` old = delete value old | otherwise = value:old -- Keep inline with clickEl isClickableEl :: XC.Cursor -> Bool isClickableEl el | XML.NodeElement el' <- XC.node el = isClickableEl' el' | otherwise = False isClickableEl' (XML.Element _ attrs _) | Just href <- "href" `M.lookup` attrs', Just _ <- URI.parseURIReference $ Txt.unpack href = True | Just src <- "src" `M.lookup` attrs', Just _ <- URI.parseURIReference $ Txt.unpack src = True | otherwise = False where attrs' = M.mapKeys XML.nameLocalName attrs indexedIDs doc = mapMaybe extractId $ XC.orSelf XC.descendant $ XC.fromDocument doc where extractId cursor | XML.NodeElement el@(XML.Element _ attrs _) <- XC.node cursor, Just id <- "id" `M.lookup` M.mapKeys XML.nameLocalName attrs = Just (id, cursor) | otherwise = Nothing