{-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-}
module Internal.Load(load, load', back, next, parseAbsoluteURI, clickEl, isClickableEl) where
import Internal
import Control.Concurrent.MVar
import System.Timeout (timeout)
import Control.Monad.IO.Class
import Data.Aeson
import Data.Text (Text, pack)
import qualified Data.Text as Txt
import Data.Text.Lazy (fromStrict)
import GHC.Generics
import Data.Maybe (fromMaybe, mapMaybe)
import qualified Data.ByteString.Lazy as B (toStrict)
import qualified Data.HashMap.Strict as HM
import Data.List (delete)
import Network.URI as URI
import Network.URI.Fetch as URI
import Network.URI.Charset (convertCharset)
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import qualified Text.XML.Cursor as XC
import qualified Text.XML.Cursor ((>=>))
import qualified Data.Map as M
import Internal.Forms
mime = words "text/html text/xml application/xml application/xhtml+xml text/plain"
load' :: Internal.Session -> URI -> IO ()
load' session uri = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do
resp@(redirected, _, _) <- fetchURL' (loader session') mime uri
let doc = parseDocument resp
return $ session' { currentURL = redirected, document = doc,
knownEls = HM.empty, id2els = HM.fromList $ indexedIDs doc }
maybeTimeout :: Session' -> URI -> IO Session' -> IO Session'
maybeTimeout session uri act | currentURL session /= uri, Just delay <- pageLoad $ timeouts session =
-- WebDriver deals in terms of milliseconds, I think?, Haskell deals in terms of microseconds.
fromMaybe session <$> timeout (delay * 1000) act
maybeTimeout _ _ act = act
---
load :: Internal.Session -> URI -> IO ()
load session uri = do
modifyMVar_ session $ return . inner
load' session uri
where
inner session'@Session {backStack = backStack', currentURL = currentURL' } =
session' { backStack = currentURL' : backStack' }
back :: Internal.Session -> IO ()
back session = do
uri <- modifyMVar session $ return . inner
load' session uri
where
inner session'@Session { backStack = b:bs, currentURL = n, nextStack = ns } =
(session' { backStack = bs, nextStack = n:ns }, b)
next :: Internal.Session -> IO ()
next session = do
uri <- modifyMVar session $ return . inner
load' session uri
where
inner session'@Session { backStack = bs, currentURL = b, nextStack = n:ns } =
(session' { backStack = b:bs, nextStack = ns }, n)
---
parseDocument (uri, "html/x-error\t", resp) = parseDocument (uri, "text/html", resp)
parseDocument (_, "text/html", Left text) = HTML.parseLT $ fromStrict text
parseDocument (_, "text/html", Right bytes) = HTML.parseLBS bytes
parseDocument (uri, mime, resp) | mime /= mime' = parseDocument (uri, mime', resp)
where mime' = takeWhile (/= ';') mime
parseDocument (_, _, Left text)
| Right doc <- XML.parseText XML.def $ fromStrict text = doc
| otherwise = pageForText text
parseDocument (_, _, Right bytes) | Right doc <- XML.parseLBS XML.def bytes = doc
parseDocument (_, 't':'e':'x':'t':'/':_, Right bytes) = pageForText $ utf8' bytes -- charset wasn't specified, so assume utf-8.
parseDocument (_, mime, Right _) = pageForText $ pack ('(':mime ++ " binary data)")
pageForText txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
XML.elementAttributes = M.empty,
XML.elementNodes = [XML.NodeContent txt]
},
XML.documentEpilogue = []
}
utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes
--------
---- Gemini implementation
--------
-- Copied from css-syntax.
pattern (:.) :: Char -> Txt.Text -> Txt.Text
pattern x :. xs <- (Txt.uncons -> Just (x, xs))
infixr 5 :.
el name text = XML.Element name M.empty [XML.NodeContent text]
parseGemini :: Maybe String -> Txt.Text -> XML.Document
parseGemini lang txt = XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "body",
XML.elementAttributes = M.fromList [
("lang", Txt.pack lang') | Just langs <- [lang], lang' <- [csv langs]],
XML.elementNodes = map XML.NodeElement $ parseGemini' $ Txt.lines txt
},
XML.documentEpilogue = []
}
csv (',':_) = ""
csv (c:rest) = c:csv rest
csv "" = ""
parseGemini' :: [Txt.Text] -> [XML.Element]
parseGemini' (('#':.'#':.'#' :. '#':.'#':.'#':.line):lines) =
el "h6" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.'#':.line):lines) =
el "h5" line : parseGemini' lines
parseGemini' (('#':.'#':.'#' :. '#':.line):lines) =
el "h4" line : parseGemini' lines
parseGemini' (('#':.'#':.'#':.line):lines) = el "h3" line : parseGemini' lines
parseGemini' (('#':.'#':.line):lines) = el "h2" line : parseGemini' lines
parseGemini' (('#':.line):lines) = el "h1" line : parseGemini' lines
-- Not properly structured, but still sounds fine...
parseGemini' (('*':.line):lines) = el "li" line : parseGemini' lines
parseGemini' (('>':.line):lines) = el "blockquote" line : parseGemini' lines
parseGemini' (('=':.'>':.line):lines)
| (url:text@(_:_)) <- Txt.words line = (el "a" $ Txt.unwords text) {
XML.elementAttributes = M.insert "href" url M.empty
} : parseGemini' lines
| otherwise = (el "a" $ Txt.strip line) {
XML.elementAttributes = M.insert "href" (Txt.strip line) M.empty
} : parseGemini' lines
parseGemini' (('`':.'`':.'`':.line):lines) = el "p" line : go lines
where
go (('`':.'`':.'`':._):lines) = parseGemini' lines
go (_:lines) = go lines
go [] = []
parseGemini' ("```":lines) = go [] lines
where
go texts (('`':.'`':.'`':._):lines) =
el "pre" (Txt.unlines texts) : parseGemini' lines
go texts (line:lines) = go (texts ++ [line]) lines
go texts [] = []
parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []
----
clickEl :: Internal.Session -> XC.Cursor -> IO ()
clickEl session el | XML.NodeElement el' <- XC.node el = clickEl' session el' el
| otherwise = return ()
clickEl' session (XML.Element (XML.Name name _ _) attrs _) cursor
-- LINKS
-- There's more nuances to links in Rhapsode, but that covered most of them.
| Just href <- "href" `M.lookup` attrs', Just uri <- URI.parseURIReference $ Txt.unpack href = do
base <- withMVar session (return . Internal.currentURL)
load session $ URI.relativeTo uri base
| Just src <- "src" `M.lookup` attrs', Just uri <- URI.parseURIReference $ Txt.unpack src = do
base <- withMVar session (return . Internal.currentURL)
load session $ URI.relativeTo uri base
-- FORMS
| name `elem` ["button", "input"], Just "submit" <- "type" `M.lookup` attrs' = do
case ("name" `M.lookup` attrs', "value" `M.lookup` attrs') of
(Just name, Just value) -> modifyForm ((:) value) name cursor session
_ -> return ()
(uri, query, method) <- formAction cursor session
load session uri { uriQuery = '?':Txt.unpack query }
| name `elem` ["button", "input"], Just "reset" <- "type" `M.lookup` attrs' =
modifyMVar_ session $ \session' -> do
let form = findForm cursor $ id2els session'
let blanked = prefillForm form $ XC.fromDocument $ document session'
return $ case form of
XML.Element (XML.Name "form" _ _) _ _ ->
session' { forms = M.insert form blanked $ forms session' }
_ -> session'
| name == "input", Just "radio" <- "type" `M.lookup` attrs',
Just name <- "name" `M.lookup` attrs', Just value <- "value" `M.lookup` attrs' =
modifyForm (const [value]) name cursor session
| name == "input", Just "checkbox" <- "type" `M.lookup` attrs',
Just name <- "name" `M.lookup` attrs', Just value <- "value" `M.lookup` attrs' =
modifyForm (toggleOption value) name cursor session
| name == "option", Just value <- "value" `M.lookup` attrs',
(datalist:_) <- (XC.checkName (== "datalist") XC.>=> XC.ancestor) cursor,
XML.NodeElement (XML.Element "datalist" attrs2 _) <- XC.node datalist,
Just id <- "id" `M.lookup` M.mapKeys XML.nameLocalName attrs2 = do
root <- withMVar session (\session' -> return . XC.fromDocument $ document session')
let refsList (XML.Element _ attrs3 _) = M.lookup "list" (M.mapKeys XML.nameLocalName attrs3) == Just id
let input = XC.checkElement refsList XC.>=> XC.checkName (== "input") XC.>=> XC.descendant
let (input', attrs4') = case input root of {
(input':_) | XML.NodeElement (XML.Element "input" attrs4 _) <- XC.node input' ->
(input', M.mapKeys XML.nameLocalName attrs4);
_ -> (root, M.empty)
}
case "name" `M.lookup` attrs4' of
Just name -> modifyForm (const [value]) name input' session
Nothing -> return ()
| name == "option", (select:_) <- XC.parent cursor,
XML.NodeElement select'@(XML.Element "select" attrs2 _) <- XC.node select,
let attrs2' = M.mapKeys XML.nameLocalName attrs2,
Just name <- "name" `M.lookup` attrs2, Just value <- "value" `M.lookup` attrs' =
if "multiple" `M.member` attrs2'
then modifyForm (toggleOption value) name cursor session
else modifyForm (const [value]) name cursor session
| name == "label", Just for <- "for" `M.lookup` attrs' = do
input <- withMVar session (return . HM.lookup for . id2els)
case input of { Just input' -> clickEl session input'; Nothing -> return () }
| otherwise = return ()
where
attrs' = M.mapKeys XML.nameLocalName attrs
toggleOption value old | value `elem` old = delete value old
| otherwise = value:old
-- Keep inline with clickEl
isClickableEl :: XC.Cursor -> Bool
isClickableEl el | XML.NodeElement el' <- XC.node el = isClickableEl' el'
| otherwise = False
isClickableEl' (XML.Element name attrs _)
| Just href <- "href" `M.lookup` attrs', Just _ <- URI.parseURIReference $ Txt.unpack href = True
| Just src <- "src" `M.lookup` attrs', Just _ <- URI.parseURIReference $ Txt.unpack src = True
| name `elem` ["input", "label", "button", "option"] = True
| otherwise = False
where attrs' = M.mapKeys XML.nameLocalName attrs
indexedIDs doc = mapMaybe extractId $ XC.orSelf XC.descendant $ XC.fromDocument doc
where
extractId cursor | XML.NodeElement el@(XML.Element _ attrs _) <- XC.node cursor,
Just id <- "id" `M.lookup` M.mapKeys XML.nameLocalName attrs = Just (id, cursor)
| otherwise = Nothing