From 38e35ef9d734707812e882609d87acb401c82ef2 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Thu, 24 Jun 2021 16:48:46 +1200 Subject: [PATCH] Add support for querying elements by CSS selector. Uses a fork of the hackage, altered to use as a lexer. --- amphiarao.cabal | 8 ++- src/Internal.hs | 23 ++++++-- src/Internal/Elements.hs | 24 +++++++-- src/Internal/Load.hs | 3 +- src/Webdriver.hs | 17 +++++- src/XML/LICENSE | 1 + src/XML/Selectors/CSS.hs | 64 ++++++++++++++++++++++ src/XML/Selectors/CSS/Parse.y | 87 ++++++++++++++++++++++++++++++ src/XML/Selectors/CSS/Tokens.hs | 42 +++++++++++++++ src/XML/Selectors/CSS/Types.hs | 94 +++++++++++++++++++++++++++++++++ 10 files changed, 353 insertions(+), 10 deletions(-) create mode 100644 src/XML/LICENSE create mode 100644 src/XML/Selectors/CSS.hs create mode 100644 src/XML/Selectors/CSS/Parse.y create mode 100644 src/XML/Selectors/CSS/Tokens.hs create mode 100644 src/XML/Selectors/CSS/Types.hs diff --git a/amphiarao.cabal b/amphiarao.cabal index b274928..5e3c218 100644 --- a/amphiarao.cabal +++ b/amphiarao.cabal @@ -56,7 +56,8 @@ executable amphiarao -- Modules included in this executable, other than Main. other-modules: Webdriver, Capabilities, JSON, Messages, Internal, Internal.Load, Internal.Elements, - UI.Templates, UI.Search + UI.Templates, UI.Search, + XML.Selectors.CSS, XML.Selectors.CSS.Parse, XML.Selectors.CSS.Tokens, XML.Selectors.CSS.Types -- LANGUAGE extensions used by modules in this package. -- other-extensions: @@ -66,7 +67,10 @@ executable amphiarao aeson >= 1.5.0 && <1.6, text, bytestring, unordered-containers, vector, containers >=0.5 && <0.7, uuid >=1.3 && <1.4, blaze-html, xml-conduit >= 1.8 && < 1.9, html-conduit >= 1.3 && <1.4, - hurl >= 2.1 && <3, network-uri + hurl >= 2.1 && <3, network-uri, + css-syntax, array >=0.4 + + build-tools: happy -- Directories containing source files. hs-source-dirs: src diff --git a/src/Internal.hs b/src/Internal.hs index 946a7ac..0397ea6 100644 --- a/src/Internal.hs +++ b/src/Internal.hs @@ -1,7 +1,7 @@ {-# LANGUAGE OverloadedStrings, ScopedTypeVariables, DeriveGeneric #-} module Internal(Session(..), Session'(..), Sessions(..), initSessions, createSession, delSession, getSession, withSession, - Timeouts(..)) where + Timeouts(..), registerEl, serializeEl) where import qualified Data.HashMap.Strict as M import Data.UUID as ID @@ -17,6 +17,7 @@ import GHC.Generics import qualified Network.URI as URI import qualified Network.URI.Fetch as URI import qualified Text.XML as XML +import qualified Text.XML.Cursor as XML import qualified Data.Map as M' type Sessions = MVar (M.HashMap UUID Session) @@ -28,7 +29,8 @@ data Session' = Session { currentURL :: URI.URI, backStack :: [URI.URI], nextStack :: [URI.URI], - document :: XML.Document + document :: XML.Document, + knownEls :: M.HashMap UUID XML.Cursor } initSessions :: IO Sessions @@ -54,7 +56,8 @@ createSession sessions caps = do XML.elementNodes = [] }, XML.documentEpilogue = [] - } + }, + knownEls = M.empty } session' <- newMVar session modifyMVar_ sessions (return . M.insert uuid session') @@ -83,3 +86,17 @@ data Timeouts = Timeouts { } deriving Generic instance FromJSON Timeouts instance ToJSON Timeouts + +registerEl :: Session -> XML.Cursor -> IO UUID +registerEl session el = modifyMVar session $ \session' -> do + uuid <- ID.nextRandom + return (session' { + knownEls = M.insert uuid el $ knownEls session' + }, uuid) + +serializeEl :: Session -> XML.Cursor -> IO Object +serializeEl session el = do + uuid <- registerEl session el + return $ M.fromList [ + ("element-6066-11e4-a52e-4f735466cecf", String $ pack $ ID.toString uuid) + ] diff --git a/src/Internal/Elements.hs b/src/Internal/Elements.hs index 5435ead..82493c5 100644 --- a/src/Internal/Elements.hs +++ b/src/Internal/Elements.hs @@ -1,13 +1,21 @@ -{-# LANGUAGE OverloadedStrings #-} -module Internal.Elements (getTitle) where +{-# LANGUAGE OverloadedStrings, DeriveGeneric #-} +module Internal.Elements (getTitle, Find(..), find) where import Text.XML import qualified Data.Map as M -import Data.Text as Txt +import Data.Text as Txt hiding (find) import Control.Concurrent.MVar import Internal +import Data.Aeson +import Data.Text (Text, pack) +import GHC.Generics + +-- Selector engines +import qualified Text.XML.Cursor as X +import qualified XML.Selectors.CSS as CSS + getTitle :: Session -> IO Text getTitle session = getTitle' <$> documentRoot <$> document <$> readMVar session @@ -17,3 +25,13 @@ getTitle' (Element _ _ childs) -- FIXME: Caught Rhapsode bug repaired here, needs that filtering condition. | title:_ <- [getTitle' el | NodeElement el <- childs, getTitle' el /= ""] = title | otherwise = "" + +--- + +data Find = Find { using :: Text, value :: String } deriving Generic +instance FromJSON Find +find :: Find -> X.Cursor -> Either (Bool, String) [X.Cursor] +find (Find "css selector" sel) root = case CSS.parsePath sel of + Right sel' -> Right $ CSS.toAxis sel' root + Left msg -> Left (True, msg) +find (Find type_ _) _ = Left (False, "Invalid selector type: " ++ Txt.unpack type_) diff --git a/src/Internal/Load.hs b/src/Internal/Load.hs index c04d027..d19b344 100644 --- a/src/Internal/Load.hs +++ b/src/Internal/Load.hs @@ -15,6 +15,7 @@ import GHC.Generics import Data.Maybe (fromMaybe) import qualified Data.ByteString.Lazy as B (toStrict) +import qualified Data.HashMap.Strict as HM import Network.URI as URI import Network.URI.Fetch as URI @@ -28,7 +29,7 @@ mime = words "text/html text/xml application/xml application/xhtml+xml text/plai load' :: Internal.Session -> URI -> IO () load' session uri = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do resp@(redirected, _, _) <- fetchURL' (loader session') mime uri - return $ session' { currentURL = redirected, document = parseDocument resp } + return $ session' { currentURL = redirected, document = parseDocument resp, knownEls = HM.empty } maybeTimeout :: Session' -> URI -> IO Session' -> IO Session' maybeTimeout session uri act | currentURL session /= uri, Just delay <- pageLoad $ timeouts session = diff --git a/src/Webdriver.hs b/src/Webdriver.hs index e1500ca..bd5b6b4 100644 --- a/src/Webdriver.hs +++ b/src/Webdriver.hs @@ -13,10 +13,12 @@ import Data.UUID as ID import Data.UUID.V4 import Control.Monad.IO.Class (liftIO) +import Control.Monad (mapM) import Data.Maybe (fromMaybe, isJust) import qualified Network.URI as URI import qualified Network.URI.Fetch as URI +import qualified Text.XML.Cursor as XC import Capabilities (processCaps) import JSON @@ -53,7 +55,8 @@ serveSession sessions = WD.withSession fail (\uuid session -> msum [ dir "minimize" $ unsupportedOp, dir "fullscreen" $ unsupportedOp ], - dir "frame" $ msum [noSuchFrame, dir "parent" $ ok $ toResponse ()] -- Noops + dir "frame" $ msum [noSuchFrame, dir "parent" $ ok $ toResponse ()], -- Noops + dir "element" $ findFromRoot session ]) sessions where fail uuid'| Just _ <- ID.fromString uuid' = errJSON 404 "invalid session ID" $ ( @@ -171,3 +174,15 @@ unsupportedOp = do errJSON 400 "unsupported operation" "Windowsize is meaningless to Rhapsode" ---- + +findFromRoot session = do + method POST + nullDir + req <- getJSON + session' <- liftIO $ readMVar session + case req of + Just req' -> case WDE.find req' $ XC.fromDocument $ WD.document session' of + Right res -> okJSON =<< mapM (liftIO . WD.serializeEl session) res + Left (True, msg) -> errJSON 400 "invalid selector" msg + Left (False, msg) -> errJSON 400 "invalid argument" msg + Nothing -> errJSON 400 "invalid argument" "Failed to parse JSON" diff --git a/src/XML/LICENSE b/src/XML/LICENSE new file mode 100644 index 0000000..d60208c --- /dev/null +++ b/src/XML/LICENSE @@ -0,0 +1 @@ +BSD-3 Clause, forked from `selectors` hackage due to installation struggles. diff --git a/src/XML/Selectors/CSS.hs b/src/XML/Selectors/CSS.hs new file mode 100644 index 0000000..504d073 --- /dev/null +++ b/src/XML/Selectors/CSS.hs @@ -0,0 +1,64 @@ +-- | This module exports functions for parsing and executing CSS selector +-- expressions in pure Haskell. TH QuasiQuoters are provided in +-- "XML.Selectors.CSS.TH" for validation and static-checking of CSS selectors. + +{-# LANGUAGE OverloadedStrings #-} +module XML.Selectors.CSS ( + toAxis, + parsePath + ) where + +import XML.Selectors.CSS.Parse +import XML.Selectors.CSS.Types +import Text.XML +import Text.XML.Cursor +import Data.List +import Data.String +import qualified Data.Map as M +import qualified Data.Text as T + +-- Axes that match nodes and their ancestors could result in duplicate nodes +-- in following descendant axes + +-- | Convert CSS 'Selector' to an 'Axis'. +toAxis :: Selector -> Axis +toAxis selector = descendant >=> toAxis' selector + +mhead :: Monad m => [a] -> m a +mhead [] = fail "empty" +mhead (a:_) = return a + +toAxis' (Selector selector) = simpleAxis selector +toAxis' (Combinator simple comb selector) = axis where + axis = simpleAxis simple >=> combaxis >=> toAxis' selector + combaxis = case comb of + Descendant -> descendant + Child -> child + AnySibling -> followingSibling + FollowingSibling -> mhead . followingSibling + +simpleAxis (SimpleSelector mbelem specs mbpseudo) = axis where + axis = elemaxis >=> specaxis >=> pseuaxis + elemaxis = case mbelem of + Nothing -> anyElement + Just nm -> element (fromString nm) + pseuaxis = case mbpseudo of + Nothing -> return + Just FirstChild -> mhead . child + Just LastChild -> return . last . child + specaxis = loop specs + loop [] = return + loop (spec:ss) = toaxis spec >=> loop ss + toaxis (ID id) = attributeIs "id" (fromString id) + toaxis (Class cls) = toaxis (Attrib "class" $ Pred Includes cls) + toaxis (Attrib attr pred) = \c -> case node c of + NodeElement (Element _ as _) | Just v <- M.lookup (fromString attr) as -> case pred of + None -> [c] + Pred op val | Equals <- op, val' == v -> [c] + | Includes <- op, val' `elem` T.words v -> [c] + | BeginsWith <- op, val' == T.take vallen v -> [c] + | EndsWith <- op, val' == T.drop (T.length v - vallen) v -> [c] + | otherwise -> [] where + val' = fromString val + vallen = T.length val' + _ -> [] diff --git a/src/XML/Selectors/CSS/Parse.y b/src/XML/Selectors/CSS/Parse.y new file mode 100644 index 0000000..2613843 --- /dev/null +++ b/src/XML/Selectors/CSS/Parse.y @@ -0,0 +1,87 @@ +{ module XML.Selectors.CSS.Parse (parsePath) where + +import XML.Selectors.CSS.Tokens (lexer) +import XML.Selectors.CSS.Types +} + +%name cssPath +%tokentype { Token } +%error { parseError } +%monad { Either String } +%token + sp { TokenSpace } + name { TokenName $$ } + string { TokenString $$ } + '+' { TokenPlus } + '-' { TokenMinus } + '/' { TokenSlash } + '>' { TokenChild } + '~' { TokenAnySibling } + '#' { TokenHash } + firstchild { TokenFirstChild } + lastchild { TokenLastChild } + '.' { TokenDot } + '=' { TokenEquals } + "~=" { TokenIncludes } + "^=" { TokenBeginsWith } + "$=" { TokenEndsWith } + "|=" { TokenDashMatch } + ':' { TokenPseudo } + '(' { TokenOP } + ')' { TokenCP } + '[' { TokenOB } + ']' { TokenCB } + nat { TokenDigits $$ } + '*' { TokenAster } + +%% + +Selector : SimpleSelector { Selector $1 } + | SimpleSelector Combinator Selector { Combinator $1 $2 $3 } + +Combinator : sp { Descendant } + | sp Combinator { $2 } + | '+' { FollowingSibling } + | '+' sp { FollowingSibling } + | '~' { AnySibling } + | '~' sp { AnySibling } + | '>' { Child } + | '>' sp { Child } + +SimpleSelector : name { SimpleSelector (Just $1) [] Nothing } + | name specs { SimpleSelector (Just $1) $2 Nothing } + | name specs Pseudo { SimpleSelector (Just $1) $2 (Just $3) } + | specs { SimpleSelector Nothing $1 Nothing } + | specs Pseudo { SimpleSelector Nothing $1 (Just $2) } + | '*' { SimpleSelector Nothing [] Nothing } + +specs : Specifier { [$1] } + | specs Specifier { $2 : $1 } + +Specifier : '#' name { ID $2 } + | '.' name { Class $2 } + | '[' attr ']' { $2 } + | '[' attr sp ']' { $2 } + +attr : name Pred { Attrib $1 $2 } + | sp attr { $2 } + +Pred : sp Pred { $2 } + | PredOp sp string { Pred $1 $3 } + | PredOp string { Pred $1 $2 } + +PredOp : '=' { Equals } + | "~=" { Includes } + | "|=" { DashMatch } + | "^=" { BeginsWith } + | "$=" { EndsWith } + +Pseudo : ':' firstchild { FirstChild } + | ':' lastchild { LastChild } + +{ +parseError toks = Left $ "parse error: " ++ show toks + +parsePath :: String -> Either String Selector +parsePath str = lexer str >>= cssPath +} diff --git a/src/XML/Selectors/CSS/Tokens.hs b/src/XML/Selectors/CSS/Tokens.hs new file mode 100644 index 0000000..a1ba0f8 --- /dev/null +++ b/src/XML/Selectors/CSS/Tokens.hs @@ -0,0 +1,42 @@ +{-# LANGUAGE OverloadedStrings #-} +module XML.Selectors.CSS.Tokens where + +import XML.Selectors.CSS.Types +import Data.CSS.Syntax.Tokens as T +import Data.Text (pack, unpack) + +lexer str = let ret = lexer' $ T.tokenize $ pack str in if TokenEOF `elem` ret + then Left "invalid token!" else Right ret + +lexer' (T.Whitespace:ts) = TokenSpace:lexer' ts +lexer' (T.Ident n:ts) = TokenName (unpack n):lexer' ts +lexer' (T.Delim n:T.Ident ns:ts) | n /= ':' = TokenName (n:unpack ns):lexer' ts +lexer' (T.Hash _ n:ts) = TokenName (unpack n):lexer' ts +lexer' (T.String txt:ts) = TokenString (unpack txt):lexer' ts +lexer' (T.Delim '+':ts) = TokenPlus:lexer' ts +lexer' (T.Delim '-':ts) = TokenMinus:lexer' ts +lexer' (T.Delim '/':ts) = TokenSlash:lexer' ts +lexer' (T.Delim '*':ts) = TokenAster:lexer' ts +lexer' (T.Delim '>':ts) = TokenChild:lexer' ts +lexer' (T.Delim ':':T.Function "nth-child":ts) = TokenNthChild:TokenOP:lexer' ts +lexer' (T.Delim ':':T.Ident "first-child":ts) = TokenFirstChild:lexer' ts +lexer' (T.Delim ':':T.Ident "last-child":ts) = TokenLastChild:lexer' ts +lexer' (T.Delim ':':T.Function "nth-last-child":ts) = TokenNthLastChild:TokenOP:lexer' ts +lexer' (T.Delim '~':ts) = TokenAnySibling:lexer' ts +lexer' (T.LeftSquareBracket:ts) = TokenOB:lexer' ts +lexer' (T.RightSquareBracket:ts) = TokenCB:lexer' ts +lexer' (T.LeftParen:ts) = TokenOP:lexer' ts +lexer' (T.RightParen:ts) = TokenCP:lexer' ts +lexer' (T.Number txt _:ts) = TokenDigits (unpack txt):lexer' ts +lexer' (T.Dimension txt _ unit:ts) = TokenDigits (unpack txt):TokenName (unpack unit):lexer' ts +lexer' (T.Delim '#':ts) = TokenHash:lexer' ts +lexer' (T.Delim '.':ts) = TokenDot:lexer' ts +lexer' (T.Delim '=':ts) = TokenEquals:lexer' ts +lexer' (T.IncludeMatch:ts) = TokenIncludes:lexer' ts +lexer' (T.DashMatch:ts) = TokenDashMatch:lexer' ts +lexer' (T.PrefixMatch:ts) = TokenBeginsWith:lexer' ts +lexer' (T.SuffixMatch:ts) = TokenEndsWith:lexer' ts +lexer' (T.BadString:ts) = TokenQuote:lexer' ts +lexer' (T.Delim ':':ts) = TokenPseudo:lexer' ts +lexer' (_:ts) = TokenEOF:lexer' ts +lexer' [] = [] diff --git a/src/XML/Selectors/CSS/Types.hs b/src/XML/Selectors/CSS/Types.hs new file mode 100644 index 0000000..b43e594 --- /dev/null +++ b/src/XML/Selectors/CSS/Types.hs @@ -0,0 +1,94 @@ +-- | Definitions of 'Token's in the CSS selector grammar as well as the data types +-- representing the parse result. +{-# LANGUAGE DeriveDataTypeable #-} +module XML.Selectors.CSS.Types where + +import Data.Typeable +import Data.Data + +-- | Top level entity representing the parse tree of a CSS path expression. +-- Composed of one or more 'SimpleSelector's combined with a 'Comb' relation. +data Selector = Selector SimpleSelector + | Combinator SimpleSelector Comb Selector + deriving (Show, Typeable, Data) + +-- | Type representing the relationship between two 'Selector's. +data Comb + -- | Any descendant; represented by plain whitespace in CSS + = Descendant + -- | Any direct child; represented by @ > @ in CSS + | Child + -- | Directly following sibling node; represented by @ + @ in CSS + | FollowingSibling + -- | Any following sibling node; represented by @ ~ @ in CSS + | AnySibling + deriving (Show, Typeable, Data) + +-- | Type representing a single set of filters for selecting nodes. +-- Contain an optional single element name, a sequence of id, class, and attribute +-- 'Specifier's, and an optional pseudo-element selector. +data SimpleSelector = SimpleSelector (Maybe String) [Specifier] (Maybe Pseudo) + deriving (Show, Typeable, Data) + + +-- | Type representing id, class, and attribute filters. +data Specifier = ID String + | Class String + | Attrib String Pred + deriving (Show, Typeable, Data) + +-- | Type representing boolean operations on attribute values. +data Pred + -- | Simple existence test + = None + -- | String comparison test + | Pred PredOp String + deriving (Show, Typeable, Data) + +data PredOp + -- | String equality + = Equals + -- | Contains word; @ ~= @ + | Includes + -- | String equality, optional dash following; @ |= @ + | DashMatch + -- | String begins with; @ ^= @ + | BeginsWith + -- | String ends with; @ $= @ + | EndsWith + deriving (Show, Typeable, Data) + +-- | Only supporting two pseudoelement selectors. +data Pseudo = FirstChild + | LastChild + deriving (Show, Typeable, Data) + +data Token = TokenSpace + | TokenName String + | TokenString String + | TokenPlus + | TokenMinus + | TokenSlash + | TokenAster + | TokenChild + | TokenNthChild + | TokenFirstChild + | TokenLastChild + | TokenNthLastChild + | TokenAnySibling + | TokenOB + | TokenCB + | TokenOP + | TokenCP + | TokenDigits String + | TokenHash + | TokenDot + | TokenEquals + | TokenIncludes + | TokenDashMatch + | TokenBeginsWith + | TokenEndsWith + | TokenQuote + | TokenEOF + | TokenPseudo + deriving (Eq, Show) -- 2.30.2