~alcinnz/amphiarao

38e35ef9d734707812e882609d87acb401c82ef2 — Adrian Cochrane 3 years ago 8e19504
Add support for querying elements by CSS selector.

Uses a fork of the  hackage, altered to use  as a lexer.
M amphiarao.cabal => amphiarao.cabal +6 -2
@@ 56,7 56,8 @@ executable amphiarao
  -- Modules included in this executable, other than Main.
  other-modules:       Webdriver, Capabilities, JSON, Messages,
        Internal, Internal.Load, Internal.Elements,
        UI.Templates, UI.Search
        UI.Templates, UI.Search,
        XML.Selectors.CSS, XML.Selectors.CSS.Parse, XML.Selectors.CSS.Tokens, XML.Selectors.CSS.Types
  
  -- LANGUAGE extensions used by modules in this package.
  -- other-extensions:    


@@ 66,7 67,10 @@ executable amphiarao
        aeson >= 1.5.0 && <1.6, text, bytestring, unordered-containers, vector,
        containers >=0.5 && <0.7, uuid >=1.3 && <1.4,
        blaze-html, xml-conduit >= 1.8 && < 1.9, html-conduit >= 1.3 && <1.4,
        hurl >= 2.1 && <3, network-uri
        hurl >= 2.1 && <3, network-uri,
        css-syntax, array >=0.4

  build-tools: happy
  
  -- Directories containing source files.
  hs-source-dirs:      src

M src/Internal.hs => src/Internal.hs +20 -3
@@ 1,7 1,7 @@
{-# LANGUAGE OverloadedStrings, ScopedTypeVariables, DeriveGeneric #-}
module Internal(Session(..), Session'(..), Sessions(..),
        initSessions, createSession, delSession, getSession, withSession,
        Timeouts(..)) where
        Timeouts(..), registerEl, serializeEl) where

import qualified Data.HashMap.Strict as M
import Data.UUID as ID


@@ 17,6 17,7 @@ import GHC.Generics
import qualified Network.URI as URI
import qualified Network.URI.Fetch as URI
import qualified Text.XML as XML
import qualified Text.XML.Cursor as XML
import qualified Data.Map as M'

type Sessions = MVar (M.HashMap UUID Session)


@@ 28,7 29,8 @@ data Session' = Session {
    currentURL :: URI.URI,
    backStack :: [URI.URI],
    nextStack :: [URI.URI],
    document :: XML.Document
    document :: XML.Document,
    knownEls :: M.HashMap UUID XML.Cursor
  }

initSessions :: IO Sessions


@@ 54,7 56,8 @@ createSession sessions caps = do
                XML.elementNodes = []
            },
            XML.documentEpilogue = []
        }
        },
        knownEls = M.empty
      }
    session' <- newMVar session
    modifyMVar_ sessions (return . M.insert uuid session')


@@ 83,3 86,17 @@ data Timeouts = Timeouts {
  } deriving Generic
instance FromJSON Timeouts
instance ToJSON Timeouts

registerEl :: Session -> XML.Cursor -> IO UUID
registerEl session el = modifyMVar session $ \session' -> do
    uuid <- ID.nextRandom
    return (session' {
        knownEls = M.insert uuid el $ knownEls session'
      }, uuid)

serializeEl :: Session -> XML.Cursor -> IO Object
serializeEl session el = do
    uuid <- registerEl session el
    return $ M.fromList [
        ("element-6066-11e4-a52e-4f735466cecf", String $ pack $ ID.toString uuid)
      ]

M src/Internal/Elements.hs => src/Internal/Elements.hs +21 -3
@@ 1,13 1,21 @@
{-# LANGUAGE OverloadedStrings #-}
module Internal.Elements (getTitle) where
{-# LANGUAGE OverloadedStrings, DeriveGeneric #-}
module Internal.Elements (getTitle, Find(..), find) where

import Text.XML
import qualified Data.Map as M
import Data.Text as Txt
import Data.Text as Txt hiding (find)
import Control.Concurrent.MVar

import Internal

import Data.Aeson
import Data.Text (Text, pack)
import GHC.Generics

-- Selector engines
import qualified Text.XML.Cursor as X
import qualified XML.Selectors.CSS as CSS

getTitle :: Session -> IO Text
getTitle session = getTitle' <$> documentRoot <$> document <$> readMVar session



@@ 17,3 25,13 @@ getTitle' (Element _ _ childs)
    -- FIXME: Caught Rhapsode bug repaired here, needs that filtering condition.
    | title:_ <- [getTitle' el | NodeElement el <- childs, getTitle' el /= ""] = title
    | otherwise = ""

---

data Find = Find { using :: Text, value :: String } deriving Generic
instance FromJSON Find
find :: Find -> X.Cursor -> Either (Bool, String) [X.Cursor]
find (Find "css selector" sel) root = case CSS.parsePath sel of
    Right sel' -> Right $ CSS.toAxis sel' root
    Left msg -> Left (True, msg)
find (Find type_ _) _ = Left (False, "Invalid selector type: " ++ Txt.unpack type_)

M src/Internal/Load.hs => src/Internal/Load.hs +2 -1
@@ 15,6 15,7 @@ import GHC.Generics

import Data.Maybe (fromMaybe)
import qualified Data.ByteString.Lazy as B (toStrict)
import qualified Data.HashMap.Strict as HM

import Network.URI as URI
import Network.URI.Fetch as URI


@@ 28,7 29,7 @@ mime = words "text/html text/xml application/xml application/xhtml+xml text/plai
load' :: Internal.Session -> URI -> IO ()
load' session uri = modifyMVar_ session $ \session' -> maybeTimeout session' uri $ do
    resp@(redirected, _, _) <- fetchURL' (loader session') mime uri
    return $ session' { currentURL = redirected, document = parseDocument resp }
    return $ session' { currentURL = redirected, document = parseDocument resp, knownEls = HM.empty }

maybeTimeout :: Session' -> URI -> IO Session' -> IO Session'
maybeTimeout session uri act | currentURL session /= uri, Just delay <- pageLoad $ timeouts session =

M src/Webdriver.hs => src/Webdriver.hs +16 -1
@@ 13,10 13,12 @@ import Data.UUID as ID
import Data.UUID.V4

import Control.Monad.IO.Class (liftIO)
import Control.Monad (mapM)
import Data.Maybe (fromMaybe, isJust)

import qualified Network.URI as URI
import qualified Network.URI.Fetch as URI
import qualified Text.XML.Cursor as XC

import Capabilities (processCaps)
import JSON


@@ 53,7 55,8 @@ serveSession sessions = WD.withSession fail (\uuid session -> msum [
            dir "minimize" $ unsupportedOp,
            dir "fullscreen" $ unsupportedOp
        ],
        dir "frame" $ msum [noSuchFrame, dir "parent" $ ok $ toResponse ()] -- Noops
        dir "frame" $ msum [noSuchFrame, dir "parent" $ ok $ toResponse ()], -- Noops
        dir "element" $ findFromRoot session
    ]) sessions
  where
    fail uuid'| Just _ <- ID.fromString uuid' = errJSON 404 "invalid session ID" $ (


@@ 171,3 174,15 @@ unsupportedOp = do
    errJSON 400 "unsupported operation" "Windowsize is meaningless to Rhapsode"

----

findFromRoot session = do
    method POST
    nullDir
    req <- getJSON
    session' <- liftIO $ readMVar session
    case req of
        Just req' -> case WDE.find req' $ XC.fromDocument $ WD.document session' of
            Right res -> okJSON =<< mapM (liftIO . WD.serializeEl session) res
            Left (True, msg) -> errJSON 400 "invalid selector" msg
            Left (False, msg) -> errJSON 400 "invalid argument" msg
        Nothing -> errJSON 400 "invalid argument" "Failed to parse JSON"

A src/XML/LICENSE => src/XML/LICENSE +1 -0
@@ 0,0 1,1 @@
BSD-3 Clause, forked from `selectors` hackage due to installation struggles.

A src/XML/Selectors/CSS.hs => src/XML/Selectors/CSS.hs +64 -0
@@ 0,0 1,64 @@
-- | This module exports functions for parsing and executing CSS selector
-- expressions in pure Haskell. TH QuasiQuoters are provided in
-- "XML.Selectors.CSS.TH" for validation and static-checking of CSS selectors.

{-# LANGUAGE OverloadedStrings #-}
module XML.Selectors.CSS (
    toAxis,
    parsePath
    ) where

import XML.Selectors.CSS.Parse
import XML.Selectors.CSS.Types
import Text.XML
import Text.XML.Cursor
import Data.List
import Data.String
import qualified Data.Map as M
import qualified Data.Text as T

-- Axes that match nodes and their ancestors could result in duplicate nodes
-- in following descendant axes

-- | Convert CSS 'Selector' to an 'Axis'.
toAxis :: Selector -> Axis
toAxis selector = descendant >=> toAxis' selector

mhead :: Monad m => [a] -> m a
mhead [] = fail "empty"
mhead (a:_) = return a

toAxis' (Selector selector) = simpleAxis selector
toAxis' (Combinator simple comb selector) = axis where
    axis = simpleAxis simple >=> combaxis >=> toAxis' selector
    combaxis = case comb of
        Descendant -> descendant
        Child -> child
        AnySibling -> followingSibling
        FollowingSibling -> mhead . followingSibling

simpleAxis (SimpleSelector mbelem specs mbpseudo) = axis where
    axis = elemaxis >=> specaxis >=> pseuaxis
    elemaxis = case mbelem of
        Nothing -> anyElement
        Just nm -> element (fromString nm)
    pseuaxis = case mbpseudo of
        Nothing -> return
        Just FirstChild -> mhead . child
        Just LastChild -> return . last . child
    specaxis = loop specs
    loop [] = return
    loop (spec:ss) = toaxis spec >=> loop ss
    toaxis (ID id) = attributeIs "id" (fromString id)
    toaxis (Class cls) = toaxis (Attrib "class" $ Pred Includes cls)
    toaxis (Attrib attr pred) = \c -> case node c of
        NodeElement (Element _ as _) | Just v <- M.lookup (fromString attr) as -> case pred of
            None -> [c]
            Pred op val | Equals <- op, val' == v -> [c]
                        | Includes <- op, val' `elem` T.words v -> [c]
                        | BeginsWith <- op, val' == T.take vallen v -> [c]
                        | EndsWith <- op, val' == T.drop (T.length v - vallen) v -> [c]
                        | otherwise -> [] where
                val' = fromString val
                vallen = T.length val'
        _ -> []

A src/XML/Selectors/CSS/Parse.y => src/XML/Selectors/CSS/Parse.y +87 -0
@@ 0,0 1,87 @@
{ module XML.Selectors.CSS.Parse (parsePath) where

import XML.Selectors.CSS.Tokens (lexer)
import XML.Selectors.CSS.Types
}

%name cssPath
%tokentype { Token }
%error { parseError }
%monad { Either String }
%token
    sp                      { TokenSpace }
    name                    { TokenName $$ }
    string                  { TokenString $$ }
    '+'                     { TokenPlus }
    '-'                     { TokenMinus }
    '/'                     { TokenSlash }
    '>'                     { TokenChild }
    '~'                     { TokenAnySibling }
    '#'                     { TokenHash }
    firstchild              { TokenFirstChild }
    lastchild               { TokenLastChild }
    '.'                     { TokenDot }
    '='                     { TokenEquals }
    "~="                    { TokenIncludes }
    "^="                    { TokenBeginsWith }
    "$="                    { TokenEndsWith }
    "|="                    { TokenDashMatch }
    ':'                     { TokenPseudo }
    '('                     { TokenOP }
    ')'                     { TokenCP }
    '['                     { TokenOB }
    ']'                     { TokenCB }
    nat                     { TokenDigits $$ }
    '*'                     { TokenAster }

%%

Selector    : SimpleSelector                        { Selector $1 }
            | SimpleSelector Combinator Selector    { Combinator $1 $2 $3 }

Combinator  : sp                                    { Descendant }
            | sp Combinator                         { $2 }
            | '+'                                   { FollowingSibling }
            | '+' sp                                { FollowingSibling }
            | '~'                                   { AnySibling }
            | '~' sp                                { AnySibling }
            | '>'                                   { Child }
            | '>' sp                                { Child }

SimpleSelector  : name                              { SimpleSelector (Just $1) [] Nothing }
                | name specs                        { SimpleSelector (Just $1) $2 Nothing }
                | name specs Pseudo                 { SimpleSelector (Just $1) $2 (Just $3) }
                | specs                             { SimpleSelector Nothing $1 Nothing }
                | specs Pseudo                      { SimpleSelector Nothing $1 (Just $2) }
                | '*'                               { SimpleSelector Nothing [] Nothing }

specs   : Specifier                                 { [$1] }
        | specs Specifier                           { $2 : $1 }

Specifier   : '#' name                              { ID $2 }
            | '.' name                              { Class $2 }
            | '[' attr ']'                          { $2 }
            | '[' attr sp ']'                       { $2 }

attr    : name Pred                                 { Attrib $1 $2 }
        | sp attr                                   { $2 }

Pred    : sp Pred                                   { $2 }
        | PredOp sp string                          { Pred $1 $3 }
        | PredOp string                             { Pred $1 $2 }

PredOp  : '='                                       { Equals }
        | "~="                                      { Includes }
        | "|="                                      { DashMatch }
        | "^="                                      { BeginsWith }
        | "$="                                      { EndsWith }

Pseudo  : ':' firstchild                            { FirstChild }
        | ':' lastchild                             { LastChild }

{
parseError toks = Left $ "parse error: " ++ show toks

parsePath :: String -> Either String Selector
parsePath str = lexer str >>= cssPath
}

A src/XML/Selectors/CSS/Tokens.hs => src/XML/Selectors/CSS/Tokens.hs +42 -0
@@ 0,0 1,42 @@
{-# LANGUAGE OverloadedStrings #-}
module XML.Selectors.CSS.Tokens where

import XML.Selectors.CSS.Types
import Data.CSS.Syntax.Tokens as T
import Data.Text (pack, unpack)

lexer str = let ret = lexer' $ T.tokenize $ pack str in if TokenEOF `elem` ret
    then Left "invalid token!" else Right ret

lexer' (T.Whitespace:ts) = TokenSpace:lexer' ts
lexer' (T.Ident n:ts) = TokenName (unpack n):lexer' ts
lexer' (T.Delim n:T.Ident ns:ts) | n /= ':' = TokenName (n:unpack ns):lexer' ts
lexer' (T.Hash _ n:ts) = TokenName (unpack n):lexer' ts
lexer' (T.String txt:ts) = TokenString (unpack txt):lexer' ts
lexer' (T.Delim '+':ts) = TokenPlus:lexer' ts
lexer' (T.Delim '-':ts) = TokenMinus:lexer' ts
lexer' (T.Delim '/':ts) = TokenSlash:lexer' ts
lexer' (T.Delim '*':ts) = TokenAster:lexer' ts
lexer' (T.Delim '>':ts) = TokenChild:lexer' ts
lexer' (T.Delim ':':T.Function "nth-child":ts) = TokenNthChild:TokenOP:lexer' ts
lexer' (T.Delim ':':T.Ident "first-child":ts) = TokenFirstChild:lexer' ts
lexer' (T.Delim ':':T.Ident "last-child":ts) = TokenLastChild:lexer' ts
lexer' (T.Delim ':':T.Function "nth-last-child":ts) = TokenNthLastChild:TokenOP:lexer' ts
lexer' (T.Delim '~':ts) = TokenAnySibling:lexer' ts
lexer' (T.LeftSquareBracket:ts) = TokenOB:lexer' ts
lexer' (T.RightSquareBracket:ts) = TokenCB:lexer' ts
lexer' (T.LeftParen:ts) = TokenOP:lexer' ts
lexer' (T.RightParen:ts) = TokenCP:lexer' ts
lexer' (T.Number txt _:ts) = TokenDigits (unpack txt):lexer' ts
lexer' (T.Dimension txt _ unit:ts) = TokenDigits (unpack txt):TokenName (unpack unit):lexer' ts
lexer' (T.Delim '#':ts) = TokenHash:lexer' ts
lexer' (T.Delim '.':ts) = TokenDot:lexer' ts
lexer' (T.Delim '=':ts) = TokenEquals:lexer' ts
lexer' (T.IncludeMatch:ts) = TokenIncludes:lexer' ts
lexer' (T.DashMatch:ts) = TokenDashMatch:lexer' ts
lexer' (T.PrefixMatch:ts) = TokenBeginsWith:lexer' ts
lexer' (T.SuffixMatch:ts) = TokenEndsWith:lexer' ts
lexer' (T.BadString:ts) = TokenQuote:lexer' ts
lexer' (T.Delim ':':ts) = TokenPseudo:lexer' ts
lexer' (_:ts) = TokenEOF:lexer' ts
lexer' [] = []

A src/XML/Selectors/CSS/Types.hs => src/XML/Selectors/CSS/Types.hs +94 -0
@@ 0,0 1,94 @@
-- | Definitions of 'Token's in the CSS selector grammar as well as the data types
-- representing the parse result.
{-# LANGUAGE DeriveDataTypeable #-}
module XML.Selectors.CSS.Types where

import Data.Typeable
import Data.Data

-- | Top level entity representing the parse tree of a CSS path expression.
-- Composed of one or more 'SimpleSelector's combined with a 'Comb' relation.
data Selector   = Selector SimpleSelector
                | Combinator SimpleSelector Comb Selector
    deriving (Show, Typeable, Data)

-- | Type representing the relationship between two 'Selector's.
data Comb 
            -- | Any descendant; represented by plain whitespace in CSS
            = Descendant
            -- | Any direct child; represented by @ > @ in CSS
            | Child
            -- | Directly following sibling node; represented by @ + @ in CSS
            | FollowingSibling
            -- | Any following sibling node; represented by @ ~ @ in CSS
            | AnySibling
    deriving (Show, Typeable, Data)

-- | Type representing a single set of filters for selecting nodes.
-- Contain an optional single element name, a sequence of id, class, and attribute
-- 'Specifier's, and an optional pseudo-element selector.
data SimpleSelector = SimpleSelector (Maybe String) [Specifier] (Maybe Pseudo)
    deriving (Show, Typeable, Data)


-- | Type representing id, class, and attribute filters.
data Specifier  = ID String
                | Class String
                | Attrib String Pred
    deriving (Show, Typeable, Data)

-- | Type representing boolean operations on attribute values.
data Pred   
            -- | Simple existence test
            = None
            -- | String comparison test
            | Pred PredOp String
    deriving (Show, Typeable, Data)

data PredOp 
            -- | String equality
            = Equals
            -- | Contains word; @ ~= @
            | Includes
            -- | String equality, optional dash following; @ |= @
            | DashMatch
            -- | String begins with; @ ^= @
            | BeginsWith
            -- | String ends with; @ $= @
            | EndsWith
    deriving (Show, Typeable, Data)

-- | Only supporting two pseudoelement selectors.
data Pseudo = FirstChild
            | LastChild
    deriving (Show, Typeable, Data)

data Token  = TokenSpace
            | TokenName String
            | TokenString String
            | TokenPlus
            | TokenMinus
            | TokenSlash
            | TokenAster
            | TokenChild
            | TokenNthChild
            | TokenFirstChild
            | TokenLastChild
            | TokenNthLastChild
            | TokenAnySibling
            | TokenOB
            | TokenCB
            | TokenOP
            | TokenCP
            | TokenDigits String
            | TokenHash
            | TokenDot
            | TokenEquals
            | TokenIncludes
            | TokenDashMatch
            | TokenBeginsWith
            | TokenEndsWith
            | TokenQuote
            | TokenEOF
            | TokenPseudo
    deriving (Eq, Show)