From 2e0a2343f591f1e45ba55eae2b3a06d9648c13ff Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Tue, 21 Dec 2021 16:11:23 +1300 Subject: [PATCH] Integrate HXT for working XPath support. --- amphiarao.cabal | 3 +- src/Internal/Elements.hs | 35 +- src/Internal/Elements/XPath.hs | 567 ----------------------- src/Internal/Elements/XPathParse.hs | 680 ---------------------------- src/UI/Search.hs | 2 +- 5 files changed, 33 insertions(+), 1254 deletions(-) delete mode 100644 src/Internal/Elements/XPath.hs delete mode 100644 src/Internal/Elements/XPathParse.hs diff --git a/amphiarao.cabal b/amphiarao.cabal index b59bbdf..3830ff6 100644 --- a/amphiarao.cabal +++ b/amphiarao.cabal @@ -56,7 +56,6 @@ executable amphiarao -- Modules included in this executable, other than Main. other-modules: Webdriver, Capabilities, JSON, Messages, Internal, Internal.Load, Internal.Elements, Internal.Forms, - Internal.Elements.XPath, Internal.Elements.XPathParse, UI.Templates, UI.Search, XML.Selectors.CSS, XML.Selectors.CSS.Parse, XML.Selectors.CSS.Tokens, XML.Selectors.CSS.Types @@ -74,7 +73,7 @@ executable amphiarao hurl >= 2.1.1 && <3, network-uri, http-client, -- Parse & query XML/HTML xml-conduit >= 1.8, html-conduit >= 1.3, css-syntax, array >=0.4, - attoparsec, time + attoparsec, time, hxt-xpath, hxt build-tools: happy diff --git a/src/Internal/Elements.hs b/src/Internal/Elements.hs index 974149a..1c76129 100644 --- a/src/Internal/Elements.hs +++ b/src/Internal/Elements.hs @@ -2,6 +2,8 @@ module Internal.Elements (getTitle, Find(..), find) where import Text.XML +import Text.XML.HXT.DOM.TypeDefs +import Data.Tree.NTree.TypeDefs (NTree(..)) import qualified Data.Map as M import Data.Text as Txt hiding (find) import Control.Concurrent.MVar @@ -15,7 +17,7 @@ import GHC.Generics -- Selector engines import qualified Text.XML.Cursor as X import qualified XML.Selectors.CSS as CSS -import qualified Internal.Elements.XPath as XPath +import Text.XML.HXT.XPath.XPathEval (getXPath', parseXPathExpr) import Network.URI (parseURIReference) import Data.Maybe @@ -47,9 +49,10 @@ find (Find "css selector" sel) root = case CSS.parsePath sel of find (Find "link text" sel) root = Right $ allLinks (== pack sel) root find (Find "partial link text" sel) root = Right $ allLinks (Txt.isInfixOf $ pack sel) root find (Find "tag name" sel) root = Right (X.descendant root >>= X.checkName (== fromString sel)) -find (Find "xpath" sel) root = case XPath.runXPath root $ Txt.pack sel of - Right res -> Right res - Left msgs -> Left (True, Prelude.unlines msgs) +-- XML Conduit doesn't have an XPath implementation, but HXT does. So convert back & forth! +find (Find "xpath" sel) root = case parseXPathExpr sel of + Right expr -> Right $ hxts2cursors root $ getXPath' expr $ conduit2hxt $ X.node root + Left err -> Left (True, err) find (Find type_ _) _ = Left (False, "Invalid selector type: " ++ Txt.unpack type_) allLinks test = X.descendant X.>=> @@ -64,3 +67,27 @@ allLinks test = X.descendant X.>=> test (Txt.unwords $ Txt.words $ nodesText childs) union a b cursor = a cursor ++ b cursor + +--- XPath Support + +map' = Prelude.map + +conduit2hxt (NodeElement (Element name attrs childs)) = + NTree (XTag (n2qn name) $ map' attr2hxt $ M.toList attrs) $ map' conduit2hxt childs +conduit2hxt (NodeInstruction (Instruction target dat)) = + NTree (XPi (mkName $ unpack target) []) [NTree (XText $ unpack dat) []] +conduit2hxt (NodeContent txt) = NTree (XText $ unpack txt) [] +conduit2hxt (NodeComment txt) = NTree (XCmt $ unpack txt) [] + +attr2hxt (name, value) = NTree (XAttr $ n2qn name) [NTree (XText $ unpack value) []] + +n2qn (Name local (Just namespace) (Just prefix)) = + mkQName (unpack prefix) (unpack local) (unpack namespace) +n2qn (Name local Nothing (Just prefix)) = + mkName (unpack prefix ++ ':' : unpack local) +n2qn (Name local (Just namespace) Nothing) = mkNsName (unpack local) (unpack namespace) +n2qn (Name local Nothing Nothing) = mkName $ unpack local + +hxt2cursor node hxt = lookup hxt [ + (conduit2hxt $ X.node cursor, cursor) | cursor <- X.orSelf X.descendant node] +hxts2cursors node hxts = catMaybes $ map' (hxt2cursor node) hxts diff --git a/src/Internal/Elements/XPath.hs b/src/Internal/Elements/XPath.hs deleted file mode 100644 index f2efaad..0000000 --- a/src/Internal/Elements/XPath.hs +++ /dev/null @@ -1,567 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -module Internal.Elements.XPath where - -import Internal.Elements.XPathParse -import qualified Data.Text as Txt -import Data.Text (Text) - -import Text.XML.Cursor hiding (bool) -import Text.XML -import qualified Data.Map.Strict as M - -import Data.Time -import Data.Maybe -import Data.Either -import Data.List (nub) -import Text.Read (readMaybe) - --- Loosely transliterated from https://www.w3.org/TR/xpath-3/#eval_context --- TODO How much are noops? -data Context = Context { - item :: [Value], - vars :: M.Map Name [Value], - funcs :: M.Map (Name, Int) (Context -> [[Value]] -> [Value]) - } - ----- Typesystem/coercion - -atomize :: Value -> [Value] -atomize ret@(Number _) = [ret] -atomize ret@(String _) = [ret] -atomize ret@(Bool _) = [ret] -atomize ret@(Error _ _ _) = [ret] -atomize (Node n) = [String $ Txt.unpack $ nodeText $ node n] -- For a proper implementation, should be parsed via XSD? -atomize (Array vals) = atomsize $ concat vals -atomize (Map _) = [Error "err:FOTY0013" "Unnaceptable map item" ""] -atomize (Function _ _ _ _) = [Error "err:FOTY0013" "Unacceptable function item" ""] -atomize (FuncRef _ _) = [Error "err:FOTY0013" "Unacceptable function item" ""] -atomsize :: [Value] -> [Value] -atomsize = concatMap atomize - -nodeText (NodeElement (Element _ _ nodes)) = Txt.concat $ map nodeText' nodes -nodeText (NodeInstruction (Instruction _ ret)) = ret -nodeText (NodeContent ret) = ret -nodeText (NodeComment ret) = ret -nodeText' (NodeElement (Element _ _ nodes)) = Txt.concat $ map nodeText' nodes -nodeText' (NodeContent ret) = ret -nodeText' _ = "" - -maybeName (NodeElement (Element n _ _)) = n -maybeName _ = "" - -nameMatch (Name "*" (Just "*") (Just "*")) _ = True -nameMatch (Name "*" (Just "*") (Just prefixA)) (Name _ _ (Just prefixB)) = prefixA == prefixB -nameMatch (Name "*" Nothing (Just prefixA)) (Name _ _ (Just prefixB)) = prefixA == prefixB -nameMatch (Name "*" (Just "*") Nothing) (Name _ _ prefix) = isNothing prefix -nameMatch (Name "*" (Just nsA) _) (Name _ (Just nsB) _) = nsA == nsB -nameMatch (Name "*" Nothing _) (Name _ ns _) = isNothing ns -nameMatch (Name nameA (Just "*") (Just "*")) (Name nameB _ _) = nameA == nameB -nameMatch (Name nameA (Just "*") (Just prefixA)) (Name nameB _ (Just prefixB)) = nameA == nameB && prefixA == prefixB -nameMatch (Name nameA Nothing (Just prefixA)) (Name nameB _ (Just prefixB)) = nameA == nameB && prefixA == prefixB -nameMatch (Name nameA (Just "*") Nothing) (Name nameB _ prefix) = nameA == nameB && isNothing prefix -nameMatch (Name nameA (Just nsA) _) (Name nameB (Just nsB) _) = nameA == nameB && nsA == nsB -nameMatch (Name nameA Nothing _) (Name nameB ns _) = nameA == nameB && isNothing ns -nameMatch _ _ = False - -hasName name node = nameMatch (maybeName node) name - -attribute' name (NodeElement (Element _ attrs _)) = - [String $ Txt.unpack val | (key, val) <- M.toList attrs, nameMatch name key] -attribute' _ _ = [] - -bool [] = Right False -bool (Node _:_) = Right True -bool [Bool ret] = Right ret -bool [String ret] = Right $ not $ null ret -bool [Number x] = Right $ not (isNaN x || x == 0) -bool vals | ret@(_:_) <- [err | err@(Error _ _ _) <- vals] = Left ret -bool (val:_) = Left [Error "err:FORG0006" "Invalid argument type" $ show $ val2type val] - -nums items | ret@(_:_) <- [err | err@(Error _ _ _) <- items] = Left ret -nums (Number x:items) = (x:) <$> nums items -nums (String str:items) | Just x <- readMaybe str = (x:) <$> nums items - | otherwise = Left [Error "err:FORG0001" "Invalid cast to number" str] -nums (Bool True:items) = (1:) <$> nums items -nums (Bool False:items) = (0:) <$> nums items -nums [] = Right [] -nums items = nums $ atomsize items -num items = case nums items of - Left errs -> Left errs - Right (x:_) -> Right x - Right [] -> Right (0/0) -- NaN - -withNums2 x y cb = withNums2' (nums x) (nums y) cb -withNums2' (Right xs) (Right ys) cb = cb xs ys -withNums2' x y _ = fromLeft [] x ++ fromLeft [] y -withNum2 x y cb = withNum2' (num x) (num y) cb -withNum2' (Right x) (Right y) cb = cb x y -withNum2' x y _ = fromLeft [] x ++ fromLeft [] y - -text :: [Value] -> Either [Value] [String] -text items | ret@(_:_) <- [err | err@(Error _ _ _) <- items] = Left ret -text (Number x:items) = (show x:) <$> text items -text (String ret:items) = (ret:) <$> text items -text (Bool b:items) = (show b:) <$> text items -text [] = Right [] -text items = text $ atomsize items - ----Type system - -cast (TypeAtomic ty) [x] - | ty `elem` ["xsd:integer", "xsd:float", "xsd:double", "xsd:decimal"], Right x <- num [x] = [Number x] - | ty == "xsd:bool", Right b <- bool [x] = [Bool b] - | Right [str] <- text [x] = [String str] -cast TypeText [x] | Right [str] <- text [x] = [String str] -cast ty [_] = [Error "err:XPTY0004" "Invalid type to cast to." $ show ty] -cast _ [] = [] -cast _ items = [Error "err:XPTY0004" "Too many items in cast." $ show $ length items] -cast' :: Quantity -> [Value] -> [Value] -cast' ty val | instanceOf' ty val = val -cast' (Quantity ty _ Many) _ = [Error "err:XPDY0050" "Can't cast to non-optional multiplicity" ""] -cast' (Quantity ty _ None) _ = [] -cast' (Quantity ty Many One) _ = [Error "err:XPDY0050" "Can't cast to non-optional multiplicity" ""] -cast' (Quantity ty One One) val | null $ cast ty $ atomsize val = cast ty $ atomsize val - | otherwise = [Error "err:XPTY0004" "Expected non-zero items in value." "0"] -cast' (Quantity ty None One) val = cast ty $ atomsize val -tryCast' (Just ty) val = let val' = cast (TypeAtomic ty) [val] in if any isErr val' then [] else val' -tryCast' Nothing val = [val] - -instanceOf :: XType -> Value -> Bool -instanceOf (TypeDocument type_) (Node cursor) | null $ ancestor cursor = instanceOf type_ $ Node cursor -instanceOf (TypeElement (Just name) type_) (Node cursor) - | NodeElement (Element n _ _) <- node cursor = instanceOf (TypeElement Nothing type_) $ Node cursor -instanceOf (TypeElement Nothing (Just "xsd:bool")) val - | Right res <- text [val] = res `elem` [["true"], ["false"]] -instanceOf (TypeElement Nothing (Just "xsd:integer")) val - | Right x <- num [val] = fromIntegral (floor x) == x -instanceOf (TypeElement Nothing (Just type_)) val - | type_ `elem` ["xsd:double", "xsd:float", "xsd:decimal"] = isRight $ num [val] - | otherwise = True -instanceOf (TypeArray Nothing) (Array _) = True -instanceOf (TypeArray (Just type_)) (Array vals) = all (instanceOf' type_) vals -instanceOf (TypeMap Nothing') (Map _) = True -instanceOf (TypeMap (Some' _ valType)) (Map m) = all (instanceOf' valType) $ M.elems m -- TODO check keytype -instanceOf (TypeFunction Nothing') (Array _) = True -instanceOf (TypeFunction Nothing') (Map _) = True -instanceOf (TypeFunction Nothing') (PartialFunc _ _) = True -instanceOf (TypeFunction Nothing') (FuncRef _ _) = True -instanceOf (TypeFunction Nothing') (Function _ _ _ _) = True -instanceOf (TypeFunction (Some' [Quantity (TypeAtomic "xsd:integer") One One] retty)) (Array vals) = - all (instanceOf' retty) vals -instanceOf (TypeFunction (Some' [Quantity (TypeAtomic _) One One] retty)) (Map m) = - all (instanceOf' retty) $ M.elems m -instanceOf (TypeFunction (Some' [Quantity TypeText One One] retty)) (Map m) = - all (instanceOf' retty) $ M.elems m --- FIXME doesn't actually check types, that requires deeper introspection than I currently model. -instanceOf (TypeFunction (Some' argtys _)) (PartialFunc args _) = length argtys == length args -instanceOf (TypeFunction (Some' argtys _)) (FuncRef _ nargs) = length argtys == nargs -instanceOf (TypeFunction (Some' argtys retty)) (Function args retty' _ _) = - retty == retty' && and [ty == ty' | (ty, (_, ty')) <- zip argtys args] -instanceOf (TypeSchemaElement _) _ = False -- TODO What's this? -instanceOf TypeNode (Node _) = True -instanceOf TypeNamespaceNode _ = False -- Don't think I support this yet. -instanceOf TypeText (String _) = True -instanceOf TypeText (Node n) | NodeContent _ <- node n = True -instanceOf TypeComment (Node n) | NodeComment _ <- node n = True -instanceOf (TypePI Nothing) (Node n) | NodeInstruction _ <- node n = True -instanceOf (TypePI (Just name)) (Node n) | NodeInstruction (Instruction name' _) <- node n = name == name' -instanceOf (TypeSchemaAttribute _) _ = False -- Don't think I support this yet. -instanceOf (TypeAttribute _ _) _ = False -- XML Conduit's datamodel had me support this indirectly. -instanceOf (TypeAtomic "xsd:bool") (Bool _) = True -instanceOf (TypeAtomic "xsd:integer") (Number x) = fromIntegral (floor x) == x -instanceOf (TypeAtomic type_) (Number _) = type_ `elem` ["xsd:double", "xsd:float", "xsd:decimal"] -instanceOf (TypeAtomic type_) (String _) = type_ `notElem` ["xsd:bool", "xsd:double", "xsd:float", "xsd:decimal"] -instanceOf TypeItem _ = True -instanceOf _ _ = False -instanceOf' :: Quantity -> [Value] -> Bool -instanceOf' (Quantity ty Many high) vals@(_:_:_) = instanceOf' (Quantity ty None high) vals -instanceOf' (Quantity ty One high) vals@(_:_) = instanceOf' (Quantity ty None high) vals -instanceOf' (Quantity type_ None Many) vals = all (instanceOf type_) vals -instanceOf' (Quantity type_ None One) vals | length vals <= 1 = all (instanceOf type_) vals -instanceOf' (Quantity _ None None) [] = True -instanceOf' _ _ = False - ----- Interpretor - -evals ctx = concatMap $ eval ctx -eval :: Context -> Expression -> [Value] -eval ctx (ExprRange from to) = withNums2 (eval ctx from) (eval ctx to) inner - where - inner (from:_) (to:_) = map Number [from..to] - inner _ _ = [] -eval ctx (ExprUnion exps) = nub $ concat $ map (eval ctx) exps -eval ctx (ExprIntersect a b) = let b' = eval ctx b in - [v | v <- eval ctx a, v `elem` b' || isErr v] ++ [err | err <- b', isErr err] -eval ctx (ExprExcept a b) = let b' = eval ctx b in - [v | v <- eval ctx a, v `notElem` b' || isErr v] ++ [err | err <- b', isErr err] - -eval ctx (ExprSum exps) = let (errs, xs) = partitionEithers $ map (num . eval ctx) exps in - if null errs then [Number $ sum xs] else concat errs -eval ctx (ExprMul a b) = withNum2 (eval ctx a) (eval ctx b) $ \x y -> [Number (x*y)] -eval ctx (ExprDiv a b) = withNum2 (eval ctx a) (eval ctx b) $ \x y -> [Number (x/y)] -eval ctx (ExprIDiv a b) = withNum2 (eval ctx a) (eval ctx b) $ - \x y -> [Number $ fromIntegral (floor x `div` floor y)] -eval ctx (ExprMod a b) = withNum2 (eval ctx a) (eval ctx b) $ - \x y -> [Number $ fromIntegral (floor x `rem` floor y)] -eval ctx (ExprNegate a) = case num $ eval ctx a of - Left errs -> errs - Right x -> [Number $ negate x] - -eval ctx (ExprStrConcat texts) = let (errs, texts') = partitionEithers $ map (text . eval ctx) texts - in if null errs then [String $ concat $ map concat texts'] else concat errs - -eval ctx (ExprGe' a b) - | (Number a':_) <- atomsize $ eval ctx a, (Number b':_) <- atomsize $ eval ctx b = [Bool (a' >= b')] - | Right a' <- num $ eval ctx a, Right b' <- num $ eval ctx b = [Bool (a' >= b')] - | a' <- num $ eval ctx a, b' <- num $ eval ctx b = fromLeft [] a' ++ fromLeft [] b' -eval ctx (ExprGt' a b) - | (Number a':_) <- atomsize $ eval ctx a, (Number b':_) <- atomsize $ eval ctx b = [Bool (a' > b')] - | Right a' <- num $ eval ctx a, Right b' <- num $ eval ctx b = [Bool (a' > b')] - | a' <- num $ eval ctx a, b' <- num $ eval ctx b = fromLeft [] a' ++ fromLeft [] b' -eval ctx (ExprLe' a b) - | (Number a':_) <- atomsize $ eval ctx a, (Number b':_) <- atomsize $ eval ctx b = [Bool (a' <= b')] - | Right a' <- num $ eval ctx a, Right b' <- num $ eval ctx b = [Bool (a' <= b')] - | a' <- num $ eval ctx a, b' <- num $ eval ctx b = fromLeft [] a' ++ fromLeft [] b' -eval ctx (ExprLt' a b) - | (Number a':_) <- atomsize $ eval ctx a, (Number b':_) <- atomsize $ eval ctx b = [Bool (a' < b')] - | Right a' <- num $ eval ctx a, Right b' <- num $ eval ctx b = [Bool (a' < b')] - | a' <- num $ eval ctx a, b' <- num $ eval ctx b = fromLeft [] a' ++ fromLeft [] b' -eval ctx (ExprNe' a b) - | (Number a':_) <- atomsize $ eval ctx a, (Number b':_) <- atomsize $ eval ctx b = [Bool (a' /= b')] - | Right a' <- num $ eval ctx a, Right b' <- num $ eval ctx b = [Bool (a' /= b')] - | a' <- num $ eval ctx a, b' <- num $ eval ctx b = fromLeft [] a' ++ fromLeft [] b' -eval ctx (ExprEq' a b) - | (Number a':_) <- atomsize $ eval ctx a, (Number b':_) <- atomsize $ eval ctx b = [Bool (a' == b')] - | Right a' <- num $ eval ctx a, Right b' <- num $ eval ctx b = [Bool (a' == b')] - | a' <- num $ eval ctx a, b' <- num $ eval ctx b = fromLeft [] a' ++ fromLeft [] b' -eval ctx (ExprGe a b) = withNums2 (eval ctx a) (eval ctx b) $ \a' b' -> [Bool (maximum a' >= minimum b')] -eval ctx (ExprGt a b) = withNums2 (eval ctx a) (eval ctx b) $ \a' b' -> [Bool (maximum a' > minimum b')] -eval ctx (ExprLe a b) = withNums2 (eval ctx a) (eval ctx b) $ \a' b' -> [Bool (minimum a' <= maximum b')] -eval ctx (ExprLt a b) = withNums2 (eval ctx a) (eval ctx b) $ \a' b' -> [Bool (minimum a' < maximum b')] -eval ctx (ExprNe a b) = let (a', b') = (atomsize $ eval ctx a, atomsize $ eval ctx b) - in if any isErr a' || any isErr b' then [err | err <- a', isErr err] ++ [err | err <- b', isErr err] - else [Bool $ any (`notElem` b') a'] -eval ctx (ExprEq a b) = let (a', b') = (atomsize $ eval ctx a, atomsize $ eval ctx b) - in if any isErr a' || any isErr b' then [err | err <- a', isErr err] ++ [err | err <- b', isErr err] - else [Bool $ any (`elem` b') a'] --- TODO node order comparisons. - -eval ctx (ExprAnd exps) = let (errs, bools) = partitionEithers $ map (bool . eval ctx) exps - in if null errs then [Bool $ and bools] else concat errs -eval ctx (ExprOr exps) = let (errs, bools) = partitionEithers $ map (bool . eval ctx) exps - in if null errs then [Bool $ or bools] else concat errs - -eval ctx (ExprInstanceOf a type_) = [Bool $ instanceOf' type_ $ eval ctx a] -eval ctx (ExprTreatAs a type_) | instanceOf' type_ $ eval ctx a = eval ctx a - | otherwise = [Error "err:XPDY0050" "Expression not of expected type" ("Expected " ++ show type_)] -eval ctx (ExprCastAs a type_) = cast' type_ $ eval ctx a -eval ctx (ExprCastableAs a type_) = let val = eval ctx a in if any isErr val - then [err | err <- val, isErr err] else [Bool $ not $ any isErr $ cast' type_ val] - -eval ctx (ExprFor name expr body) = concat $ map inner $ eval ctx expr - where inner subitem = eval ctx { vars = M.insert name [subitem] $ vars ctx } body -eval ctx (ExprLet name expr body) = eval ctx { vars = vars' } body - where vars' = M.insert name (eval ctx expr) $ vars ctx -eval ctx (ExprIf cond pass fail) = case bool $ evals ctx cond of - Left errs -> errs - Right cond' -> eval ctx $ if cond' then pass else fail -eval ctx (ExprQuantified variant tests) = - let (errs, res) = partitionEithers $ map (\x -> bool [x]) $ eval ctx tests - in if null errs then [Bool $ (case variant of {Some -> or; Every -> and }) res] - else concat errs - -eval ctx (ExprMap a b) = concatMap (\a' -> eval ctx { item = [a'] } b) $ eval ctx a -eval ctx (ExprArrow self func args) = - evalPostfix ctx { item = [retrieveArrow ctx func $ succ $ length args] } $ PostCall (self:args) -eval ctx (ExprPath (path:paths)) = eval ctx { item = evalPath ctx path } $ ExprPath paths -eval ctx (ExprPath []) = item ctx -eval _ _ = [] -- TODO - -retrieveArrow _ (ArrowName n) arity = FuncRef n arity -retrieveArrow ctx (ArrowVar n) _ | Just [ret] <- n `M.lookup` vars ctx = ret - | otherwise = Error "err:XPST0008" "Undefined name" $ show n -retrieveArrow ctx (ArrowExpr exps) _ | [ret] <- evals ctx exps = ret - | otherwise = Error "err:XPTY0004" "Too many (or no) items in collection identifying function to call" "" - -evalPrimary _ (Literal x) = [x] -evalPrimary ctx (VarRef n) | Just val <- n `M.lookup` vars ctx = val - | otherwise = [Error "err:XPST0008" "Undefined name" $ show n] -evalPrimary ctx (Expr exprs) = evals ctx exprs -evalPrimary ctx ContextItem | null $ item ctx = [Error "err:XPDY0002" "Absent context item" ""] - | otherwise = item ctx -evalPrimary ctx (FunctionCall n args) - | Just cb <- (n, length args) `M.lookup` funcs ctx = cb ctx $ map (eval ctx) args - | otherwise = [Error "err:XPST0017" "Undefined function" (show n ++ '#':show (length args))] --- NOTE: No recursion! Great! Not Turing Complete, hopefully! -evalPrimary ctx (FunctionItem args retty body) = [Function args retty body $ vars ctx] -evalPrimary ctx (MapConstruct entries) - | any isErr $ concat $ map (eval ctx . fst) entries = filter isErr $ concat $ map (eval ctx . fst) entries - | any isErr $ concat $ map (eval ctx . snd) entries = filter isErr $ concat $ map (eval ctx . snd) entries - | otherwise = [Map $ M.fromList $ map buildEntry entries] - where buildEntry (key, val) = (concat $ fromRight [] $ text $ eval ctx key, eval ctx val) -evalPrimary ctx (ArrayConstruct entries) - | any isErr $ concat $ map (eval ctx) entries = filter isErr $ concat $ map (eval ctx) entries - | otherwise = [Array $ map (eval ctx) entries] -evalPrimary ctx (UnaryLookup key) = evalPostfix ctx (PostLookup key) -- Both the same, both operating on `item ctx` - --- It's a little tricky to get errors thrown correctly this branch... -evalPostfix ctx (PostFilter cond) = concatMap inner $ zip [0..] $ item ctx - where - inner (i, item) = case evals ctx { item = [item] } cond of - [Number x] -> test (x == i) - v -> case bool v of - Left ret -> ret - Right ret -> test ret - where test cond = if cond then [item] else [] -evalPostfix ctx (PostCall _) | length (item ctx) /= 1 = [ - Error "err:XPTY0004" "Function calls require singleton values" (show (length $ item ctx) ++ " items")] - | not $ null [1 | Error _ _ _ <- item ctx] = [err | err@(Error _ _ _) <- item ctx] -evalPostfix ctx (PostCall args) | not $ null [1 | ArgPlaceholder <- args] = [PartialFunc args $ head $ item ctx] -evalPostfix ctx (PostCall args) = case head $ item ctx of - Array vals | [arg] <- args -> case nums $ eval ctx arg of - Left errs -> errs - Right [i] | i >= 1.0 && floor i <= length vals -> vals !! floor i - Right [i] -> [Error "err:FOAY0001" "Array index out of bounds" $ show i] - Right items -> [Error "err:XPTY0004" "Too many items in index" $ show $ length items] - Map m | [arg] <- args -> case text $ eval ctx arg of - Left errs -> errs - Right [key] -> fromMaybe [] $ M.lookup key m - Right items -> [Error "err:XPTY0004" "Too many items in key" $ show $ length items] - PartialFunc tpl val -> case expandPartial tpl args of - Left False -> [Error "err:XPTY0004" "Too few arguments to partial" $ show $ length args] - Left True -> [Error "err:XPTY0004" "Too many arguments to partial" $ show $ length args] - Right args' -> evalPostfix ctx { item = [val] } $ PostCall args' - FuncRef name arity | arity /= length args -> [ - Error "err:XPTY0004" "Wrong number of args to referenced function." (show arity ++ " not " ++ show (length args))] - FuncRef name arity | Just cb <- (name, arity) `M.lookup` funcs ctx -> cb ctx $ map (eval ctx) args - FuncRef name arity -> [Error "err:XPST0017" "Undfined function" (show name ++ '#':show arity)] - Function params retty body closure -> case assignArgs params args closure ctx of - Left False -> [Error "err:XPTY0004" "Too few arguments to dynamic function" $ show $ length args] - Left True -> [Error "err:XPTY0004" "Too many arguments to dynamic function" $ show $ length args] - Right vars -> cast' retty $ evals ctx { item = [], vars = vars } body - _ -> [Error "err:XPTY0004" "Invalid function type" ""] -evalPostfix ctx (PostLookup (KeyExpr exprs)) = case atomsize $ evals ctx exprs of - [key] -> evalPostfix ctx $ PostLookup $ KeyLiteral key - items -> [Error "err:XPTY0004" "Too many items in key" $ show $ length items] -evalPostfix ctx (PostLookup KeyWildcard) = case item ctx of - [Array vals] -> concat $ vals - [Map m] -> concat $ M.elems m - _ -> [Error "err:XPTY0004" "Invalid collection type or arity" ""] -evalPostfix ctx (PostLookup (KeyLiteral k)) = case item ctx of - [Array vals] -> case nums [k] of - Left errs -> errs - Right [i] | i >= 1.0 && floor i <= length vals -> vals !! floor i - Right [i] -> [Error "err:FOAY0001" "Array index out of bounds" $ show i] - Right items -> [Error "err:XPTY0004" "Too many items in index" $ show $ length items] - [Map m] -> case text [k] of - Left errs -> errs - Right [key] -> fromMaybe [] $ M.lookup key m - Right items -> [Error "err:XPTY0004" "Too many items in key" $ show $ length items] - _ -> [Error "err:XPTY0004" "Invalid collection type or arity" ""] -evalPostfix' ctx (op:ops) = evalPostfix' ctx { item = evalPostfix ctx op } ops -evalPostfix' ctx [] = item ctx - -evalPath ctx (PostExpr primary postfix) = evalPostfix' ctx { item = evalPrimary ctx primary } postfix -evalPath ctx (PathStep (Axis axis, test) filters) = - [n | Node n <- item ctx] >>= - axis >>= - check (case test of {Left type_ -> instanceOf type_ . Node; Right name -> hasName name . node }) >>= - return . Node >>= - filter' filters ctx -evalPath ctx (PathStep (Attribute, Right n) filters) = - [node n | Node n <- item ctx] >>= attribute' n >>= filter' filters ctx -evalPath ctx (PathStep (Attribute, Left (TypeAttribute (Just n) type_)) filters) = - [node n | Node n <- item ctx] >>= attribute' n >>= tryCast' type_ >>= filter' filters ctx -evalPath ctx (PathStep (Attribute, Left (TypeAttribute Nothing type_)) filters) = - [String $ Txt.unpack val | - Node n <- item ctx, - NodeElement (Element _ attrs _) <- [node n], - (_, val) <- M.toList attrs] >>= - tryCast' type_ >>= - filter' filters ctx -evalPath _ (PathStep (Attribute, Left _) _) = [] -evalPath _ (PathStep (Namespace, _) _) = [Error "err:XPST0010" "Namespace axis is unsupported" ""] - -filter' filters ctx v = - let (errs, pred) = partitionEithers $ map (bool . evals ctx {item = [v]}) filters - in [v | and pred] ++ concat errs - ----- Dynamic function call support -expandPartial (ArgPlaceholder:tpls) (arg:args) = (arg:) <$> expandPartial tpls args -expandPartial (ArgPlaceholder:_) [] = Left False -expandPartial (tpl:tpls) args = (tpl:) <$> expandPartial tpls args -expandPartial [] (_:_) = Left True -expandPartial [] [] = Right [] - -assignArgs ((name, type_):params) (arg:args) vars ctx = - let vars' = M.insert name (cast' type_ $ eval ctx arg) vars - in assignArgs params args vars' ctx -assignArgs (_:_) [] _ _ = Left False -assignArgs [] (_:_) _ _ = Left True -assignArgs [] [] vars _ = Right vars - -----Utils - -fromLeft _ (Left ret) = ret -fromLeft ret (Right _) = ret -fromRight _ (Right ret) = ret -fromRight ret (Left _) = ret - ---- standard library - -stdLib :: M.Map (Name, Int) (Context -> [[Value]] -> [Value]) -stdLib = M.fromList [ - (("fn:node-name", 1), fnNodeName), - (("fn:node-name", 0), fnNodeName), --- (("fn:nilled", 1), fnNilled), --- (("fn:nilled", 0), fnNilled), - (("fn:string", 1), fnString), - (("fn:string", 0), fnString), - (("fn:data", 1), fnData), - (("fn:data", 0), fnData), --- (("fn:base-uri", 1), fnBaseURI), --- (("fn:document-uri", 1), fnDocumentURI), - - (("fn:error", 0), fnError), - (("fn:error", 1), fnError), - (("fn:error", 2), fnError), - (("fn:error", 3), fnError), - - (("op:numeric-add", 2), fnNumericAdd), - (("op:numeric-subtract", 2), fnNumericSubtract), - (("op:numeric-multiply", 2), fnNumericMultiply), - (("op:numeric-divide", 2), fnNumericDivide), - (("op:numeric-integer-divide", 2), fnNumericIntegerDivide), - (("op:numeric-mod", 2), fnNumericMod), - (("op:numeric-unary-plus", 1), fnNumericUnaryPlus), - (("op:numeric-unary-minus", 1), fnNumericUnaryMinus), - - (("op:numeric-equal", 2), opNumericEqual), - (("op:numeric-less-than", 2), opNumericLessThan), - (("op:numeric-greater-than", 2), opNumericGreaterThan), - - (("fn:abs", 1), fnAbs), - (("fn:ceiling", 1), fnCeiling), - (("fn:floor", 1), fnFloor), - (("fn:round", 1), fnRound), - (("fn:round-half-to-even", 1), fnRoundToEven), - - (("fn:number", 1), fnNumericUnaryPlus), --- (("fn:format-integer", 2), fnFormatInteger), --- (("fn:format-integer", 3), fnFormatInteger), --- (("fn:format-number", 2), fnFormatNumber), --- (("fn:format-number", 3), fnFormatNumber), - - (("math:pi", 0), fnMathPi), - (("math:exp", 1), fnMathExp), - (("math:exp10", 1), fnMathExp10), - (("math:log", 1), fnMathLog), - (("math:log10", 1), fnMathLog10), - (("math:pow", 2), fnMathPow), - (("math:sqrt", 1), fnMathSqrt), - (("math:sin", 1), fnMathSin), - (("math:cos", 1), fnMathCos), - (("math:tan", 1), fnMathTan), - (("math:asin", 1), fnMathASin), - (("math:acos", 1), fnMathACos), - (("math:atan", 1), fnMathATan), - (("math:atan2", 2), fnMathATan2) - ] -typeError = [Error "fn:XPTY0004" "Invalid type signature for function" ""] - -fnNodeName _ [[Node c]] | NodeElement (Element name _ _) <- node c = [String $ show name] - | otherwise = typeError -fnNodeName ctx [] = fnNodeName ctx [item ctx] -fnNodeName _ _ = typeError -fnString _ [[Node c]] = [String $ Txt.unpack $ nodeText $ node c] -fnString ctx [] = fnString ctx [item ctx] -fnString _ _ = typeError -fnData _ [val] = atomsize val -fnData ctx [] = atomsize $ item ctx -fnData _ _ = typeError - -fnError _ [] = [Error "err:user" "" ""] -fnError _ [code'] | Right [code] <- text code' = - [Error (fromRight "err:user" $ parseQName $ Txt.pack code) "" ""] -fnError _ [code', msg'] | Right [code] <- text code', Right [msg] <- text msg' = - [Error (fromRight "err:user" $ parseQName $ Txt.pack code) msg ""] -fnError _ [code', msg', items] | Right [code] <- text code', Right [msg] <- text msg' = - [Error (fromRight "err:user" $ parseQName $ Txt.pack code) msg $ concat $ fromRight [] $ text items] -fnError _ _ = typeError - -fnNumericAdd _ [x', y'] | Right x <- num x', Right y <- num y' = [Number (x + y)] -fnNumericAdd _ _ = typeError -fnNumericSubtract _ [x', y'] | Right x <- num x', Right y <- num y' = [Number (x - y)] -fnNumericSubtract _ _ = typeError -fnNumericMultiply _ [x', y'] | Right x <- num x', Right y <- num y' = [Number (x * y)] -fnNumericMultiply _ _ = typeError -fnNumericDivide _ [x', y'] | Right x <- num x', Right y <- num y' = [Number (x / y)] -fnNumericDivide _ _ = typeError -fnNumericIntegerDivide _ [x', y'] | Right x <- num x', Right y <- num y' = [Number $ fromIntegral (floor x `div` floor y)] -fnNumericIntegerDivide _ _ = typeError -fnNumericMod _ [x', y'] | Right x <- num x', Right y <- num y' = [Number $ fromIntegral (floor x `rem` floor y)] -fnNumericMod _ _ = typeError -fnNumericUnaryPlus _ [x'] | Right x <- num x' = [Number x] -fnNumericUnaryPlus _ _ = typeError -fnNumericUnaryMinus _ [x'] | Right x <- num x' = [Number $ negate x] -fnNumericUnaryMinus _ _ = typeError - -opNumericEqual _ [x', y'] | Right x <- num x', Right y <- num y' = [Bool (x == y)] -opNumericEqual _ _ = typeError -opNumericLessThan _ [x', y'] | Right x <- num x', Right y <- num y' = [Bool (x < y)] -opNumericLessThan _ _ = typeError -opNumericGreaterThan _ [x', y'] | Right x <- num x', Right y <- num y' = [Bool (x > y)] -opNumericGreaterThan _ _ = typeError - -fnAbs _ [x'] | Right x <- num x' = [Number $ abs x] -fnAbs _ _ = typeError -fnCeiling _ [x'] | Right x <- num x' = [Number $ fromIntegral $ ceiling x] -fnCeiling _ _ = typeError -fnFloor _ [x'] | Right x <- num x' = [Number $ fromIntegral $ floor x] -fnFloor _ _ = typeError -fnRound _ [x'] | Right x <- num x' = [Number $ fromIntegral $ round x] -fnRound _ _ = typeError -fnRoundToEven _ [x'] | Right x <- num x' = [Number $ fromIntegral $ toEven $ round x] - where toEven y = if even y then y else succ y -fnRoundToEven _ _ = typeError - -fnMathPi _ [] = [Number pi] -fnMathPi _ _ = typeError -fnMathExp _ [x'] | Right x <- num x' = [Number $ exp x] -fnMathExp _ _ = typeError -fnMathExp10 _ [x'] | Right x <- num x' = [Number (10 ** x)] -fnMathExp10 _ _ = typeError -fnMathLog _ [x'] | Right x <- num x' = [Number $ log x] -fnMathLog _ _ = typeError -fnMathLog10 _ [x'] | Right x <- num x' = [Number $ logBase 10 x] -fnMathLog10 _ _ = typeError -fnMathPow _ [x', y'] | Right x <- num x', Right y <- num y' = [Number (x ** y)] -fnMathPow _ _ = typeError -fnMathSqrt _ [x'] | Right x <- num x' = [Number $ sqrt x] -fnMathSqrt _ _ = typeError -fnMathSin _ [x'] | Right x <- num x' = [Number $ sin x] -fnMathSin _ _ = typeError -fnMathCos _ [x'] | Right x <- num x' = [Number $ cos x] -fnMathCos _ _ = typeError -fnMathTan _ [x'] | Right x <- num x' = [Number $ tan x] -fnMathTan _ _ = typeError -fnMathASin _ [x'] | Right x <- num x' = [Number $ asin x] -fnMathASin _ _ = typeError -fnMathACos _ [x'] | Right x <- num x' = [Number $ acos x] -fnMathACos _ _ = typeError -fnMathATan _ [x'] | Right x <- num x' = [Number $ atan x] -fnMathATan _ _ = typeError -fnMathATan2 _ [y', x'] | Right x <- num x', Right y <- num y' = [Number $ atan2 y x] -fnMathATan2 _ _ = typeError - ----- Public API -runXPath' node sel = case parseXPath sel of - Right ast -> evals Context { - item = [Node node], - vars = M.empty, - funcs = stdLib - } ast - Left err -> [Error "err:syntax" err ""] -runXPath node sel = let ret = runXPath' node sel - in if any isErr ret then Left [msg | Error name msg _ <- ret] - else Right [n | Node n <- ret] diff --git a/src/Internal/Elements/XPathParse.hs b/src/Internal/Elements/XPathParse.hs deleted file mode 100644 index 99da36b..0000000 --- a/src/Internal/Elements/XPathParse.hs +++ /dev/null @@ -1,680 +0,0 @@ -{-# LANGUAGE OverloadedStrings #-} -module Internal.Elements.XPathParse(parseXPath, parseQName, - Quantifier(..), Quantity(..), MaybePair(..), XType(..), Value(..), val2type, isErr, - Expression(..), Arrow(..), PathComp(..), PrimaryExpr(..), PostExpr(..), - Key(..), Axis'(..), SomeEvery(..)) where - -import Data.Attoparsec.Text.Lazy -import Control.Applicative -import Data.Text (Text(..), pack) - -import Text.XML.Cursor -import Text.XML (Name(..), Node(..)) -import qualified Data.Map.Strict as M - -skipSpace' = do - skipSpace - option () $ do - char '(' - char ':' - manyTill anyChar $ string ":)" - skipSpace -pSpaced :: Parser a -> Parser a -pSpaced act = do - skipSpace' - ret <- act - skipSpace' - return ret --- Transliterated from grammar at https://www.w3.org/TR/xpath-3/#nt-bnf -parseXPath = parseOnly pExpr - -pParamList = do - head <- pParam - tail <- many' $ do - char ',' - pParam - return (head:tail) -pParam = do - skipSpace' - char '$' - name <- pEQName - skipSpace' - type_ <- option (Quantity TypeItem None Many) $ pTypeDeclaration - return (name, type_) -pFunctionBody = pEnclosedExpr -pEnclosedExpr = pSpaced $ do - char '{' - ret <- option [] $ pExpr - char '}' - return ret -pExpr :: Parser [Expression] -pExpr = do - head <- pExprSingle - tail <- many' $ do - char ',' - pExprSingle - return (head:tail) -pExprSingle = pSpaced (pForExpr <|> pLetExpr <|> pQuantifiedExpr <|> pIfExpr <|> pOrExpr) - -pForExpr = do - header <- pSimpleForClause - string "return" - body <- pExprSingle - return $ foldr (\(var, val) body' -> ExprFor var val body') body header -pSimpleForClause = do - string "for" - head <- pSimpleForBinding - tail <- many' $ do - char ',' - pSimpleForBinding - return (head:tail) -pSimpleForBinding = do - skipSpace' - char '$' - var <- pVarName - skipSpace' >> char 'i' >> char 'n' - val <- pExprSingle - return (var, val) - -pLetExpr = do - header <- pSimpleLetClause - string "return" - body <- pExprSingle - return $ foldr (\(var, val) body' -> ExprLet var val body') body header -pSimpleLetClause = do - string "let" - head <- pSimpleLetBinding - tail <- many' $ do - char ',' - pSimpleLetBinding - return (head:tail) -pSimpleLetBinding = do - skipSpace' - char '$' - var <- pVarName - skipSpace' >> char ':' >> char '=' - val <- pExprSingle - return (var, val) - -pQuantifiedExpr = do - quantifier <- (string "some" >> return Some) <|> (string "every" >> return Every) - head <- pSimpleForBinding - tail <- many' $ do - char ',' - pSimpleForBinding - string "satisfies" - body <- pExprSingle - return $ ExprQuantified quantifier $ - foldr (\(var, val) body' -> ExprFor var val body') body (head:tail) - -pIfExpr = do - char 'i' >> char 'f' - skipSpace' >> char '(' - test <- pExpr - char ')' >> skipSpace' - string "then" - pass <- pExprSingle - string "else" - fail <- pExprSingle - return $ ExprIf test pass fail - -pOrExpr = do - a <- pAndExpr - bs <- many' $ do - pSpaced (char 'o' >> char 'r') - pAndExpr - return $ if null bs then a else ExprOr (a:bs) -pAndExpr = do - a <- pComparisonExpr - bs <- many' $ do - pSpaced $ string "and" - pComparisonExpr - return $ if null bs then a else ExprAnd (a:bs) -pComparisonExpr = do - a <- pStringConcatExpr - option a $ do - skipSpace' - op <- pValueComp <|> pGeneralComp <|> pNodeComp -- TODO implement comparisons - skipSpace' - b <- pStringConcatExpr - return $ op a b -pStringConcatExpr = do - a <- pRangeExpr - bs <- many' $ do - pSpaced (char '|' >> char '|') - pRangeExpr - return $ if null bs then a else ExprStrConcat (a:bs) -pRangeExpr = do - a <- pAdditiveExpr - option a $ do - pSpaced (char 't' >> char 'o') - b <- pAdditiveExpr - return $ ExprRange a b -pAdditiveExpr = do - a <- pMultiplicativeExpr - bs <- many' $ do - op <- pSpaced ((char '-' >> return ExprNegate) <|> (char '+' >> return id)) - b <- pMultiplicativeExpr - return $ op b - return $ if null bs then a else ExprSum (a:bs) -pMultiplicativeExpr = do - a <- pUnionExpr - bs <- many' $ do - skipSpace' - op <- (char '*' >> return ExprMul) <|> (string "div" >> return ExprDiv) <|> - (string "idiv" >> return ExprIDiv) <|> (string "mode" >> return ExprMod) - skipSpace' - b <- pUnionExpr - return (`op` b) - return $ foldl (\a b -> b a) a bs -pUnionExpr = do - a <- pIntersectExceptExpr - bs <- many' $ do - pSpaced (string "union" <|> string "|") - pIntersectExceptExpr - return $ ExprUnion (a:bs) -pIntersectExceptExpr = do - a <- pInstanceOfExpr - bs <- many' $ do - op <- pSpaced ((string "intersect" >> return ExprIntersect) <|> - (string "except" >> return ExprExcept)) - b <- pInstanceOfExpr - return (`op` b) - return $ foldl (\a b -> b a) a bs -pInstanceOfExpr = do - a <- pTreatExpr - option a $ do - pSpaced (string "instance" >> skipSpace' >> char 'o' >> char 'f') - type_ <- pSequenceType - return $ ExprInstanceOf a type_ -pAs = pSpaced (char 'a' >> char 's') -pTreatExpr = do - a <- pCastableExpr - option a $ do - skipSpace' >> string "treat" >> pAs - type_ <- pSequenceType - return $ ExprTreatAs a type_ -pCastableExpr = do - a <- pCastExpr - option a $ do - skipSpace' >> string "castable" >> pAs - type_ <- pSingleType - return $ ExprCastableAs a type_ -pCastExpr = do - a <- pArrowExpr - option a $ do - skipSpace' >> string "cast" >> pAs - type_ <- pSingleType - return $ ExprCastAs a type_ -pArrowExpr = do - a <- pUnaryExpr - bs <- many' $ do - pSpaced (char '=' >> char '>') - spec <- pArrowFunctionSpecifier - args <- pArgumentList - return (spec, args) - return $ foldl (\a (spec, args) -> ExprArrow a spec args) a bs -pUnaryExpr = do - ops <- many' $ pSpaced (char '+' <|> char '-') - a <- pValueExpr - return $ if length (filter (== '-') ops) `rem` 2 == 1 then ExprNegate a else a -pValueExpr :: Parser Expression -pValueExpr = pSimpleMapExpr - -char' ch ret = char ch >> return ret -char'eq ch ret = char ch >> char '=' >> return ret -char'2 ch ch2 ret= char ch >> char ch2 >> return ret -pGeneralComp = char' '=' ExprEq <|> char'eq '!' ExprNe <|> char' '<' ExprLt <|> - char'eq '<' ExprLe <|> char' '>' ExprGt <|> char'eq '>' ExprGe -pValueComp = char'2 'e' 'q' ExprEq' <|> char'2 'n' 'e' ExprNe' <|> char'2 'l' 't' ExprLt' <|> - char'2 'l' 'e' ExprLe' <|> char'2 'g' 't' ExprGt' <|> char'2 'g' 'e' ExprGe' -pNodeComp = char'2 'i' 's' ExprIs <|> char'2 '<' '<' ExprPrecedes <|> char'2 '>' '>' ExprFollows - -pSimpleMapExpr = do - a <- pPathExpr - bs <- many' $ do - pSpaced $ char '!' - pPathExpr - return $ foldl ExprMap a bs -pathHead = PathStep (Axis root, Left $ TypeDocument $ TypeElement Nothing Nothing) [] -pPathExpr = do - skipSpace' - char '/' - tail <- (skipSpace' >> return []) <|> pRelativePathExpr - return $ ExprPath (pathHead:tail) - <|> do - skipSpace' - let head2 = PathStep (Axis $ orSelf descendant, Left TypeNode) [] - char '/' >> char '/' - tail <- pRelativePathExpr - return $ ExprPath (pathHead:head2:tail) - <|> (ExprPath <$> pRelativePathExpr) -pRelativePathExpr = do - a <- pStepExpr - bs <- many' $ do - char '/' - descend <- option False (char '/' >> return True) - b <- pStepExpr - return $ if descend then [PathStep (Axis $ orSelf descendant, Left TypeNode) [], b] else [b] - return (a:concat bs) -pStepExpr = pPostfixExpr <|> pAxisStep -pAxisStep = do - step <- pReverseStep <|> pForwardStep - preds <- pPredicateList - return $ PathStep step preds -pForwardStep = do - axis <- pForwardAxis - test <- pNodeTest - return (axis, test) - <|> pAbbrevForwardStep -string' text ret = string text >> return ret -pForwardAxis = pSpaced $ do - ret <- string' "child" (Axis child) <|> - string' "descendant" (Axis descendant) <|> - string' "attribute" Attribute <|> - string' "self" (Axis self) <|> - string' "descendant-or-self" (Axis $ orSelf $ descendant) <|> - string' "following-sibling" (Axis followingSibling) <|> - string' "following" (Axis following) <|> - string' "namespace" Namespace - skipSpace' >> char ':' >> char ':' - return ret -pAbbrevForwardStep = do - attrs <- pSpaced $ option (Axis child) (char '@' >> return Attribute) - test <- pNodeTest - return (attrs, test) -pReverseStep = do - axis <- pReverseAxis - test <- pNodeTest - return (axis, test) - <|> pAbbrevReverseStep -pReverseAxis = pSpaced $ do - ret <- string' "parent" (Axis parent) <|> - string' "ancestor" (Axis ancestor) <|> - string' "preceding-sibling" (Axis precedingSibling) <|> - string' "preceding" (Axis preceding) <|> - string' "ancestor-or-self" (Axis $ orSelf $ ancestor) - skipSpace' >> char ':' >> char ':' - return ret -pAbbrevReverseStep = do - pSpaced (char '.' >> char '.') - return (Axis parent, Left TypeNode) - -pNodeTest = (Left <$> pKindTest) <|> (Right <$> pNameTest) -pNameTest = pEQName' True - -pPostfixExpr = do - a <- pPrimaryExpr - bs <- many' ((PostFilter <$> pPredicate) <|> (PostCall <$> pArgumentList) <|> (PostLookup <$> pLookup)) - return $ PostExpr a bs -pArgumentList = pSpaced $ do - char '(' - head <- pArgument - tail <- many' $ do - char ',' - pArgument - char ')' - return (head:tail) -pPredicateList = many' pPredicate -pPredicate = pSpaced $ do - skipSpace' >> char '[' - ret <- pExpr - char ']' >> skipSpace' - return ret -pLookup = do - pSpaced $ char '?' - pKeySpecifier -pKeySpecifier = (KeyLiteral <$> String <$> show <$> pNCName) <|> (KeyLiteral <$> Number <$> fromIntegral <$> pIntegerLiteral) <|> - (KeyExpr <$> pParenthesizedExpr) <|> (pSpaced $ char '*' >> return KeyWildcard) - -pArrowFunctionSpecifier = ArrowName <$> pEQName <|> ArrowVar <$> pVarRef <|> ArrowExpr <$> pParenthesizedExpr -pPrimaryExpr = Literal <$> pLiteral <|> VarRef <$> pVarRef <|> Expr <$> pParenthesizedExpr - <|> pContextItemExpr <|> pFunctionCall <|> pFunctionItemExpr <|> UnaryLookup <$> pUnaryLookup <|> - MapConstruct <$> pMapConstructor <|> ArrayConstruct <$> pArrayConstructor -pLiteral = Number <$> pNumericLiteral <|> String <$> pStringLiteral -pNumericLiteral = double -- I think this matches... - -pVarRef = char '$' >> pVarName -pVarName = pEQName -pParenthesizedExpr = pSpaced $ do - char '(' - ret <- option [] pExpr - char ')' - return ret -pContextItemExpr = char '.' >> return ContextItem -pFunctionCall = do - callee <- pEQName - args <- pArgumentList - return $ FunctionCall callee args -pArgument = pExprSingle <|> pArgumentPlaceholder -pArgumentPlaceholder = char '?' >> return ArgPlaceholder -pFunctionItemExpr = pNamedFunctionRef <|> pInlineFunctionExpr -pNamedFunctionRef = do - name <- pEQName - pSpaced $ char '#' - index <- pIntegerLiteral - return $ Literal $ FuncRef name index -pInlineFunctionExpr = do - pSpaced $ string "function" - char '(' - params <- pParamList - char ')' - type_ <- option (Quantity TypeItem None Many) (pAs >> pSequenceType) - body <- pFunctionBody - return $ FunctionItem params type_ body -pMapConstructor = do - pSpaced $ string "map" - char '{' - head <- pMapConstructorEntry - tail <- many' $ do - char ',' - pMapConstructorEntry - char '}' - return (head:tail) -pMapConstructorEntry = do - key <- pExprSingle - char ':' - value <- pExprSingle - return (key, value) -pArrayConstructor = pSquareArrayConstructor <|> pCurlyArrayConstructor -pSquareArrayConstructor = pSpaced $ do - char '[' - ret <- option [] pExpr - char ']' - return ret -pCurlyArrayConstructor = do - skipSpace' >> string "array" - pEnclosedExpr -pUnaryLookup = do - skipSpace' >> char '?' - pKeySpecifier -pSingleType = do - type_ <- pSimpleTypeName - optional <- option False (char '?' >> skipSpace' >> return True) - return $ if optional then Quantity type_ None One else Quantity type_ One One - -pTypeDeclaration = pAs >> pSequenceType -pSequenceType :: Parser Quantity -pSequenceType = do - skipSpace' >> string "empty-sequence" >> pParens - return emptySequence - <|> do - type_ <- pItemType - (low, high) <- option (One, One) $ parseQuantity <$> pOccurrenceIndicator - return $ Quantity type_ low high -pOccurrenceIndicator = char '?' <|> char '*' <|> char '+' -pParens = pSpaced (char '(' >> skipSpace' >> char ')') >> return () -pItemType = pKindTest <|> do - skipSpace' >> string "item-type" - pParens - return TypeItem - <|> pFunctionTest <|> pMapTest <|> pArrayTest <|> pAtomicOrUnionType <|> pParenthesizedItemType -pAtomicOrUnionType = TypeAtomic <$> pEQName -pKindTest = pDocumentTest <|> pElementTest <|> pAttributeTest <|> pSchemaElementTest <|> - pSchemaAttributeTest <|> pPITest <|> pCommentTest <|> pTextTest <|> pNamespaceNodeTest <|> pAnyKindTest -pAnyKindTest = skipSpace' >> string "node" >> pParens >> return TypeNode -pDocumentTest = pSpaced $ do - string "document-node" - skipSpace' >> char '(' - arg <- pElementTest <|> pSchemaElementTest <|> return (TypeElement Nothing Nothing) -- FIXME What should the default be? - char ')' - return $ TypeDocument arg -pTextTest = skipSpace' >> string "text" >> pParens >> return TypeText -pCommentTest = skipSpace' >> string "comment" >> pParens >> return TypeComment -pNamespaceNodeTest = skipSpace' >> string "namespace-node" >> pParens >> return TypeNamespaceNode -pPITest = pSpaced $ do - string "processing-instruction" - skipSpace' >> char '(' - arg <- Just <$> pNCName <|> Just <$> pack <$> pStringLiteral <|> return Nothing - char ')' - return $ TypePI arg -pAttributeTest = pSpaced $ do - string "attribute" - skipSpace' >> char '(' - (attribName, type_) <- option (Nothing, Nothing) $ do - attribName <- pAttribNameOrWildcard - type_ <- option Nothing $ do - char ',' - Just <$> pTypeName - return (attribName, type_) - char ')' - return $ TypeAttribute attribName type_ -pAttribNameOrWildcard = pSpaced ((Just <$> pAttributeName) <|> (char '*' >> return Nothing)) -pAttributeName = pEQName -pSchemaAttributeTest = pSpaced $ do - string "schema-attribute" - skipSpace' >> char '(' - arg <- pAttributeDeclaration - char ')' - return $ TypeSchemaAttribute arg -pAttributeDeclaration = pAttributeName -pElementTest = pSpaced $ do - string "element" - pSpaced $ char '(' - (elName, type_) <- option (Nothing, Nothing) $ do - elName <- pElementNameOrWildcard - type_ <- option Nothing $ do - char ',' - Just <$> pTypeName - return (elName, type_) - char ')' >> skipSpace' - return $ TypeElement elName type_ -pElementNameOrWildcard = pSpaced ((Just <$> pAttributeName) <|> (char '*' >> return Nothing)) -pElementName = pEQName -pSchemaElementTest = pSpaced $ do - string "schema-element" - skipSpace' >> char '(' - ret <- pElementDeclaration - char ')' - return $ TypeSchemaElement ret -pElementDeclaration = pElementName -pSimpleTypeName = TypeAtomic <$> pTypeName -pTypeName = pEQName -pFunctionTest = pAnyFunctionTest <|> pTypedFunctionTest -pAnyFunctionTest = pSpaced $ do - string "function" - skipSpace' >> char '(' >> pSpaced (char '*') >> char ')' - return $ TypeFunction Nothing' -pTypedFunctionTest = pSpaced $ do - string "function" - pSpaced $ char '(' - args <- option [] $ do - head <- pSequenceType - tail <- many' $ do - char ',' - pSequenceType - return (head:tail) - char ')' - ret <- option emptySequence $ do - pAs - pSequenceType - return $ TypeFunction $ Some' args ret -pMapTest = pAnyMapTest <|> pTypedMapTest -pAnyMapTest = pSpaced $ do - string "map" - skipSpace' >> char '(' >> pSpaced (char '*') >> char ')' - return $ TypeMap Nothing' -pTypedMapTest = pSpaced $ do - string "map" - skipSpace' >> char '(' - keyType <- pAtomicOrUnionType - char ',' - valType <- pSequenceType - char ')' - return $ TypeMap $ Some' keyType valType -pArrayTest = pAnyArrayTest <|> pTypedArrayTest -pAnyArrayTest = pSpaced $ do - skipSpace' >> string "array" - skipSpace' >> char '(' >> pSpaced (char '*') >> char ')' - return $ TypeArray Nothing -pTypedArrayTest = pSpaced $ do - string "array" >> skipSpace' - char '(' - type_ <- pSequenceType - char ')' - return $ TypeArray $ Just type_ -pParenthesizedItemType = pSpaced $ do - char '(' - ret <- pItemType - char ')' - return ret -pEQName = pEQName' False -pEQName' wildcard = pQName wildcard <|> pURIQualifiedName wildcard - ---- Terminals -pIntegerLiteral = read <$> pSpaced pDigits -pStringLiteral :: Parser String -pStringLiteral = pSpaced (pStringLiteral' '\'' <|> pStringLiteral' '"') -pStringLiteral' qt = do - char qt - ret <- many' (pEscapeQuote qt <|> notChar qt) - char qt - return ret -pBracedURILiteral = pSpaced $ do - char 'Q' - char '{' - ret <- takeWhile1 (`notElem` ['{', '}']) - char '}' - return ret -pEscapeQuote qt = char qt >> char qt -pDigits = many1 digit - - --- XML grammar rules referenced by XPath --- This is copied from XML Conduit rather than transliterated from the standard --- because Attoparsec can't recognize (but can preserve) Unicode characters -pNCName = pSpaced $ pNCName' False -pNCName' isWildcard = takeWhile1 valid "identifier" - where - valid '*' = isWildcard - valid '&' = False - valid '<' = False - valid '>' = False - valid ':' = False - valid '?' = False - valid '=' = False - valid '"' = False - valid '\'' = False - valid '/' = False - valid ';' = False - valid '#' = False - valid c = not $ isXMLSpace c - -isXMLSpace :: Char -> Bool -isXMLSpace ' ' = True -isXMLSpace '\t' = True -isXMLSpace '\r' = True -isXMLSpace '\n' = True -isXMLSpace _ = False - -parseQName :: Text -> Either String Name -parseQName = parseOnly $ pQName False -pQName isWildcard = pSpaced $ do - ns <- option Nothing $ do - ret <- pNCName' isWildcard - char ':' - return $ Just ret - name <- pNCName' isWildcard - return $ Name name Nothing ns -pURIQualifiedName isWildcard = pSpaced $ do - ns <- pBracedURILiteral - name <- pNCName' isWildcard - return $ Name name (Just ns) Nothing - ---- Type Modelling -data Quantifier = None | One | Many deriving (Eq, Show) -parseQuantity '?' = (None, One) -parseQuantity '*' = (None, Many) -parseQuantity '+' = (One, Many) - -data Quantity = Quantity XType Quantifier Quantifier deriving (Eq, Show) -emptySequence = Quantity TypeItem None None - -data MaybePair a b = Nothing' | Some' a b deriving (Eq, Show) - -data XType = TypeDocument XType | - TypeElement (Maybe Name) (Maybe Name) | - TypeArray (Maybe Quantity) | - TypeMap (MaybePair XType Quantity) | - TypeFunction (MaybePair [Quantity] Quantity) | - TypeSchemaElement Name | - TypeNode | - TypeNamespaceNode | - TypeText | - TypeComment | - TypePI (Maybe Text) | - TypeSchemaAttribute Name | - TypeAttribute (Maybe Name) (Maybe Name) | - TypeAtomic Name | - TypeItem deriving (Eq, Show) - ---- Abstract Syntax Tree -data Expression = ExprFor Name Expression Expression | - ArgPlaceholder | - ExprLet Name Expression Expression | - ExprQuantified SomeEvery Expression | - ExprIf [Expression] Expression Expression | - ExprOr [Expression] | - ExprAnd [Expression] | - ExprStrConcat [Expression] | - ExprRange Expression Expression | - ExprNegate Expression | - ExprSum [Expression] | - ExprFollows Expr Expr | ExprPrecedes Expr Expr | ExprIs Expr Expr | - ExprGe Expr Expr | ExprGt Expr Expr | ExprLe Expr Expr | ExprLt Expr Expr | ExprNe Expr Expr | ExprEq Expr Expr | - ExprGe' Expr Expr | ExprGt' Expr Expr | ExprLe' Expr Expr | ExprLt' Expr Expr | ExprNe' Expr Expr | ExprEq' Expr Expr | - ExprMod Expr Expr | ExprIDiv Expr Expr | ExprDiv Expr Expr | ExprMul Expr Expr | - ExprUnion [Expression] | ExprIntersect Expr Expr | ExprExcept Expr Expr | - ExprArrow Expression Arrow [Expression] | - ExprCastAs Expr Quantity | ExprCastableAs Expr Quantity | ExprTreatAs Expr Quantity | ExprInstanceOf Expr Quantity | - ExprMap Expression Expression | - ExprPath [PathComp] -type Expr = Expression - -data SomeEvery = Some | Every - -data Arrow = ArrowName Name | ArrowVar Name | ArrowExpr [Expression] - -data PathComp = PostExpr PrimaryExpr [PostExpr] | PathStep (Axis', Either XType Name) [[Expression]] - -data PrimaryExpr = Literal Value | VarRef Name | Expr [Expression] | ContextItem | - FunctionCall Name [Expression] | FunctionItem [(Name, Quantity)] Quantity [Expression] | - MapConstruct [(Expr, Expr)] | ArrayConstruct [Expression] | UnaryLookup Key - -data PostExpr = PostFilter [Expression] | PostCall [Expression] | PostLookup Key - -data Key = KeyExpr [Expression] | KeyWildcard | KeyLiteral Value - -data Axis' = Axis Axis | Attribute | Namespace - ---- Runtime - -data Value = Number Double | String String | Bool Bool | Node Cursor | Error Name String String | - Array [[Value]] | Map (M.Map String [Value]) | PartialFunc [Expression] Value | - FuncRef Name Int | Function [(Name, Quantity)] Quantity [Expression] (M.Map Name [Value]) - -instance Eq Value where - Number x == Number y = x == y - String x == String y = x == y - Bool x == Bool y = x == y - Node x == Node y = node x == node y - Error x0 x1 x2 == Error y0 y1 y2 = x0 == y0 && x1 == y1 && x2 == y2 - Array x == Array y = x == y - Map x == Map y = x == y - FuncRef x0 x1 == FuncRef y0 y1 = x0 == y0 && x1 == y1 - _ == _ = False - -val2type (Number _) = TypeAtomic "xsd:double" -val2type (String _) = TypeText -val2type (Bool _) = TypeAtomic "xsd:bool" -val2type (Node _) = TypeNode -val2type (Error _ _ _) = TypeAtomic "xsd:error" -val2type (Array _) = TypeArray Nothing -val2type (Map _) = TypeMap Nothing' -val2type (Function _ _ _ _) = TypeFunction Nothing' - -isErr (Error _ _ _) = True -isErr _ = False - -self cursor = [cursor] -root = ancestor >=> check (null . ancestor) diff --git a/src/UI/Search.hs b/src/UI/Search.hs index 605c2ea..d8c532c 100644 --- a/src/UI/Search.hs +++ b/src/UI/Search.hs @@ -28,7 +28,7 @@ engines = [ (const "CSS", queryEls "css selector"), (flip l LinkSearchExact, queryEls "link text"), -- Only useful if it promotes results higher. (flip l LinkSearch, queryEls "partial link text"), - (const "XPath", queryEls "xpath") -- FIXME parser infinite loops... + (const "XPath", queryEls "xpath") ] result href' = a ! href (stringValue href') ! target "preview" -- 2.30.2