{-# LANGUAGE OverloadedStrings #-} module Internal.Elements.XPath(parseXPath) where import Data.Attoparsec.Text.Lazy import Control.Applicative import Data.Text (Text(..), pack) import Text.XML.Cursor import Text.XML (Name(..), Node(..)) import qualified Data.Map.Strict as M pSpaced :: Parser a -> Parser a pSpaced act = do skipSpace ret <- act skipSpace return ret -- Transliterated from grammar at https://www.w3.org/TR/xpath-3/#nt-bnf parseXPath = parse pExpr pParamList = do head <- pParam tail <- many' $ do char ',' pParam return (head:tail) pParam = do skipSpace char '$' name <- pEQName skipSpace type_ <- option Nothing $ Just <$> pTypeDeclaration return (name, type_) pFunctionBody = pEnclosedExpr pEnclosedExpr = pSpaced $ do char '{' ret <- option [] $ pExpr char '}' return ret pExpr :: Parser [Expression] pExpr = do head <- pExprSingle tail <- many' $ do char ',' pExprSingle return (head:tail) pExprSingle = pSpaced (pForExpr <|> pLetExpr <|> pQuantifiedExpr <|> pIfExpr <|> pOrExpr) pForExpr = do header <- pSimpleForClause string "return" body <- pExprSingle return $ ExprFor header body pSimpleForClause = do string "for" head <- pSimpleForBinding tail <- many' $ do char ',' pSimpleForBinding return (head:tail) pSimpleForBinding = do skipSpace char '$' var <- pVarName skipSpace >> char 'i' >> char 'n' val <- pExprSingle return (var, val) pLetExpr = do header <- pSimpleLetClause string "return" body <- pExprSingle return $ ExprLet header body pSimpleLetClause = do string "let" head <- pSimpleLetBinding tail <- many' $ do char ',' pSimpleLetBinding return (head:tail) pSimpleLetBinding = do skipSpace char '$' var <- pVarName skipSpace >> char ':' >> char '=' val <- pExprSingle return (var, val) pQuantifiedExpr = do quantifier <- (string "some" >> return ExprSome) <|> (string "every" >> return ExprEvery) head <- pSimpleForBinding tail <- many' $ do char ',' pSimpleForBinding string "satisfies" body <- pExprSingle return $ quantifier (head:tail) body pIfExpr = do char 'i' >> char 'f' skipSpace >> char '(' test <- pExpr char ')' >> skipSpace string "then" pass <- pExprSingle string "else" fail <- pExprSingle return $ ExprIf test pass fail pOrExpr = do a <- pAndExpr bs <- many' $ do pSpaced (char 'o' >> char 'r') pAndExpr return $ if null bs then a else ExprOr (a:bs) pAndExpr = do a <- pComparisonExpr bs <- many' $ do pSpaced $ string "and" pComparisonExpr return $ if null bs then a else ExprAnd (a:bs) pComparisonExpr = do a <- pStringConcatExpr option a $ do skipSpace op <- pValueComp <|> pGeneralComp <|> pNodeComp -- TODO implement comparisons skipSpace b <- pStringConcatExpr return $ op a b pStringConcatExpr = do a <- pRangeExpr bs <- many' $ do pSpaced (char '|' >> char '|') pRangeExpr return $ if null bs then a else ExprStrConcat (a:bs) pRangeExpr = do a <- pAdditiveExpr option a $ do pSpaced (char 't' >> char 'o') b <- pAdditiveExpr return $ ExprRange a b pAdditiveExpr = do a <- pAdditiveExpr bs <- many' $ do op <- pSpaced ((char '-' >> return ExprNegate) <|> (char '+' >> return id)) b <- pAdditiveExpr return $ op b return $ if null bs then a else ExprSum (a:bs) pMultiplicativeExpr = do a <- pUnionExpr bs <- many' $ do skipSpace op <- (char '*' >> return ExprMul) <|> (string "div" >> return ExprDiv) <|> (string "idiv" >> return ExprIDiv) <|> (string "mode" >> return ExprMode) skipSpace b <- pUnionExpr return (`op` b) return $ foldl (\a b -> b a) a bs pUnionExpr = do a <- pIntersectExceptExpr bs <- many' $ do pSpaced (string "union" <|> string "|") pIntersectExceptExpr return $ ExprUnion (a:bs) pIntersectExceptExpr = do a <- pInstanceOfExpr bs <- many' $ do op <- pSpaced ((string "intersect" >> return ExprIntersect) <|> (string "except" >> return ExprExcept)) b <- pInstanceOfExpr return (`op` b) return $ foldl (\a b -> b a) a bs pInstanceOfExpr = do a <- pTreatExpr option a $ do pSpaced (string "instance" >> skipSpace >> char 'o' >> char 'f') type_ <- pSequenceType return $ ExprInstanceOf a type_ pAs = pSpaced (char 'a' >> char 's') pTreatExpr = do a <- pCastableExpr option a $ do skipSpace >> string "treat" >> pAs type_ <- pSequenceType return $ ExprTreatAs a type_ pCastableExpr = do a <- pCastExpr option a $ do skipSpace >> string "castable" >> pAs type_ <- pSingleType return $ ExprCastableAs a type_ pCastExpr = do a <- pArrowExpr option a $ do skipSpace >> string "cast" >> pAs type_ <- pSingleType return $ ExprCastAs a type_ pArrowExpr = do a <- pUnaryExpr bs <- many' $ do pSpaced (char '=' >> char '>') spec <- pArrowFunctionSpecifier args <- pArgumentList return (spec, args) return $ foldl (\a (spec, args) -> ExprArrow a spec args) a bs pUnaryExpr = do ops <- many' $ pSpaced (char '+' <|> char '-') a <- pValueExpr return $ if length (filter (== '-') ops) `rem` 2 == 1 then ExprNegate a else a pValueExpr :: Parser Expression pValueExpr = pSimpleMapExpr char' ch ret = char ch >> return ret char'eq ch ret = char ch >> char '=' >> return ret char'2 ch ch2 ret= char ch >> char ch2 >> return ret pGeneralComp = char' '=' ExprEq <|> char'eq '!' ExprNe <|> char' '<' ExprLt <|> char'eq '<' ExprLe <|> char' '>' ExprGt <|> char'eq '>' ExprGe pValueComp = char'2 'e' 'q' ExprEq' <|> char'2 'n' 'e' ExprNe' <|> char'2 'l' 't' ExprLt' <|> char'2 'l' 'e' ExprLe' <|> char'2 'g' 't' ExprGt' <|> char'2 'g' 'e' ExprGe' pNodeComp = char'2 'i' 's' ExprIs <|> char'2 '<' '<' ExprPrecedes <|> char'2 '>' '>' ExprFollows pSimpleMapExpr = do a <- pPathExpr bs <- many' $ do pSpaced $ char '!' pPathExpr return $ foldl ExprMap a bs pPathExpr = do skipSpace char '/' -- let head = ... tail <- (skipSpace >> return []) <|> pRelativePathExpr return $ ExprPath tail <|> do skipSpace -- let head = ... char '/' >> char '/' tail <- pRelativePathExpr return $ ExprPath tail <|> (ExprPath <$> pRelativePathExpr) pRelativePathExpr = do a <- pStepExpr bs <- many' $ do char '/' descend <- option False (char '/' >> return True) b <- pStepExpr return [b] -- if descend then [(orSelf descendant, PathType Type Node), b] else [b] return (a:concat bs) pStepExpr = pPostfixExpr <|> pAxisStep pAxisStep = do step <- pReverseStep <|> pForwardStep preds <- pPredicateList return $ PathStep step preds pForwardStep = do axis <- pForwardAxis test <- pNodeTest return (axis, test) <|> pAbbrevForwardStep string' text ret = string text >> return ret pForwardAxis = pSpaced $ do ret <- string' "child" (Axis child) <|> string' "descendant" (Axis descendant) <|> string' "attribute" Attribute <|> string' "self" (Axis self) <|> string' "descendant-or-self" (Axis $ orSelf $ descendant) <|> string' "following-sibling" (Axis followingSibling) <|> string' "following" (Axis following) <|> string' "namespace" Namespace skipSpace >> char ':' >> char ':' return ret pAbbrevForwardStep = do attrs <- pSpaced $ option (Axis child) (char '@' >> return Attribute) test <- pNodeTest return (attrs, test) pReverseStep = do axis <- pReverseAxis test <- pNodeTest return (axis, test) <|> pAbbrevReverseStep pReverseAxis = pSpaced $ do ret <- string' "parent" (Axis parent) <|> string' "ancestor" (Axis ancestor) <|> string' "preceding-sibling" (Axis precedingSibling) <|> string' "preceding" (Axis preceding) <|> string' "ancestor-or-self" (Axis $ orSelf $ ancestor) skipSpace >> char ':' >> char ':' return ret pAbbrevReverseStep = do pSpaced (char '.' >> char '.') return (Axis parent, Left TypeNode) pNodeTest = (Left <$> pKindTest) <|> (Right <$> pNameTest) pNameTest = pEQName' True pPostfixExpr = do a <- pPrimaryExpr bs <- many' ((PostFilter <$> pPredicate) <|> (PostCall <$> pArgumentList) <|> (PostLookup <$> pLookup)) return $ PostExpr a bs pArgumentList = pSpaced $ do char '(' head <- pArgument tail <- many' $ do char ',' pArgument char ')' return (head:tail) pPredicateList = many' pPredicate pPredicate = pSpaced $ do skipSpace >> char '[' ret <- pExpr char ']' >> skipSpace return ret pLookup = do pSpaced $ char '?' pKeySpecifier pKeySpecifier = (KeyName <$> pNCName) <|> (KeyInt <$> pIntegerLiteral) <|> (KeyExpr <$> pParenthesizedExpr) <|> (pSpaced $ char '*' >> return KeyWildcard) pArrowFunctionSpecifier = ArrowName <$> pEQName <|> ArrowVar <$> pVarRef <|> ArrowExpr <$> pParenthesizedExpr pPrimaryExpr = Literal <$> pLiteral <|> VarRef <$> pVarRef <|> Expr <$> pParenthesizedExpr <|> pContextItemExpr <|> pFunctionCall <|> pFunctionItemExpr <|> UnaryLookup <$> pUnaryLookup <|> MapConstruct <$> pMapConstructor <|> ArrayConstruct <$> pArrayConstructor pLiteral = Number <$> pNumericLiteral <|> String <$> pStringLiteral pNumericLiteral = double -- I think this matches... pVarRef = char '$' >> pVarName pVarName = pEQName pParenthesizedExpr = pSpaced $ do char '(' ret <- option [] pExpr char ')' return ret pContextItemExpr = char '.' >> return ContextItem pFunctionCall = do callee <- pEQName args <- pArgumentList return $ FunctionCall callee args pArgument = pExprSingle <|> pArgumentPlaceholder pArgumentPlaceholder = char '?' >> return ArgPlaceholder pFunctionItemExpr = do name <- pEQName pSpaced $ char '#' index <- pIntegerLiteral return $ FunctionItem name index pInlineFunctionExpr = do pSpaced $ string "function" char '(' params <- pParamList char ')' type_ <- option Nothing $ do pAs Just <$> pSequenceType pFunctionBody return () pMapConstructor = do pSpaced $ string "map" char '{' head <- pMapConstructorEntry tail <- many' $ do char ',' pMapConstructorEntry char '}' return (head:tail) pMapConstructorEntry = do key <- pExprSingle char ':' value <- pExprSingle return (key, value) pArrayConstructor = pSquareArrayConstructor <|> pCurlyArrayConstructor pSquareArrayConstructor = pSpaced $ do char '[' ret <- option [] pExpr char ']' return $ ExprArray ret pCurlyArrayConstructor = do skipSpace >> string "array" ExprArray <$> pEnclosedExpr pUnaryLookup = do skipSpace >> char '?' pKeySpecifier pSingleType = do type_ <- pSimpleTypeName optional <- option False (char '?' >> skipSpace >> return True) return $ if optional then Quantity type_ None One else Quantity type_ One One pTypeDeclaration = pAs >> pSequenceType pSequenceType :: Parser Quantity pSequenceType = do skipSpace >> string "empty-sequence" >> pParens return emptySequence <|> do type_ <- pItemType (low, high) <- option (One, One) $ parseQuantity <$> pOccurrenceIndicator return $ Quantity type_ low high pOccurrenceIndicator = char '?' <|> char '*' <|> char '+' pParens = pSpaced (char '(' >> skipSpace >> char ')') >> return () pItemType = pKindTest <|> do skipSpace >> string "item-type" pParens return TypeItem <|> pFunctionTest <|> pMapTest <|> pArrayTest <|> pAtomicOrUnionType <|> pParenthesizedItemType pAtomicOrUnionType = TypeAtomic <$> pEQName pKindTest = pDocumentTest <|> pElementTest <|> pAttributeTest <|> pSchemaElementTest <|> pSchemaAttributeTest <|> pPITest <|> pCommentTest <|> pTextTest <|> pNamespaceNodeTest <|> pAnyKindTest pAnyKindTest = skipSpace >> string "node" >> pParens >> return TypeNode pDocumentTest = pSpaced $ do string "document-node" skipSpace >> char '(' arg <- pElementTest <|> pSchemaElementTest <|> return (TypeElement Nothing Nothing) -- FIXME What should the default be? char ')' return $ TypeDocument arg pTextTest = skipSpace >> string "text" >> pParens >> return TypeText pCommentTest = skipSpace >> string "comment" >> pParens >> return TypeComment pNamespaceNodeTest = skipSpace >> string "namespace-node" >> pParens >> return TypeNamespaceNode pPITest = pSpaced $ do string "processing-instruction" skipSpace >> char '(' arg <- Just <$> pNCName <|> Just <$> pack <$> pStringLiteral <|> return Nothing char ')' return $ TypePI arg pAttributeTest = pSpaced $ do string "attribute" skipSpace >> char '(' (attribName, type_) <- option (Nothing, Nothing) $ do attribName <- pAttribNameOrWildcard type_ <- option Nothing $ do char ',' Just <$> pTypeName return (attribName, type_) char ')' return $ TypeAttribute attribName type_ pAttribNameOrWildcard = pSpaced ((Just <$> pAttributeName) <|> (char '*' >> return Nothing)) pAttributeName = pEQName pSchemaAttributeTest = pSpaced $ do string "schema-attribute" skipSpace >> char '(' arg <- pAttributeDeclaration char ')' return $ TypeSchemaAttribute arg pAttributeDeclaration = pAttributeName pElementTest = pSpaced $ do string "element" pSpaced $ char '(' (elName, type_) <- option (Nothing, Nothing) $ do elName <- pElementNameOrWildcard type_ <- option Nothing $ do char ',' Just <$> pTypeName return (elName, type_) char ')' >> skipSpace return $ TypeElement elName type_ pElementNameOrWildcard = pSpaced ((Just <$> pAttributeName) <|> (char '*' >> return Nothing)) pElementName = pEQName pSchemaElementTest = pSpaced $ do string "schema-element" skipSpace >> char '(' ret <- pElementDeclaration char ')' return $ TypeSchemaElement ret pElementDeclaration = pElementName pSimpleTypeName = TypeAtomic <$> pTypeName pTypeName = pEQName pFunctionTest = pAnyFunctionTest <|> pTypedFunctionTest pAnyFunctionTest = pSpaced $ do string "function" skipSpace >> char '(' >> pSpaced (char '*') >> char ')' return $ TypeFunction Nothing' pTypedFunctionTest = pSpaced $ do string "function" pSpaced $ char '(' args <- option [] $ do head <- pSequenceType tail <- many' $ do char ',' pSequenceType return (head:tail) char ')' ret <- option emptySequence $ do pAs pSequenceType return $ TypeFunction $ Some' args ret pMapTest = pAnyMapTest <|> pTypedMapTest pAnyMapTest = pSpaced $ do string "map" skipSpace >> char '(' >> pSpaced (char '*') >> char ')' return $ TypeMap Nothing' pTypedMapTest = pSpaced $ do string "map" skipSpace >> char '(' keyType <- pAtomicOrUnionType char ',' valType <- pSequenceType char ')' return $ TypeMap $ Some' keyType valType pArrayTest = pAnyArrayTest <|> pTypedArrayTest pAnyArrayTest = pSpaced $ do skipSpace >> string "array" skipSpace >> char '(' >> pSpaced (char '*') >> char ')' return $ TypeArray Nothing pTypedArrayTest = pSpaced $ do string "array" >> skipSpace char '(' type_ <- pSequenceType char ')' return $ TypeArray $ Just type_ pParenthesizedItemType = pSpaced $ do char '(' ret <- pItemType char ')' return ret pEQName = pEQName' False pEQName' wildcard = pQName wildcard <|> pURIQualifiedName wildcard --- Terminals pIntegerLiteral = read <$> pSpaced pDigits pStringLiteral :: Parser String pStringLiteral = pSpaced (pStringLiteral' '\'' <|> pStringLiteral' '"') pStringLiteral' qt = do char qt ret <- many' (pEscapeQuote qt <|> notChar qt) char qt return ret pBracedURILiteral = pSpaced $ do char 'Q' char '{' ret <- takeWhile1 (`notElem` ['{', '}']) char '}' return ret pEscapeQuote qt = char qt >> char qt pDigits = many1 digit -- XML grammar rules referenced by XPath -- This is copied from XML Conduit rather than transliterated from the standard -- because Attoparsec can't recognize (but can preserve) Unicode characters pNCName = pSpaced $ pNCName' False pNCName' isWildcard = takeWhile1 valid "identifier" where valid '*' = isWildcard valid '&' = False valid '<' = False valid '>' = False valid ':' = False valid '?' = False valid '=' = False valid '"' = False valid '\'' = False valid '/' = False valid ';' = False valid '#' = False valid c = not $ isXMLSpace c isXMLSpace :: Char -> Bool isXMLSpace ' ' = True isXMLSpace '\t' = True isXMLSpace '\r' = True isXMLSpace '\n' = True isXMLSpace _ = False pQName isWildcard = pSpaced $ do ns <- option Nothing $ do ret <- pNCName' isWildcard char ':' return $ Just ret name <- pNCName' isWildcard return $ Name name Nothing ns pURIQualifiedName isWildcard = pSpaced $ do ns <- pBracedURILiteral name <- pNCName' isWildcard return $ Name name (Just ns) Nothing --- Type Modelling data Quantifier = None | One | Many parseQuantity '?' = (None, One) parseQuantity '*' = (None, Many) parseQuantity '+' = (One, Many) data Quantity = Quantity XType Quantifier Quantifier emptySequence = Quantity TypeItem None None data MaybePair a b = Nothing' | Some' a b data XType = TypeDocument XType | TypeElement (Maybe Name) (Maybe Name) | TypeArray (Maybe Quantity) | TypeMap (MaybePair XType Quantity) | TypeFunction (MaybePair [Quantity] Quantity) | TypeSchemaElement Name | TypeNode | TypeNamespaceNode | TypeText | TypeComment | TypePI (Maybe Text) | TypeSchemaAttribute Name | TypeAttribute (Maybe Name) (Maybe Name) | TypeAtomic Name | TypeItem --- Abstract Syntax Tree data Expression = ExprFor [(Name, Expression)] Expression | ArgPlaceholder | ExprLet [(Name, Expression)] Expression | ExprSome [(Name, Expression)] Expression | ExprEvery [(Name, Expression)] Expression | ExprIf [Expression] Expression Expression | ExprOr [Expression] | ExprAnd [Expression] | ExprStrConcat [Expression] | ExprRange Expression Expression | ExprNegate Expression | ExprSum [Expression] | ExprArray [Expression] | ExprFollows Expr Expr | ExprPrecedes Expr Expr | ExprIs Expr Expr | ExprGe Expr Expr | ExprGt Expr Expr | ExprLe Expr Expr | ExprLt Expr Expr | ExprNe Expr Expr | ExprEq Expr Expr | ExprGe' Expr Expr | ExprGt' Expr Expr | ExprLe' Expr Expr | ExprLt' Expr Expr | ExprNe' Expr Expr | ExprEq' Expr Expr | ExprMode Expr Expr | ExprIDiv Expr Expr | ExprDiv Expr Expr | ExprMul Expr Expr | ExprUnion [Expression] | ExprIntersect Expr Expr | ExprExcept Expr Expr | ExprArrow Expression Arrow [Expression] | ExprCastAs Expr Quantity | ExprCastableAs Expr Quantity | ExprTreatAs Expr Quantity | ExprInstanceOf Expr Quantity | ExprMap Expression Expression | ExprPath [PathComp] type Expr = Expression data Arrow = ArrowName Name | ArrowVar Name | ArrowExpr [Expression] data PathComp = PostExpr PrimaryExpr [PostExpr] | PathStep (Axis', Either XType Name) [[Expression]] data PrimaryExpr = Literal Value | VarRef Name | Expr [Expression] | ContextItem | FunctionCall Name [Expression] | FunctionItem Name Int | MapConstruct [(Expr, Expr)] | ArrayConstruct Expression | UnaryLookup Key data PostExpr = PostFilter [Expression] | PostCall [Expression] | PostLookup Key data Key = KeyName Text | KeyInt Int | KeyExpr [Expression] | KeyWildcard data Axis' = Axis Axis | Attribute | Namespace --- Runtime data Value = Number Double | String String | Nodes [Node] | Array [Value] -- | ... self cursor = [cursor]