{-# LANGUAGE OverloadedStrings #-} module Internal.Elements.XPathParse(parseXPath, parseQName, Quantifier(..), Quantity(..), MaybePair(..), XType(..), Value(..), val2type, isErr, Expression(..), Arrow(..), PathComp(..), PrimaryExpr(..), PostExpr(..), Key(..), Axis'(..), SomeEvery(..)) where import Data.Attoparsec.Text.Lazy import Control.Applicative import Data.Text (Text(..), pack) import Text.XML.Cursor import Text.XML (Name(..), Node(..)) import qualified Data.Map.Strict as M skipSpace' = do skipSpace option () $ do char '(' char ':' manyTill anyChar $ string ":)" skipSpace pSpaced :: Parser a -> Parser a pSpaced act = do skipSpace' ret <- act skipSpace' return ret -- Transliterated from grammar at https://www.w3.org/TR/xpath-3/#nt-bnf parseXPath = parseOnly pExpr pParamList = do head <- pParam tail <- many' $ do char ',' pParam return (head:tail) pParam = do skipSpace' char '$' name <- pEQName skipSpace' type_ <- option (Quantity TypeItem None Many) $ pTypeDeclaration return (name, type_) pFunctionBody = pEnclosedExpr pEnclosedExpr = pSpaced $ do char '{' ret <- option [] $ pExpr char '}' return ret pExpr :: Parser [Expression] pExpr = do head <- pExprSingle tail <- many' $ do char ',' pExprSingle return (head:tail) pExprSingle = pSpaced (pForExpr <|> pLetExpr <|> pQuantifiedExpr <|> pIfExpr <|> pOrExpr) pForExpr = do header <- pSimpleForClause string "return" body <- pExprSingle return $ foldr (\(var, val) body' -> ExprFor var val body') body header pSimpleForClause = do string "for" head <- pSimpleForBinding tail <- many' $ do char ',' pSimpleForBinding return (head:tail) pSimpleForBinding = do skipSpace' char '$' var <- pVarName skipSpace' >> char 'i' >> char 'n' val <- pExprSingle return (var, val) pLetExpr = do header <- pSimpleLetClause string "return" body <- pExprSingle return $ foldr (\(var, val) body' -> ExprLet var val body') body header pSimpleLetClause = do string "let" head <- pSimpleLetBinding tail <- many' $ do char ',' pSimpleLetBinding return (head:tail) pSimpleLetBinding = do skipSpace' char '$' var <- pVarName skipSpace' >> char ':' >> char '=' val <- pExprSingle return (var, val) pQuantifiedExpr = do quantifier <- (string "some" >> return Some) <|> (string "every" >> return Every) head <- pSimpleForBinding tail <- many' $ do char ',' pSimpleForBinding string "satisfies" body <- pExprSingle return $ ExprQuantified quantifier $ foldr (\(var, val) body' -> ExprFor var val body') body (head:tail) pIfExpr = do char 'i' >> char 'f' skipSpace' >> char '(' test <- pExpr char ')' >> skipSpace' string "then" pass <- pExprSingle string "else" fail <- pExprSingle return $ ExprIf test pass fail pOrExpr = do a <- pAndExpr bs <- many' $ do pSpaced (char 'o' >> char 'r') pAndExpr return $ if null bs then a else ExprOr (a:bs) pAndExpr = do a <- pComparisonExpr bs <- many' $ do pSpaced $ string "and" pComparisonExpr return $ if null bs then a else ExprAnd (a:bs) pComparisonExpr = do a <- pStringConcatExpr option a $ do skipSpace' op <- pValueComp <|> pGeneralComp <|> pNodeComp -- TODO implement comparisons skipSpace' b <- pStringConcatExpr return $ op a b pStringConcatExpr = do a <- pRangeExpr bs <- many' $ do pSpaced (char '|' >> char '|') pRangeExpr return $ if null bs then a else ExprStrConcat (a:bs) pRangeExpr = do a <- pAdditiveExpr option a $ do pSpaced (char 't' >> char 'o') b <- pAdditiveExpr return $ ExprRange a b pAdditiveExpr = do a <- pMultiplicativeExpr bs <- many' $ do op <- pSpaced ((char '-' >> return ExprNegate) <|> (char '+' >> return id)) b <- pMultiplicativeExpr return $ op b return $ if null bs then a else ExprSum (a:bs) pMultiplicativeExpr = do a <- pUnionExpr bs <- many' $ do skipSpace' op <- (char '*' >> return ExprMul) <|> (string "div" >> return ExprDiv) <|> (string "idiv" >> return ExprIDiv) <|> (string "mode" >> return ExprMod) skipSpace' b <- pUnionExpr return (`op` b) return $ foldl (\a b -> b a) a bs pUnionExpr = do a <- pIntersectExceptExpr bs <- many' $ do pSpaced (string "union" <|> string "|") pIntersectExceptExpr return $ ExprUnion (a:bs) pIntersectExceptExpr = do a <- pInstanceOfExpr bs <- many' $ do op <- pSpaced ((string "intersect" >> return ExprIntersect) <|> (string "except" >> return ExprExcept)) b <- pInstanceOfExpr return (`op` b) return $ foldl (\a b -> b a) a bs pInstanceOfExpr = do a <- pTreatExpr option a $ do pSpaced (string "instance" >> skipSpace' >> char 'o' >> char 'f') type_ <- pSequenceType return $ ExprInstanceOf a type_ pAs = pSpaced (char 'a' >> char 's') pTreatExpr = do a <- pCastableExpr option a $ do skipSpace' >> string "treat" >> pAs type_ <- pSequenceType return $ ExprTreatAs a type_ pCastableExpr = do a <- pCastExpr option a $ do skipSpace' >> string "castable" >> pAs type_ <- pSingleType return $ ExprCastableAs a type_ pCastExpr = do a <- pArrowExpr option a $ do skipSpace' >> string "cast" >> pAs type_ <- pSingleType return $ ExprCastAs a type_ pArrowExpr = do a <- pUnaryExpr bs <- many' $ do pSpaced (char '=' >> char '>') spec <- pArrowFunctionSpecifier args <- pArgumentList return (spec, args) return $ foldl (\a (spec, args) -> ExprArrow a spec args) a bs pUnaryExpr = do ops <- many' $ pSpaced (char '+' <|> char '-') a <- pValueExpr return $ if length (filter (== '-') ops) `rem` 2 == 1 then ExprNegate a else a pValueExpr :: Parser Expression pValueExpr = pSimpleMapExpr char' ch ret = char ch >> return ret char'eq ch ret = char ch >> char '=' >> return ret char'2 ch ch2 ret= char ch >> char ch2 >> return ret pGeneralComp = char' '=' ExprEq <|> char'eq '!' ExprNe <|> char' '<' ExprLt <|> char'eq '<' ExprLe <|> char' '>' ExprGt <|> char'eq '>' ExprGe pValueComp = char'2 'e' 'q' ExprEq' <|> char'2 'n' 'e' ExprNe' <|> char'2 'l' 't' ExprLt' <|> char'2 'l' 'e' ExprLe' <|> char'2 'g' 't' ExprGt' <|> char'2 'g' 'e' ExprGe' pNodeComp = char'2 'i' 's' ExprIs <|> char'2 '<' '<' ExprPrecedes <|> char'2 '>' '>' ExprFollows pSimpleMapExpr = do a <- pPathExpr bs <- many' $ do pSpaced $ char '!' pPathExpr return $ foldl ExprMap a bs pathHead = PathStep (Axis root, Left $ TypeDocument $ TypeElement Nothing Nothing) [] pPathExpr = do skipSpace' char '/' tail <- (skipSpace' >> return []) <|> pRelativePathExpr return $ ExprPath (pathHead:tail) <|> do skipSpace' let head2 = PathStep (Axis $ orSelf descendant, Left TypeNode) [] char '/' >> char '/' tail <- pRelativePathExpr return $ ExprPath (pathHead:head2:tail) <|> (ExprPath <$> pRelativePathExpr) pRelativePathExpr = do a <- pStepExpr bs <- many' $ do char '/' descend <- option False (char '/' >> return True) b <- pStepExpr return $ if descend then [PathStep (Axis $ orSelf descendant, Left TypeNode) [], b] else [b] return (a:concat bs) pStepExpr = pPostfixExpr <|> pAxisStep pAxisStep = do step <- pReverseStep <|> pForwardStep preds <- pPredicateList return $ PathStep step preds pForwardStep = do axis <- pForwardAxis test <- pNodeTest return (axis, test) <|> pAbbrevForwardStep string' text ret = string text >> return ret pForwardAxis = pSpaced $ do ret <- string' "child" (Axis child) <|> string' "descendant" (Axis descendant) <|> string' "attribute" Attribute <|> string' "self" (Axis self) <|> string' "descendant-or-self" (Axis $ orSelf $ descendant) <|> string' "following-sibling" (Axis followingSibling) <|> string' "following" (Axis following) <|> string' "namespace" Namespace skipSpace' >> char ':' >> char ':' return ret pAbbrevForwardStep = do attrs <- pSpaced $ option (Axis child) (char '@' >> return Attribute) test <- pNodeTest return (attrs, test) pReverseStep = do axis <- pReverseAxis test <- pNodeTest return (axis, test) <|> pAbbrevReverseStep pReverseAxis = pSpaced $ do ret <- string' "parent" (Axis parent) <|> string' "ancestor" (Axis ancestor) <|> string' "preceding-sibling" (Axis precedingSibling) <|> string' "preceding" (Axis preceding) <|> string' "ancestor-or-self" (Axis $ orSelf $ ancestor) skipSpace' >> char ':' >> char ':' return ret pAbbrevReverseStep = do pSpaced (char '.' >> char '.') return (Axis parent, Left TypeNode) pNodeTest = (Left <$> pKindTest) <|> (Right <$> pNameTest) pNameTest = pEQName' True pPostfixExpr = do a <- pPrimaryExpr bs <- many' ((PostFilter <$> pPredicate) <|> (PostCall <$> pArgumentList) <|> (PostLookup <$> pLookup)) return $ PostExpr a bs pArgumentList = pSpaced $ do char '(' head <- pArgument tail <- many' $ do char ',' pArgument char ')' return (head:tail) pPredicateList = many' pPredicate pPredicate = pSpaced $ do skipSpace' >> char '[' ret <- pExpr char ']' >> skipSpace' return ret pLookup = do pSpaced $ char '?' pKeySpecifier pKeySpecifier = (KeyLiteral <$> String <$> show <$> pNCName) <|> (KeyLiteral <$> Number <$> fromIntegral <$> pIntegerLiteral) <|> (KeyExpr <$> pParenthesizedExpr) <|> (pSpaced $ char '*' >> return KeyWildcard) pArrowFunctionSpecifier = ArrowName <$> pEQName <|> ArrowVar <$> pVarRef <|> ArrowExpr <$> pParenthesizedExpr pPrimaryExpr = Literal <$> pLiteral <|> VarRef <$> pVarRef <|> Expr <$> pParenthesizedExpr <|> pContextItemExpr <|> pFunctionCall <|> pFunctionItemExpr <|> UnaryLookup <$> pUnaryLookup <|> MapConstruct <$> pMapConstructor <|> ArrayConstruct <$> pArrayConstructor pLiteral = Number <$> pNumericLiteral <|> String <$> pStringLiteral pNumericLiteral = double -- I think this matches... pVarRef = char '$' >> pVarName pVarName = pEQName pParenthesizedExpr = pSpaced $ do char '(' ret <- option [] pExpr char ')' return ret pContextItemExpr = char '.' >> return ContextItem pFunctionCall = do callee <- pEQName args <- pArgumentList return $ FunctionCall callee args pArgument = pExprSingle <|> pArgumentPlaceholder pArgumentPlaceholder = char '?' >> return ArgPlaceholder pFunctionItemExpr = pNamedFunctionRef <|> pInlineFunctionExpr pNamedFunctionRef = do name <- pEQName pSpaced $ char '#' index <- pIntegerLiteral return $ Literal $ FuncRef name index pInlineFunctionExpr = do pSpaced $ string "function" char '(' params <- pParamList char ')' type_ <- option (Quantity TypeItem None Many) (pAs >> pSequenceType) body <- pFunctionBody return $ FunctionItem params type_ body pMapConstructor = do pSpaced $ string "map" char '{' head <- pMapConstructorEntry tail <- many' $ do char ',' pMapConstructorEntry char '}' return (head:tail) pMapConstructorEntry = do key <- pExprSingle char ':' value <- pExprSingle return (key, value) pArrayConstructor = pSquareArrayConstructor <|> pCurlyArrayConstructor pSquareArrayConstructor = pSpaced $ do char '[' ret <- option [] pExpr char ']' return ret pCurlyArrayConstructor = do skipSpace' >> string "array" pEnclosedExpr pUnaryLookup = do skipSpace' >> char '?' pKeySpecifier pSingleType = do type_ <- pSimpleTypeName optional <- option False (char '?' >> skipSpace' >> return True) return $ if optional then Quantity type_ None One else Quantity type_ One One pTypeDeclaration = pAs >> pSequenceType pSequenceType :: Parser Quantity pSequenceType = do skipSpace' >> string "empty-sequence" >> pParens return emptySequence <|> do type_ <- pItemType (low, high) <- option (One, One) $ parseQuantity <$> pOccurrenceIndicator return $ Quantity type_ low high pOccurrenceIndicator = char '?' <|> char '*' <|> char '+' pParens = pSpaced (char '(' >> skipSpace' >> char ')') >> return () pItemType = pKindTest <|> do skipSpace' >> string "item-type" pParens return TypeItem <|> pFunctionTest <|> pMapTest <|> pArrayTest <|> pAtomicOrUnionType <|> pParenthesizedItemType pAtomicOrUnionType = TypeAtomic <$> pEQName pKindTest = pDocumentTest <|> pElementTest <|> pAttributeTest <|> pSchemaElementTest <|> pSchemaAttributeTest <|> pPITest <|> pCommentTest <|> pTextTest <|> pNamespaceNodeTest <|> pAnyKindTest pAnyKindTest = skipSpace' >> string "node" >> pParens >> return TypeNode pDocumentTest = pSpaced $ do string "document-node" skipSpace' >> char '(' arg <- pElementTest <|> pSchemaElementTest <|> return (TypeElement Nothing Nothing) -- FIXME What should the default be? char ')' return $ TypeDocument arg pTextTest = skipSpace' >> string "text" >> pParens >> return TypeText pCommentTest = skipSpace' >> string "comment" >> pParens >> return TypeComment pNamespaceNodeTest = skipSpace' >> string "namespace-node" >> pParens >> return TypeNamespaceNode pPITest = pSpaced $ do string "processing-instruction" skipSpace' >> char '(' arg <- Just <$> pNCName <|> Just <$> pack <$> pStringLiteral <|> return Nothing char ')' return $ TypePI arg pAttributeTest = pSpaced $ do string "attribute" skipSpace' >> char '(' (attribName, type_) <- option (Nothing, Nothing) $ do attribName <- pAttribNameOrWildcard type_ <- option Nothing $ do char ',' Just <$> pTypeName return (attribName, type_) char ')' return $ TypeAttribute attribName type_ pAttribNameOrWildcard = pSpaced ((Just <$> pAttributeName) <|> (char '*' >> return Nothing)) pAttributeName = pEQName pSchemaAttributeTest = pSpaced $ do string "schema-attribute" skipSpace' >> char '(' arg <- pAttributeDeclaration char ')' return $ TypeSchemaAttribute arg pAttributeDeclaration = pAttributeName pElementTest = pSpaced $ do string "element" pSpaced $ char '(' (elName, type_) <- option (Nothing, Nothing) $ do elName <- pElementNameOrWildcard type_ <- option Nothing $ do char ',' Just <$> pTypeName return (elName, type_) char ')' >> skipSpace' return $ TypeElement elName type_ pElementNameOrWildcard = pSpaced ((Just <$> pAttributeName) <|> (char '*' >> return Nothing)) pElementName = pEQName pSchemaElementTest = pSpaced $ do string "schema-element" skipSpace' >> char '(' ret <- pElementDeclaration char ')' return $ TypeSchemaElement ret pElementDeclaration = pElementName pSimpleTypeName = TypeAtomic <$> pTypeName pTypeName = pEQName pFunctionTest = pAnyFunctionTest <|> pTypedFunctionTest pAnyFunctionTest = pSpaced $ do string "function" skipSpace' >> char '(' >> pSpaced (char '*') >> char ')' return $ TypeFunction Nothing' pTypedFunctionTest = pSpaced $ do string "function" pSpaced $ char '(' args <- option [] $ do head <- pSequenceType tail <- many' $ do char ',' pSequenceType return (head:tail) char ')' ret <- option emptySequence $ do pAs pSequenceType return $ TypeFunction $ Some' args ret pMapTest = pAnyMapTest <|> pTypedMapTest pAnyMapTest = pSpaced $ do string "map" skipSpace' >> char '(' >> pSpaced (char '*') >> char ')' return $ TypeMap Nothing' pTypedMapTest = pSpaced $ do string "map" skipSpace' >> char '(' keyType <- pAtomicOrUnionType char ',' valType <- pSequenceType char ')' return $ TypeMap $ Some' keyType valType pArrayTest = pAnyArrayTest <|> pTypedArrayTest pAnyArrayTest = pSpaced $ do skipSpace' >> string "array" skipSpace' >> char '(' >> pSpaced (char '*') >> char ')' return $ TypeArray Nothing pTypedArrayTest = pSpaced $ do string "array" >> skipSpace' char '(' type_ <- pSequenceType char ')' return $ TypeArray $ Just type_ pParenthesizedItemType = pSpaced $ do char '(' ret <- pItemType char ')' return ret pEQName = pEQName' False pEQName' wildcard = pQName wildcard <|> pURIQualifiedName wildcard --- Terminals pIntegerLiteral = read <$> pSpaced pDigits pStringLiteral :: Parser String pStringLiteral = pSpaced (pStringLiteral' '\'' <|> pStringLiteral' '"') pStringLiteral' qt = do char qt ret <- many' (pEscapeQuote qt <|> notChar qt) char qt return ret pBracedURILiteral = pSpaced $ do char 'Q' char '{' ret <- takeWhile1 (`notElem` ['{', '}']) char '}' return ret pEscapeQuote qt = char qt >> char qt pDigits = many1 digit -- XML grammar rules referenced by XPath -- This is copied from XML Conduit rather than transliterated from the standard -- because Attoparsec can't recognize (but can preserve) Unicode characters pNCName = pSpaced $ pNCName' False pNCName' isWildcard = takeWhile1 valid "identifier" where valid '*' = isWildcard valid '&' = False valid '<' = False valid '>' = False valid ':' = False valid '?' = False valid '=' = False valid '"' = False valid '\'' = False valid '/' = False valid ';' = False valid '#' = False valid c = not $ isXMLSpace c isXMLSpace :: Char -> Bool isXMLSpace ' ' = True isXMLSpace '\t' = True isXMLSpace '\r' = True isXMLSpace '\n' = True isXMLSpace _ = False parseQName :: Text -> Either String Name parseQName = parseOnly $ pQName False pQName isWildcard = pSpaced $ do ns <- option Nothing $ do ret <- pNCName' isWildcard char ':' return $ Just ret name <- pNCName' isWildcard return $ Name name Nothing ns pURIQualifiedName isWildcard = pSpaced $ do ns <- pBracedURILiteral name <- pNCName' isWildcard return $ Name name (Just ns) Nothing --- Type Modelling data Quantifier = None | One | Many deriving (Eq, Show) parseQuantity '?' = (None, One) parseQuantity '*' = (None, Many) parseQuantity '+' = (One, Many) data Quantity = Quantity XType Quantifier Quantifier deriving (Eq, Show) emptySequence = Quantity TypeItem None None data MaybePair a b = Nothing' | Some' a b deriving (Eq, Show) data XType = TypeDocument XType | TypeElement (Maybe Name) (Maybe Name) | TypeArray (Maybe Quantity) | TypeMap (MaybePair XType Quantity) | TypeFunction (MaybePair [Quantity] Quantity) | TypeSchemaElement Name | TypeNode | TypeNamespaceNode | TypeText | TypeComment | TypePI (Maybe Text) | TypeSchemaAttribute Name | TypeAttribute (Maybe Name) (Maybe Name) | TypeAtomic Name | TypeItem deriving (Eq, Show) --- Abstract Syntax Tree data Expression = ExprFor Name Expression Expression | ArgPlaceholder | ExprLet Name Expression Expression | ExprQuantified SomeEvery Expression | ExprIf [Expression] Expression Expression | ExprOr [Expression] | ExprAnd [Expression] | ExprStrConcat [Expression] | ExprRange Expression Expression | ExprNegate Expression | ExprSum [Expression] | ExprFollows Expr Expr | ExprPrecedes Expr Expr | ExprIs Expr Expr | ExprGe Expr Expr | ExprGt Expr Expr | ExprLe Expr Expr | ExprLt Expr Expr | ExprNe Expr Expr | ExprEq Expr Expr | ExprGe' Expr Expr | ExprGt' Expr Expr | ExprLe' Expr Expr | ExprLt' Expr Expr | ExprNe' Expr Expr | ExprEq' Expr Expr | ExprMod Expr Expr | ExprIDiv Expr Expr | ExprDiv Expr Expr | ExprMul Expr Expr | ExprUnion [Expression] | ExprIntersect Expr Expr | ExprExcept Expr Expr | ExprArrow Expression Arrow [Expression] | ExprCastAs Expr Quantity | ExprCastableAs Expr Quantity | ExprTreatAs Expr Quantity | ExprInstanceOf Expr Quantity | ExprMap Expression Expression | ExprPath [PathComp] type Expr = Expression data SomeEvery = Some | Every data Arrow = ArrowName Name | ArrowVar Name | ArrowExpr [Expression] data PathComp = PostExpr PrimaryExpr [PostExpr] | PathStep (Axis', Either XType Name) [[Expression]] data PrimaryExpr = Literal Value | VarRef Name | Expr [Expression] | ContextItem | FunctionCall Name [Expression] | FunctionItem [(Name, Quantity)] Quantity [Expression] | MapConstruct [(Expr, Expr)] | ArrayConstruct [Expression] | UnaryLookup Key data PostExpr = PostFilter [Expression] | PostCall [Expression] | PostLookup Key data Key = KeyExpr [Expression] | KeyWildcard | KeyLiteral Value data Axis' = Axis Axis | Attribute | Namespace --- Runtime data Value = Number Double | String String | Bool Bool | Node Cursor | Error Name String String | Array [[Value]] | Map (M.Map String [Value]) | PartialFunc [Expression] Value | FuncRef Name Int | Function [(Name, Quantity)] Quantity [Expression] (M.Map Name [Value]) instance Eq Value where Number x == Number y = x == y String x == String y = x == y Bool x == Bool y = x == y Node x == Node y = node x == node y Error x0 x1 x2 == Error y0 y1 y2 = x0 == y0 && x1 == y1 && x2 == y2 Array x == Array y = x == y Map x == Map y = x == y FuncRef x0 x1 == FuncRef y0 y1 = x0 == y0 && x1 == y1 _ == _ = False val2type (Number _) = TypeAtomic "xsd:double" val2type (String _) = TypeText val2type (Bool _) = TypeAtomic "xsd:bool" val2type (Node _) = TypeNode val2type (Error _ _ _) = TypeAtomic "xsd:error" val2type (Array _) = TypeArray Nothing val2type (Map _) = TypeMap Nothing' val2type (Function _ _ _ _) = TypeFunction Nothing' isErr (Error _ _ _) = True isErr _ = False self cursor = [cursor] root = ancestor >=> check (null . ancestor)