{-# LANGUAGE OverloadedStrings #-}
module Internal.Elements.XPath(parseXPath) where
import Data.Attoparsec.Text.Lazy
import Control.Applicative
import Data.Text (Text(..), pack)
import Text.XML.Cursor
import Text.XML (Name(..), Node(..))
import qualified Data.Map.Strict as M
pSpaced :: Parser a -> Parser a
pSpaced act = do
skipSpace
ret <- act
skipSpace
return ret
-- Transliterated from grammar at https://www.w3.org/TR/xpath-3/#nt-bnf
parseXPath = parse pExpr
pParamList = do
head <- pParam
tail <- many' $ do
char ','
pParam
return (head:tail)
pParam = do
skipSpace
char '$'
name <- pEQName
skipSpace
type_ <- option Nothing $ Just <$> pTypeDeclaration
return (name, type_)
pFunctionBody = pEnclosedExpr
pEnclosedExpr = pSpaced $ do
char '{'
ret <- option [] $ pExpr
char '}'
return ret
pExpr :: Parser [Expression]
pExpr = do
head <- pExprSingle
tail <- many' $ do
char ','
pExprSingle
return (head:tail)
pExprSingle = pSpaced (pForExpr <|> pLetExpr <|> pQuantifiedExpr <|> pIfExpr <|> pOrExpr)
pForExpr = do
header <- pSimpleForClause
string "return"
body <- pExprSingle
return $ ExprFor header body
pSimpleForClause = do
string "for"
head <- pSimpleForBinding
tail <- many' $ do
char ','
pSimpleForBinding
return (head:tail)
pSimpleForBinding = do
skipSpace
char '$'
var <- pVarName
skipSpace >> char 'i' >> char 'n'
val <- pExprSingle
return (var, val)
pLetExpr = do
header <- pSimpleLetClause
string "return"
body <- pExprSingle
return $ ExprLet header body
pSimpleLetClause = do
string "let"
head <- pSimpleLetBinding
tail <- many' $ do
char ','
pSimpleLetBinding
return (head:tail)
pSimpleLetBinding = do
skipSpace
char '$'
var <- pVarName
skipSpace >> char ':' >> char '='
val <- pExprSingle
return (var, val)
pQuantifiedExpr = do
quantifier <- (string "some" >> return ExprSome) <|> (string "every" >> return ExprEvery)
head <- pSimpleForBinding
tail <- many' $ do
char ','
pSimpleForBinding
string "satisfies"
body <- pExprSingle
return $ quantifier (head:tail) body
pIfExpr = do
char 'i' >> char 'f'
skipSpace >> char '('
test <- pExpr
char ')' >> skipSpace
string "then"
pass <- pExprSingle
string "else"
fail <- pExprSingle
return $ ExprIf test pass fail
pOrExpr = do
a <- pAndExpr
bs <- many' $ do
pSpaced (char 'o' >> char 'r')
pAndExpr
return $ if null bs then a else ExprOr (a:bs)
pAndExpr = do
a <- pComparisonExpr
bs <- many' $ do
pSpaced $ string "and"
pComparisonExpr
return $ if null bs then a else ExprAnd (a:bs)
pComparisonExpr = do
a <- pStringConcatExpr
option a $ do
skipSpace
op <- pValueComp <|> pGeneralComp <|> pNodeComp -- TODO implement comparisons
skipSpace
b <- pStringConcatExpr
return $ op a b
pStringConcatExpr = do
a <- pRangeExpr
bs <- many' $ do
pSpaced (char '|' >> char '|')
pRangeExpr
return $ if null bs then a else ExprStrConcat (a:bs)
pRangeExpr = do
a <- pAdditiveExpr
option a $ do
pSpaced (char 't' >> char 'o')
b <- pAdditiveExpr
return $ ExprRange a b
pAdditiveExpr = do
a <- pAdditiveExpr
bs <- many' $ do
op <- pSpaced ((char '-' >> return ExprNegate) <|> (char '+' >> return id))
b <- pAdditiveExpr
return $ op b
return $ if null bs then a else ExprSum (a:bs)
pMultiplicativeExpr = do
a <- pUnionExpr
bs <- many' $ do
skipSpace
op <- (char '*' >> return ExprMul) <|> (string "div" >> return ExprDiv) <|>
(string "idiv" >> return ExprIDiv) <|> (string "mode" >> return ExprMode)
skipSpace
b <- pUnionExpr
return (`op` b)
return $ foldl (\a b -> b a) a bs
pUnionExpr = do
a <- pIntersectExceptExpr
bs <- many' $ do
pSpaced (string "union" <|> string "|")
pIntersectExceptExpr
return $ ExprUnion (a:bs)
pIntersectExceptExpr = do
a <- pInstanceOfExpr
bs <- many' $ do
op <- pSpaced ((string "intersect" >> return ExprIntersect) <|>
(string "except" >> return ExprExcept))
b <- pInstanceOfExpr
return (`op` b)
return $ foldl (\a b -> b a) a bs
pInstanceOfExpr = do
a <- pTreatExpr
option a $ do
pSpaced (string "instance" >> skipSpace >> char 'o' >> char 'f')
type_ <- pSequenceType
return $ ExprInstanceOf a type_
pAs = pSpaced (char 'a' >> char 's')
pTreatExpr = do
a <- pCastableExpr
option a $ do
skipSpace >> string "treat" >> pAs
type_ <- pSequenceType
return $ ExprTreatAs a type_
pCastableExpr = do
a <- pCastExpr
option a $ do
skipSpace >> string "castable" >> pAs
type_ <- pSingleType
return $ ExprCastableAs a type_
pCastExpr = do
a <- pArrowExpr
option a $ do
skipSpace >> string "cast" >> pAs
type_ <- pSingleType
return $ ExprCastAs a type_
pArrowExpr = do
a <- pUnaryExpr
bs <- many' $ do
pSpaced (char '=' >> char '>')
spec <- pArrowFunctionSpecifier
args <- pArgumentList
return (spec, args)
return $ foldl (\a (spec, args) -> ExprArrow a spec args) a bs
pUnaryExpr = do
ops <- many' $ pSpaced (char '+' <|> char '-')
a <- pValueExpr
return $ if length (filter (== '-') ops) `rem` 2 == 1 then ExprNegate a else a
pValueExpr :: Parser Expression
pValueExpr = pSimpleMapExpr
char' ch ret = char ch >> return ret
char'eq ch ret = char ch >> char '=' >> return ret
char'2 ch ch2 ret= char ch >> char ch2 >> return ret
pGeneralComp = char' '=' ExprEq <|> char'eq '!' ExprNe <|> char' '<' ExprLt <|>
char'eq '<' ExprLe <|> char' '>' ExprGt <|> char'eq '>' ExprGe
pValueComp = char'2 'e' 'q' ExprEq' <|> char'2 'n' 'e' ExprNe' <|> char'2 'l' 't' ExprLt' <|>
char'2 'l' 'e' ExprLe' <|> char'2 'g' 't' ExprGt' <|> char'2 'g' 'e' ExprGe'
pNodeComp = char'2 'i' 's' ExprIs <|> char'2 '<' '<' ExprPrecedes <|> char'2 '>' '>' ExprFollows
pSimpleMapExpr = do
a <- pPathExpr
bs <- many' $ do
pSpaced $ char '!'
pPathExpr
return $ foldl ExprMap a bs
pPathExpr = do
skipSpace
char '/'
-- let head = ...
tail <- (skipSpace >> return []) <|> pRelativePathExpr
return $ ExprPath tail
<|> do
skipSpace
-- let head = ...
char '/' >> char '/'
tail <- pRelativePathExpr
return $ ExprPath tail
<|> (ExprPath <$> pRelativePathExpr)
pRelativePathExpr = do
a <- pStepExpr
bs <- many' $ do
char '/'
descend <- option False (char '/' >> return True)
b <- pStepExpr
return [b] -- if descend then [(orSelf descendant, PathType Type Node), b] else [b]
return (a:concat bs)
pStepExpr = pPostfixExpr <|> pAxisStep
pAxisStep = do
step <- pReverseStep <|> pForwardStep
preds <- pPredicateList
return $ PathStep step preds
pForwardStep = do
axis <- pForwardAxis
test <- pNodeTest
return (axis, test)
<|> pAbbrevForwardStep
string' text ret = string text >> return ret
pForwardAxis = pSpaced $ do
ret <- string' "child" (Axis child) <|>
string' "descendant" (Axis descendant) <|>
string' "attribute" Attribute <|>
string' "self" (Axis self) <|>
string' "descendant-or-self" (Axis $ orSelf $ descendant) <|>
string' "following-sibling" (Axis followingSibling) <|>
string' "following" (Axis following) <|>
string' "namespace" Namespace
skipSpace >> char ':' >> char ':'
return ret
pAbbrevForwardStep = do
attrs <- pSpaced $ option (Axis child) (char '@' >> return Attribute)
test <- pNodeTest
return (attrs, test)
pReverseStep = do
axis <- pReverseAxis
test <- pNodeTest
return (axis, test)
<|> pAbbrevReverseStep
pReverseAxis = pSpaced $ do
ret <- string' "parent" (Axis parent) <|>
string' "ancestor" (Axis ancestor) <|>
string' "preceding-sibling" (Axis precedingSibling) <|>
string' "preceding" (Axis preceding) <|>
string' "ancestor-or-self" (Axis $ orSelf $ ancestor)
skipSpace >> char ':' >> char ':'
return ret
pAbbrevReverseStep = do
pSpaced (char '.' >> char '.')
return (Axis parent, Left TypeNode)
pNodeTest = (Left <$> pKindTest) <|> (Right <$> pNameTest)
pNameTest = pEQName' True
pPostfixExpr = do
a <- pPrimaryExpr
bs <- many' ((PostFilter <$> pPredicate) <|> (PostCall <$> pArgumentList) <|> (PostLookup <$> pLookup))
return $ PostExpr a bs
pArgumentList = pSpaced $ do
char '('
head <- pArgument
tail <- many' $ do
char ','
pArgument
char ')'
return (head:tail)
pPredicateList = many' pPredicate
pPredicate = pSpaced $ do
skipSpace >> char '['
ret <- pExpr
char ']' >> skipSpace
return ret
pLookup = do
pSpaced $ char '?'
pKeySpecifier
pKeySpecifier = (KeyName <$> pNCName) <|> (KeyInt <$> pIntegerLiteral) <|>
(KeyExpr <$> pParenthesizedExpr) <|> (pSpaced $ char '*' >> return KeyWildcard)
pArrowFunctionSpecifier = ArrowName <$> pEQName <|> ArrowVar <$> pVarRef <|> ArrowExpr <$> pParenthesizedExpr
pPrimaryExpr = Literal <$> pLiteral <|> VarRef <$> pVarRef <|> Expr <$> pParenthesizedExpr
<|> pContextItemExpr <|> pFunctionCall <|> pFunctionItemExpr <|> UnaryLookup <$> pUnaryLookup <|>
MapConstruct <$> pMapConstructor <|> ArrayConstruct <$> pArrayConstructor
pLiteral = Number <$> pNumericLiteral <|> String <$> pStringLiteral
pNumericLiteral = double -- I think this matches...
pVarRef = char '$' >> pVarName
pVarName = pEQName
pParenthesizedExpr = pSpaced $ do
char '('
ret <- option [] pExpr
char ')'
return ret
pContextItemExpr = char '.' >> return ContextItem
pFunctionCall = do
callee <- pEQName
args <- pArgumentList
return $ FunctionCall callee args
pArgument = pExprSingle <|> pArgumentPlaceholder
pArgumentPlaceholder = char '?' >> return ArgPlaceholder
pFunctionItemExpr = do
name <- pEQName
pSpaced $ char '#'
index <- pIntegerLiteral
return $ FunctionItem name index
pInlineFunctionExpr = do
pSpaced $ string "function"
char '('
params <- pParamList
char ')'
type_ <- option Nothing $ do
pAs
Just <$> pSequenceType
pFunctionBody
return ()
pMapConstructor = do
pSpaced $ string "map"
char '{'
head <- pMapConstructorEntry
tail <- many' $ do
char ','
pMapConstructorEntry
char '}'
return (head:tail)
pMapConstructorEntry = do
key <- pExprSingle
char ':'
value <- pExprSingle
return (key, value)
pArrayConstructor = pSquareArrayConstructor <|> pCurlyArrayConstructor
pSquareArrayConstructor = pSpaced $ do
char '['
ret <- option [] pExpr
char ']'
return $ ExprArray ret
pCurlyArrayConstructor = do
skipSpace >> string "array"
ExprArray <$> pEnclosedExpr
pUnaryLookup = do
skipSpace >> char '?'
pKeySpecifier
pSingleType = do
type_ <- pSimpleTypeName
optional <- option False (char '?' >> skipSpace >> return True)
return $ if optional then Quantity type_ None One else Quantity type_ One One
pTypeDeclaration = pAs >> pSequenceType
pSequenceType :: Parser Quantity
pSequenceType = do
skipSpace >> string "empty-sequence" >> pParens
return emptySequence
<|> do
type_ <- pItemType
(low, high) <- option (One, One) $ parseQuantity <$> pOccurrenceIndicator
return $ Quantity type_ low high
pOccurrenceIndicator = char '?' <|> char '*' <|> char '+'
pParens = pSpaced (char '(' >> skipSpace >> char ')') >> return ()
pItemType = pKindTest <|> do
skipSpace >> string "item-type"
pParens
return TypeItem
<|> pFunctionTest <|> pMapTest <|> pArrayTest <|> pAtomicOrUnionType <|> pParenthesizedItemType
pAtomicOrUnionType = TypeAtomic <$> pEQName
pKindTest = pDocumentTest <|> pElementTest <|> pAttributeTest <|> pSchemaElementTest <|>
pSchemaAttributeTest <|> pPITest <|> pCommentTest <|> pTextTest <|> pNamespaceNodeTest <|> pAnyKindTest
pAnyKindTest = skipSpace >> string "node" >> pParens >> return TypeNode
pDocumentTest = pSpaced $ do
string "document-node"
skipSpace >> char '('
arg <- pElementTest <|> pSchemaElementTest <|> return (TypeElement Nothing Nothing) -- FIXME What should the default be?
char ')'
return $ TypeDocument arg
pTextTest = skipSpace >> string "text" >> pParens >> return TypeText
pCommentTest = skipSpace >> string "comment" >> pParens >> return TypeComment
pNamespaceNodeTest = skipSpace >> string "namespace-node" >> pParens >> return TypeNamespaceNode
pPITest = pSpaced $ do
string "processing-instruction"
skipSpace >> char '('
arg <- Just <$> pNCName <|> Just <$> pack <$> pStringLiteral <|> return Nothing
char ')'
return $ TypePI arg
pAttributeTest = pSpaced $ do
string "attribute"
skipSpace >> char '('
(attribName, type_) <- option (Nothing, Nothing) $ do
attribName <- pAttribNameOrWildcard
type_ <- option Nothing $ do
char ','
Just <$> pTypeName
return (attribName, type_)
char ')'
return $ TypeAttribute attribName type_
pAttribNameOrWildcard = pSpaced ((Just <$> pAttributeName) <|> (char '*' >> return Nothing))
pAttributeName = pEQName
pSchemaAttributeTest = pSpaced $ do
string "schema-attribute"
skipSpace >> char '('
arg <- pAttributeDeclaration
char ')'
return $ TypeSchemaAttribute arg
pAttributeDeclaration = pAttributeName
pElementTest = pSpaced $ do
string "element"
pSpaced $ char '('
(elName, type_) <- option (Nothing, Nothing) $ do
elName <- pElementNameOrWildcard
type_ <- option Nothing $ do
char ','
Just <$> pTypeName
return (elName, type_)
char ')' >> skipSpace
return $ TypeElement elName type_
pElementNameOrWildcard = pSpaced ((Just <$> pAttributeName) <|> (char '*' >> return Nothing))
pElementName = pEQName
pSchemaElementTest = pSpaced $ do
string "schema-element"
skipSpace >> char '('
ret <- pElementDeclaration
char ')'
return $ TypeSchemaElement ret
pElementDeclaration = pElementName
pSimpleTypeName = TypeAtomic <$> pTypeName
pTypeName = pEQName
pFunctionTest = pAnyFunctionTest <|> pTypedFunctionTest
pAnyFunctionTest = pSpaced $ do
string "function"
skipSpace >> char '(' >> pSpaced (char '*') >> char ')'
return $ TypeFunction Nothing'
pTypedFunctionTest = pSpaced $ do
string "function"
pSpaced $ char '('
args <- option [] $ do
head <- pSequenceType
tail <- many' $ do
char ','
pSequenceType
return (head:tail)
char ')'
ret <- option emptySequence $ do
pAs
pSequenceType
return $ TypeFunction $ Some' args ret
pMapTest = pAnyMapTest <|> pTypedMapTest
pAnyMapTest = pSpaced $ do
string "map"
skipSpace >> char '(' >> pSpaced (char '*') >> char ')'
return $ TypeMap Nothing'
pTypedMapTest = pSpaced $ do
string "map"
skipSpace >> char '('
keyType <- pAtomicOrUnionType
char ','
valType <- pSequenceType
char ')'
return $ TypeMap $ Some' keyType valType
pArrayTest = pAnyArrayTest <|> pTypedArrayTest
pAnyArrayTest = pSpaced $ do
skipSpace >> string "array"
skipSpace >> char '(' >> pSpaced (char '*') >> char ')'
return $ TypeArray Nothing
pTypedArrayTest = pSpaced $ do
string "array" >> skipSpace
char '('
type_ <- pSequenceType
char ')'
return $ TypeArray $ Just type_
pParenthesizedItemType = pSpaced $ do
char '('
ret <- pItemType
char ')'
return ret
pEQName = pEQName' False
pEQName' wildcard = pQName wildcard <|> pURIQualifiedName wildcard
--- Terminals
pIntegerLiteral = read <$> pSpaced pDigits
pStringLiteral :: Parser String
pStringLiteral = pSpaced (pStringLiteral' '\'' <|> pStringLiteral' '"')
pStringLiteral' qt = do
char qt
ret <- many' (pEscapeQuote qt <|> notChar qt)
char qt
return ret
pBracedURILiteral = pSpaced $ do
char 'Q'
char '{'
ret <- takeWhile1 (`notElem` ['{', '}'])
char '}'
return ret
pEscapeQuote qt = char qt >> char qt
pDigits = many1 digit
-- XML grammar rules referenced by XPath
-- This is copied from XML Conduit rather than transliterated from the standard
-- because Attoparsec can't recognize (but can preserve) Unicode characters
pNCName = pSpaced $ pNCName' False
pNCName' isWildcard = takeWhile1 valid <?> "identifier"
where
valid '*' = isWildcard
valid '&' = False
valid '<' = False
valid '>' = False
valid ':' = False
valid '?' = False
valid '=' = False
valid '"' = False
valid '\'' = False
valid '/' = False
valid ';' = False
valid '#' = False
valid c = not $ isXMLSpace c
isXMLSpace :: Char -> Bool
isXMLSpace ' ' = True
isXMLSpace '\t' = True
isXMLSpace '\r' = True
isXMLSpace '\n' = True
isXMLSpace _ = False
pQName isWildcard = pSpaced $ do
ns <- option Nothing $ do
ret <- pNCName' isWildcard
char ':'
return $ Just ret
name <- pNCName' isWildcard
return $ Name name Nothing ns
pURIQualifiedName isWildcard = pSpaced $ do
ns <- pBracedURILiteral
name <- pNCName' isWildcard
return $ Name name (Just ns) Nothing
--- Type Modelling
data Quantifier = None | One | Many
parseQuantity '?' = (None, One)
parseQuantity '*' = (None, Many)
parseQuantity '+' = (One, Many)
data Quantity = Quantity XType Quantifier Quantifier
emptySequence = Quantity TypeItem None None
data MaybePair a b = Nothing' | Some' a b
data XType = TypeDocument XType |
TypeElement (Maybe Name) (Maybe Name) |
TypeArray (Maybe Quantity) |
TypeMap (MaybePair XType Quantity) |
TypeFunction (MaybePair [Quantity] Quantity) |
TypeSchemaElement Name |
TypeNode |
TypeNamespaceNode |
TypeText |
TypeComment |
TypePI (Maybe Text) |
TypeSchemaAttribute Name |
TypeAttribute (Maybe Name) (Maybe Name) |
TypeAtomic Name |
TypeItem
--- Abstract Syntax Tree
data Expression = ExprFor [(Name, Expression)] Expression |
ArgPlaceholder |
ExprLet [(Name, Expression)] Expression |
ExprSome [(Name, Expression)] Expression |
ExprEvery [(Name, Expression)] Expression |
ExprIf [Expression] Expression Expression |
ExprOr [Expression] |
ExprAnd [Expression] |
ExprStrConcat [Expression] |
ExprRange Expression Expression |
ExprNegate Expression |
ExprSum [Expression] |
ExprArray [Expression] |
ExprFollows Expr Expr | ExprPrecedes Expr Expr | ExprIs Expr Expr |
ExprGe Expr Expr | ExprGt Expr Expr | ExprLe Expr Expr | ExprLt Expr Expr | ExprNe Expr Expr | ExprEq Expr Expr |
ExprGe' Expr Expr | ExprGt' Expr Expr | ExprLe' Expr Expr | ExprLt' Expr Expr | ExprNe' Expr Expr | ExprEq' Expr Expr |
ExprMode Expr Expr | ExprIDiv Expr Expr | ExprDiv Expr Expr | ExprMul Expr Expr |
ExprUnion [Expression] | ExprIntersect Expr Expr | ExprExcept Expr Expr |
ExprArrow Expression Arrow [Expression] |
ExprCastAs Expr Quantity | ExprCastableAs Expr Quantity | ExprTreatAs Expr Quantity | ExprInstanceOf Expr Quantity |
ExprMap Expression Expression |
ExprPath [PathComp]
type Expr = Expression
data Arrow = ArrowName Name | ArrowVar Name | ArrowExpr [Expression]
data PathComp = PostExpr PrimaryExpr [PostExpr] | PathStep (Axis', Either XType Name) [[Expression]]
data PrimaryExpr = Literal Value | VarRef Name | Expr [Expression] | ContextItem |
FunctionCall Name [Expression] | FunctionItem Name Int |
MapConstruct [(Expr, Expr)] | ArrayConstruct Expression | UnaryLookup Key
data PostExpr = PostFilter [Expression] | PostCall [Expression] | PostLookup Key
data Key = KeyName Text | KeyInt Int | KeyExpr [Expression] | KeyWildcard
data Axis' = Axis Axis | Attribute | Namespace
--- Runtime
data Value = Number Double | String String | Nodes [Node] | Array [Value] -- | ...
self cursor = [cursor]