{-# LANGUAGE OverloadedStrings, TypeSynonymInstances, FlexibleInstances #-}
module Text.HTML.Form (Form(..), Input(..), parseElement, parseDocument) where
import Data.Text (Text)
import qualified Data.Text as Txt
import Text.XML.Cursor
import Text.XML (Document, Name(..), Node(..))
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
import Data.Char (isDigit)
import Data.List (singleton)
import Text.Read (readMaybe)
import Data.Function (on)
import Network.URI (parseURIReference, URI, nullURI)
import Text.Regex.TDFA (Regex, defaultCompOpt, defaultExecOpt)
import Text.Regex.TDFA.Text (compile)
data Form = Form {
action :: URI,
enctype :: Text,
method :: Text,
validate :: Bool,
target :: Text,
acceptCharset :: [Text],
autocomplete :: Bool,
formName :: Text,
rel :: Text,
inputs :: [Input]
}
data Input = Input {
-- Core attributes
label :: Node,
description :: Node,
inputType :: Text,
dirname :: Text,
inputName :: Text,
-- State
value :: Text,
inputAutocomplete :: Text,
autofocus :: Bool,
checked :: Bool,
disabled :: Bool,
readonly :: Bool,
-- Input behaviour
multiple :: Bool,
formAction :: Maybe URI,
formEnctype :: Maybe Text,
formMethod :: Maybe Text,
formValidate :: Bool,
formTarget :: Maybe Text,
inputMode :: Text,
list :: [OptionGroup],
-- Validation
range :: (Maybe Text, Maybe Text),
step :: Maybe Text,
lengthRange :: (Maybe Int, Maybe Int),
pattern :: Maybe Regex,
required :: Bool,
-- Presentation
placeholder :: Text,
-- sort by tabindex?
title :: Text,
size :: Maybe Int,
fileData :: FileSelector,
imageData :: ImageData,
textArea :: TextArea
}
data OptionGroup = OptGroup {
optsLabel :: Text,
optsDisabled :: Bool,
subopts :: [Option]
}
data Option = Option {
optLabel :: Text,
optValue :: Text,
optSelected :: Bool,
optDisabled :: Bool
}
data FileSelector = FileSelector {
fileAccept :: [Text],
fileCapture :: Text
}
defaultFileData :: FileSelector
defaultFileData = FileSelector [] ""
data ImageData = ImageData {
imgAlt :: Maybe Text,
imgSize :: (Maybe Int, Maybe Int),
imgSrc :: Maybe Text
}
defaultImageData :: ImageData
defaultImageData = ImageData Nothing (Nothing, Nothing) Nothing
data TextArea = TextArea {
autocorrect :: Bool,
rows :: Maybe Int,
spellcheck :: Maybe Bool,
textwrap :: Maybe Bool
}
defaultTextArea :: TextArea
defaultTextArea = TextArea True Nothing Nothing Nothing
attr :: Text -> Cursor -> Text -> Text
attr n el def | [ret] <- n `laxAttribute` el = ret
| otherwise = def
attr' :: Text -> Cursor -> (Text -> a) -> Text -> a
attr' n el cb def = cb $ attr n el def
attr'' :: Text -> Cursor -> (String -> a) -> Text -> a
attr'' n el cb def = attr' n el (cb . Txt.unpack) def
hasAttr :: Name -> Cursor -> Bool
hasAttr n = not . null . hasAttribute n
mAttr :: Text -> Cursor -> Maybe Text
mAttr n = listToMaybe . laxAttribute n
parseElement :: Cursor -> Maybe Form
parseElement el | _:_ <- laxElement "form" el = Just Form {
action = attr'' "action" el (fromMaybe nullURI . parseURIReference) ".",
enctype = attr "enctype" el "",
method = attr "method" el "GET",
validate = null $ hasAttribute "novalidate" el,
target = attr "target" el "_self",
acceptCharset = attr' "accept-charset" el Txt.words "utf-8",
autocomplete = hasAttr "autocomplete" el,
formName = attr "name" el "",
rel = attr "rel" el "",
inputs = mapMaybe parseInput $ queryInputs el
}
| otherwise = Nothing
root :: Axis
root = singleton . last . ancestor
laxElements :: [Text] -> Axis
laxElements ns = checkName (\x -> or [
on (==) Txt.toCaseFold n $ nameLocalName x | n <- ns])
queryInputs :: Cursor -> [Cursor]
queryInputs form = (allInputs >=> inForm) form
where
allInputs = root >=> descendant >=> laxElements [
"input", "textarea", "button", "select"]
inForm = check (\x ->
laxAttribute "form" x == laxAttribute "id" form ||
nestedInForm x)
nestedInForm x = listToMaybe ((ancestor >=> laxElement "form") x) == Just form
parseInput :: Cursor -> Maybe Input
parseInput el | _:_ <- laxElement "input" el = Just Input {
label = fromMaybe (mkEl $ attr "name" el "") $ fmap node $
listToMaybe ((ancestor >=> laxElement "label") el) *>
elByAttr "for" (attr "id" el ""),
description = fromMaybe (mkEl "") $ fmap node $
elByID $ attr "aria-describedby" el "",
inputType = attr "type" el "text",
value = attr "value" el "",
inputAutocomplete = attr "autocomplete" el "on",
autofocus = hasAttr "autofocus" el,
checked = hasAttr "checked" el,
disabled = hasAttr "disabled" el,
readonly = hasAttr "readonly" el,
multiple = hasAttr "multiple" el,
dirname = attr "dirname" el "",
inputName = attr "name" el "",
formAction = if hasAttr "formaction" el
then attr' "formaction" el (parseURIReference . Txt.unpack) ""
else Nothing,
formEnctype = mAttr "formenctype" el,
formMethod = mAttr "formmethod" el,
formValidate = not $ hasAttr "formnovalidate" el,
formTarget = mAttr "formtarget" el,
inputMode = attr "inputmode" el "text",
list = fromMaybe [] $ fmap parseOptions (elByID =<< mAttr "list" el),
range = (mAttr "min" el, mAttr "max" el),
step = mAttr "step" el,
lengthRange = (attr'' "minlength" el readMaybe "",
attr'' "maxLength" el readMaybe ""),
pattern = attr' "pattern" el
(rightToMaybe . compile defaultCompOpt defaultExecOpt) ".*",
required = hasAttr "required" el,
placeholder = attr "placeholder" el "",
title = attr "title" el "",
size = attr'' "size" el readMaybe "",
fileData = FileSelector {
fileAccept = attr' "accept" el Txt.words "*",
fileCapture = attr "capture" el ""
},
imageData = ImageData {
imgAlt = mAttr "alt" el,
imgSize = (attr'' "width" el readMaybe "",
attr'' "height" el readMaybe ""),
imgSrc = mAttr "src" el
},
textArea = defaultTextArea
}
| _:_ <- laxElement "textarea" el = Just Input {
inputType = "<textarea>",
label = fromMaybe (mkEl $ attr "name" el "") $ fmap node $
listToMaybe ((ancestor >=> laxElement "label") el) *>
elByAttr "for" (attr "id" el ""),
description = fromMaybe (mkEl "") $ fmap node $
elByID $ attr "aria-describedby" el "",
value = Txt.concat $ (descendant >=> content) el,
inputAutocomplete = attr "autocomplete" el "on",
autofocus = hasAttr "autofocus" el,
size = attr'' "cols" el readMaybe "",
dirname = attr "dirname" el "",
disabled = hasAttr "disabled" el,
lengthRange = (attr'' "minLength" el readMaybe "",
attr'' "maxLength" el readMaybe ""),
inputName = attr "name" el "",
placeholder = attr "placeholder" el "",
readonly = hasAttr "readonly" el,
required = hasAttr "required" el,
title = attr "title" el "",
inputMode = attr "inputMode" el "text",
textArea = TextArea {
autocorrect = attr "autocorrect" el "on" /= "off",
rows = attr'' "rows" el readMaybe "",
spellcheck = attr' "spellcheck" el (\x -> case x of
"true" -> Just True
"false" -> Just False
"default" -> Nothing
_ -> Nothing) "default",
textwrap = attr' "wrap" el (\x -> case x of
"hard" -> Just True
"soft" -> Just False
"off" -> Nothing
_ -> Just False) "soft"
},
checked = True,
multiple = False,
formAction = Nothing,
formEnctype = Nothing,
formMethod = Nothing,
formValidate = False,
formTarget = Nothing,
list = [],
range = (Nothing, Nothing),
step = Nothing,
pattern = Nothing,
fileData = defaultFileData,
imageData = defaultImageData
}
| _:_ <- laxElement "button" el = Just Input {
-- Fallingback to the input itself as its label allow for
-- the full richness of its children to be rendered!
label = fromMaybe (node el) $ fmap node $
listToMaybe ((ancestor >=> laxElement "label") el) *>
elByAttr "for" (attr "id" el ""),
description = fromMaybe (node el) $ fmap node $
elByID $ attr "aria-describedby" el "",
autofocus = hasAttr "autofocus" el,
disabled = hasAttr "disabled" el,
formAction = if hasAttr "formaction" el
then attr' "formaction" el (parseURIReference . Txt.unpack) ""
else Nothing,
formEnctype = mAttr "formenctype" el,
formMethod = mAttr "formmethod" el,
formValidate = not $ hasAttr "formnovalidate" el,
formTarget = mAttr "formtarget" el,
inputName = attr "name" el "",
-- Popover buttons should be handled by HTML engine, not form engine.
inputType = attr "type" el "submit",
value = attr "value" el "",
title = attr "title" el "",
-- Placeholder makes sense as a place to put the label...
placeholder = Txt.concat $ (descendant >=> content) el,
dirname = "",
inputAutocomplete = "",
checked = False, -- Switch to true for the activated button!
readonly = False,
multiple = False,
inputMode = "",
list = [],
range = (Nothing, Nothing),
step = Nothing,
lengthRange = (Nothing, Nothing),
pattern = Nothing,
required = False,
size = Nothing,
fileData = defaultFileData,
imageData = defaultImageData,
textArea = defaultTextArea
}
| _:_ <- laxElement "select" el = Just Input {
inputType = "<select>",
label = fromMaybe (mkEl $ attr "name" el "") $ fmap node $
listToMaybe ((ancestor >=> laxElement "label") el) *>
elByAttr "for" (attr "id" el ""),
description = fromMaybe (mkEl "") $ fmap node $
elByID $ attr "aria-describedby" el "",
inputAutocomplete = attr "autocomplete" el "on",
autofocus = hasAttr "autofocus" el,
disabled = hasAttr "disabled" el,
multiple = hasAttr "multiple" el,
inputName = attr "name" el "",
required = hasAttr "required" el,
size = attr'' "size" el readMaybe "",
list = parseOptions el,
title = attr "title" el "",
dirname = "",
value = "", -- Sourced from list...
checked = True,
readonly = False,
formAction = Nothing,
formEnctype = Nothing,
formMethod = Nothing,
formValidate = False,
formTarget = Nothing,
inputMode = "",
range = (Nothing, Nothing),
step = Nothing,
lengthRange = (Nothing, Nothing),
pattern = Nothing,
placeholder = "",
fileData = defaultFileData,
imageData = defaultImageData,
textArea = defaultTextArea
}
| otherwise = Nothing
where
elByAttr k v = listToMaybe $ (root >=> descendant >=> attributeIs k v) el
elByID = elByAttr "id"
mkEl txt = NodeContent txt
parseOptions :: Cursor -> [OptionGroup]
parseOptions el = [parseGroup opt
| opt <- (descendant >=> laxElements ["option", "optgroup"] >=>
checkNot (parent >=> laxElement "optgroup")) el]
where
checkNot test = check (not . bool . test)
parseGroup opt
| _:_ <- laxElement "option" opt =
OptGroup "" False [parseOption opt False]
| _:_ <- laxElement "optgroup" opt = OptGroup {
optsLabel = attr "label" opt "",
optsDisabled = hasAttr "disabled" opt,
subopts = [parseOption o $ hasAttr "disabled" opt | o <- child opt]
}
| otherwise = OptGroup "" True [] -- Shouldn't happen!
parseOption opt disabledOverride = Option {
optLabel = attr "label" opt text,
optValue = attr "value" opt text,
optSelected = hasAttr "selected" opt,
optDisabled = hasAttr "disabled" opt || disabledOverride
} where text = Txt.concat $ (descendant >=> content) opt
read' :: Read a => Text -> a
read' = read . Txt.unpack
parseDocument :: Document -> Text -> Maybe Form
parseDocument doc n
| Txt.all isDigit n = parseElement (forms doc' !! read' n)
| el:_ <- (forms >=> attributeIs "name" n) doc' = parseElement el
| otherwise = Nothing
where
forms = orSelf descendant >=> laxElement "form"
doc' = fromDocument doc
rightToMaybe :: Either a b -> Maybe b
rightToMaybe (Left _) = Nothing
rightToMaybe (Right x) = Just x
instance Eq Cursor where
a == b = node a == node b