{-# LANGUAGE OverloadedStrings, TypeSynonymInstances, FlexibleInstances #-}
module Text.HTML.Form (Form(..), Input(..), OptionGroup(..), Option(..),
FileSelector(..), defaultFileData, ImageData(..), defaultImageData,
TextArea(..), defaultTextArea, parseElement, parseDocument) where
import Data.Text (Text)
import qualified Data.Text as Txt
import Text.XML.Cursor
import Text.XML (Document, Name(..), Node(..))
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
import Data.List (singleton)
import Text.Read (readMaybe)
import Data.Function (on)
import Network.URI (parseURIReference, URI, nullURI)
import Text.Regex.TDFA (Regex, defaultCompOpt, defaultExecOpt)
import Text.Regex.TDFA.Text (compile)
data Form = Form {
action :: URI,
enctype :: Text,
method :: Text,
validate :: Bool,
target :: Text,
acceptCharset :: [Text],
autocomplete :: Bool,
formName :: Text,
rel :: Text,
inputs :: [Input]
}
data Input = Input {
-- Core attributes
label :: Text,
description :: Node,
inputType :: Text,
dirname :: Text,
inputName :: Text,
-- State
value :: Text,
inputAutocomplete :: Text,
autofocus :: Bool,
checked :: Bool,
disabled :: Bool,
readonly :: Bool,
-- Input behaviour
multiple :: Bool,
formAction :: Maybe URI,
formEnctype :: Maybe Text,
formMethod :: Maybe Text,
formValidate :: Bool,
formTarget :: Maybe Text,
inputMode :: Text,
list :: [OptionGroup],
-- Validation
range :: (Maybe Text, Maybe Text),
step :: Maybe Text,
lengthRange :: (Maybe Int, Maybe Int),
pattern :: Maybe Regex,
required :: Bool,
-- Presentation
placeholder :: Text,
-- sort by tabindex?
title :: Text,
size :: Maybe Int,
fileData :: FileSelector,
imageData :: ImageData,
textArea :: TextArea
}
data OptionGroup = OptGroup {
optsLabel :: Text,
optsDisabled :: Bool,
subopts :: [Option]
}
data Option = Option {
optLabel :: Text,
optValue :: Text,
optSelected :: Bool,
optDisabled :: Bool
}
data FileSelector = FileSelector {
fileAccept :: [Text],
fileCapture :: Text
}
defaultFileData :: FileSelector
defaultFileData = FileSelector [] ""
data ImageData = ImageData {
imgAlt :: Maybe Text,
imgSize :: (Maybe Int, Maybe Int),
imgSrc :: Maybe Text
}
defaultImageData :: ImageData
defaultImageData = ImageData Nothing (Nothing, Nothing) Nothing
data TextArea = TextArea {
autocorrect :: Bool,
rows :: Maybe Int,
spellcheck :: Maybe Bool,
textwrap :: Maybe Bool
}
defaultTextArea :: TextArea
defaultTextArea = TextArea True Nothing Nothing Nothing
attr :: Text -> Cursor -> Text -> Text
attr n el def | [ret] <- n `laxAttribute` el = ret
| otherwise = def
attr' :: Text -> Cursor -> (Text -> a) -> Text -> a
attr' n el cb def = cb $ attr n el def
attr'' :: Text -> Cursor -> (String -> a) -> Text -> a
attr'' n el cb def = attr' n el (cb . Txt.unpack) def
hasAttr :: Name -> Cursor -> Bool
hasAttr n = not . null . hasAttribute n
mAttr :: Text -> Cursor -> Maybe Text
mAttr n = listToMaybe . laxAttribute n
parseElement :: Cursor -> Maybe Form
parseElement el | _:_ <- laxElement "form" el = Just Form {
action = attr'' "action" el (fromMaybe nullURI . parseURIReference) ".",
enctype = attr "enctype" el "",
method = attr "method" el "GET",
validate = null $ hasAttribute "novalidate" el,
target = attr "target" el "_self",
acceptCharset = attr' "accept-charset" el Txt.words "utf-8",
autocomplete = hasAttr "autocomplete" el,
formName = attr "name" el "",
rel = attr "rel" el "",
inputs = mapMaybe parseInput $ queryInputs el
}
| otherwise = Nothing
root :: Axis
root = singleton . last . orSelf ancestor
laxElements :: [Text] -> Axis
laxElements ns = checkName (\x -> or [
on (==) Txt.toCaseFold n $ nameLocalName x | n <- ns])
queryInputs :: Cursor -> [Cursor]
queryInputs form = (allInputs >=> inForm) form
where
allInputs = root >=> descendant >=> laxElements [
"input", "textarea", "button", "select"]
inForm = check (\x ->
laxAttribute "form" x == laxAttribute "id" form ||
nestedInForm x)
nestedInForm x = listToMaybe ((ancestor >=> laxElement "form") x) == Just form
parseInput :: Cursor -> Maybe Input
parseInput el | _:_ <- laxElement "input" el = Just Input {
label = fromMaybe
-- Additional fallbacks are primarily for buttons
(attr "name" el $ attr "value" el $ attr "alt" el $
attr "type" el "text") $ fmap text label',
description = fromMaybe (mkEl $ attr "title" el "") $ fmap node $
elByID (attr "aria-describedby" el "") `orElse` label',
inputType = attr "type" el "text",
value = attr "value" el "",
inputAutocomplete = attr "autocomplete" el "on",
autofocus = hasAttr "autofocus" el,
checked = hasAttr "checked" el,
-- NOTE: No remaining harm in displaying hidden inputs,
-- might be informative...
disabled = hasAttr "disabled" el || attr "type" el "" == "hidden",
readonly = hasAttr "readonly" el || attr "type" el "" == "hidden",
multiple = hasAttr "multiple" el,
dirname = attr "dirname" el "",
inputName = attr "name" el "",
formAction = if hasAttr "formaction" el
then attr' "formaction" el (parseURIReference . Txt.unpack) ""
else Nothing,
formEnctype = mAttr "formenctype" el,
formMethod = mAttr "formmethod" el,
formValidate = not $ hasAttr "formnovalidate" el,
formTarget = mAttr "formtarget" el,
inputMode = attr "inputmode" el "text",
list = fromMaybe [] $ fmap parseOptions (elByID =<< mAttr "list" el),
range = (mAttr "min" el, mAttr "max" el),
step = mAttr "step" el,
lengthRange = (attr'' "minlength" el readMaybe "",
attr'' "maxLength" el readMaybe ""),
pattern = attr' "pattern" el
(rightToMaybe . compile defaultCompOpt defaultExecOpt) ".*",
required = hasAttr "required" el,
placeholder = attr "placeholder" el "",
title = attr "title" el "",
size = attr'' "size" el readMaybe "",
fileData = FileSelector {
fileAccept = attr' "accept" el Txt.words "*",
fileCapture = attr "capture" el ""
},
imageData = ImageData {
imgAlt = mAttr "alt" el,
imgSize = (attr'' "width" el readMaybe "",
attr'' "height" el readMaybe ""),
imgSrc = mAttr "src" el
},
textArea = defaultTextArea
}
| _:_ <- laxElement "textarea" el = Just Input {
inputType = "<textarea>",
label = fromMaybe (attr "name" el "") $ fmap text label',
description = fromMaybe (mkEl $ attr "title" el "") $ fmap node $
elByID (attr "aria-describedby" el "") `orElse` label',
value = text el,
inputAutocomplete = attr "autocomplete" el "on",
autofocus = hasAttr "autofocus" el,
size = attr'' "cols" el readMaybe "",
dirname = attr "dirname" el "",
disabled = hasAttr "disabled" el,
lengthRange = (attr'' "minLength" el readMaybe "",
attr'' "maxLength" el readMaybe ""),
inputName = attr "name" el "",
placeholder = attr "placeholder" el "",
readonly = hasAttr "readonly" el,
required = hasAttr "required" el,
title = attr "title" el "",
inputMode = attr "inputMode" el "text",
textArea = TextArea {
autocorrect = attr "autocorrect" el "on" /= "off",
rows = attr'' "rows" el readMaybe "",
spellcheck = attr' "spellcheck" el (\x -> case x of
"true" -> Just True
"false" -> Just False
"default" -> Nothing
_ -> Nothing) "default",
textwrap = attr' "wrap" el (\x -> case x of
"hard" -> Just True
"soft" -> Just False
"off" -> Nothing
_ -> Just False) "soft"
},
checked = False,
multiple = True,
formAction = Nothing,
formEnctype = Nothing,
formMethod = Nothing,
formValidate = False,
formTarget = Nothing,
list = [],
range = (Nothing, Nothing),
step = Nothing,
pattern = Nothing,
fileData = defaultFileData,
imageData = defaultImageData
}
| _:_ <- laxElement "button" el = Just Input {
-- Fallingback to the input itself as its label allow for
-- the full richness of its children to be rendered!
label = fromMaybe (text el) $ fmap text label',
description = fromMaybe (node el) $ fmap node $
elByID $ attr "aria-describedby" el "",
autofocus = hasAttr "autofocus" el,
disabled = hasAttr "disabled" el,
formAction = if hasAttr "formaction" el
then attr' "formaction" el (parseURIReference . Txt.unpack) ""
else Nothing,
formEnctype = mAttr "formenctype" el,
formMethod = mAttr "formmethod" el,
formValidate = not $ hasAttr "formnovalidate" el,
formTarget = mAttr "formtarget" el,
inputName = attr "name" el "",
-- Popover buttons should be handled by HTML engine, not form engine.
inputType = attr "type" el "submit",
value = attr "value" el "",
title = attr "title" el "",
-- Placeholder makes sense as a place to put the label...
placeholder = Txt.concat $ (descendant >=> content) el,
dirname = "",
inputAutocomplete = "",
checked = False, -- Switch to true for the activated button!
readonly = False,
multiple = False,
inputMode = "",
list = [],
range = (Nothing, Nothing),
step = Nothing,
lengthRange = (Nothing, Nothing),
pattern = Nothing,
required = False,
size = Nothing,
fileData = defaultFileData,
imageData = defaultImageData,
textArea = defaultTextArea
}
| _:_ <- laxElement "select" el = Just Input {
inputType = "<select>",
label = fromMaybe (attr "name" el "") $ fmap Txt.concat $
fmap filterSelect label',
description = fromMaybe (mkEl $ attr "title" el "") $ fmap node $
elByID $ attr "aria-describedby" el "",
inputAutocomplete = attr "autocomplete" el "on",
autofocus = hasAttr "autofocus" el,
disabled = hasAttr "disabled" el,
multiple = hasAttr "multiple" el,
inputName = attr "name" el "",
required = hasAttr "required" el,
size = attr'' "size" el readMaybe "",
list = parseOptions el,
title = attr "title" el "",
dirname = "",
value = "", -- Sourced from list...
checked = False,
readonly = False,
formAction = Nothing,
formEnctype = Nothing,
formMethod = Nothing,
formValidate = False,
formTarget = Nothing,
inputMode = "",
range = (Nothing, Nothing),
step = Nothing,
lengthRange = (Nothing, Nothing),
pattern = Nothing,
placeholder = "",
fileData = defaultFileData,
imageData = defaultImageData,
textArea = defaultTextArea
}
| otherwise = Nothing
where
elByAttr k v = listToMaybe $ (root >=> descendant >=> attributeIs k v) el
elByID = elByAttr "id"
label' = elByAttr "for" (attr "id" el "") `orElse`
listToMaybe $ (ancestor >=> laxElement "label") el
filterSelect = descendant >=>
checkNot (orSelf ancestor >=> laxElement "select") >=>
content
parseOptions :: Cursor -> [OptionGroup]
parseOptions el = [parseGroup opt
| opt <- (descendant >=> laxElements ["option", "optgroup"] >=>
checkNot (parent >=> laxElement "optgroup")) el]
where
parseGroup opt
| _:_ <- laxElement "option" opt =
OptGroup "" False [parseOption opt False]
| _:_ <- laxElement "optgroup" opt = OptGroup {
optsLabel = attr "label" opt "",
optsDisabled = hasAttr "disabled" opt,
subopts = [parseOption o $ hasAttr "disabled" opt | o <- child opt]
}
| otherwise = OptGroup "" True [] -- Shouldn't happen!
parseOption opt disabledOverride = Option {
optLabel = attr "label" opt $ text opt,
optValue = attr "value" opt $ text opt,
optSelected = hasAttr "selected" opt,
optDisabled = hasAttr "disabled" opt || disabledOverride
}
parseDocument :: Document -> Text -> Maybe Form
parseDocument doc n
| Just n' <- readMaybe $ Txt.unpack n, n' < length (forms doc') =
parseElement (forms doc' !! n')
| el:_ <- (forms >=> attributeIs "name" n) doc' = parseElement el
| otherwise = Nothing
where
forms = orSelf descendant >=> laxElement "form"
doc' = fromDocument doc
checkNot :: Boolean b => (Cursor -> b) -> Axis
checkNot test = check (not . bool . test)
rightToMaybe :: Either a b -> Maybe b
rightToMaybe (Left _) = Nothing
rightToMaybe (Right x) = Just x
instance Eq Cursor where
a == b = node a == node b
orElse :: Maybe a -> Maybe a -> Maybe a
orElse ret@(Just _) _ = ret
orElse _ ret = ret
infixr 0 `orElse`
text :: Cursor -> Text
text = Txt.concat . (descendant >=> content)
mkEl :: Text -> Node
mkEl = NodeContent