{-# LANGUAGE OverloadedStrings, TypeSynonymInstances, FlexibleInstances #-}
module Text.HTML.Form (Form(..), Input(..), parseElement, parseDocument) where
import Data.Text (Text)
import qualified Data.Text as Txt
import Text.XML.Cursor
import Text.XML (Document, Name)
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
import Network.URI (parseURIReference, URI, nullURI)
import Data.Char (isDigit)
import Data.List (nub)
import Text.Read (readMaybe)
import Text.Regex.TDFA (Regex, defaultCompOpt, defaultExecOpt)
import Text.Regex.TDFA.Text (compile)
data Form = Form {
action :: URI,
enctype :: Text,
method :: Text,
validate :: Bool,
target :: Text,
acceptCharset :: [Text],
autocomplete :: Bool,
formName :: Text,
rel :: Text,
inputs :: [Input]
}
data Input = Input {
-- Core attributes
inputType :: Text,
dirname :: Text,
inputName :: Text,
-- State
value :: Text,
inputAutocomplete :: Text,
autofocus :: Bool,
checked :: Bool,
disabled :: Bool,
readonly :: Bool,
-- Input behaviour
multiple :: Bool,
formAction :: Maybe URI,
formEnctype :: Maybe Text,
formMethod :: Maybe Text,
formValidate :: Bool,
formTarget :: Maybe Text,
inputMode :: Text,
list :: [(Text, Text)],
-- Validation
range :: (Maybe Text, Maybe Text),
step :: Maybe Text,
lengthRange :: (Maybe Int, Maybe Int),
pattern :: Maybe Regex,
required :: Bool,
-- Presentation
placeholder :: Text,
-- sort by tabindex?
title :: Text,
size :: Maybe Int,
fileData :: FileSelector,
imageData :: ImageData
}
data FileSelector = FileSelector {
fileAccept :: [Text],
fileCapture :: Text
}
data ImageData = ImageData {
imgAlt :: Maybe Text,
imgSize :: (Maybe Int, Maybe Int),
imgSrc :: Maybe Text
}
attr :: Text -> Cursor -> Text -> Text
attr n el def | [ret] <- n `laxAttribute` el = ret
| otherwise = def
attr' :: Text -> Cursor -> (Text -> a) -> Text -> a
attr' n el cb def = cb $ attr n el def
attr'' :: Text -> Cursor -> (String -> a) -> Text -> a
attr'' n el cb def = attr' n el (cb . Txt.unpack) def
hasAttr :: Name -> Cursor -> Bool
hasAttr n = not . null . hasAttribute n
mAttr :: Text -> Cursor -> Maybe Text
mAttr n = listToMaybe . laxAttribute n
parseElement :: Cursor -> Maybe Form
parseElement el | _:_ <- laxElement "form" el = Just Form {
action = attr'' "action" el (fromMaybe nullURI . parseURIReference) ".",
enctype = attr "enctype" el "",
method = attr "method" el "GET",
validate = null $ hasAttribute "novalidate" el,
target = attr "target" el "_self",
acceptCharset = attr' "accept-charset" el Txt.words "utf-8",
autocomplete = hasAttr "autocomplete" el,
formName = attr "name" el "",
rel = attr "rel" el "",
inputs = mapMaybe parseInput $ queryInputs el
}
| otherwise = Nothing
queryInputs :: Cursor -> [Cursor]
queryInputs form = (allInputs >=> inForm) form
where
allInputs = last' ancestor >=> descendant >=> laxElement "input"
inForm = check nestedInForm &++
check (\x -> laxAttribute "form" x == laxAttribute "id" form)
nestedInForm x = listToMaybe ((ancestor >=> laxElement "form") x) == Just form
last' f x | ret:_ <- reverse $ f x = [ret]
| otherwise = []
f &++ g = \x -> nub (f x ++ g x)
parseInput :: Cursor -> Maybe Input
parseInput el | _:_ <- laxElement "input" el = Just Input {
inputType = attr "type" el "text",
value = attr "value" el "",
inputAutocomplete = attr "autocomplete" el "on",
autofocus = hasAttr "autofocus" el,
checked = hasAttr "checked" el,
disabled = hasAttr "disabled" el,
readonly = hasAttr "readonly" el,
multiple = hasAttr "multiple" el,
dirname = attr "dirname" el "",
inputName = attr "name" el "",
formAction = if hasAttr "formaction" el
then attr' "formaction" el (parseURIReference . Txt.unpack) ""
else Nothing,
formEnctype = mAttr "formenctype" el,
formMethod = mAttr "formmethod" el,
formValidate = not $ hasAttr "formnovalidate" el,
formTarget = mAttr "formtarget" el,
inputMode = attr "inputmode" el "text",
list = [], -- TODO
range = (mAttr "min" el, mAttr "max" el),
step = mAttr "step" el,
lengthRange = (attr'' "minlength" el readMaybe "",
attr'' "maxLength" el readMaybe ""),
pattern = attr' "pattern" el
(rightToMaybe . compile defaultCompOpt defaultExecOpt) ".*",
required = hasAttr "required" el,
placeholder = attr "placeholder" el "",
title = attr "title" el "",
size = attr'' "size" el readMaybe "",
fileData = FileSelector {
fileAccept = attr' "accept" el Txt.words "*",
fileCapture = attr "capture" el ""
},
imageData = ImageData {
imgAlt = mAttr "alt" el,
imgSize = (attr'' "width" el readMaybe "",
attr'' "height" el readMaybe ""),
imgSrc = mAttr "src" el
}
}
| otherwise = Nothing
read' :: Read a => Text -> a
read' = read . Txt.unpack
parseDocument :: Document -> Text -> Maybe Form
parseDocument doc n
| Txt.all isDigit n = parseElement (forms doc' !! read' n)
| el:_ <- (forms >=> attributeIs "name" n) doc' = parseElement el
| otherwise = Nothing
where
forms = orSelf descendant >=> laxElement "form"
doc' = fromDocument doc
rightToMaybe :: Either a b -> Maybe b
rightToMaybe (Left _) = Nothing
rightToMaybe (Right x) = Just x
instance Eq Cursor where
a == b = node a == node b