@@ 1,14 1,19 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, TypeSynonymInstances, FlexibleInstances #-}
module Text.HTML.Form (Form(..), Input(..), parseElement, parseDocument) where
import Data.Text (Text)
import qualified Data.Text as Txt
import Text.XML.Cursor
-import Text.XML (Document)
+import Text.XML (Document, Name)
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
import Network.URI (parseURIReference, URI, nullURI)
import Data.Char (isDigit)
+import Data.List (nub)
+import Text.Read (readMaybe)
+
+import Text.Regex.TDFA (Regex, defaultCompOpt, defaultExecOpt)
+import Text.Regex.TDFA.Text (compile)
data Form = Form {
action :: URI,
@@ 18,31 23,133 @@ data Form = Form {
target :: Text,
acceptCharset :: [Text],
autocomplete :: Bool,
- name :: Text,
+ formName :: Text,
rel :: Text,
inputs :: [Input]
}
-data Input = Input
+data Input = Input {
+ -- Core attributes
+ inputType :: Text,
+ dirname :: Text,
+ inputName :: Text,
+ -- State
+ value :: Text,
+ inputAutocomplete :: Text,
+ autofocus :: Bool,
+ checked :: Bool,
+ disabled :: Bool,
+ readonly :: Bool,
+ -- Input behaviour
+ multiple :: Bool,
+ formAction :: Maybe URI,
+ formEnctype :: Maybe Text,
+ formMethod :: Maybe Text,
+ formValidate :: Bool,
+ formTarget :: Maybe Text,
+ inputMode :: Text,
+ list :: [(Text, Text)],
+ -- Validation
+ range :: (Maybe Text, Maybe Text),
+ step :: Maybe Text,
+ lengthRange :: (Maybe Int, Maybe Int),
+ pattern :: Maybe Regex,
+ required :: Bool,
+ -- Presentation
+ placeholder :: Text,
+ -- sort by tabindex?
+ title :: Text,
+ size :: Maybe Int,
+ fileData :: FileSelector,
+ imageData :: ImageData
+}
+data FileSelector = FileSelector {
+ fileAccept :: [Text],
+ fileCapture :: Text
+}
+data ImageData = ImageData {
+ imgAlt :: Maybe Text,
+ imgSize :: (Maybe Int, Maybe Int),
+ imgSrc :: Maybe Text
+}
attr :: Text -> Cursor -> Text -> Text
attr n el def | [ret] <- n `laxAttribute` el = ret
| otherwise = def
attr' :: Text -> Cursor -> (Text -> a) -> Text -> a
attr' n el cb def = cb $ attr n el def
+attr'' :: Text -> Cursor -> (String -> a) -> Text -> a
+attr'' n el cb def = attr' n el (cb . Txt.unpack) def
+hasAttr :: Name -> Cursor -> Bool
+hasAttr n = not . null . hasAttribute n
+mAttr :: Text -> Cursor -> Maybe Text
+mAttr n = listToMaybe . laxAttribute n
parseElement :: Cursor -> Maybe Form
parseElement el | _:_ <- laxElement "form" el = Just Form {
- action = attr' "action" el
- (fromMaybe nullURI . parseURIReference . Txt.unpack) ".",
+ action = attr'' "action" el (fromMaybe nullURI . parseURIReference) ".",
enctype = attr "enctype" el "",
method = attr "method" el "GET",
validate = null $ hasAttribute "novalidate" el,
target = attr "target" el "_self",
acceptCharset = attr' "accept-charset" el Txt.words "utf-8",
- autocomplete = not $ null $ hasAttribute "autocomplete" el,
- name = attr "name" el "",
+ autocomplete = hasAttr "autocomplete" el,
+ formName = attr "name" el "",
rel = attr "rel" el "",
- inputs = []
+ inputs = mapMaybe parseInput $ queryInputs el
+ }
+ | otherwise = Nothing
+
+queryInputs :: Cursor -> [Cursor]
+queryInputs form = (allInputs >=> inForm) form
+ where
+ allInputs = last' ancestor >=> descendant >=> laxElement "input"
+ inForm = check nestedInForm &++
+ check (\x -> laxAttribute "form" x == laxAttribute "id" form)
+ nestedInForm x = listToMaybe ((ancestor >=> laxElement "form") x) == Just form
+ last' f x | ret:_ <- reverse $ f x = [ret]
+ | otherwise = []
+ f &++ g = \x -> nub (f x ++ g x)
+parseInput :: Cursor -> Maybe Input
+parseInput el | _:_ <- laxElement "input" el = Just Input {
+ inputType = attr "type" el "text",
+ value = attr "value" el "",
+ inputAutocomplete = attr "autocomplete" el "on",
+ autofocus = hasAttr "autofocus" el,
+ checked = hasAttr "checked" el,
+ disabled = hasAttr "disabled" el,
+ readonly = hasAttr "readonly" el,
+ multiple = hasAttr "multiple" el,
+ dirname = attr "dirname" el "",
+ inputName = attr "name" el "",
+ formAction = if hasAttr "formaction" el
+ then attr' "formaction" el (parseURIReference . Txt.unpack) ""
+ else Nothing,
+ formEnctype = mAttr "formenctype" el,
+ formMethod = mAttr "formmethod" el,
+ formValidate = not $ hasAttr "formnovalidate" el,
+ formTarget = mAttr "formtarget" el,
+ inputMode = attr "inputmode" el "text",
+ list = [], -- TODO
+ range = (mAttr "min" el, mAttr "max" el),
+ step = mAttr "step" el,
+ lengthRange = (attr'' "minlength" el readMaybe "",
+ attr'' "maxLength" el readMaybe ""),
+ pattern = attr' "pattern" el
+ (rightToMaybe . compile defaultCompOpt defaultExecOpt) ".*",
+ required = hasAttr "required" el,
+ placeholder = attr "placeholder" el "",
+ title = attr "title" el "",
+ size = attr'' "size" el readMaybe "",
+ fileData = FileSelector {
+ fileAccept = attr' "accept" el Txt.words "*",
+ fileCapture = attr "capture" el ""
+ },
+ imageData = ImageData {
+ imgAlt = mAttr "alt" el,
+ imgSize = (attr'' "width" el readMaybe "",
+ attr'' "height" el readMaybe ""),
+ imgSrc = mAttr "src" el
+ }
}
| otherwise = Nothing
@@ 56,3 163,9 @@ parseDocument doc n
where
forms = orSelf descendant >=> laxElement "form"
doc' = fromDocument doc
+
+rightToMaybe :: Either a b -> Maybe b
+rightToMaybe (Left _) = Nothing
+rightToMaybe (Right x) = Just x
+instance Eq Cursor where
+ a == b = node a == node b