{-# LANGUAGE OverloadedStrings, TypeSynonymInstances, FlexibleInstances #-} module Text.HTML.Form (Form(..), Input(..), OptionGroup(..), Option(..), FileSelector(..), defaultFileData, ImageData(..), defaultImageData, TextArea(..), defaultTextArea, parseElement, parseDocument) where import Data.Text (Text) import qualified Data.Text as Txt import Text.XML.Cursor import Text.XML (Document, Name(..), Node(..)) import Data.Maybe (fromMaybe, listToMaybe, mapMaybe) import Data.List (singleton) import Text.Read (readMaybe) import Data.Function (on) import Network.URI (parseURIReference, URI, nullURI) import Text.Regex.TDFA (Regex, defaultCompOpt, defaultExecOpt) import Text.Regex.TDFA.Text (compile) data Form = Form { action :: URI, enctype :: Text, method :: Text, validate :: Bool, target :: Text, acceptCharset :: [Text], autocomplete :: Bool, formName :: Text, rel :: Text, inputs :: [Input] } data Input = Input { -- Core attributes label :: Text, description :: Node, inputType :: Text, dirname :: Text, inputName :: Text, -- State value :: Text, inputAutocomplete :: Text, autofocus :: Bool, checked :: Bool, disabled :: Bool, readonly :: Bool, -- Input behaviour multiple :: Bool, formAction :: Maybe URI, formEnctype :: Maybe Text, formMethod :: Maybe Text, formValidate :: Bool, formTarget :: Maybe Text, inputMode :: Text, list :: [OptionGroup], -- Validation range :: (Maybe Text, Maybe Text), step :: Maybe Text, lengthRange :: (Maybe Int, Maybe Int), pattern :: Maybe Regex, required :: Bool, -- Presentation placeholder :: Text, -- sort by tabindex? title :: Text, size :: Maybe Int, fileData :: FileSelector, imageData :: ImageData, textArea :: TextArea } data OptionGroup = OptGroup { optsLabel :: Text, optsDisabled :: Bool, subopts :: [Option] } data Option = Option { optLabel :: Text, optValue :: Text, optSelected :: Bool, optDisabled :: Bool } data FileSelector = FileSelector { fileAccept :: [Text], fileCapture :: Text } defaultFileData :: FileSelector defaultFileData = FileSelector [] "" data ImageData = ImageData { imgAlt :: Maybe Text, imgSize :: (Maybe Int, Maybe Int), imgSrc :: Maybe Text } defaultImageData :: ImageData defaultImageData = ImageData Nothing (Nothing, Nothing) Nothing data TextArea = TextArea { autocorrect :: Bool, rows :: Maybe Int, spellcheck :: Maybe Bool, textwrap :: Maybe Bool } defaultTextArea :: TextArea defaultTextArea = TextArea True Nothing Nothing Nothing attr :: Text -> Cursor -> Text -> Text attr n el def | [ret] <- n `laxAttribute` el = ret | otherwise = def attr' :: Text -> Cursor -> (Text -> a) -> Text -> a attr' n el cb def = cb $ attr n el def attr'' :: Text -> Cursor -> (String -> a) -> Text -> a attr'' n el cb def = attr' n el (cb . Txt.unpack) def hasAttr :: Name -> Cursor -> Bool hasAttr n = not . null . hasAttribute n mAttr :: Text -> Cursor -> Maybe Text mAttr n = listToMaybe . laxAttribute n parseElement :: Cursor -> Maybe Form parseElement el | _:_ <- laxElement "form" el = Just Form { action = attr'' "action" el (fromMaybe nullURI . parseURIReference) ".", enctype = attr "enctype" el "", method = attr "method" el "GET", validate = null $ hasAttribute "novalidate" el, target = attr "target" el "_self", acceptCharset = attr' "accept-charset" el Txt.words "utf-8", autocomplete = hasAttr "autocomplete" el, formName = attr "name" el "", rel = attr "rel" el "", inputs = mapMaybe parseInput $ queryInputs el } | otherwise = Nothing root :: Axis root = singleton . last . ancestor laxElements :: [Text] -> Axis laxElements ns = checkName (\x -> or [ on (==) Txt.toCaseFold n $ nameLocalName x | n <- ns]) queryInputs :: Cursor -> [Cursor] queryInputs form = (allInputs >=> inForm) form where allInputs = root >=> descendant >=> laxElements [ "input", "textarea", "button", "select"] inForm = check (\x -> laxAttribute "form" x == laxAttribute "id" form || nestedInForm x) nestedInForm x = listToMaybe ((ancestor >=> laxElement "form") x) == Just form parseInput :: Cursor -> Maybe Input parseInput el | _:_ <- laxElement "input" el = Just Input { label = fromMaybe (attr "name" el "") $ fmap text label', description = fromMaybe (mkEl $ attr "title" el "") $ fmap node $ elByID (attr "aria-describedby" el "") `orElse` label', inputType = attr "type" el "text", value = attr "value" el "", inputAutocomplete = attr "autocomplete" el "on", autofocus = hasAttr "autofocus" el, checked = hasAttr "checked" el, disabled = hasAttr "disabled" el, readonly = hasAttr "readonly" el, multiple = hasAttr "multiple" el, dirname = attr "dirname" el "", inputName = attr "name" el "", formAction = if hasAttr "formaction" el then attr' "formaction" el (parseURIReference . Txt.unpack) "" else Nothing, formEnctype = mAttr "formenctype" el, formMethod = mAttr "formmethod" el, formValidate = not $ hasAttr "formnovalidate" el, formTarget = mAttr "formtarget" el, inputMode = attr "inputmode" el "text", list = fromMaybe [] $ fmap parseOptions (elByID =<< mAttr "list" el), range = (mAttr "min" el, mAttr "max" el), step = mAttr "step" el, lengthRange = (attr'' "minlength" el readMaybe "", attr'' "maxLength" el readMaybe ""), pattern = attr' "pattern" el (rightToMaybe . compile defaultCompOpt defaultExecOpt) ".*", required = hasAttr "required" el, placeholder = attr "placeholder" el "", title = attr "title" el "", size = attr'' "size" el readMaybe "", fileData = FileSelector { fileAccept = attr' "accept" el Txt.words "*", fileCapture = attr "capture" el "" }, imageData = ImageData { imgAlt = mAttr "alt" el, imgSize = (attr'' "width" el readMaybe "", attr'' "height" el readMaybe ""), imgSrc = mAttr "src" el }, textArea = defaultTextArea } | _:_ <- laxElement "textarea" el = Just Input { inputType = "