{-# LANGUAGE OverloadedStrings, TypeSynonymInstances, FlexibleInstances #-} module Text.HTML.Form (Form(..), Input(..), parseElement, parseDocument) where import Data.Text (Text) import qualified Data.Text as Txt import Text.XML.Cursor import Text.XML (Document, Name) import Data.Maybe (fromMaybe, listToMaybe, mapMaybe) import Network.URI (parseURIReference, URI, nullURI) import Data.Char (isDigit) import Data.List (nub) import Text.Read (readMaybe) import Text.Regex.TDFA (Regex, defaultCompOpt, defaultExecOpt) import Text.Regex.TDFA.Text (compile) data Form = Form { action :: URI, enctype :: Text, method :: Text, validate :: Bool, target :: Text, acceptCharset :: [Text], autocomplete :: Bool, formName :: Text, rel :: Text, inputs :: [Input] } data Input = Input { -- Core attributes inputType :: Text, dirname :: Text, inputName :: Text, -- State value :: Text, inputAutocomplete :: Text, autofocus :: Bool, checked :: Bool, disabled :: Bool, readonly :: Bool, -- Input behaviour multiple :: Bool, formAction :: Maybe URI, formEnctype :: Maybe Text, formMethod :: Maybe Text, formValidate :: Bool, formTarget :: Maybe Text, inputMode :: Text, list :: [(Text, Text)], -- Validation range :: (Maybe Text, Maybe Text), step :: Maybe Text, lengthRange :: (Maybe Int, Maybe Int), pattern :: Maybe Regex, required :: Bool, -- Presentation placeholder :: Text, -- sort by tabindex? title :: Text, size :: Maybe Int, fileData :: FileSelector, imageData :: ImageData } data FileSelector = FileSelector { fileAccept :: [Text], fileCapture :: Text } data ImageData = ImageData { imgAlt :: Maybe Text, imgSize :: (Maybe Int, Maybe Int), imgSrc :: Maybe Text } attr :: Text -> Cursor -> Text -> Text attr n el def | [ret] <- n `laxAttribute` el = ret | otherwise = def attr' :: Text -> Cursor -> (Text -> a) -> Text -> a attr' n el cb def = cb $ attr n el def attr'' :: Text -> Cursor -> (String -> a) -> Text -> a attr'' n el cb def = attr' n el (cb . Txt.unpack) def hasAttr :: Name -> Cursor -> Bool hasAttr n = not . null . hasAttribute n mAttr :: Text -> Cursor -> Maybe Text mAttr n = listToMaybe . laxAttribute n parseElement :: Cursor -> Maybe Form parseElement el | _:_ <- laxElement "form" el = Just Form { action = attr'' "action" el (fromMaybe nullURI . parseURIReference) ".", enctype = attr "enctype" el "", method = attr "method" el "GET", validate = null $ hasAttribute "novalidate" el, target = attr "target" el "_self", acceptCharset = attr' "accept-charset" el Txt.words "utf-8", autocomplete = hasAttr "autocomplete" el, formName = attr "name" el "", rel = attr "rel" el "", inputs = mapMaybe parseInput $ queryInputs el } | otherwise = Nothing queryInputs :: Cursor -> [Cursor] queryInputs form = (allInputs >=> inForm) form where allInputs = last' ancestor >=> descendant >=> laxElement "input" inForm = check nestedInForm &++ check (\x -> laxAttribute "form" x == laxAttribute "id" form) nestedInForm x = listToMaybe ((ancestor >=> laxElement "form") x) == Just form last' f x | ret:_ <- reverse $ f x = [ret] | otherwise = [] f &++ g = \x -> nub (f x ++ g x) parseInput :: Cursor -> Maybe Input parseInput el | _:_ <- laxElement "input" el = Just Input { inputType = attr "type" el "text", value = attr "value" el "", inputAutocomplete = attr "autocomplete" el "on", autofocus = hasAttr "autofocus" el, checked = hasAttr "checked" el, disabled = hasAttr "disabled" el, readonly = hasAttr "readonly" el, multiple = hasAttr "multiple" el, dirname = attr "dirname" el "", inputName = attr "name" el "", formAction = if hasAttr "formaction" el then attr' "formaction" el (parseURIReference . Txt.unpack) "" else Nothing, formEnctype = mAttr "formenctype" el, formMethod = mAttr "formmethod" el, formValidate = not $ hasAttr "formnovalidate" el, formTarget = mAttr "formtarget" el, inputMode = attr "inputmode" el "text", list = [], -- TODO range = (mAttr "min" el, mAttr "max" el), step = mAttr "step" el, lengthRange = (attr'' "minlength" el readMaybe "", attr'' "maxLength" el readMaybe ""), pattern = attr' "pattern" el (rightToMaybe . compile defaultCompOpt defaultExecOpt) ".*", required = hasAttr "required" el, placeholder = attr "placeholder" el "", title = attr "title" el "", size = attr'' "size" el readMaybe "", fileData = FileSelector { fileAccept = attr' "accept" el Txt.words "*", fileCapture = attr "capture" el "" }, imageData = ImageData { imgAlt = mAttr "alt" el, imgSize = (attr'' "width" el readMaybe "", attr'' "height" el readMaybe ""), imgSrc = mAttr "src" el } } | otherwise = Nothing read' :: Read a => Text -> a read' = read . Txt.unpack parseDocument :: Document -> Text -> Maybe Form parseDocument doc n | Txt.all isDigit n = parseElement (forms doc' !! read' n) | el:_ <- (forms >=> attributeIs "name" n) doc' = parseElement el | otherwise = Nothing where forms = orSelf descendant >=> laxElement "form" doc' = fromDocument doc rightToMaybe :: Either a b -> Maybe b rightToMaybe (Left _) = Nothing rightToMaybe (Right x) = Just x instance Eq Cursor where a == b = node a == node b