~alcinnz/bureaucromancy

ref: 47000929c9a4b46e8757d5daf0c23ede3fd30199 bureaucromancy/src/Text/HTML/Form.hs -rw-r--r-- 5.6 KiB
47000929 — Adrian Cochrane Parse <input> tags. 1 year, 4 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
{-# LANGUAGE OverloadedStrings, TypeSynonymInstances, FlexibleInstances #-}
module Text.HTML.Form (Form(..), Input(..), parseElement, parseDocument) where

import Data.Text (Text)
import qualified Data.Text as Txt
import Text.XML.Cursor
import Text.XML (Document, Name)

import Data.Maybe (fromMaybe, listToMaybe, mapMaybe)
import Network.URI (parseURIReference, URI, nullURI)
import Data.Char (isDigit)
import Data.List (nub)
import Text.Read (readMaybe)

import Text.Regex.TDFA (Regex, defaultCompOpt, defaultExecOpt)
import Text.Regex.TDFA.Text (compile)

data Form = Form {
    action :: URI,
    enctype :: Text,
    method :: Text,
    validate :: Bool,
    target :: Text,
    acceptCharset :: [Text],
    autocomplete :: Bool,
    formName :: Text,
    rel :: Text,
    inputs :: [Input]
}

data Input = Input {
    -- Core attributes
    inputType :: Text,
    dirname :: Text,
    inputName :: Text,
    -- State
    value :: Text,
    inputAutocomplete :: Text,
    autofocus :: Bool,
    checked :: Bool,
    disabled :: Bool,
    readonly :: Bool,
    -- Input behaviour
    multiple :: Bool,
    formAction :: Maybe URI,
    formEnctype :: Maybe Text,
    formMethod :: Maybe Text,
    formValidate :: Bool,
    formTarget :: Maybe Text,
    inputMode :: Text,
    list :: [(Text, Text)],
    -- Validation
    range :: (Maybe Text, Maybe Text),
    step :: Maybe Text,
    lengthRange :: (Maybe Int, Maybe Int),
    pattern :: Maybe Regex,
    required :: Bool,
    -- Presentation
    placeholder :: Text,
    -- sort by tabindex?
    title :: Text,
    size :: Maybe Int,
    fileData :: FileSelector,
    imageData :: ImageData
}
data FileSelector = FileSelector {
    fileAccept :: [Text],
    fileCapture :: Text
}
data ImageData = ImageData {
    imgAlt :: Maybe Text,
    imgSize :: (Maybe Int, Maybe Int),
    imgSrc :: Maybe Text
}

attr :: Text -> Cursor -> Text -> Text
attr n el def | [ret] <- n `laxAttribute` el = ret
    | otherwise = def
attr' :: Text -> Cursor -> (Text -> a) -> Text -> a
attr' n el cb def = cb $ attr n el def
attr'' :: Text -> Cursor -> (String -> a) -> Text -> a
attr'' n el cb def = attr' n el (cb . Txt.unpack) def
hasAttr :: Name -> Cursor -> Bool
hasAttr n = not . null . hasAttribute n
mAttr :: Text -> Cursor -> Maybe Text
mAttr n = listToMaybe . laxAttribute n
parseElement :: Cursor -> Maybe Form
parseElement el | _:_ <- laxElement "form" el = Just Form {
        action = attr'' "action" el (fromMaybe nullURI . parseURIReference) ".",
        enctype = attr "enctype" el "",
        method = attr "method" el "GET",
        validate = null $ hasAttribute "novalidate" el,
        target = attr "target" el "_self",
        acceptCharset = attr' "accept-charset" el Txt.words "utf-8",
        autocomplete = hasAttr "autocomplete" el,
        formName = attr "name" el "",
        rel = attr "rel" el "",
        inputs = mapMaybe parseInput $ queryInputs el
      }
    | otherwise = Nothing

queryInputs :: Cursor -> [Cursor]
queryInputs form = (allInputs >=> inForm) form
  where
    allInputs = last' ancestor >=> descendant >=> laxElement "input"
    inForm = check nestedInForm &++
        check (\x -> laxAttribute "form" x == laxAttribute "id" form)
    nestedInForm x = listToMaybe ((ancestor >=> laxElement "form") x) == Just form
    last' f x | ret:_ <- reverse $ f x = [ret]
        | otherwise = []
    f &++ g = \x -> nub (f x ++ g x)
parseInput :: Cursor -> Maybe Input
parseInput el | _:_ <- laxElement "input" el = Just Input {
        inputType = attr "type" el "text",
        value = attr "value" el "",
        inputAutocomplete = attr "autocomplete" el "on",
        autofocus = hasAttr "autofocus" el,
        checked = hasAttr "checked" el,
        disabled = hasAttr "disabled" el,
        readonly = hasAttr "readonly" el,
        multiple = hasAttr "multiple" el,
        dirname = attr "dirname" el "",
        inputName = attr "name" el "",
        formAction = if hasAttr "formaction" el
            then attr' "formaction" el (parseURIReference . Txt.unpack) ""
            else Nothing,
        formEnctype = mAttr "formenctype" el,
        formMethod = mAttr "formmethod" el,
        formValidate = not $ hasAttr "formnovalidate" el,
        formTarget = mAttr "formtarget" el,
        inputMode = attr "inputmode" el "text",
        list = [], -- TODO
        range = (mAttr "min" el, mAttr "max" el),
        step = mAttr "step" el,
        lengthRange = (attr'' "minlength" el readMaybe "",
            attr'' "maxLength" el readMaybe ""),
        pattern = attr' "pattern" el
            (rightToMaybe . compile defaultCompOpt defaultExecOpt) ".*",
        required = hasAttr "required" el,
        placeholder = attr "placeholder" el "",
        title = attr "title" el "",
        size = attr'' "size" el readMaybe "",
        fileData = FileSelector {
            fileAccept = attr' "accept" el Txt.words "*",
            fileCapture = attr "capture" el ""
        },
        imageData = ImageData {
            imgAlt = mAttr "alt" el,
            imgSize = (attr'' "width" el readMaybe "",
                attr'' "height" el readMaybe ""),
            imgSrc = mAttr "src" el
        }
      }
    | otherwise = Nothing

read' :: Read a => Text -> a
read' = read . Txt.unpack
parseDocument :: Document -> Text -> Maybe Form
parseDocument doc n
    | Txt.all isDigit n = parseElement (forms doc' !! read' n)
    | el:_ <- (forms >=> attributeIs "name" n) doc' = parseElement el
    | otherwise = Nothing
  where
    forms = orSelf descendant >=> laxElement "form"
    doc' = fromDocument doc

rightToMaybe :: Either a b -> Maybe b
rightToMaybe (Left _)  = Nothing
rightToMaybe (Right x) = Just x
instance Eq Cursor where
    a == b = node a == node b