~alcinnz/bureaucromancy

ref: 4e46e68a618bedfb66e14ca4e8b975198175ab44 bureaucromancy/src/Text/HTML/Form.hs -rw-r--r-- 1.8 KiB
4e46e68a — Adrian Cochrane Add infrastructure for parsing documents. 1 year, 4 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
{-# LANGUAGE OverloadedStrings #-}
module Text.HTML.Form (Form(..), Input(..), parseElement, parseDocument) where

import Data.Text (Text)
import qualified Data.Text as Txt
import Text.XML.Cursor
import Text.XML (Document)

import Data.Maybe (fromMaybe)
import Network.URI (parseURIReference, URI, nullURI)
import Data.Char (isDigit)

data Form = Form {
    action :: URI,
    enctype :: Text,
    method :: Text,
    validate :: Bool,
    target :: Text,
    acceptCharset :: [Text],
    autocomplete :: Bool,
    name :: Text,
    rel :: Text,
    inputs :: [Input]
}

data Input = Input

attr :: Text -> Cursor -> Text -> Text
attr n el def | [ret] <- n `laxAttribute` el = ret
    | otherwise = def
attr' :: Text -> Cursor -> (Text -> a) -> Text -> a
attr' n el cb def = cb $ attr n el def
parseElement :: Cursor -> Maybe Form
parseElement el | _:_ <- laxElement "form" el = Just Form {
        action = attr' "action" el
            (fromMaybe nullURI . parseURIReference . Txt.unpack) ".",
        enctype = attr "enctype" el "",
        method = attr "method" el "GET",
        validate = null $ hasAttribute "novalidate" el,
        target = attr "target" el "_self",
        acceptCharset = attr' "accept-charset" el Txt.words "utf-8",
        autocomplete = not $ null $ hasAttribute "autocomplete" el,
        name = attr "name" el "",
        rel = attr "rel" el "",
        inputs = []
      }
    | otherwise = Nothing

read' :: Read a => Text -> a
read' = read . Txt.unpack
parseDocument :: Document -> Text -> Maybe Form
parseDocument doc n
    | Txt.all isDigit n = parseElement (forms doc' !! read' n)
    | el:_ <- (forms >=> attributeIs "name" n) doc' = parseElement el
    | otherwise = Nothing
  where
    forms = orSelf descendant >=> laxElement "form"
    doc' = fromDocument doc