From 4e46e68a618bedfb66e14ca4e8b975198175ab44 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Fri, 1 Sep 2023 15:51:33 +1200 Subject: [PATCH] Add infrastructure for parsing documents. --- app/Main.hs | 19 +++++++++++--- bureaucromancy.cabal | 5 ++-- src/Text/HTML/Form.hs | 59 +++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 76 insertions(+), 7 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 076722f..4c76ac8 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -4,17 +4,30 @@ module Main where import Network.Wai.Handler.Warp import Network.Wai import Network.HTTP.Types +import System.Environment (getArgs) import Text.HTML.Form.WebApp +import Text.HTML.Form + import Data.Text.Lazy.Encoding (encodeUtf8) import Data.Text.Lazy (fromStrict) +import qualified Data.Text as Txt +import Text.XML (readFile, def) +import Data.Maybe (fromJust) main :: IO () main = do - runEnv 2018 servePage + args <- getArgs + let (filename, ident) = case args of + n:anchor:_ -> (n, anchor) + [n] -> (n, "0") + [] -> ("form.html", "0") + doc <- Text.XML.readFile def filename + runEnv 2018 $ servePage $ fromJust $ parseDocument doc $ Txt.pack ident -servePage req respond = do - ret <- renderPage Form (pathInfo req) (queryString req) +servePage :: Form -> Application +servePage form req respond = do + ret <- renderPage form (pathInfo req) (queryString req) case ret of Just txt -> respond $ responseLBS status200 [] $ encodeUtf8 $ fromStrict txt Nothing -> respond $ responseLBS status404 [] "Unknown input or operation!" diff --git a/bureaucromancy.cabal b/bureaucromancy.cabal index be79257..ace7556 100644 --- a/bureaucromancy.cabal +++ b/bureaucromancy.cabal @@ -71,7 +71,8 @@ library -- other-extensions: -- Other library packages from which modules are imported. - build-depends: base ^>=4.16.4.0, ginger, bytestring, text + build-depends: base ^>=4.16.4.0, ginger, + bytestring, text, xml-conduit, network-uri -- Directories containing source files. hs-source-dirs: src @@ -95,7 +96,7 @@ executable bureaucromancy -- Other library packages from which modules are imported. build-depends: base ^>=4.16.4.0, - bureaucromancy, warp, wai, http-types, text + bureaucromancy, warp, wai, http-types, text, xml-conduit -- Directories containing source files. hs-source-dirs: app diff --git a/src/Text/HTML/Form.hs b/src/Text/HTML/Form.hs index 9bf71c0..5b28fdc 100644 --- a/src/Text/HTML/Form.hs +++ b/src/Text/HTML/Form.hs @@ -1,3 +1,58 @@ -module Text.HTML.Form (Form(..)) where +{-# LANGUAGE OverloadedStrings #-} +module Text.HTML.Form (Form(..), Input(..), parseElement, parseDocument) where -data Form = Form +import Data.Text (Text) +import qualified Data.Text as Txt +import Text.XML.Cursor +import Text.XML (Document) + +import Data.Maybe (fromMaybe) +import Network.URI (parseURIReference, URI, nullURI) +import Data.Char (isDigit) + +data Form = Form { + action :: URI, + enctype :: Text, + method :: Text, + validate :: Bool, + target :: Text, + acceptCharset :: [Text], + autocomplete :: Bool, + name :: Text, + rel :: Text, + inputs :: [Input] +} + +data Input = Input + +attr :: Text -> Cursor -> Text -> Text +attr n el def | [ret] <- n `laxAttribute` el = ret + | otherwise = def +attr' :: Text -> Cursor -> (Text -> a) -> Text -> a +attr' n el cb def = cb $ attr n el def +parseElement :: Cursor -> Maybe Form +parseElement el | _:_ <- laxElement "form" el = Just Form { + action = attr' "action" el + (fromMaybe nullURI . parseURIReference . Txt.unpack) ".", + enctype = attr "enctype" el "", + method = attr "method" el "GET", + validate = null $ hasAttribute "novalidate" el, + target = attr "target" el "_self", + acceptCharset = attr' "accept-charset" el Txt.words "utf-8", + autocomplete = not $ null $ hasAttribute "autocomplete" el, + name = attr "name" el "", + rel = attr "rel" el "", + inputs = [] + } + | otherwise = Nothing + +read' :: Read a => Text -> a +read' = read . Txt.unpack +parseDocument :: Document -> Text -> Maybe Form +parseDocument doc n + | Txt.all isDigit n = parseElement (forms doc' !! read' n) + | el:_ <- (forms >=> attributeIs "name" n) doc' = parseElement el + | otherwise = Nothing + where + forms = orSelf descendant >=> laxElement "form" + doc' = fromDocument doc -- 2.30.2