From 12c81bf0660acab500c2c9c234803e0fd1a47a0d Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Sat, 23 Jul 2022 20:32:14 +1200 Subject: [PATCH] Bug/crash/feature fixes for submitting forms. Now longer crashes when attempting to save cookies either in-memory or on-disk. On-disk cookie storage is now resilient to changes in http-client internals. HURL now handles encoding form submissions as per caller-specified MIMEtype. HURL now supports a choice of HTTP method when submitting forms. Bugfix means now content negotiation, caching, & form data are now actually sent to the server. --- Main2.hs | 30 ++++++++ hurl.cabal | 19 ++++- src/Network/URI/CookiesDB.hs | 135 +++++++++++++++++++++++++++++++++++ src/Network/URI/Fetch.hs | 96 +++++++++++++++++++------ 4 files changed, 258 insertions(+), 22 deletions(-) create mode 100644 Main2.hs create mode 100644 src/Network/URI/CookiesDB.hs diff --git a/Main2.hs b/Main2.hs new file mode 100644 index 0000000..a8fa325 --- /dev/null +++ b/Main2.hs @@ -0,0 +1,30 @@ +{-# LANGUAGE OverloadedStrings #-} +module Main where + +import Network.URI.Fetch +-- Input parsing +import System.Environment (getArgs) +import Network.URI (parseURI, nullURI) +import Data.Maybe (fromJust) +-- Where to save files +import System.Directory (getCurrentDirectory) +import qualified Data.ByteString.Char8 as C8 + +main :: IO () +main = do + url:encoding:args <- getArgs + let url' = fromJust $ parseURI url + putStrLn encoding + session <- newSession + dir <- getCurrentDirectory + + resp <- submitURL' session ["*/*"] url' "POST" (C8.pack encoding) $map parseArg args + res <- saveDownload nullURI dir resp + putStrLn $ show res + +parseArg ('-':arg) | (key, '=':value) <- break (== '=') arg = (key, Left value) + | otherwise = (arg, Left "") +parseArg ('+':arg) | (key, '=':value) <- break (== '=') arg = (key, Right value) + | otherwise = (arg, Left "") +parseArg arg | (key, '=':value) <- break (== '=') arg = (key, Left value) + | otherwise = (arg, Left "") diff --git a/hurl.cabal b/hurl.cabal index 2526329..501fa5a 100644 --- a/hurl.cabal +++ b/hurl.cabal @@ -114,8 +114,8 @@ library if flag(http) CPP-options: -DWITH_HTTP_URI build-depends: http-client, http-types >= 0.12 && <0.13, - http-client-openssl, HsOpenSSL - other-modules: Network.URI.Cache + http-client-openssl, HsOpenSSL, time, cookie + other-modules: Network.URI.Cache, Network.URI.CookiesDB if flag(gemini) CPP-options: -DWITH_GEMINI_URI -DWITH_RAW_CONNECTIONS build-depends: HsOpenSSL, openssl-streams >= 1.2 && < 1.3, io-streams >= 1.5 && < 1.6 @@ -152,3 +152,18 @@ executable hurl default-language: Haskell2010 ghc-options: -threaded + +executable hurl-post + -- .hs file containing the Main module + main-is: Main2.hs + + -- Other library packages from which modules are imported + build-depends: base >= 4.9 && <5, hurl, network-uri, directory, bytestring + + -- Directories containing source files. + hs-source-dirs: . + + -- Base languages which the package is written in. + default-language: Haskell2010 + + ghc-options: -threaded diff --git a/src/Network/URI/CookiesDB.hs b/src/Network/URI/CookiesDB.hs new file mode 100644 index 0000000..b7b5365 --- /dev/null +++ b/src/Network/URI/CookiesDB.hs @@ -0,0 +1,135 @@ +-- | Read & write Netscape Navigator cookies format. +{-# LANGUAGE OverloadedStrings, RecordWildCards #-} +module Network.URI.CookiesDB (readCookies, writeCookies) where +import qualified Data.ByteString as B +import qualified Data.ByteString.Char8 as C +import Network.HTTP.Client +import System.Directory (doesFileExist) + +import Web.Cookie (formatCookieExpires, parseCookieExpires) +import Data.Maybe (fromMaybe, mapMaybe) +import Data.Time.Clock (nominalDay, getCurrentTime, addUTCTime, UTCTime) + +readCookies :: FilePath -> IO CookieJar +readCookies filepath = do + exists <- doesFileExist filepath + if exists then do + file <- B.readFile filepath + now <- getCurrentTime + return $ createCookieJar $ readCookies' now file + else return $ createCookieJar [] +readCookies' :: UTCTime -> B.ByteString -> [Cookie] +readCookies' now = mapMaybe (readCookie' now) . C.lines +readCookie' :: UTCTime -> B.ByteString -> Maybe Cookie +readCookie' now = readCookie now . C.split '\t' +readCookie :: UTCTime -> [B.ByteString] -> Maybe Cookie +readCookie now [domain, _, path, secure, expiration, name, value] = + Just Cookie { + cookie_domain = domain, + cookie_path = path, + cookie_secure_only = b secure, + cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration, + cookie_name = name, + cookie_value = value, + + cookie_creation_time = now, + cookie_last_access_time = now, + cookie_persistent = True, + cookie_host_only = False, + cookie_http_only = False + } +readCookie now [domain, _, path, secure, expiration, name, value, httpOnly, session] = + Just Cookie { + cookie_domain = domain, + cookie_path = path, + cookie_secure_only = b secure, + cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration, + cookie_name = name, + cookie_value = value, + cookie_http_only = b httpOnly, + cookie_persistent = not $ b session, + + cookie_creation_time = now, + cookie_last_access_time = now, + cookie_host_only = False + } +readCookie now [domain, _, path, secure, expiration, name, value, + httpOnly, session, sameSite] = Just Cookie { + cookie_domain = domain, + cookie_path = path, + cookie_secure_only = b secure, + cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration, + cookie_name = name, + cookie_value = value, + cookie_http_only = b httpOnly, + cookie_persistent = not $ b session, + cookie_host_only = sameSite == "STRICT", + + cookie_creation_time = now, + cookie_last_access_time = now + } +readCookie now [domain, _, path, secure, expiration, name, value, + httpOnly, session, sameSite, _] = Just Cookie { + cookie_domain = domain, + cookie_path = path, + cookie_secure_only = b secure, + cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration, + cookie_name = name, + cookie_value = value, + cookie_http_only = b httpOnly, + cookie_persistent = not $ b session, + cookie_host_only = sameSite == "STRICT", + + cookie_creation_time = now, + cookie_last_access_time = now + } +readCookie now [domain, _, path, secure, expiration, name, value, + httpOnly, session, sameSite, _, creation] = Just Cookie { + cookie_domain = domain, + cookie_path = path, + cookie_secure_only = b secure, + cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration, + cookie_name = name, + cookie_value = value, + cookie_http_only = b httpOnly, + cookie_persistent = not $ b session, + cookie_host_only = sameSite == "STRICT", + cookie_creation_time = fromMaybe now $ parseCookieExpires creation, + cookie_last_access_time = fromMaybe now $ parseCookieExpires creation + } +readCookie now (domain:_:path:secure:expiration:name:value: + httpOnly:session:sameSite:_:creation:access:_) = Just Cookie { + cookie_domain = domain, + cookie_path = path, + cookie_secure_only = b secure, + cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration, + cookie_name = name, + cookie_value = value, + cookie_http_only = b httpOnly, + cookie_persistent = not $ b session, + cookie_host_only = sameSite == "STRICT", + cookie_creation_time = fromMaybe now $ parseCookieExpires creation, + cookie_last_access_time = fromMaybe now $ parseCookieExpires access + } +readCookie _ _ = Nothing +b "TRUE" = True +b _ = False + +writeCookies :: FilePath -> CookieJar -> Bool -> IO () +writeCookies filepath cookies isSession = do + B.writeFile filepath $ writeCookies' isSession $ destroyCookieJar cookies +writeCookies' :: Bool -> [Cookie] -> B.ByteString +writeCookies' isSession = C.unlines . map writeCookie' . filter shouldSaveCookie + where + shouldSaveCookie | isSession = cookie_persistent + | otherwise = const True +writeCookie' :: Cookie -> B.ByteString +writeCookie' Cookie {..} = C.intercalate "\t" [ + cookie_domain, "TRUE", cookie_path, b' cookie_secure_only, + formatCookieExpires cookie_expiry_time, cookie_name, cookie_value, + b' cookie_http_only, b' $ not cookie_persistent, + if cookie_host_only then "STRICT" else "LAX", "MEDIUM", + formatCookieExpires cookie_creation_time, + formatCookieExpires cookie_last_access_time] +b' True = "TRUE" +b' False = "FALSE" diff --git a/src/Network/URI/Fetch.hs b/src/Network/URI/Fetch.hs index 45404b9..a243ab9 100644 --- a/src/Network/URI/Fetch.hs +++ b/src/Network/URI/Fetch.hs @@ -3,7 +3,7 @@ -- | Retrieves documents for a URL, supporting multiple URL schemes that can be -- disabled at build-time for reduced dependencies. module Network.URI.Fetch(Session(locale, aboutPages, redirectCount, cachingEnabled), newSession, - fetchURL, fetchURL', fetchURLs, submitURL, mimeERR, htmlERR, + fetchURL, fetchURL', fetchURLs, submitURL, submitURL', mimeERR, htmlERR, dispatchByMIME, appsForMIME, Application(..), dispatchByApp, saveDownload, downloadToURI, -- logging API @@ -13,16 +13,20 @@ import Network.URI.Types import qualified Data.Text as Txt import Data.Text (Text) +import qualified Data.Text.Encoding as Txt import Network.URI import qualified Data.ByteString as Strict import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Char8 as C8 +import qualified Data.ByteString.Builder as Builder import Network.URI.Charset import Control.Exception import System.IO.Error (isEOFError) import Control.Concurrent.Async (forConcurrently) +import qualified Data.Maybe as M + -- for about: URIs & port parsing, all standard lib import Data.Maybe (fromMaybe, listToMaybe, isJust) import Data.Either (isLeft) @@ -44,6 +48,7 @@ import Data.List as L #ifdef WITH_HTTP_URI import qualified Network.HTTP.Client as HTTP +import qualified Network.HTTP.Client.MultipartFormData as HTTP import qualified Network.HTTP.Client.OpenSSL as TLS import qualified OpenSSL.Session as TLS import Network.HTTP.Types @@ -51,6 +56,7 @@ import Data.List (intercalate) import Control.Concurrent (forkIO) import Network.URI.Cache +import Network.URI.CookiesDB #endif #ifdef WITH_RAW_CONNECTIONS @@ -132,12 +138,10 @@ newSession' appname = do TLS.contextSetVerificationMode httpsCtxt $ TLS.VerifyPeer True True Nothing managerHTTP' <- HTTP.newManager $ TLS.opensslManagerSettings $ return httpsCtxt - cookiesDir <- getXdgDirectory XdgData "nz.geek.adrian.hurl.cookies" + cookiesDir <- getXdgDirectory XdgData "nz.geek.adrian.hurl.cookies2" let cookiesPath' = cookiesDir appname - cookiesExist <- doesFileExist cookiesPath' - cookies <- if cookiesExist then readMaybe <$> readFile cookiesPath' else return Nothing + cookies' <- readCookies cookiesPath' now <- getCurrentTime - let cookies' = HTTP.createCookieJar $ fromMaybe [] cookies cookieJar <- newMVar $ HTTP.evictExpiredCookies cookies' now cookieJar' <- newMVar $ HTTP.createCookieJar [] #endif @@ -217,24 +221,75 @@ mimeERR = "txt/x-error\t" htmlERR = "html/x-error\t" submitURL :: Session -> [String] -> URI -> Text -> String -> IO (URI, String, Either Text ByteString) +-- | See submitURL', preserved for backwards compatability. +-- This is a little more cumbersome to use, & doesn't support file uploads. +-- Was designed naively based on convenience of initial caller. +submitURL s a u m q = + submitURL' s a u (Txt.encodeUtf8 m) "application/x-www-form-urlencoded" $ + Prelude.map parseQuery $ Txt.splitOn "&" $ Txt.pack q + where + parseQuery q = let (key, value) = Txt.breakOn "=" q in if Txt.null value + then (decode key, Left "") + else (decode key, Left $ decode $ Txt.tail value) + decode = unEscapeString . Txt.unpack +-- | Uploads given key-value pairs to the specified URL using the specified HTTP method & encoding. +-- The key-value pairs may specify filepaths, in which case the method must be "POST" +-- and the encoding must be "multipart/form-data" for that data to get sent. +-- +-- Unsupported encodings (values other than "application/x-www-form-urlencoded", +-- "text/plain", or "multipart/form-data") omits the key-value pairs from the request. +submitURL' :: Session -> [String] -> URI -> Strict.ByteString -> Strict.ByteString -> + [(String, Either String FilePath)] -> IO (URI, String, Either Text ByteString) #ifdef WITH_HTTP_URI -submitURL session accept uri "POST" query | uriScheme uri `elem` ["http:", "https:"] = do +addHTTPBody mime body req = return req { + HTTP.requestHeaders = (hContentType, mime) : + Prelude.filter (\(x, _) -> x /= hContentType) (HTTP.requestHeaders req), + HTTP.requestBody = HTTP.RequestBodyBS $ C8.pack body + } +packQuery :: [(String, Either String FilePath)] -> C8.ByteString -> HTTP.Request -> IO HTTP.Request +packQuery query mime@"application/x-www-form-urlencoded" = + addHTTPBody mime $ encodeQuery query +packQuery query mime@"text/plain" = addHTTPBody mime $ + Prelude.unlines [value | (key, Left value) <- query, not $ null value] +packQuery q "multipart/form-data" = HTTP.formDataBody $ Prelude.map encodePart q + where + encodePart (key, Left value) = HTTP.partBS (Txt.pack key) (C8.pack value) + encodePart (key, Right value) = + -- C:\fakepath\ is part of the webstandards now & I might as well use it. + -- Ancient browsers exposed the full filepath which was a security vulnerability. + -- Now to avoid breaking servers relying on this behaviour we send + -- a fake Windows filepath. + HTTP.partFileRequestBodyM (Txt.pack key) ("C:\\fakepath\\" ++ takeFileName value) $ do + size <- fromInteger <$> withBinaryFile value ReadMode hFileSize + body <- B.readFile value + return $ HTTP.RequestBodyBuilder size $ Builder.lazyByteString body +packQuery _ _ = return -- Do not upload data if requested to do so in an invalid format. +submitURL' session accept uri method encoding query | uriScheme uri `elem` ["http:", "https:"] = do -- HURL is very strict on when it allows cookies to be set: Only POST HTTP requests are considered consent. -- For the sake of most webframeworks' CSRF protection, cookies from retrieving the form are retroactively set. csrfCookies <- case retroactiveCookies session of { Just cookies -> Just <$> readMVar cookies; Nothing -> return Nothing } - fetchHTTPCached session accept uri (\req -> req { + fetchHTTPCached session False accept uri (\req -> do + ret <- packQuery query encoding req + return ret { HTTP.cookieJar = firstJust csrfCookies $ HTTP.cookieJar req, - HTTP.method = "POST", - HTTP.requestBody = HTTP.RequestBodyBS $ C8.pack query - }) $ \resp -> do + HTTP.method = method + }) $ \resp -> if method /= "POST" then return () else do let cookies = HTTP.responseCookieJar resp - putMVar (globalCookieJar session) cookies - writeFile (cookiesPath session) $ show $ HTTP.destroyCookieJar cookies + swapMVar (globalCookieJar session) cookies + writeCookies (cookiesPath session) cookies False #endif -submitURL session mimes uri _method query = fetchURL' session mimes uri { uriQuery = '?':query } +submitURL' session mimes uri _method _encoding query = fetchURL' session mimes uri { + uriQuery = '?':encodeQuery query } +encodeQuery :: [(String, Either String FilePath)] -> String +encodeQuery query = intercalate "&" $ M.mapMaybe encodePart query + where + encodePart (key, Left "") = Just $ escape key + encodePart (key, Left value) = Just (escape key ++ '=':escape value) + encodePart _ = Nothing + escape = escapeURIString isUnescapedInURIComponent -- | As per `fetchURL`, but also returns the redirected URI. fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString) @@ -289,10 +344,11 @@ fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath #ifdef WITH_HTTP_URI fetchURL' session accept uri | uriScheme uri `elem` ["http:", "https:"] = - fetchHTTPCached session accept uri id saveCookies + fetchHTTPCached session (cachingEnabled session) accept uri return saveCookies where saveCookies resp - | Just cookies <- retroactiveCookies session = putMVar cookies $ HTTP.responseCookieJar resp + | Just cookies <- retroactiveCookies session = + void $swapMVar cookies $HTTP.responseCookieJar resp | otherwise = return () #endif @@ -395,14 +451,14 @@ dispatchByApp _ _ _ _ = return False #endif #ifdef WITH_HTTP_URI -fetchHTTPCached session accept@(defaultMIME:_) uri cbReq cbResp = do - cached <- if cachingEnabled session then readCacheHTTP uri else return (Nothing, Nothing) +fetchHTTPCached session shouldCache accept@(defaultMIME:_) uri cbReq cbResp = do + cached <- if shouldCache then readCacheHTTP uri else return (Nothing, Nothing) response <- case cached of (Just (mime, body), Nothing) -> return $ Right (mime, body) (cached, cachingHeaders) -> do request <- HTTP.requestFromURI uri cookieJar <- readMVar $ globalCookieJar session - let request' = cbReq $ request { + request'<- cbReq request { HTTP.cookieJar = Just $ cookieJar, HTTP.requestHeaders = [ ("Accept", C8.pack $ intercalate ", " accept), @@ -410,7 +466,7 @@ fetchHTTPCached session accept@(defaultMIME:_) uri cbReq cbResp = do ] ++ fromMaybe [] cachingHeaders, HTTP.redirectCount = 0 } - response <- HTTP.httpLbs request $ managerHTTP session + response <- HTTP.httpLbs request' $ managerHTTP session cbResp response case ( HTTP.responseStatus response, @@ -442,7 +498,7 @@ fetchHTTPCached session accept@(defaultMIME:_) uri cbReq cbResp = do let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime in return $ resolveCharset' uri mime' body `catch` \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e) -fetchHTTPCached session [] uri _ _ = +fetchHTTPCached session _ [] uri _ _ = return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ UnsupportedMIME "") #endif -- 2.30.2