From ac4767de156fadaf2af635723a36ad6dce96077e Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Tue, 22 Dec 2020 20:49:38 +1300 Subject: [PATCH] Integrate new HTTP cache. And while I'm at it, implement HTTP redirects myself to report correct URIs to the caller & cache. --- src/Network/URI/Cache.hs | 43 ++++++++++++++++-------- src/Network/URI/Fetch.hs | 71 +++++++++++++++++++++++++++++----------- 2 files changed, 80 insertions(+), 34 deletions(-) diff --git a/src/Network/URI/Cache.hs b/src/Network/URI/Cache.hs index 01b47b1..908597f 100644 --- a/src/Network/URI/Cache.hs +++ b/src/Network/URI/Cache.hs @@ -1,5 +1,5 @@ {-# LANGUAGE OverloadedStrings #-} -module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP) where +module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP, cleanCacheHTTP) where import Network.HTTP.Client import Network.HTTP.Types.Status import Network.HTTP.Types.Header @@ -15,6 +15,7 @@ import Data.ByteString.Lazy as Lazy import System.IO as IO import System.FilePath import System.Directory +import qualified Data.Text as Txt import Data.Maybe import Data.Char (isSpace) @@ -70,7 +71,7 @@ cacheHTTP uri resp | shouldCacheHTTP resp = do | otherwise = [] cacheHTTP _ _ = return () -readCacheHTTP :: URI -> IO (Maybe (String, Lazy.ByteString), Maybe ResponseHeaders) +readCacheHTTP :: URI -> IO (Maybe (Txt.Text, Lazy.ByteString), Maybe ResponseHeaders) readCacheHTTP uri = do cached <- readKV $ uriToString' uri case cached of @@ -86,11 +87,25 @@ readCacheHTTP uri = do -- Cache entry has expired, delete. when (isJust headers') $ deleteKV $ uriToString' uri - return (Just (mime, body), headers') + return (Just (Txt.pack mime, body), headers') _ -> return (Nothing, Just []) +cleanCacheHTTP = void $ do + now <- getCurrentTime + let tombstone = now + dir <- getXdgDirectory XdgCache "nz.geek.adrian.hurl" + dirExists <- doesDirectoryExist (dir "http") + files <- if dirExists then listDirectory (dir "http") else return [] + forM files $ \file -> do + exists <- doesFileExist file + when exists $ IO.withFile file ReadMode $ \h -> do + (headers, _) <- parseHeaders h + let hasHeader h = isJust $ lookup h headers + validatable = hasHeader "modified" || hasHeader "etag" + expires = fromMaybe tombstone (readMaybe =<< lookup "expires" headers) + when (now >= expires && not validatable) $ removeFile file ------ --- Key-value storage @@ -113,17 +128,17 @@ openKV key mode act = do if exists then Just <$> IO.withFile path mode act else return Nothing readKV key = openKV key ReadMode parseHeaders - where - parseHeaders h = do - line <- IO.hGetLine h - case L.break isSpace $ strip' line of - ("", "") -> do - body <- Lazy.hGetContents h - return ([], body) - (key, value) -> do - (headers, body) <- parseHeaders h - return ((key, strip' value):headers, body) - strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace + +parseHeaders h = do + line <- IO.hGetLine h + case L.break isSpace $ strip' line of + ("", "") -> do + body <- Lazy.hGetContents h + return ([], body) + (key, value) -> do + (headers, body) <- parseHeaders h + return ((key, strip' value):headers, body) +strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace writeKV key (headers, body) = void $ openKV key WriteMode $ \h -> do forM headers $ \(key, value) -> do diff --git a/src/Network/URI/Fetch.hs b/src/Network/URI/Fetch.hs index 12fea99..b643b61 100644 --- a/src/Network/URI/Fetch.hs +++ b/src/Network/URI/Fetch.hs @@ -2,7 +2,7 @@ {-# LANGUAGE OverloadedStrings #-} -- | Retrieves documents for a URL, supporting multiple URL schemes that can be -- disabled at build-time for reduced dependencies. -module Network.URI.Fetch(Session(locale, aboutPages, redirectCount), newSession, +module Network.URI.Fetch(Session(locale, aboutPages, redirectCount, cachingEnabled), newSession, fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR, dispatchByMIME, saveDownload, downloadToURI, -- logging API @@ -44,6 +44,9 @@ import qualified Network.HTTP.Client.OpenSSL as TLS import qualified OpenSSL.Session as TLS import Network.HTTP.Types import Data.List (intercalate) +import Control.Concurrent (forkIO) + +import Network.URI.Cache #endif #ifdef WITH_RAW_CONNECTIONS @@ -89,7 +92,9 @@ data Session = Session { -- | Log of timestamped/profiled URL requests requestLog :: MVar [LogRecord], -- | How many redirects to follow for Gemini or HTTP(S) requests - redirectCount :: Int + redirectCount :: Int, + -- | Whether to cache network responses, avoiding sending requests + cachingEnabled :: Bool } data LogRecord = LogRecord { @@ -149,7 +154,8 @@ newSession' appname = do locale = ietfLocale, aboutPages = [], requestLog = log, - redirectCount = 5 + redirectCount = 5, + cachingEnabled = True } llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key] @@ -231,23 +237,48 @@ fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath #ifdef WITH_HTTP_URI fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do - request <- HTTP.requestFromURI uri - response <- HTTP.httpLbs request { - HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form. - HTTP.requestHeaders = [ - ("Accept", C8.pack $ intercalate ", " accept), - ("Accept-Language", C8.pack $ intercalate ", " $ locale session) - ], - HTTP.redirectCount = redirectCount session - } $ managerHTTP session - return $ case ( - HTTP.responseBody response, - [val | ("content-type", val) <- HTTP.responseHeaders response] - ) of - ("", _) -> (uri, mimeERR, Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response) - (response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype - in resolveCharset' uri (L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response - (response, []) -> (uri, defaultMIME, Right response) + cached <- if cachingEnabled session then readCacheHTTP uri else return (Nothing, Nothing) + response <- case cached of + (Just (mime, body), Nothing) -> return $ Right (mime, body) + (cached, cachingHeaders) -> do + request <- HTTP.requestFromURI uri + response <- HTTP.httpLbs request { + HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form. + HTTP.requestHeaders = [ + ("Accept", C8.pack $ intercalate ", " accept), + ("Accept-Language", C8.pack $ intercalate ", " $ locale session) + ] ++ fromMaybe [] cachingHeaders, + HTTP.redirectCount = 0 + } $ managerHTTP session + case ( + HTTP.responseStatus response, + HTTP.responseBody response, + [val | ("content-type", val) <- HTTP.responseHeaders response] + ) of + (Status 304 _, _, _) | Just cached'@(_, body) <- cached -> do + cacheHTTP uri $ response { HTTP.responseBody = body } + return $ Right cached' + -- Manually handle redirects so the caller & HTTP cache gets the correct URI. + (Status code _, _, _) | code > 300 && code < 400, + Just location <- lookup "location" $ HTTP.responseHeaders response, + Just uri' <- parseURIReference $ C8.unpack location -> + return $ Left $ relativeTo uri' uri + (Status _ msg, "", _) -> return $ Right (Txt.pack mimeERR, B.fromStrict msg) + (_, body, (mimetype:_)) -> do + cacheHTTP uri response + forkIO cleanCacheHTTP -- Try to keep diskspace down... + + let mime = Txt.toLower $ convertCharset "utf-8" mimetype + return $ Right (mime, body) + (_, response, []) -> return $ Right (Txt.pack defaultMIME, response) + + case response of + Left redirect -> + let session' = session { redirectCount = redirectCount session - 1 } + in fetchURL' session' accept redirect + Right (mime, body) -> + let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime + in return $ resolveCharset' uri mime' body `catches` [ Handler $ \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e), Handler $ \(ErrorCall msg) -> do return (uri, mimeERR, Left $ Txt.pack msg) -- 2.30.2