~alcinnz/hurl

ac4767de156fadaf2af635723a36ad6dce96077e — Adrian Cochrane 3 years ago 663fb7a
Integrate new HTTP cache.

And while I'm at it, implement HTTP redirects myself to report correct URIs to the caller & cache.
2 files changed, 80 insertions(+), 34 deletions(-)

M src/Network/URI/Cache.hs
M src/Network/URI/Fetch.hs
M src/Network/URI/Cache.hs => src/Network/URI/Cache.hs +29 -14
@@ 1,5 1,5 @@
{-# LANGUAGE OverloadedStrings #-}
module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP) where
module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP, cleanCacheHTTP) where
import Network.HTTP.Client
import Network.HTTP.Types.Status
import Network.HTTP.Types.Header


@@ 15,6 15,7 @@ import Data.ByteString.Lazy as Lazy
import System.IO as IO
import System.FilePath
import System.Directory
import qualified Data.Text as Txt

import Data.Maybe
import Data.Char (isSpace)


@@ 70,7 71,7 @@ cacheHTTP uri resp | shouldCacheHTTP resp = do
        | otherwise = []
cacheHTTP _ _ = return ()

readCacheHTTP :: URI -> IO (Maybe (String, Lazy.ByteString), Maybe ResponseHeaders)
readCacheHTTP :: URI -> IO (Maybe (Txt.Text, Lazy.ByteString), Maybe ResponseHeaders)
readCacheHTTP uri = do
    cached <- readKV $ uriToString' uri
    case cached of


@@ 86,11 87,25 @@ readCacheHTTP uri = do
            -- Cache entry has expired, delete.
            when (isJust headers') $ deleteKV $ uriToString' uri

            return (Just (mime, body), headers')
            return (Just (Txt.pack mime, body), headers')

        _ -> return (Nothing, Just [])

cleanCacheHTTP = void $ do
    now <- getCurrentTime
    let tombstone = now

    dir <- getXdgDirectory XdgCache "nz.geek.adrian.hurl"
    dirExists <- doesDirectoryExist (dir </> "http")
    files <- if dirExists then listDirectory (dir </> "http") else return []
    forM files $ \file -> do
        exists <- doesFileExist file
        when exists $ IO.withFile file ReadMode $ \h -> do
            (headers, _) <- parseHeaders h
            let hasHeader h = isJust $ lookup h headers
                validatable = hasHeader "modified" || hasHeader "etag"
                expires = fromMaybe tombstone (readMaybe =<< lookup "expires" headers)
            when (now >= expires && not validatable) $ removeFile file

------
--- Key-value storage


@@ 113,17 128,17 @@ openKV key mode act = do
    if exists then Just <$> IO.withFile path mode act else return Nothing

readKV key = openKV key ReadMode parseHeaders
  where
    parseHeaders h = do
        line <- IO.hGetLine h
        case L.break isSpace $ strip' line of
            ("", "") -> do
                body <- Lazy.hGetContents h
                return ([], body)
            (key, value) -> do
                (headers, body) <- parseHeaders h
                return ((key, strip' value):headers, body)
    strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace

parseHeaders h = do
    line <- IO.hGetLine h
    case L.break isSpace $ strip' line of
        ("", "") -> do
            body <- Lazy.hGetContents h
            return ([], body)
        (key, value) -> do
            (headers, body) <- parseHeaders h
            return ((key, strip' value):headers, body)
strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace

writeKV key (headers, body) = void $ openKV key WriteMode $ \h -> do
    forM headers $ \(key, value) -> do

M src/Network/URI/Fetch.hs => src/Network/URI/Fetch.hs +51 -20
@@ 2,7 2,7 @@
{-# LANGUAGE OverloadedStrings #-}
-- | Retrieves documents for a URL, supporting multiple URL schemes that can be
-- disabled at build-time for reduced dependencies.
module Network.URI.Fetch(Session(locale, aboutPages, redirectCount), newSession,
module Network.URI.Fetch(Session(locale, aboutPages, redirectCount, cachingEnabled), newSession,
    fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR,
    dispatchByMIME, saveDownload, downloadToURI,
    -- logging API


@@ 44,6 44,9 @@ import qualified Network.HTTP.Client.OpenSSL as TLS
import qualified OpenSSL.Session as TLS
import           Network.HTTP.Types
import           Data.List (intercalate)
import           Control.Concurrent (forkIO)

import           Network.URI.Cache
#endif

#ifdef WITH_RAW_CONNECTIONS


@@ 89,7 92,9 @@ data Session = Session {
    -- | Log of timestamped/profiled URL requests
    requestLog :: MVar [LogRecord],
    -- | How many redirects to follow for Gemini or HTTP(S) requests
    redirectCount :: Int
    redirectCount :: Int,
    -- | Whether to cache network responses, avoiding sending requests
    cachingEnabled :: Bool
}

data LogRecord = LogRecord {


@@ 149,7 154,8 @@ newSession' appname = do
        locale = ietfLocale,
        aboutPages = [],
        requestLog = log,
        redirectCount = 5
        redirectCount = 5,
        cachingEnabled = True
    }

llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key]


@@ 231,23 237,48 @@ fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath 

#ifdef WITH_HTTP_URI
fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do
    request <- HTTP.requestFromURI uri
    response <- HTTP.httpLbs request {
            HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form.
            HTTP.requestHeaders = [
                ("Accept", C8.pack $ intercalate ", " accept),
                ("Accept-Language", C8.pack $ intercalate ", " $ locale session)
            ],
            HTTP.redirectCount = redirectCount session
        } $ managerHTTP session
    return $ case (
            HTTP.responseBody response,
            [val | ("content-type", val) <- HTTP.responseHeaders response]
      ) of
        ("", _) -> (uri, mimeERR, Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response)
        (response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype
            in resolveCharset' uri (L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response
        (response, []) -> (uri, defaultMIME, Right response)
    cached <- if cachingEnabled session then readCacheHTTP uri else return (Nothing, Nothing)
    response <- case cached of
        (Just (mime, body), Nothing) -> return $ Right (mime, body)
        (cached, cachingHeaders) -> do
            request <- HTTP.requestFromURI uri
            response <- HTTP.httpLbs request {
                    HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form.
                    HTTP.requestHeaders = [
                        ("Accept", C8.pack $ intercalate ", " accept),
                        ("Accept-Language", C8.pack $ intercalate ", " $ locale session)
                    ] ++ fromMaybe [] cachingHeaders,
                    HTTP.redirectCount = 0
                } $ managerHTTP session
            case (
                    HTTP.responseStatus response,
                    HTTP.responseBody response,
                    [val | ("content-type", val) <- HTTP.responseHeaders response]
              ) of
                (Status 304 _, _, _) | Just cached'@(_, body) <- cached -> do
                    cacheHTTP uri $ response { HTTP.responseBody = body }
                    return $ Right cached'
                -- Manually handle redirects so the caller & HTTP cache gets the correct URI.
                (Status code _, _, _) | code > 300 && code < 400,
                        Just location <- lookup "location" $ HTTP.responseHeaders response,
                        Just uri' <- parseURIReference $ C8.unpack location ->
                    return $ Left $ relativeTo uri' uri
                (Status _ msg, "", _) -> return $ Right (Txt.pack mimeERR, B.fromStrict msg)
                (_, body, (mimetype:_)) -> do
                    cacheHTTP uri response
                    forkIO cleanCacheHTTP -- Try to keep diskspace down...

                    let mime = Txt.toLower $ convertCharset "utf-8" mimetype
                    return $ Right (mime, body)
                (_, response, []) -> return $ Right (Txt.pack defaultMIME, response)

    case response of
        Left redirect ->
            let session' = session { redirectCount = redirectCount session - 1 }
            in fetchURL' session' accept redirect
        Right (mime, body) ->
            let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime
            in return $ resolveCharset' uri mime' body
  `catches` [
    Handler $ \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e),
    Handler $ \(ErrorCall msg) -> do return (uri, mimeERR, Left $ Txt.pack msg)