{-# LANGUAGE OverloadedStrings #-} module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP) where import Network.HTTP.Client import Network.HTTP.Types.Status import Network.HTTP.Types.Header -- For escaping filepaths, since I already have this dependency import Network.URI (escapeURIString, isUnescapedInURIComponent, URI, uriToString) import Data.Time.Clock import Data.Time.Format import Data.ByteString as Strict import Data.ByteString.Char8 as C import Data.ByteString.Lazy as Lazy import System.IO as IO import System.FilePath import System.Directory import Data.Maybe import Data.Char (isSpace) import Data.List as L import Control.Monad (forM) import Text.Read (readMaybe) strip = C.dropWhile isSpace -- FIXME Upgrade bytestring dependency for a real strip function. httpCacheDirective :: Response b -> Strict.ByteString -> Maybe Strict.ByteString httpCacheDirective response key | Just header <- lookup hCacheControl $ responseHeaders response = let directives = Prelude.map strip $ C.split ',' header in if key `Prelude.elem` directives then Just "" else listToMaybe $ mapMaybe (C.stripPrefix $ C.snoc key '=') directives | otherwise = Nothing shouldCacheHTTP :: Response b -> Bool -- IETF RFC7234 Section 3 shouldCacheHTTP response = -- Assume GET statusCode (responseStatus response) `Prelude.elem` [200, 201, 404] && -- Supported response code isNothing (httpCacheDirective response "no-store") -- Honor no-store -- This is a private cache, don't check for Cache-Control: private -- Also, I'll cache anything for supported response codes, regardless of explicit expiry times. uriToString' uri = uriToString id uri "" parseHTTPTime str | ',' `L.elem` str = parseTimeM True defaultTimeLocale rfc822DateFormat str parseHTTPTime str = parseTimeM True defaultTimeLocale "%_d %b %Y %H:%M:%S %Z" str secondsFromNow i = do now <- getCurrentTime -- This ugliness required because regex depends on outdated version of time. return $ addUTCTime (fromRational $ toRational $ secondsToDiffTime i) now computeExpires :: Response a -> IO UTCTime computeExpires resp | Just header <- lookup hExpires $ responseHeaders resp, Just time <- parseHTTPTime $ C.unpack header = return time | Just pragma <- httpCacheDirective resp "max-age", Just seconds <- readMaybe $ C.unpack pragma = secondsFromNow seconds | otherwise = secondsFromNow (60*60*24) -- One day cacheHTTP :: URI -> Response Lazy.ByteString -> IO () cacheHTTP uri resp | shouldCacheHTTP resp = do expires <- computeExpires resp writeKV (uriToString' uri) ([ ("mime", C.unpack $ fromMaybe "application/octet-stream" $ lookup "content-type" $ responseHeaders resp), ("expires", show expires) ], responseBody resp) cacheHTTP _ _ = return () readCacheHTTP :: URI -> IO (Maybe (String, Lazy.ByteString)) readCacheHTTP uri = do (headers, body) <- readKV (uriToString' uri) let mime = fromMaybe "application/octet-stream" $ lookup "mime" headers case readMaybe =<< lookup "expires" headers of Nothing -> return Nothing Just expiry -> do now <- getCurrentTime if expiry <= now then return $ Just (mime, body) else do -- TODO validate via a HEAD request return Nothing ------ --- Key-value storage ------ readKV :: String -> IO ([(String, String)], Lazy.ByteString) writeKV :: String -> ([(String, String)], Lazy.ByteString) -> IO () openKV :: String -> IO.IOMode -> (Handle -> IO r) -> IO r openKV key mode act = do dir <- getXdgDirectory XdgCache "nz.geek.adrian.hurl" IO.withFile (dir "http" escapeURIString isUnescapedInURIComponent key) mode act readKV key = openKV key ReadMode parseHeaders where parseHeaders h = do line <- IO.hGetLine h case L.break isSpace $ strip' line of ("", "") -> do body <- Lazy.hGetContents h return ([], body) (key, value) -> do (headers, body) <- parseHeaders h return ((key, strip' value):headers, body) strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace writeKV key (headers, body) = openKV key WriteMode $ \h -> do forM headers $ \(key, value) -> do IO.hPutStrLn h (key++' ':value) IO.hPutStrLn h "" Lazy.hPut h body