{-# LANGUAGE OverloadedStrings #-}
module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP) where
import Network.HTTP.Client
import Network.HTTP.Types.Status
import Network.HTTP.Types.Header
-- For escaping filepaths, since I already have this dependency
import Network.URI (escapeURIString, isUnescapedInURIComponent, URI, uriToString)
import Data.Time.Clock
import Data.Time.Format
import Data.ByteString as Strict
import Data.ByteString.Char8 as C
import Data.ByteString.Lazy as Lazy
import System.IO as IO
import System.FilePath
import System.Directory
import Data.Maybe
import Data.Char (isSpace)
import Data.List as L
import Control.Monad (forM)
import Text.Read (readMaybe)
strip = C.dropWhile isSpace -- FIXME Upgrade bytestring dependency for a real strip function.
httpCacheDirective :: Response b -> Strict.ByteString -> Maybe Strict.ByteString
httpCacheDirective response key | Just header <- lookup hCacheControl $ responseHeaders response =
let directives = Prelude.map strip $ C.split ',' header
in if key `Prelude.elem` directives
then Just ""
else listToMaybe $ mapMaybe (C.stripPrefix $ C.snoc key '=') directives
| otherwise = Nothing
shouldCacheHTTP :: Response b -> Bool
-- IETF RFC7234 Section 3
shouldCacheHTTP response = -- Assume GET
statusCode (responseStatus response) `Prelude.elem` [200, 201, 404] && -- Supported response code
isNothing (httpCacheDirective response "no-store") -- Honor no-store
-- This is a private cache, don't check for Cache-Control: private
-- Also, I'll cache anything for supported response codes, regardless of explicit expiry times.
uriToString' uri = uriToString id uri ""
parseHTTPTime str | ',' `L.elem` str = parseTimeM True defaultTimeLocale rfc822DateFormat str
parseHTTPTime str = parseTimeM True defaultTimeLocale "%_d %b %Y %H:%M:%S %Z" str
secondsFromNow i = do
now <- getCurrentTime
-- This ugliness required because regex depends on outdated version of time.
return $ addUTCTime (fromRational $ toRational $ secondsToDiffTime i) now
computeExpires :: Response a -> IO UTCTime
computeExpires resp
| Just header <- lookup hExpires $ responseHeaders resp,
Just time <- parseHTTPTime $ C.unpack header = return time
| Just pragma <- httpCacheDirective resp "max-age",
Just seconds <- readMaybe $ C.unpack pragma = secondsFromNow seconds
| otherwise = secondsFromNow (60*60*24) -- One day
cacheHTTP :: URI -> Response Lazy.ByteString -> IO ()
cacheHTTP uri resp | shouldCacheHTTP resp = do
expires <- computeExpires resp
writeKV (uriToString' uri) ([
("mime", C.unpack $ fromMaybe "application/octet-stream" $
lookup "content-type" $ responseHeaders resp),
("expires", show expires)
], responseBody resp)
cacheHTTP _ _ = return ()
readCacheHTTP :: URI -> IO (Maybe (String, Lazy.ByteString))
readCacheHTTP uri = do
(headers, body) <- readKV (uriToString' uri)
let mime = fromMaybe "application/octet-stream" $ lookup "mime" headers
case readMaybe =<< lookup "expires" headers of
Nothing -> return Nothing
Just expiry -> do
now <- getCurrentTime
if expiry <= now then return $ Just (mime, body)
else do
-- TODO validate via a HEAD request
return Nothing
------
--- Key-value storage
------
readKV :: String -> IO ([(String, String)], Lazy.ByteString)
writeKV :: String -> ([(String, String)], Lazy.ByteString) -> IO ()
openKV :: String -> IO.IOMode -> (Handle -> IO r) -> IO r
openKV key mode act = do
dir <- getXdgDirectory XdgCache "nz.geek.adrian.hurl"
IO.withFile (dir </> "http" </> escapeURIString isUnescapedInURIComponent key) mode act
readKV key = openKV key ReadMode parseHeaders
where
parseHeaders h = do
line <- IO.hGetLine h
case L.break isSpace $ strip' line of
("", "") -> do
body <- Lazy.hGetContents h
return ([], body)
(key, value) -> do
(headers, body) <- parseHeaders h
return ((key, strip' value):headers, body)
strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace
writeKV key (headers, body) = openKV key WriteMode $ \h -> do
forM headers $ \(key, value) -> do
IO.hPutStrLn h (key++' ':value)
IO.hPutStrLn h ""
Lazy.hPut h body