~alcinnz/hurl

ref: 9faa5334d97b51c95bc9c31c7523140ad1417443 hurl/src/Network/URI/Cache.hs -rw-r--r-- 4.3 KiB
9faa5334 — Adrian Cochrane Utilize the cache HURL's been writing! 3 years ago
                                                                                
a4ffbfc2 Adrian Cochrane
9faa5334 Adrian Cochrane
a4ffbfc2 Adrian Cochrane
53ee542a Adrian Cochrane
ff57d15d Adrian Cochrane
53ee542a Adrian Cochrane
a4ffbfc2 Adrian Cochrane
53ee542a Adrian Cochrane
ff57d15d Adrian Cochrane
a4ffbfc2 Adrian Cochrane
9faa5334 Adrian Cochrane
a4ffbfc2 Adrian Cochrane
53ee542a Adrian Cochrane
ff57d15d Adrian Cochrane
a4ffbfc2 Adrian Cochrane
53ee542a Adrian Cochrane
a4ffbfc2 Adrian Cochrane
14a0ac04 Adrian Cochrane
53ee542a Adrian Cochrane
ff57d15d Adrian Cochrane
9faa5334 Adrian Cochrane
ff57d15d Adrian Cochrane
9faa5334 Adrian Cochrane
53ee542a Adrian Cochrane
9faa5334 Adrian Cochrane
53ee542a Adrian Cochrane
9faa5334 Adrian Cochrane
ff57d15d Adrian Cochrane
53ee542a Adrian Cochrane
9faa5334 Adrian Cochrane
53ee542a Adrian Cochrane
9faa5334 Adrian Cochrane
53ee542a Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
{-# LANGUAGE OverloadedStrings #-}
module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP) where
import Network.HTTP.Client
import Network.HTTP.Types.Status
import Network.HTTP.Types.Header

-- For escaping filepaths, since I already have this dependency
import Network.URI (escapeURIString, isUnescapedInURIComponent, URI, uriToString)
import Data.Time.Clock
import Data.Time.Format

import Data.ByteString as Strict
import Data.ByteString.Char8 as C
import Data.ByteString.Lazy as Lazy
import System.IO as IO
import System.FilePath
import System.Directory

import Data.Maybe
import Data.Char (isSpace)
import Data.List as L
import Control.Monad (forM)
import Text.Read (readMaybe)

strip = C.dropWhile isSpace -- FIXME Upgrade bytestring dependency for a real strip function.

httpCacheDirective :: Response b -> Strict.ByteString -> Maybe Strict.ByteString
httpCacheDirective response key | Just header <- lookup hCacheControl $ responseHeaders response =
        let directives = Prelude.map strip $ C.split ',' header
        in if key `Prelude.elem` directives
            then Just ""
            else listToMaybe $ mapMaybe (C.stripPrefix $ C.snoc key '=') directives
    | otherwise = Nothing

shouldCacheHTTP :: Response b -> Bool
-- IETF RFC7234 Section 3
shouldCacheHTTP response = -- Assume GET
    statusCode (responseStatus response) `Prelude.elem` [200, 201, 404] && -- Supported response code
        isNothing (httpCacheDirective response "no-store") -- Honor no-store
        -- This is a private cache, don't check for Cache-Control: private
        -- Also, I'll cache anything for supported response codes, regardless of explicit expiry times.

uriToString' uri = uriToString id uri ""
parseHTTPTime str | ',' `L.elem` str = parseTimeM True defaultTimeLocale rfc822DateFormat str
parseHTTPTime str = parseTimeM True defaultTimeLocale "%_d %b %Y %H:%M:%S %Z" str
secondsFromNow i = do
    now <- getCurrentTime
    -- This ugliness required because regex depends on outdated version of time.
    return $ addUTCTime (fromRational $ toRational $ secondsToDiffTime i) now

computeExpires :: Response a -> IO UTCTime
computeExpires resp
  | Just header <- lookup hExpires $ responseHeaders resp,
        Just time <- parseHTTPTime $ C.unpack header = return time
  | Just pragma <- httpCacheDirective resp "max-age",
        Just seconds <- readMaybe $ C.unpack pragma = secondsFromNow seconds
  | otherwise = secondsFromNow (60*60*24) -- One day

cacheHTTP :: URI -> Response Lazy.ByteString -> IO ()
cacheHTTP uri resp | shouldCacheHTTP resp = do
    expires <- computeExpires resp
    writeKV (uriToString' uri) ([
            ("mime", C.unpack $ fromMaybe "application/octet-stream" $ 
                    lookup "content-type" $ responseHeaders resp),
            ("expires", show expires)
        ], responseBody resp)
cacheHTTP _ _ = return ()

readCacheHTTP :: URI -> IO (Maybe (String, Lazy.ByteString))
readCacheHTTP uri = do
    (headers, body) <- readKV (uriToString' uri)
    let mime = fromMaybe "application/octet-stream" $ lookup "mime" headers
    case readMaybe =<< lookup "expires" headers of
        Nothing -> return Nothing
        Just expiry -> do
            now <- getCurrentTime
            if expiry <= now then return $ Just (mime, body)
            else do
                -- TODO validate via a HEAD request
                return Nothing

------
--- Key-value storage
------

readKV :: String -> IO ([(String, String)], Lazy.ByteString)
writeKV :: String -> ([(String, String)], Lazy.ByteString) -> IO ()
openKV :: String -> IO.IOMode -> (Handle -> IO r) -> IO r

openKV key mode act = do
    dir <- getXdgDirectory XdgCache "nz.geek.adrian.hurl"
    IO.withFile (dir </> "http" </> escapeURIString isUnescapedInURIComponent key) mode act

readKV key = openKV key ReadMode parseHeaders
  where
    parseHeaders h = do
        line <- IO.hGetLine h
        case L.break isSpace $ strip' line of
            ("", "") -> do
                body <- Lazy.hGetContents h
                return ([], body)
            (key, value) -> do
                (headers, body) <- parseHeaders h
                return ((key, strip' value):headers, body)
    strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace

writeKV key (headers, body) = openKV key WriteMode $ \h -> do
    forM headers $ \(key, value) -> do
        IO.hPutStrLn h (key++' ':value)
    IO.hPutStrLn h ""
    Lazy.hPut h body