~alcinnz/hurl

ref: 7d31eda45dc9d615cbfa084a2149b17c5e313a85 hurl/src/Network/URI/Cache.hs -rw-r--r-- 6.0 KiB
7d31eda4 — Adrian Cochrane Note charset optimization. 2 years ago
                                                                                
a4ffbfc2 Adrian Cochrane
ac4767de Adrian Cochrane
a4ffbfc2 Adrian Cochrane
53ee542a Adrian Cochrane
ff57d15d Adrian Cochrane
53ee542a Adrian Cochrane
a4ffbfc2 Adrian Cochrane
53ee542a Adrian Cochrane
ff57d15d Adrian Cochrane
ac4767de Adrian Cochrane
a4ffbfc2 Adrian Cochrane
9faa5334 Adrian Cochrane
a4ffbfc2 Adrian Cochrane
53ee542a Adrian Cochrane
663fb7aa Adrian Cochrane
ff57d15d Adrian Cochrane
a4ffbfc2 Adrian Cochrane
1e101982 Adrian Cochrane
a4ffbfc2 Adrian Cochrane
53ee542a Adrian Cochrane
a4ffbfc2 Adrian Cochrane
1e101982 Adrian Cochrane
a4ffbfc2 Adrian Cochrane
14a0ac04 Adrian Cochrane
53ee542a Adrian Cochrane
ff57d15d Adrian Cochrane
663fb7aa Adrian Cochrane
ff57d15d Adrian Cochrane
663fb7aa Adrian Cochrane
ff57d15d Adrian Cochrane
ac4767de Adrian Cochrane
9faa5334 Adrian Cochrane
663fb7aa Adrian Cochrane
9faa5334 Adrian Cochrane
663fb7aa Adrian Cochrane
ac4767de Adrian Cochrane
663fb7aa Adrian Cochrane
ac4767de Adrian Cochrane
663fb7aa Adrian Cochrane
ac4767de Adrian Cochrane
53ee542a Adrian Cochrane
663fb7aa Adrian Cochrane
9faa5334 Adrian Cochrane
663fb7aa Adrian Cochrane
53ee542a Adrian Cochrane
663fb7aa Adrian Cochrane
ff57d15d Adrian Cochrane
663fb7aa Adrian Cochrane
53ee542a Adrian Cochrane
9faa5334 Adrian Cochrane
ac4767de Adrian Cochrane
53ee542a Adrian Cochrane
663fb7aa Adrian Cochrane
53ee542a Adrian Cochrane
663fb7aa Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
{-# LANGUAGE OverloadedStrings #-}
module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP, cleanCacheHTTP) where
import Network.HTTP.Client
import Network.HTTP.Types.Status
import Network.HTTP.Types.Header

-- For escaping filepaths, since I already have this dependency
import Network.URI (escapeURIString, isUnescapedInURIComponent, URI, uriToString)
import Data.Time.Clock
import Data.Time.Format

import Data.ByteString as Strict
import Data.ByteString.Char8 as C
import Data.ByteString.Lazy as Lazy
import System.IO as IO
import System.FilePath
import System.Directory
import qualified Data.Text as Txt

import Data.Maybe
import Data.Char (isSpace)
import Data.List as L
import Control.Monad (forM, void, when)
import Text.Read (readMaybe)

stripBS = C.dropWhile isSpace -- FIXME Upgrade bytestring dependency for a real strip function.

httpCacheDirective :: Response b -> Strict.ByteString -> Maybe Strict.ByteString
httpCacheDirective response key | Just header <- lookup hCacheControl $ responseHeaders response =
        let directives = Prelude.map stripBS $ C.split ',' header
        in if key `Prelude.elem` directives
            then Just ""
            else listToMaybe $ mapMaybe (C.stripPrefix $ C.snoc key '=') directives
    | otherwise = Nothing

shouldCacheHTTP :: Response b -> Bool
-- IETF RFC7234 Section 3
shouldCacheHTTP response = -- Assume GET
    statusCode (responseStatus response) `Prelude.elem` [200, 201, 404] && -- Supported response code
        isNothing (httpCacheDirective response "no-store") -- Honor no-store
        -- This is a private cache, don't check for Cache-Control: private
        -- Also, I'll cache anything for supported response codes, regardless of explicit expiry times.

uriToString' uri = uriToString id uri ""
parseHTTPTime :: String -> Maybe UTCTime
parseHTTPTime str | ',' `L.elem` str = parseTimeM True defaultTimeLocale rfc822DateFormat str
parseHTTPTime str = parseTimeM True defaultTimeLocale "%_d %b %Y %H:%M:%S %Z" str
secondsFromNow i = do
    now <- getCurrentTime
    -- This ugliness required because regex depends on outdated version of time.
    return $ addUTCTime (fromRational $ toRational $ secondsToDiffTime i) now

computeExpires :: Response a -> IO UTCTime
computeExpires resp
  | Just header <- lookup hExpires $ responseHeaders resp,
        Just time <- parseHTTPTime $ C.unpack header = return time
  | Just pragma <- httpCacheDirective resp "max-age",
        Just seconds <- readMaybe $ C.unpack pragma = secondsFromNow seconds
  | otherwise = secondsFromNow (60*60*24) -- One day

cacheHTTP :: URI -> Response Lazy.ByteString -> IO ()
cacheHTTP uri resp | shouldCacheHTTP resp = do
    expires <- computeExpires resp
    let headers = responseHeaders resp
    writeKV (uriToString' uri) (
        [("expires", show expires)] ++ getHeader "content-type" "mime" ++
            getHeader "ETag" "etag" ++ getHeader "Last-Modified" "modified",
        responseBody resp)
  where
    getHeader header key | Just value <- lookup header $ responseHeaders resp = [(key, C.unpack value)]
        | otherwise = []
cacheHTTP _ _ = return ()

readCacheHTTP :: URI -> IO (Maybe (Txt.Text, Lazy.ByteString), Maybe ResponseHeaders)
readCacheHTTP uri = do
    cached <- readKV $ uriToString' uri
    case cached of
        Just (headers, body) | Just expiry <- readMaybe =<< lookup "expires" headers -> do
            let mime = fromMaybe "application/octet-stream" $ lookup "mime" headers
            now <- getCurrentTime

            -- Headers for a validation request & whether should be sent.
            let headers' = if expiry <= now then Nothing else Just (
                    [("If-Modified-Since", C.pack val) | ("modified", val) <- headers,
                        isJust $ parseHTTPTime val] ++
                    [("If-None-Match", C.pack val) | ("etag", val) <- headers])
            -- Cache entry has expired, delete.
            when (isJust headers') $ deleteKV $ uriToString' uri

            return (Just (Txt.pack mime, body), headers')

        _ -> return (Nothing, Just [])

cleanCacheHTTP = void $ do
    now <- getCurrentTime
    let tombstone = now

    dir <- getXdgDirectory XdgCache "nz.geek.adrian.hurl"
    dirExists <- doesDirectoryExist (dir </> "http")
    files <- if dirExists then listDirectory (dir </> "http") else return []
    forM files $ \file -> do
        exists <- doesFileExist file
        when exists $ IO.withFile file ReadMode $ \h -> do
            (headers, _) <- parseHeaders h
            let hasHeader h = isJust $ lookup h headers
                validatable = hasHeader "modified" || hasHeader "etag"
                expires = fromMaybe tombstone (readMaybe =<< lookup "expires" headers)
            when (now >= expires && not validatable) $ removeFile file

------
--- Key-value storage
------

readKV :: String -> IO (Maybe ([(String, String)], Lazy.ByteString))
writeKV :: String -> ([(String, String)], Lazy.ByteString) -> IO ()
deleteKV :: String -> IO ()
openKV :: String -> IO.IOMode -> (Handle -> IO r) -> IO (Maybe r)
pathKV :: String -> IO FilePath

pathKV key = do
    dir <- getXdgDirectory XdgCache "nz.geek.adrian.hurl"
    createDirectoryIfMissing True (dir </> "http")
    return (dir </> "http" </> escapeURIString isUnescapedInURIComponent key)

openKV key mode act = do
    path <- pathKV key
    exists <- doesFileExist path
    if exists then Just <$> IO.withFile path mode act else return Nothing

readKV key = openKV key ReadMode parseHeaders

parseHeaders h = do
    line <- IO.hGetLine h
    case L.break isSpace $ strip' line of
        ("", "") -> do
            body <- Lazy.hGetContents h
            return ([], body)
        (key, value) -> do
            (headers, body) <- parseHeaders h
            return ((key, strip' value):headers, body)
strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace

writeKV key (headers, body) = void $ openKV key WriteMode $ \h -> do
    forM headers $ \(key, value) -> do
        IO.hPutStrLn h (key++' ':value)
    IO.hPutStrLn h ""
    Lazy.hPut h body

deleteKV key = pathKV key >>= removeFile