~alcinnz/hurl

ref: 29768fbfd764b9fd3c2f514a372aeaa14dc6f7f5 hurl/src/Network/URI/Fetch.hs -rw-r--r-- 9.6 KiB
29768fbf — Adrian Cochrane Upstream save function, with intelligent filenaming. 4 years ago
                                                                                
a7eb27a9 Adrian Cochrane
e0988f7e Adrian Cochrane
29768fbf Adrian Cochrane
e0988f7e Adrian Cochrane
a7eb27a9 Adrian Cochrane
3cd22771 Adrian Cochrane
09c71046 Adrian Cochrane
26da82c4 Adrian Cochrane
a7eb27a9 Adrian Cochrane
3cd22771 Adrian Cochrane
2d5e8445 Adrian Cochrane
3cd22771 Adrian Cochrane
2d5e8445 Adrian Cochrane
29768fbf Adrian Cochrane
a7eb27a9 Adrian Cochrane
c72f07c9 Adrian Cochrane
a7eb27a9 Adrian Cochrane
fcc0da55 Adrian Cochrane
a7eb27a9 Adrian Cochrane
3cd22771 Adrian Cochrane
a7eb27a9 Adrian Cochrane
c72f07c9 Adrian Cochrane
47f9ef33 Adrian Cochrane
c72f07c9 Adrian Cochrane
8d239eb6 Adrian Cochrane
e0988f7e Adrian Cochrane
a7eb27a9 Adrian Cochrane
8d239eb6 Adrian Cochrane
3cd22771 Adrian Cochrane
8d239eb6 Adrian Cochrane
214ebf86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
8d239eb6 Adrian Cochrane
2d5e8445 Adrian Cochrane
a7eb27a9 Adrian Cochrane
e0988f7e Adrian Cochrane
c72f07c9 Adrian Cochrane
214ebf86 Adrian Cochrane
c72f07c9 Adrian Cochrane
3cd22771 Adrian Cochrane
8d239eb6 Adrian Cochrane
214ebf86 Adrian Cochrane
8d239eb6 Adrian Cochrane
c72f07c9 Adrian Cochrane
8d239eb6 Adrian Cochrane
c72f07c9 Adrian Cochrane
3cd22771 Adrian Cochrane
8d239eb6 Adrian Cochrane
2d5e8445 Adrian Cochrane
c72f07c9 Adrian Cochrane
2d5e8445 Adrian Cochrane
3cd22771 Adrian Cochrane
2d5e8445 Adrian Cochrane
e0988f7e Adrian Cochrane
a7c88c86 Adrian Cochrane
26da82c4 Adrian Cochrane
a7c88c86 Adrian Cochrane
2d5e8445 Adrian Cochrane
a7eb27a9 Adrian Cochrane
a7c88c86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
fcc0da55 Adrian Cochrane
c72f07c9 Adrian Cochrane
fcc0da55 Adrian Cochrane
a7eb27a9 Adrian Cochrane
a7c88c86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
a7c88c86 Adrian Cochrane
09c71046 Adrian Cochrane
a7c88c86 Adrian Cochrane
09c71046 Adrian Cochrane
a7eb27a9 Adrian Cochrane
3cd22771 Adrian Cochrane
a7c88c86 Adrian Cochrane
3cd22771 Adrian Cochrane
a7c88c86 Adrian Cochrane
3cd22771 Adrian Cochrane
a7c88c86 Adrian Cochrane
3cd22771 Adrian Cochrane
a7c88c86 Adrian Cochrane
3cd22771 Adrian Cochrane
a7c88c86 Adrian Cochrane
3cd22771 Adrian Cochrane
a7eb27a9 Adrian Cochrane
a7c88c86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
a7c88c86 Adrian Cochrane
09c71046 Adrian Cochrane
a7c88c86 Adrian Cochrane
09c71046 Adrian Cochrane
a7eb27a9 Adrian Cochrane
a7c88c86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
a7c88c86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
a7c88c86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
8d239eb6 Adrian Cochrane
a7c88c86 Adrian Cochrane
214ebf86 Adrian Cochrane
a7c88c86 Adrian Cochrane
214ebf86 Adrian Cochrane
a7c88c86 Adrian Cochrane
214ebf86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
8d239eb6 Adrian Cochrane
214ebf86 Adrian Cochrane
39af078c Adrian Cochrane
8d239eb6 Adrian Cochrane
39af078c Adrian Cochrane
8d239eb6 Adrian Cochrane
29768fbf Adrian Cochrane
3cd22771 Adrian Cochrane
a7eb27a9 Adrian Cochrane
3cd22771 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Retrieves documents for a URL, supporting multiple URL schemes that can be
-- disabled at build-time for reduced dependencies.
module Network.URI.Fetch(Session, locale, newSession, fetchURL, dispatchByMIME, saveDownload) where

import qualified Data.Text as Txt
import           Data.Text (Text)
import           Network.URI
import           Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Char8 as C8
import           Network.URI.Charset
import           Control.Exception
import           Control.Concurrent.Async (forConcurrently)

-- for about: URIs & port parsing, all standard lib
import Data.Maybe (fromMaybe, listToMaybe)
import Text.Read (readMaybe)

-- for saveDownload
import System.Directory
import System.FilePath

#ifdef WITH_HTTP_URI
import qualified Network.HTTP.Client as HTTP
import qualified Network.HTTP.Client.TLS as TLS
import           Network.HTTP.Types
import           Data.List (intercalate)
#endif

#ifdef WITH_RAW_CONNECTIONS
import Network.Connection
#endif

#ifdef WITH_DATA_URI
import qualified Data.ByteString.Base64 as B64
#endif

import Network.URI.Locale
import Network.URI.Messages

#ifdef WITH_XDG
import Network.URI.XDG
#endif

-- | Data shared accross multiple URI requests.
data Session = Session {
#ifdef WITH_HTTP_URI
    managerHTTP :: HTTP.Manager,
#endif
#ifdef WITH_RAW_CONNECTIONS
    connCtxt :: ConnectionContext,
#endif
#ifdef WITH_XDG
    apps :: XDGConfig,
#endif
    -- | The languages (RFC2616-encoded) to which responses should be localized.
    locale :: [String],
    -- | Additional files to serve from about: URIs.
    aboutPages :: [(FilePath, ByteString)]
}

-- | Initializes a default Session object to support HTTPS & Accept-Language
-- if HTTP is enabled.
newSession :: IO Session
newSession = do
    (ietfLocale, unixLocale) <- rfc2616Locale
#ifdef WITH_HTTP_URI
    managerHTTP' <- HTTP.newManager TLS.tlsManagerSettings
#endif
#ifdef WITH_RAW_CONNECTIONS
    connCtxt <- initConnectionContext
#endif
#ifdef WITH_XDG
    apps' <- loadXDGConfig unixLocale
#endif

    return Session {
#ifdef WITH_HTTP_URI
        managerHTTP = managerHTTP',
#endif
#ifdef WITH_RAW_CONNECTIONS
        connCtxt = connCtxt,
#endif
#ifdef WITH_XDG
        apps = apps',
#endif
        locale = ietfLocale,
        aboutPages = []
    }

llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key]
parsePort fallback (':':port) = fallback `fromMaybe` readMaybe port
parsePort fallback _ = fallback

-- | Retrieves a URL-identified resource & it's MIMEtype, possibly decoding it's text.
fetchURL :: Session -- ^ The session of which this request is a part.
    -> [String] -- ^ The expected MIMEtypes in priority order.
    -> URI -- ^ The URL to retrieve
    -> IO (String, Either Text ByteString) -- ^ The MIMEtype & possibly text-decoded response.
fetchURL sess mimes uri = do
    (_, mime, resp) <- fetchURL' sess mimes uri
    return (mime, resp)

-- | Concurrently fetch given URLs.
fetchURLs :: Session -> [String] -> [URI] -> ((URI, String, Either Text ByteString) -> IO a) -> IO [(URI, a)]
fetchURLs sess mimes uris cb =
    forConcurrently uris (\u -> fetchURL' sess mimes u >>= cb) >>= return . zip uris

-- | As per `fetchURL`, but also returns the redirected URI.
fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString)
fetchURL' session mimes uri@(URI {uriScheme = "about:", uriPath = ""}) =
    fetchURL' session mimes $ uri {uriPath = "version"}
fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath = path} =
    return (url,
        Txt.unpack $ convertCharset "utf-8" $ B.toStrict $
            llookup (path ++ ".mime") "text/html" pages,
        Right $ llookup path "" pages)

#ifdef WITH_HTTP_URI
fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do
    request <- HTTP.requestFromURI uri
    response <- HTTP.httpLbs request {
            HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form.
            HTTP.requestHeaders = [
                ("Accept", C8.pack $ intercalate ", " accept),
                ("Accept-Language", C8.pack $ intercalate ", " $ locale session)
            ]
        } $ managerHTTP session
    return $ case (
            HTTP.responseBody response,
            [val | ("content-type", val) <- HTTP.responseHeaders response]
      ) of
        ("", _) -> (uri, "text/plain", Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response)
        (response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype
            in resolveCharset' uri (map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response
        (response, []) -> (uri, defaultMIME, Right response)
  `catches` [
    Handler $ \e -> do return (uri, "text/plain", Left $ Txt.pack $ trans (locale session) $ Http e),
    Handler $ \(ErrorCall msg) -> do return (uri, "text/plain", Left $ Txt.pack msg)
  ]
#endif

#ifdef WITH_GEMINI_URI
fetchURL' sess@Session {connCtxt = ctxt} mimes uri@URI {
        uriScheme = "gemini:", uriAuthority = Just (URIAuth _ host port)
    } = do
        conn <- connectTo ctxt $ ConnectionParams {
            connectionHostname = host,
            connectionPort = parsePort 1965 port,
            -- TODO implement Trust-On-First-Use, client certificates
            connectionUseSecure = Just $ TLSSettingsSimple False False False,
            connectionUseSocks = Nothing
        }
        connectionPut conn $ C8.pack $ uriToString id uri "\r\n"
        header <- connectionGetLine 1024 conn
        ret <- case parseHeader header of
            -- NOTE: This case won't actually do anything until the caller (Rhapsode) implements forms.
            ('1', _, label) -> return (uri, "application/xhtml+xml", Left $ Txt.concat [
                    "<form><label for='input'>",
                    Txt.replace "<" "&lt;" $ Txt.replace "&" "&amp;" label,
                    "</label><input id='input' /></form>"
                ])
            ('2', _, mime) -> do
                chunks <- mWhile (connectionWaitForInput conn 60000) $ connectionGetChunk conn
                let mime' = map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime
                return $ resolveCharset' uri mime' $ B.fromChunks chunks
            ('3', _, redirect) | Just redirect' <- parseURIReference $ Txt.unpack redirect ->
                fetchURL' sess mimes $ relativeTo redirect' uri
            -- TODO Implement client certificates, once I have a way for the user/caller to select one.
            --      And once I figure out how to configure the TLS cryptography.
            (_, _, err) -> return (uri, "text/plain", Left err)
        connectionClose conn
        return ret
    where
        parseHeader header
            | Just (major, header') <- Txt.uncons $ convertCharset "utf-8" header,
                Just (minor, meta) <- Txt.uncons header' = (major, minor, Txt.strip meta)
            | otherwise = ('4', '1', "Invalid response!")
#endif

#ifdef WITH_FILE_URI
fetchURL' Session {locale = l} (defaultMIME:_) uri@URI {uriScheme = "file:"} = do
    response <- B.readFile $ uriPath uri
    return (uri, defaultMIME, Right response)
  `catch` \e -> do
    return (uri,
        "text/plain",
        Left $ Txt.pack $ trans l $ ReadFailed $ displayException (e :: IOException))
#endif

#ifdef WITH_DATA_URI
fetchURL' _ (defaultMIME:_) uri@URI {uriScheme = "data:"} =
    let request = uriPath uri ++ uriQuery uri ++ uriFragment uri
    in case breakOn ',' $ unEscapeString request of
        ("", response) -> return (uri, defaultMIME, Left $ Txt.pack response)
        (mime', response) | '4':'6':'e':'s':'a':'b':';':mime <- reverse mime' ->
            return $ case B64.decode $ C8.pack response of
                Left str -> (uri, "text/plain", Left $ Txt.pack str)
                Right bytes -> (uri, reverse mime, Right $ B.fromStrict bytes)
        (mime, response) -> return (uri, mime, Left $ Txt.pack response)
#endif

#ifdef WITH_XDG
fetchURL' Session {locale = l, apps = a} _ uri@(URI {uriScheme = s}) = do
        app <- dispatchURIByMIME a uri ("x-scheme-handler/" ++ init s)
        return (uri, "text/html", Left $ Txt.pack $ trans l $ app)
#else
fetchURL' Session {locale = l} _ URI {uriScheme = scheme} =
    return (uri, "text/plain", Left $ Txt.pack $ trans l $ UnsupportedScheme scheme)
#endif

dispatchByMIME :: Session -> String -> URI -> IO (Maybe String)
#if WITH_XDG
dispatchByMIME Session {locale = l, apps = a} mime uri = do
    err <- dispatchURIByMIME a uri mime
    return $ case err of
        UnsupportedMIME _ -> Nothing
        _ -> Just $ trans l err
#else
dispatchByMIME _ _ _ = return Nothing
#endif

saveDownload :: URI -> FilePath -> (URI, String, Either Text ByteString) -> IO URI
saveDownload baseURI dir (URI {uriPath = path}, mime, resp) = do
    dest <- unusedFilename (dir </> takeFileName path)
    case resp of
        Left txt -> writeFile dest $ Txt.unpack txt
        Right bytes -> B.writeFile dest bytes
    -- TODO set user.mime file attribute.
    return $ baseURI {uriPath = dest}

unusedFilename path = do
        exists <- doesFileExist path
        if exists then go 0 else return path
    where
        go n = do
            let path' = path ++ show n
            exists <- doesFileExist path'
            if exists then go (n+1) else return path'

-- Utils

#ifdef WITH_DATA_URI
breakOn c (a:as) | c == a = ([], as)
    | otherwise = let (x, y) = breakOn c as in (a:x, y)
breakOn _ [] = ([], [])
#endif

#ifdef WITH_GEMINI_URI
mWhile test body = do
    cond <- test
    if cond then do
        x <- body
        xs <- mWhile test body
        return (x:xs)
    else return []
#endif