{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Retrieves documents for a URL, supporting multiple URL schemes that can be
-- disabled at build-time for reduced dependencies.
module Network.URI.Fetch(Session(locale, aboutPages, redirectCount, cachingEnabled), newSession,
fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR,
dispatchByMIME, appsForMIME, Application(..), dispatchByApp,
saveDownload, downloadToURI,
-- logging API
LogRecord(..), enableLogging, retrieveLog, writeLog) where
import Network.URI.Types
import qualified Data.Text as Txt
import Data.Text (Text)
import Network.URI
import qualified Data.ByteString as Strict
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Char8 as C8
import Network.URI.Charset
import Control.Exception
import System.IO.Error (isEOFError)
import Control.Concurrent.Async (forConcurrently)
-- for about: URIs & port parsing, all standard lib
import Data.Maybe (fromMaybe, listToMaybe, isJust)
import Data.Either (isLeft)
import Text.Read (readMaybe)
-- for executable extensions, all standard lib
import Data.Char (isSpace)
import System.Exit (ExitCode(..))
-- for saveDownload
import System.Directory
import System.FilePath
-- for logging
import Control.Concurrent.MVar
import Data.Time.Clock
import System.IO
import Control.Monad
import Data.List as L
#ifdef WITH_HTTP_URI
import qualified Network.HTTP.Client as HTTP
import qualified Network.HTTP.Client.OpenSSL as TLS
import qualified OpenSSL.Session as TLS
import Network.HTTP.Types
import Data.List (intercalate)
import Control.Concurrent (forkIO)
import Network.URI.Cache
#endif
#ifdef WITH_RAW_CONNECTIONS
import qualified OpenSSL as TLS
import qualified OpenSSL.Session as TLS
import qualified System.IO.Streams.SSL as TLSConn
import System.IO.Streams
#endif
#ifdef WITH_DATA_URI
import qualified Data.ByteString.Base64.URL.Lazy as B64
#endif
import Network.URI.Locale
import Network.URI.Messages
#ifdef WITH_XDG
import Network.URI.XDG
#endif
#ifdef WITH_PLUGIN_REWRITES
import Network.URI.PlugIns.Rewriters
#endif
-- | Data shared accross multiple URI requests.
data Session = Session {
#ifdef WITH_HTTP_URI
managerHTTP :: HTTP.Manager,
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt :: TLS.SSLContext,
#endif
#ifdef WITH_XDG
apps :: XDGConfig,
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriter :: Rewriter,
#endif
-- | The languages (RFC2616-encoded) to which responses should be localized.
locale :: [String],
-- | Additional files to serve from about: URIs.
aboutPages :: [(FilePath, ByteString)],
-- | Log of timestamped/profiled URL requests
requestLog :: Maybe (MVar [LogRecord]),
-- | How many redirects to follow for Gemini or HTTP(S) requests
redirectCount :: Int,
-- | Whether to cache network responses, avoiding sending requests
cachingEnabled :: Bool,
-- | App-specific config subdirectory to check
appName :: String
}
data LogRecord = LogRecord {
url :: URI,
accept :: [String],
redirected :: URI,
mimetype :: String,
response :: Either Text ByteString,
begin :: UTCTime,
end :: UTCTime
}
-- | Initializes a default Session object to support HTTPS & Accept-Language
-- if HTTP is enabled.
newSession :: IO Session
newSession = newSession' ""
-- | Variant of `newSession` which loads plugins for the named app.
newSession' :: String -> IO Session
newSession' appname = do
(ietfLocale, unixLocale) <- rfc2616Locale
#ifdef WITH_HTTP_URI
httpsCtxt <- TLS.context
TLS.contextSetDefaultCiphers httpsCtxt
TLS.contextSetCADirectory httpsCtxt "/etc/ssl/certs"
TLS.contextSetVerificationMode httpsCtxt $ TLS.VerifyPeer True True Nothing
managerHTTP' <- HTTP.newManager $ TLS.opensslManagerSettings $ return httpsCtxt
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt <- TLS.context
TLS.contextSetDefaultCiphers connCtxt
TLS.contextSetCADirectory connCtxt "/etc/ssl/certs"
TLS.contextSetVerificationMode connCtxt $
TLS.VerifyPeer True True $ Just $ \valid _ -> return valid -- FIXME: Implement Trust-On-First-Use
#endif
#ifdef WITH_XDG
apps' <- loadXDGConfig unixLocale
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriters <- parseRewriters appname
#endif
return Session {
#ifdef WITH_HTTP_URI
managerHTTP = managerHTTP',
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt = connCtxt,
#endif
#ifdef WITH_XDG
apps = apps',
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriter = rewriters,
#endif
locale = ietfLocale,
aboutPages = [],
requestLog = Nothing,
redirectCount = 5,
cachingEnabled = True,
appName = appname
}
llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key]
parsePort fallback (':':port) = fallback `fromMaybe` readMaybe port
parsePort fallback _ = fallback
-- | Retrieves a URL-identified resource & it's MIMEtype, possibly decoding it's text.
fetchURL :: Session -- ^ The session of which this request is a part.
-> [String] -- ^ The expected MIMEtypes in priority order.
-> URI -- ^ The URL to retrieve
-> IO (String, Either Text ByteString) -- ^ The MIMEtype & possibly text-decoded response.
fetchURL sess mimes uri = do
(_, mime, resp) <- fetchURL' sess mimes uri
return (mime, resp)
fetchURLLogged log sess mimes uri = do
begin' <- getCurrentTime
res@(redirected', mimetype', response') <- fetchURL' sess mimes uri
end' <- getCurrentTime
modifyMVar_ log $ \log' -> return (
LogRecord uri mimes redirected' mimetype' response' begin' end' : log')
return res
-- | Concurrently fetch given URLs.
fetchURLs :: Session -> [String] -> [URI] -> ((URI, String, Either Text ByteString) -> IO a) -> IO [(URI, a)]
fetchURLs sess mimes uris cb = do
let fetch = case requestLog sess of {Nothing -> fetchURL'; Just log -> fetchURLLogged log}
forConcurrently uris (\u -> fetch sess mimes u >>= cb) >>= return . L.zip uris
-- | Internal MIMEtypes for error reporting
mimeERR, htmlERR :: String
mimeERR = "txt/x-error\t"
htmlERR = "html/x-error\t"
-- | As per `fetchURL`, but also returns the redirected URI.
fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString)
fetchURL' Session {redirectCount = 0, locale = locale'} _ uri =
return (uri, mimeERR, Left $ Txt.pack $ trans locale' ExcessiveRedirects)
#ifdef WITH_PLUGIN_REWRITES
fetchURL' session mimes uri
| Just uri' <- applyRewriter (rewriter session) uri = fetchURL' session mimes uri'
#endif
fetchURL' session@Session { appName = appname, locale = l } mimes
uri@(URI "ext:" Nothing path query _) = do
dir <- getXdgDirectory XdgData "nz.geek.adrian.hurl"
sysdirs <- getXdgDirectoryList XdgDataDirs
let dirs = concat [[dir' </> appname, dir'] | dir' <- dir : sysdirs]
programs <- findExecutablesInDirectories dirs ("bin" </> path)
case programs of
[] -> return (uri, mimeERR, Left $ Txt.pack $ trans l $ ReadFailed "404")
program:_ -> do
let args = case query of {
'?':rest -> split (== '&') rest;
_ -> []
}
(exitcode, stdout, stderr) <- readProcessWithExitCode program args ""
let response = if isSuccess exitcode then stdout else stderr
let (header, body) = breakOn '\n' response
case strip header of
'm':'i':'m':'e':mimetype -> return (uri, strip mimetype, Left $ Txt.pack body)
'u':'r':'l':header' | Just uri' <- parseURIReference $ strip header' ->
fetchURL' (session {redirectCount = redirectCount session - 1}) mimes $
relativeTo uri' uri
_ | isSuccess exitcode -> return (uri, "text/html", Left $ Txt.pack response)
_ -> return (uri, mimeERR, Left $ Txt.pack response)
where
split p s = case dropWhile p s of
"" -> []
s' -> let (w, s'') = break p s' in w : split p s''
strip = dropWhile isSpace . dropWhileEnd isSpace
isSuccess ExitSuccess = True
isSuccess _ = False
fetchURL' session mimes uri@(URI {uriScheme = "about:", uriPath = ""}) =
fetchURL' session mimes $ uri {uriPath = "version"}
fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath = path} =
return (url,
Txt.unpack $ Txt.strip $ convertCharset "utf-8" $ B.toStrict $
llookup (path ++ ".mime") "text/html" pages,
Right $ llookup path "" pages)
#ifdef WITH_HTTP_URI
fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do
cached <- if cachingEnabled session then readCacheHTTP uri else return (Nothing, Nothing)
response <- case cached of
(Just (mime, body), Nothing) -> return $ Right (mime, body)
(cached, cachingHeaders) -> do
request <- HTTP.requestFromURI uri
response <- HTTP.httpLbs request {
HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form.
HTTP.requestHeaders = [
("Accept", C8.pack $ intercalate ", " accept),
("Accept-Language", C8.pack $ intercalate ", " $ locale session)
] ++ fromMaybe [] cachingHeaders,
HTTP.redirectCount = 0
} $ managerHTTP session
case (
HTTP.responseStatus response,
HTTP.responseBody response,
[val | ("content-type", val) <- HTTP.responseHeaders response]
) of
(Status 304 _, _, _) | Just cached'@(_, body) <- cached -> do
cacheHTTP uri $ response { HTTP.responseBody = body }
return $ Right cached'
-- Manually handle redirects so the caller & HTTP cache gets the correct URI.
(Status code _, _, _) | code > 300 && code < 400,
Just location <- lookup "location" $ HTTP.responseHeaders response,
Just uri' <- parseURIReference $ C8.unpack location ->
return $ Left $ relativeTo uri' uri
(Status _ msg, "", _) -> return $ Right (Txt.pack mimeERR, B.fromStrict msg)
(_, body, (mimetype:_)) -> do
cacheHTTP uri response
forkIO cleanCacheHTTP -- Try to keep diskspace down...
let mime = Txt.toLower $ convertCharset "utf-8" mimetype
return $ Right (mime, body)
(_, response, []) -> return $ Right (Txt.pack defaultMIME, response)
case response of
Left redirect ->
let session' = session { redirectCount = redirectCount session - 1 }
in fetchURL' session' accept redirect
Right (mime, body) ->
let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime
in return $ resolveCharset' uri mime' body
`catch` \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e)
#endif
#ifdef WITH_GEMINI_URI
fetchURL' sess@Session {connCtxt = ctxt, locale = l} mimes uri@URI {
uriScheme = "gemini:", uriAuthority = Just (URIAuth _ host port)
} = TLSConn.withConnection ctxt host (parsePort 1965 port) $ \input output _ -> do
writeTo output $ Just $ C8.pack $ uriToString id uri "\r\n"
input' <- inputStreamToHandle input
header <- hGetLine input'
case parseHeader header of
-- NOTE: This case won't actually do anything until the caller (Rhapsode) implements forms.
('1', _, label) -> return (uri, "application/xhtml+xml", Left $ Txt.concat [
"<form><label>",
Txt.replace "<" "<" $ Txt.replace "&" "&" label,
"<input /></label></form>"
])
('2', _, mime) -> do
body <- Strict.hGetContents input'
let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime
return $ resolveCharset' uri mime' $ B.fromStrict body
('3', _, redirect) | Just redirect' <- parseURIReference $ Txt.unpack redirect ->
fetchURL' sess {
redirectCount = redirectCount sess - 1
} mimes $ relativeTo redirect' uri
-- TODO Implement client certificates, once I have a way for the user/caller to select one.
-- And once I figure out how to configure the TLS cryptography.
(_, _, err) -> return (uri, mimeERR, Left err)
where
parseHeader :: String -> (Char, Char, Text)
parseHeader (major:minor:meta) = (major, minor, Txt.strip $ Txt.pack meta)
parseHeader _ = ('4', '1', Txt.pack $ trans l MalformedResponse)
handleIOErr :: IOError -> IO Strict.ByteString
handleIOErr _ = return ""
#endif
#ifdef WITH_FILE_URI
fetchURL' Session {locale = l} (defaultMIME:_) uri@URI {uriScheme = "file:"} = do
response <- B.readFile $ uriPath uri
return (uri, defaultMIME, Right response)
`catch` \e -> do
return (uri, mimeERR,
Left $ Txt.pack $ trans l $ ReadFailed $ displayException (e :: IOException))
#endif
#ifdef WITH_DATA_URI
fetchURL' _ (defaultMIME:_) uri@URI {uriScheme = "data:"} =
let request = uriPath uri ++ uriQuery uri ++ uriFragment uri
in case breakOn ',' $ unEscapeString request of
("", response) -> return (uri, defaultMIME, Left $ Txt.pack response)
(mime', response) | '4':'6':'e':'s':'a':'b':';':mime <- reverse mime' ->
return $ case B64.decode $ B.fromStrict $ C8.pack response of
Left str -> (uri, mimeERR, Left $ Txt.pack $ unEscapeString str)
Right bytes -> (uri, reverse mime, Right bytes)
(mime, response) -> return (uri, mime, Left $ Txt.pack response)
#endif
#ifdef WITH_XDG
fetchURL' Session {locale = l, apps = a} _ uri@(URI {uriScheme = s}) = do
app <- dispatchURIByMIME a uri ("x-scheme-handler/" ++ init s)
return (uri, htmlERR, Left $ Txt.pack $ trans l $ app)
#else
fetchURL' Session {locale = l} _ URI {uriScheme = scheme} =
return (uri, mimeERR, Left $ Txt.pack $ trans l $ UnsupportedScheme scheme)
#endif
dispatchByMIME :: Session -> String -> URI -> IO (Maybe String)
#if WITH_XDG
dispatchByMIME Session {locale = l, apps = a} mime uri = do
err <- dispatchURIByMIME a uri mime
return $ case err of
UnsupportedMIME _ -> Nothing
_ -> Just $ trans l err
#else
dispatchByMIME _ _ _ = return Nothing
#endif
appsForMIME :: Session -> String -> IO [Application]
#if WITH_XDG
appsForMIME Session { apps = a, locale = l } = queryHandlers' a l
#else
appsForMIME _ _ = []
#endif
dispatchByApp :: Session -> Application -> String -> URI -> IO Bool
#if WITH_XDG
dispatchByApp session@Session { locale = l } Application { appId = app} mime uri = do
try1 <- launchApp' l uri app -- First try handing off the URL, feedreaders need this!
case try1 of
Left app -> return True
Right False -> return False
Right True -> do
-- Download as temp file to open locally, the app requires it...
temp <- canonicalizePath =<< getTemporaryDirectory
resp <- fetchURL' session [mime] uri
uri' <- saveDownload (URI "file:" Nothing "" "" "") temp resp
isLeft <$> launchApp' l uri' app
#else
dispatchByApp _ _ _ _ = return False
#endif
-- Downloads utilities
-- | write download to a file in the given directory.
saveDownload :: URI -> FilePath -> (URI, String, Either Text ByteString) -> IO URI
saveDownload baseURI dir (URI {uriPath = path}, mime, resp) = do
dest <- unusedFilename (dir </> takeFileName' path)
case resp of
Left txt -> writeFile dest $ Txt.unpack txt
Right bytes -> B.writeFile dest bytes
-- TODO set user.mime file attribute.
return $ baseURI {uriPath = dest}
where
takeFileName' s = case takeFileName s of { "" -> "index"; f -> f}
unusedFilename path = do
exists <- doesFileExist path
if exists then go 0 else return path
where
go n = do
let path' = path ++ show n
exists <- doesFileExist path'
if exists then go (n+1) else return path'
-- | Convert a download into a data: URI
downloadToURI :: (URI, String, Either Text ByteString) -> URI
downloadToURI (_, mime, Left txt) = nullURI {
uriScheme = "data:",
uriPath = mime ++ "," ++ escapeURIString isReserved (Txt.unpack txt)
}
downloadToURI (_, mime, Right bytes) = nullURI {
uriScheme = "data:",
uriPath = mime ++ ";base64," ++ C8.unpack (B.toStrict $ B64.encode bytes)
}
-- Logging API
enableLogging :: Session -> IO Session
enableLogging session = do
log <- newMVar []
return session { requestLog = Just log }
retrieveLog :: Session -> IO [LogRecord]
retrieveLog session@Session { requestLog = Just log } = swapMVar log []
retrieveLog _ = return []
writeLog :: Handle -> Session -> IO ()
writeLog out session = do
writeRow ["URL", "Redirected", "Accept", "MIMEtype", "Size", "Begin", "End", "Duration"]
log <- retrieveLog session
forM log $ \record -> writeRow [
show $ url record, show $ redirected record,
show $ accept record, show $ mimetype record,
case response record of
Left txt -> show $ Txt.length txt
Right bs -> show $ B.length bs,
show $ begin record, show $ end record,
show (end record `diffUTCTime` end record)
]
return ()
where
writeRow = hPutStrLn out . L.intercalate "\t"
-- Utils
breakOn c (a:as) | c == a = ([], as)
| otherwise = let (x, y) = breakOn c as in (a:x, y)
breakOn _ [] = ([], [])