{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Retrieves documents for a URL, supporting multiple URL schemes that can be
-- disabled at build-time for reduced dependencies.
module Network.URI.Fetch(Session(locale, aboutPages, redirectCount), newSession,
fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR,
dispatchByMIME, saveDownload, downloadToURI,
-- logging API
LogRecord(..), enableLogging, retrieveLog, writeLog) where
import qualified Data.Text as Txt
import Data.Text (Text)
import Network.URI
import qualified Data.ByteString as Strict
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Char8 as C8
import Network.URI.Charset
import Control.Exception
import System.IO.Error (isEOFError)
import Control.Concurrent.Async (forConcurrently)
-- for about: URIs & port parsing, all standard lib
import Data.Maybe (fromMaybe, listToMaybe)
import Text.Read (readMaybe)
-- for executable extensions, all standard lib
import Data.Char (isSpace)
import System.Exit (ExitCode(..))
-- for saveDownload
import System.Directory
import System.FilePath
-- for logging
import Control.Concurrent.MVar
import Data.Time.Clock
import System.IO
import Control.Monad
import Data.List as L
#ifdef WITH_HTTP_URI
import qualified Network.HTTP.Client as HTTP
import qualified Network.HTTP.Client.OpenSSL as TLS
import qualified OpenSSL.Session as TLS
import Network.HTTP.Types
import Data.List (intercalate)
#endif
#ifdef WITH_RAW_CONNECTIONS
import qualified OpenSSL as TLS
import qualified OpenSSL.Session as TLS
import qualified System.IO.Streams.SSL as TLSConn
import System.IO.Streams
#endif
#ifdef WITH_DATA_URI
import qualified Data.ByteString.Base64.URL.Lazy as B64
#endif
import Network.URI.Locale
import Network.URI.Messages
#ifdef WITH_XDG
import Network.URI.XDG
#endif
#ifdef WITH_PLUGIN_REWRITES
import Network.URI.PlugIns.Rewriters
#endif
-- | Data shared accross multiple URI requests.
data Session = Session {
#ifdef WITH_HTTP_URI
managerHTTP :: HTTP.Manager,
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt :: TLS.SSLContext,
#endif
#ifdef WITH_XDG
apps :: XDGConfig,
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriter :: Rewriter,
#endif
-- | The languages (RFC2616-encoded) to which responses should be localized.
locale :: [String],
-- | Additional files to serve from about: URIs.
aboutPages :: [(FilePath, ByteString)],
-- | Log of timestamped/profiled URL requests
requestLog :: MVar [LogRecord],
-- | How many redirects to follow for Gemini or HTTP(S) requests
redirectCount :: Int
}
data LogRecord = LogRecord {
url :: URI,
accept :: [String],
redirected :: URI,
mimetype :: String,
response :: Either Text ByteString,
begin :: UTCTime,
end :: UTCTime
}
-- | Initializes a default Session object to support HTTPS & Accept-Language
-- if HTTP is enabled.
newSession :: IO Session
newSession = newSession' ""
-- | Variant of `newSession` which loads plugins for the named app.
newSession' :: String -> IO Session
newSession' appname = do
(ietfLocale, unixLocale) <- rfc2616Locale
#ifdef WITH_HTTP_URI
httpsCtxt <- TLS.context
TLS.contextSetDefaultCiphers httpsCtxt
TLS.contextSetCADirectory httpsCtxt "/etc/ssl/certs"
TLS.contextSetVerificationMode httpsCtxt $ TLS.VerifyPeer True True Nothing
managerHTTP' <- HTTP.newManager $ TLS.opensslManagerSettings $ return httpsCtxt
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt <- TLS.context
TLS.contextSetDefaultCiphers connCtxt
TLS.contextSetCADirectory connCtxt "/etc/ssl/certs"
TLS.contextSetVerificationMode connCtxt $
TLS.VerifyPeer True True $ Just $ \valid _ -> return valid -- FIXME: Implement Trust-On-First-Use
#endif
#ifdef WITH_XDG
apps' <- loadXDGConfig unixLocale
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriters <- parseRewriters appname
#endif
log <- newEmptyMVar
return Session {
#ifdef WITH_HTTP_URI
managerHTTP = managerHTTP',
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt = connCtxt,
#endif
#ifdef WITH_XDG
apps = apps',
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriter = rewriters,
#endif
locale = ietfLocale,
aboutPages = [],
requestLog = log,
redirectCount = 5
}
llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key]
parsePort fallback (':':port) = fallback `fromMaybe` readMaybe port
parsePort fallback _ = fallback
-- | Retrieves a URL-identified resource & it's MIMEtype, possibly decoding it's text.
fetchURL :: Session -- ^ The session of which this request is a part.
-> [String] -- ^ The expected MIMEtypes in priority order.
-> URI -- ^ The URL to retrieve
-> IO (String, Either Text ByteString) -- ^ The MIMEtype & possibly text-decoded response.
fetchURL sess mimes uri = do
(_, mime, resp) <- fetchURL' sess mimes uri
return (mime, resp)
fetchURLLogged sess mimes uri = do
begin' <- getCurrentTime
res@(redirected', mimetype', response') <- fetchURL' sess mimes uri
end' <- getCurrentTime
modifyMVar_ (requestLog sess) $ \log -> return (
LogRecord uri mimes redirected' mimetype' response' begin' end' : log)
return res
-- | Concurrently fetch given URLs.
fetchURLs :: Session -> [String] -> [URI] -> ((URI, String, Either Text ByteString) -> IO a) -> IO [(URI, a)]
fetchURLs sess mimes uris cb = do
shouldntLog <- isEmptyMVar $ requestLog sess
let fetch = if shouldntLog then fetchURL' else fetchURLLogged
forConcurrently uris (\u -> fetch sess mimes u >>= cb) >>= return . L.zip uris
-- | Internal MIMEtypes for error reporting
mimeERR, htmlERR :: String
mimeERR = "txt/x-error\t"
htmlERR = "html/x-error\t"
-- | As per `fetchURL`, but also returns the redirected URI.
fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString)
fetchURL' Session {redirectCount = 0, locale = locale'} _ uri =
return (uri, mimeERR, Left $ Txt.pack $ trans locale' ExcessiveRedirects)
#ifdef WITH_PLUGIN_REWRITES
fetchURL' session mimes uri
| Just uri' <- applyRewriter (rewriter session) uri = fetchURL' session mimes uri'
#endif
fetchURL' session mimes uri@(URI {uriScheme = "ext:", uriAuthority = Nothing,
uriPath = path, uriQuery = query}) = do
dir <- getXdgDirectory XdgData "nz.geek.adrian.hurl"
let program = dir </> "bin" </> path
let args = case query of {
'?':rest -> split (== '&') rest;
_ -> []
}
(exitcode, stdout, stderr) <- readProcessWithExitCode program args ""
let response = if isSuccess exitcode then stdout else stderr
let (header, body) = breakOn '\n' response
case strip header of
'm':'i':'m':'e':mimetype -> return (uri, strip mimetype, Left $ Txt.pack body)
'u':'r':'l':header' | Just uri' <- parseURIReference $ strip header' ->
fetchURL' (session {redirectCount = redirectCount session - 1}) mimes $
relativeTo uri' uri
_ | isSuccess exitcode -> return (uri, "text/html", Left $ Txt.pack response)
_ -> return (uri, mimeERR, Left $ Txt.pack response)
where
split p s = case dropWhile p s of
"" -> []
s' -> let (w, s'') = break p s' in w : split p s''
strip = dropWhile isSpace . dropWhileEnd isSpace
isSuccess ExitSuccess = True
isSuccess _ = False
fetchURL' session mimes uri@(URI {uriScheme = "about:", uriPath = ""}) =
fetchURL' session mimes $ uri {uriPath = "version"}
fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath = path} =
return (url,
Txt.unpack $ Txt.strip $ convertCharset "utf-8" $ B.toStrict $
llookup (path ++ ".mime") "text/html" pages,
Right $ llookup path "" pages)
#ifdef WITH_HTTP_URI
fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do
request <- HTTP.requestFromURI uri
response <- HTTP.httpLbs request {
HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form.
HTTP.requestHeaders = [
("Accept", C8.pack $ intercalate ", " accept),
("Accept-Language", C8.pack $ intercalate ", " $ locale session)
],
HTTP.redirectCount = redirectCount session
} $ managerHTTP session
return $ case (
HTTP.responseBody response,
[val | ("content-type", val) <- HTTP.responseHeaders response]
) of
("", _) -> (uri, mimeERR, Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response)
(response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype
in resolveCharset' uri (L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response
(response, []) -> (uri, defaultMIME, Right response)
`catches` [
Handler $ \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e),
Handler $ \(ErrorCall msg) -> do return (uri, mimeERR, Left $ Txt.pack msg)
]
#endif
#ifdef WITH_GEMINI_URI
fetchURL' sess@Session {connCtxt = ctxt, locale = l} mimes uri@URI {
uriScheme = "gemini:", uriAuthority = Just (URIAuth _ host port)
} = TLSConn.withConnection ctxt host (parsePort 1965 port) $ \input output _ -> do
writeTo output $ Just $ C8.pack $ uriToString id uri "\r\n"
input' <- inputStreamToHandle input
header <- hGetLine input'
case parseHeader header of
-- NOTE: This case won't actually do anything until the caller (Rhapsode) implements forms.
('1', _, label) -> return (uri, "application/xhtml+xml", Left $ Txt.concat [
"<form><label>",
Txt.replace "<" "<" $ Txt.replace "&" "&" label,
"<input /></label></form>"
])
('2', _, mime) -> do
body <- Strict.hGetContents input'
let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime
return $ resolveCharset' uri mime' $ B.fromStrict body
('3', _, redirect) | Just redirect' <- parseURIReference $ Txt.unpack redirect ->
fetchURL' sess {
redirectCount = redirectCount sess - 1
} mimes $ relativeTo redirect' uri
-- TODO Implement client certificates, once I have a way for the user/caller to select one.
-- And once I figure out how to configure the TLS cryptography.
(_, _, err) -> return (uri, mimeERR, Left err)
where
parseHeader :: String -> (Char, Char, Text)
parseHeader (major:minor:meta) = (major, minor, Txt.strip $ Txt.pack meta)
parseHeader _ = ('4', '1', Txt.pack $ trans l MalformedResponse)
handleIOErr :: IOError -> IO Strict.ByteString
handleIOErr _ = return ""
#endif
#ifdef WITH_FILE_URI
fetchURL' Session {locale = l} (defaultMIME:_) uri@URI {uriScheme = "file:"} = do
response <- B.readFile $ uriPath uri
return (uri, defaultMIME, Right response)
`catch` \e -> do
return (uri, mimeERR,
Left $ Txt.pack $ trans l $ ReadFailed $ displayException (e :: IOException))
#endif
#ifdef WITH_DATA_URI
fetchURL' _ (defaultMIME:_) uri@URI {uriScheme = "data:"} =
let request = uriPath uri ++ uriQuery uri ++ uriFragment uri
in case breakOn ',' $ unEscapeString request of
("", response) -> return (uri, defaultMIME, Left $ Txt.pack response)
(mime', response) | '4':'6':'e':'s':'a':'b':';':mime <- reverse mime' ->
return $ case B64.decode $ B.fromStrict $ C8.pack response of
Left str -> (uri, mimeERR, Left $ Txt.pack $ unEscapeString str)
Right bytes -> (uri, reverse mime, Right bytes)
(mime, response) -> return (uri, mime, Left $ Txt.pack response)
#endif
#ifdef WITH_XDG
fetchURL' Session {locale = l, apps = a} _ uri@(URI {uriScheme = s}) = do
app <- dispatchURIByMIME a uri ("x-scheme-handler/" ++ init s)
return (uri, htmlERR, Left $ Txt.pack $ trans l $ app)
#else
fetchURL' Session {locale = l} _ URI {uriScheme = scheme} =
return (uri, mimeERR, Left $ Txt.pack $ trans l $ UnsupportedScheme scheme)
#endif
dispatchByMIME :: Session -> String -> URI -> IO (Maybe String)
#if WITH_XDG
dispatchByMIME Session {locale = l, apps = a} mime uri = do
err <- dispatchURIByMIME a uri mime
return $ case err of
UnsupportedMIME _ -> Nothing
_ -> Just $ trans l err
#else
dispatchByMIME _ _ _ = return Nothing
#endif
-- Downloads utilities
-- | write download to a file in the given directory.
saveDownload :: URI -> FilePath -> (URI, String, Either Text ByteString) -> IO URI
saveDownload baseURI dir (URI {uriPath = path}, mime, resp) = do
dest <- unusedFilename (dir </> takeFileName' path)
case resp of
Left txt -> writeFile dest $ Txt.unpack txt
Right bytes -> B.writeFile dest bytes
-- TODO set user.mime file attribute.
return $ baseURI {uriPath = dest}
where
takeFileName' s = case takeFileName s of { "" -> "index"; f -> f}
unusedFilename path = do
exists <- doesFileExist path
if exists then go 0 else return path
where
go n = do
let path' = path ++ show n
exists <- doesFileExist path'
if exists then go (n+1) else return path'
-- | Convert a download into a data: URI
downloadToURI :: (URI, String, Either Text ByteString) -> URI
downloadToURI (_, mime, Left txt) = nullURI {
uriScheme = "data:",
uriPath = mime ++ "," ++ escapeURIString isReserved (Txt.unpack txt)
}
downloadToURI (_, mime, Right bytes) = nullURI {
uriScheme = "data:",
uriPath = mime ++ ";base64," ++ C8.unpack (B.toStrict $ B64.encode bytes)
}
-- Logging API
enableLogging :: Session -> IO ()
enableLogging session = do
logInactive <- isEmptyMVar $ requestLog session
if logInactive then putMVar (requestLog session) [] else return ()
retrieveLog :: Session -> IO [LogRecord]
retrieveLog session = do
logInactive <- isEmptyMVar $ requestLog session
if logInactive then return [] else takeMVar $ requestLog session
writeLog :: Handle -> Session -> IO ()
writeLog out session = do
writeRow ["URL", "Redirected", "Accept", "MIMEtype", "Size", "Begin", "End", "Duration"]
log <- retrieveLog session
forM log $ \record -> writeRow [
show $ url record, show $ redirected record,
show $ accept record, show $ mimetype record,
case response record of
Left txt -> show $ Txt.length txt
Right bs -> show $ B.length bs,
show $ begin record, show $ end record,
show (end record `diffUTCTime` end record)
]
return ()
where
writeRow = hPutStrLn out . L.intercalate "\t"
-- Utils
breakOn c (a:as) | c == a = ([], as)
| otherwise = let (x, y) = breakOn c as in (a:x, y)
breakOn _ [] = ([], [])