{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} -- | Retrieves documents for a URL, supporting multiple URL schemes that can be -- disabled at build-time for reduced dependencies. module Network.URI.Fetch(Session(locale, aboutPages, redirectCount), newSession, fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR, dispatchByMIME, saveDownload, downloadToURI, -- logging API LogRecord(..), enableLogging, retrieveLog, writeLog) where import qualified Data.Text as Txt import Data.Text (Text) import Network.URI import qualified Data.ByteString as Strict import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Char8 as C8 import Network.URI.Charset import Control.Exception import System.IO.Error (isEOFError) import Control.Concurrent.Async (forConcurrently) -- for about: URIs & port parsing, all standard lib import Data.Maybe (fromMaybe, listToMaybe) import Text.Read (readMaybe) -- for executable extensions, all standard lib import Data.Char (isSpace) import System.Exit (ExitCode(..)) -- for saveDownload import System.Directory import System.FilePath -- for logging import Control.Concurrent.MVar import Data.Time.Clock import System.IO import Control.Monad import Data.List as L #ifdef WITH_HTTP_URI import qualified Network.HTTP.Client as HTTP import qualified Network.HTTP.Client.OpenSSL as TLS import qualified OpenSSL.Session as TLS import Network.HTTP.Types import Data.List (intercalate) #endif #ifdef WITH_RAW_CONNECTIONS import qualified OpenSSL as TLS import qualified OpenSSL.Session as TLS import qualified System.IO.Streams.SSL as TLSConn import System.IO.Streams #endif #ifdef WITH_DATA_URI import qualified Data.ByteString.Base64.URL.Lazy as B64 #endif import Network.URI.Locale import Network.URI.Messages #ifdef WITH_XDG import Network.URI.XDG #endif #ifdef WITH_PLUGIN_REWRITES import Network.URI.PlugIns.Rewriters #endif -- | Data shared accross multiple URI requests. data Session = Session { #ifdef WITH_HTTP_URI managerHTTP :: HTTP.Manager, #endif #ifdef WITH_RAW_CONNECTIONS connCtxt :: TLS.SSLContext, #endif #ifdef WITH_XDG apps :: XDGConfig, #endif #ifdef WITH_PLUGIN_REWRITES rewriter :: Rewriter, #endif -- | The languages (RFC2616-encoded) to which responses should be localized. locale :: [String], -- | Additional files to serve from about: URIs. aboutPages :: [(FilePath, ByteString)], -- | Log of timestamped/profiled URL requests requestLog :: MVar [LogRecord], -- | How many redirects to follow for Gemini or HTTP(S) requests redirectCount :: Int } data LogRecord = LogRecord { url :: URI, accept :: [String], redirected :: URI, mimetype :: String, response :: Either Text ByteString, begin :: UTCTime, end :: UTCTime } -- | Initializes a default Session object to support HTTPS & Accept-Language -- if HTTP is enabled. newSession :: IO Session newSession = newSession' "" -- | Variant of `newSession` which loads plugins for the named app. newSession' :: String -> IO Session newSession' appname = do (ietfLocale, unixLocale) <- rfc2616Locale #ifdef WITH_HTTP_URI httpsCtxt <- TLS.context TLS.contextSetDefaultCiphers httpsCtxt TLS.contextSetCADirectory httpsCtxt "/etc/ssl/certs" TLS.contextSetVerificationMode httpsCtxt $ TLS.VerifyPeer True True Nothing managerHTTP' <- HTTP.newManager $ TLS.opensslManagerSettings $ return httpsCtxt #endif #ifdef WITH_RAW_CONNECTIONS connCtxt <- TLS.context TLS.contextSetDefaultCiphers connCtxt TLS.contextSetCADirectory connCtxt "/etc/ssl/certs" TLS.contextSetVerificationMode connCtxt $ TLS.VerifyPeer True True $ Just $ \valid _ -> return valid -- FIXME: Implement Trust-On-First-Use #endif #ifdef WITH_XDG apps' <- loadXDGConfig unixLocale #endif #ifdef WITH_PLUGIN_REWRITES rewriters <- parseRewriters appname #endif log <- newEmptyMVar return Session { #ifdef WITH_HTTP_URI managerHTTP = managerHTTP', #endif #ifdef WITH_RAW_CONNECTIONS connCtxt = connCtxt, #endif #ifdef WITH_XDG apps = apps', #endif #ifdef WITH_PLUGIN_REWRITES rewriter = rewriters, #endif locale = ietfLocale, aboutPages = [], requestLog = log, redirectCount = 5 } llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key] parsePort fallback (':':port) = fallback `fromMaybe` readMaybe port parsePort fallback _ = fallback -- | Retrieves a URL-identified resource & it's MIMEtype, possibly decoding it's text. fetchURL :: Session -- ^ The session of which this request is a part. -> [String] -- ^ The expected MIMEtypes in priority order. -> URI -- ^ The URL to retrieve -> IO (String, Either Text ByteString) -- ^ The MIMEtype & possibly text-decoded response. fetchURL sess mimes uri = do (_, mime, resp) <- fetchURL' sess mimes uri return (mime, resp) fetchURLLogged sess mimes uri = do begin' <- getCurrentTime res@(redirected', mimetype', response') <- fetchURL' sess mimes uri end' <- getCurrentTime modifyMVar_ (requestLog sess) $ \log -> return ( LogRecord uri mimes redirected' mimetype' response' begin' end' : log) return res -- | Concurrently fetch given URLs. fetchURLs :: Session -> [String] -> [URI] -> ((URI, String, Either Text ByteString) -> IO a) -> IO [(URI, a)] fetchURLs sess mimes uris cb = do shouldntLog <- isEmptyMVar $ requestLog sess let fetch = if shouldntLog then fetchURL' else fetchURLLogged forConcurrently uris (\u -> fetch sess mimes u >>= cb) >>= return . L.zip uris -- | Internal MIMEtypes for error reporting mimeERR, htmlERR :: String mimeERR = "txt/x-error\t" htmlERR = "html/x-error\t" -- | As per `fetchURL`, but also returns the redirected URI. fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString) fetchURL' Session {redirectCount = 0, locale = locale'} _ uri = return (uri, mimeERR, Left $ Txt.pack $ trans locale' ExcessiveRedirects) #ifdef WITH_PLUGIN_REWRITES fetchURL' session mimes uri | Just uri' <- applyRewriter (rewriter session) uri = fetchURL' session mimes uri' #endif fetchURL' session mimes uri@(URI {uriScheme = "ext:", uriAuthority = Nothing, uriPath = path, uriQuery = query}) = do dir <- getXdgDirectory XdgData "nz.geek.adrian.hurl" let program = dir > "bin" > path let args = case query of { '?':rest -> split (== '&') rest; _ -> [] } (exitcode, stdout, stderr) <- readProcessWithExitCode program args "" let response = if isSuccess exitcode then stdout else stderr let (header, body) = breakOn '\n' response case strip header of 'm':'i':'m':'e':mimetype -> return (uri, strip mimetype, Left $ Txt.pack body) 'u':'r':'l':header' | Just uri' <- parseURIReference $ strip header' -> fetchURL' (session {redirectCount = redirectCount session - 1}) mimes $ relativeTo uri' uri _ | isSuccess exitcode -> return (uri, "text/html", Left $ Txt.pack response) _ -> return (uri, mimeERR, Left $ Txt.pack response) where split p s = case dropWhile p s of "" -> [] s' -> let (w, s'') = break p s' in w : split p s'' strip = dropWhile isSpace . dropWhileEnd isSpace isSuccess ExitSuccess = True isSuccess _ = False fetchURL' session mimes uri@(URI {uriScheme = "about:", uriPath = ""}) = fetchURL' session mimes $ uri {uriPath = "version"} fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath = path} = return (url, Txt.unpack $ Txt.strip $ convertCharset "utf-8" $ B.toStrict $ llookup (path ++ ".mime") "text/html" pages, Right $ llookup path "" pages) #ifdef WITH_HTTP_URI fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do request <- HTTP.requestFromURI uri response <- HTTP.httpLbs request { HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form. HTTP.requestHeaders = [ ("Accept", C8.pack $ intercalate ", " accept), ("Accept-Language", C8.pack $ intercalate ", " $ locale session) ], HTTP.redirectCount = redirectCount session } $ managerHTTP session return $ case ( HTTP.responseBody response, [val | ("content-type", val) <- HTTP.responseHeaders response] ) of ("", _) -> (uri, mimeERR, Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response) (response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype in resolveCharset' uri (L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response (response, []) -> (uri, defaultMIME, Right response) `catches` [ Handler $ \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e), Handler $ \(ErrorCall msg) -> do return (uri, mimeERR, Left $ Txt.pack msg) ] #endif #ifdef WITH_GEMINI_URI fetchURL' sess@Session {connCtxt = ctxt, locale = l} mimes uri@URI { uriScheme = "gemini:", uriAuthority = Just (URIAuth _ host port) } = TLSConn.withConnection ctxt host (parsePort 1965 port) $ \input output _ -> do writeTo output $ Just $ C8.pack $ uriToString id uri "\r\n" input' <- inputStreamToHandle input header <- hGetLine input' case parseHeader header of -- NOTE: This case won't actually do anything until the caller (Rhapsode) implements forms. ('1', _, label) -> return (uri, "application/xhtml+xml", Left $ Txt.concat [ "
" ]) ('2', _, mime) -> do body <- Strict.hGetContents input' let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime return $ resolveCharset' uri mime' $ B.fromStrict body ('3', _, redirect) | Just redirect' <- parseURIReference $ Txt.unpack redirect -> fetchURL' sess { redirectCount = redirectCount sess - 1 } mimes $ relativeTo redirect' uri -- TODO Implement client certificates, once I have a way for the user/caller to select one. -- And once I figure out how to configure the TLS cryptography. (_, _, err) -> return (uri, mimeERR, Left err) where parseHeader :: String -> (Char, Char, Text) parseHeader (major:minor:meta) = (major, minor, Txt.strip $ Txt.pack meta) parseHeader _ = ('4', '1', Txt.pack $ trans l MalformedResponse) handleIOErr :: IOError -> IO Strict.ByteString handleIOErr _ = return "" #endif #ifdef WITH_FILE_URI fetchURL' Session {locale = l} (defaultMIME:_) uri@URI {uriScheme = "file:"} = do response <- B.readFile $ uriPath uri return (uri, defaultMIME, Right response) `catch` \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans l $ ReadFailed $ displayException (e :: IOException)) #endif #ifdef WITH_DATA_URI fetchURL' _ (defaultMIME:_) uri@URI {uriScheme = "data:"} = let request = uriPath uri ++ uriQuery uri ++ uriFragment uri in case breakOn ',' $ unEscapeString request of ("", response) -> return (uri, defaultMIME, Left $ Txt.pack response) (mime', response) | '4':'6':'e':'s':'a':'b':';':mime <- reverse mime' -> return $ case B64.decode $ B.fromStrict $ C8.pack response of Left str -> (uri, mimeERR, Left $ Txt.pack $ unEscapeString str) Right bytes -> (uri, reverse mime, Right bytes) (mime, response) -> return (uri, mime, Left $ Txt.pack response) #endif #ifdef WITH_XDG fetchURL' Session {locale = l, apps = a} _ uri@(URI {uriScheme = s}) = do app <- dispatchURIByMIME a uri ("x-scheme-handler/" ++ init s) return (uri, htmlERR, Left $ Txt.pack $ trans l $ app) #else fetchURL' Session {locale = l} _ URI {uriScheme = scheme} = return (uri, mimeERR, Left $ Txt.pack $ trans l $ UnsupportedScheme scheme) #endif dispatchByMIME :: Session -> String -> URI -> IO (Maybe String) #if WITH_XDG dispatchByMIME Session {locale = l, apps = a} mime uri = do err <- dispatchURIByMIME a uri mime return $ case err of UnsupportedMIME _ -> Nothing _ -> Just $ trans l err #else dispatchByMIME _ _ _ = return Nothing #endif -- Downloads utilities -- | write download to a file in the given directory. saveDownload :: URI -> FilePath -> (URI, String, Either Text ByteString) -> IO URI saveDownload baseURI dir (URI {uriPath = path}, mime, resp) = do dest <- unusedFilename (dir > takeFileName' path) case resp of Left txt -> writeFile dest $ Txt.unpack txt Right bytes -> B.writeFile dest bytes -- TODO set user.mime file attribute. return $ baseURI {uriPath = dest} where takeFileName' s = case takeFileName s of { "" -> "index"; f -> f} unusedFilename path = do exists <- doesFileExist path if exists then go 0 else return path where go n = do let path' = path ++ show n exists <- doesFileExist path' if exists then go (n+1) else return path' -- | Convert a download into a data: URI downloadToURI :: (URI, String, Either Text ByteString) -> URI downloadToURI (_, mime, Left txt) = nullURI { uriScheme = "data:", uriPath = mime ++ "," ++ escapeURIString isReserved (Txt.unpack txt) } downloadToURI (_, mime, Right bytes) = nullURI { uriScheme = "data:", uriPath = mime ++ ";base64," ++ C8.unpack (B.toStrict $ B64.encode bytes) } -- Logging API enableLogging :: Session -> IO () enableLogging session = do logInactive <- isEmptyMVar $ requestLog session if logInactive then putMVar (requestLog session) [] else return () retrieveLog :: Session -> IO [LogRecord] retrieveLog session = do logInactive <- isEmptyMVar $ requestLog session if logInactive then return [] else takeMVar $ requestLog session writeLog :: Handle -> Session -> IO () writeLog out session = do writeRow ["URL", "Redirected", "Accept", "MIMEtype", "Size", "Begin", "End", "Duration"] log <- retrieveLog session forM log $ \record -> writeRow [ show $ url record, show $ redirected record, show $ accept record, show $ mimetype record, case response record of Left txt -> show $ Txt.length txt Right bs -> show $ B.length bs, show $ begin record, show $ end record, show (end record `diffUTCTime` end record) ] return () where writeRow = hPutStrLn out . L.intercalate "\t" -- Utils breakOn c (a:as) | c == a = ([], as) | otherwise = let (x, y) = breakOn c as in (a:x, y) breakOn _ [] = ([], [])