{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} -- | Retrieves documents for a URL, supporting multiple URL schemes that can be -- disabled at build-time for reduced dependencies. module Network.URI.Fetch(Session(locale, aboutPages, redirectCount, cachingEnabled), newSession, fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR, dispatchByMIME, appsForMIME, Application(..), dispatchByApp, saveDownload, downloadToURI, -- logging API LogRecord(..), enableLogging, retrieveLog, writeLog) where import Network.URI.Types import qualified Data.Text as Txt import Data.Text (Text) import Network.URI import qualified Data.ByteString as Strict import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Char8 as C8 import Network.URI.Charset import Control.Exception import System.IO.Error (isEOFError) import Control.Concurrent.Async (forConcurrently) -- for about: URIs & port parsing, all standard lib import Data.Maybe (fromMaybe, listToMaybe, isJust) import Data.Either (isLeft) import Text.Read (readMaybe) -- for executable extensions, all standard lib import Data.Char (isSpace) import System.Exit (ExitCode(..)) -- for saveDownload import System.Directory import System.FilePath -- for logging import Control.Concurrent.MVar import Data.Time.Clock import System.IO import Control.Monad import Data.List as L #ifdef WITH_HTTP_URI import qualified Network.HTTP.Client as HTTP import qualified Network.HTTP.Client.OpenSSL as TLS import qualified OpenSSL.Session as TLS import Network.HTTP.Types import Data.List (intercalate) import Control.Concurrent (forkIO) import Network.URI.Cache #endif #ifdef WITH_RAW_CONNECTIONS import qualified OpenSSL as TLS import qualified OpenSSL.Session as TLS import qualified System.IO.Streams.SSL as TLSConn import System.IO.Streams #endif #ifdef WITH_DATA_URI import qualified Data.ByteString.Base64.URL.Lazy as B64 #endif import Network.URI.Locale import Network.URI.Messages #ifdef WITH_XDG import Network.URI.XDG #endif #ifdef WITH_PLUGIN_REWRITES import Network.URI.PlugIns.Rewriters #endif -- | Data shared accross multiple URI requests. data Session = Session { #ifdef WITH_HTTP_URI managerHTTP :: HTTP.Manager, #endif #ifdef WITH_RAW_CONNECTIONS connCtxt :: TLS.SSLContext, #endif #ifdef WITH_XDG apps :: XDGConfig, #endif #ifdef WITH_PLUGIN_REWRITES rewriter :: Rewriter, #endif -- | The languages (RFC2616-encoded) to which responses should be localized. locale :: [String], -- | Additional files to serve from about: URIs. aboutPages :: [(FilePath, ByteString)], -- | Log of timestamped/profiled URL requests requestLog :: Maybe (MVar [LogRecord]), -- | How many redirects to follow for Gemini or HTTP(S) requests redirectCount :: Int, -- | Whether to cache network responses, avoiding sending requests cachingEnabled :: Bool, -- | App-specific config subdirectory to check appName :: String } data LogRecord = LogRecord { url :: URI, accept :: [String], redirected :: URI, mimetype :: String, response :: Either Text ByteString, begin :: UTCTime, end :: UTCTime } -- | Initializes a default Session object to support HTTPS & Accept-Language -- if HTTP is enabled. newSession :: IO Session newSession = newSession' "" -- | Variant of `newSession` which loads plugins for the named app. newSession' :: String -> IO Session newSession' appname = do (ietfLocale, unixLocale) <- rfc2616Locale #ifdef WITH_HTTP_URI httpsCtxt <- TLS.context TLS.contextSetDefaultCiphers httpsCtxt TLS.contextSetCADirectory httpsCtxt "/etc/ssl/certs" TLS.contextSetVerificationMode httpsCtxt $ TLS.VerifyPeer True True Nothing managerHTTP' <- HTTP.newManager $ TLS.opensslManagerSettings $ return httpsCtxt #endif #ifdef WITH_RAW_CONNECTIONS connCtxt <- TLS.context TLS.contextSetDefaultCiphers connCtxt TLS.contextSetCADirectory connCtxt "/etc/ssl/certs" TLS.contextSetVerificationMode connCtxt $ TLS.VerifyPeer True True $ Just $ \valid _ -> return valid -- FIXME: Implement Trust-On-First-Use #endif #ifdef WITH_XDG apps' <- loadXDGConfig unixLocale #endif #ifdef WITH_PLUGIN_REWRITES rewriters <- parseRewriters appname #endif return Session { #ifdef WITH_HTTP_URI managerHTTP = managerHTTP', #endif #ifdef WITH_RAW_CONNECTIONS connCtxt = connCtxt, #endif #ifdef WITH_XDG apps = apps', #endif #ifdef WITH_PLUGIN_REWRITES rewriter = rewriters, #endif locale = ietfLocale, aboutPages = [], requestLog = Nothing, redirectCount = 5, cachingEnabled = True, appName = appname } llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key] parsePort fallback (':':port) = fallback `fromMaybe` readMaybe port parsePort fallback _ = fallback -- | Retrieves a URL-identified resource & it's MIMEtype, possibly decoding it's text. fetchURL :: Session -- ^ The session of which this request is a part. -> [String] -- ^ The expected MIMEtypes in priority order. -> URI -- ^ The URL to retrieve -> IO (String, Either Text ByteString) -- ^ The MIMEtype & possibly text-decoded response. fetchURL sess mimes uri = do (_, mime, resp) <- fetchURL' sess mimes uri return (mime, resp) fetchURLLogged log sess mimes uri = do begin' <- getCurrentTime res@(redirected', mimetype', response') <- fetchURL' sess mimes uri end' <- getCurrentTime modifyMVar_ log $ \log' -> return ( LogRecord uri mimes redirected' mimetype' response' begin' end' : log') return res -- | Concurrently fetch given URLs. fetchURLs :: Session -> [String] -> [URI] -> ((URI, String, Either Text ByteString) -> IO a) -> IO [(URI, a)] fetchURLs sess mimes uris cb = do let fetch = case requestLog sess of {Nothing -> fetchURL'; Just log -> fetchURLLogged log} forConcurrently uris (\u -> fetch sess mimes u >>= cb) >>= return . L.zip uris -- | Internal MIMEtypes for error reporting mimeERR, htmlERR :: String mimeERR = "txt/x-error\t" htmlERR = "html/x-error\t" -- | As per `fetchURL`, but also returns the redirected URI. fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString) fetchURL' Session {redirectCount = 0, locale = locale'} _ uri = return (uri, mimeERR, Left $ Txt.pack $ trans locale' ExcessiveRedirects) #ifdef WITH_PLUGIN_REWRITES fetchURL' session mimes uri | Just uri' <- applyRewriter (rewriter session) uri = fetchURL' session mimes uri' #endif #ifdef WITH_PLUGIN_EXEC fetchURL' session@Session { appName = appname, locale = l } mimes uri@(URI "ext:" Nothing path query _) = do dir <- getXdgDirectory XdgData "nz.geek.adrian.hurl" sysdirs <- getXdgDirectoryList XdgDataDirs let dirs = concat [[dir' > appname, dir'] | dir' <- dir : sysdirs] programs <- findExecutablesInDirectories dirs ("bin" > path) case programs of [] -> return (uri, mimeERR, Left $ Txt.pack $ trans l $ ReadFailed "404") program:_ -> do let args = case query of { '?':rest -> split (== '&') rest; _ -> [] } (exitcode, stdout, stderr) <- readProcessWithExitCode program args "" let response = if isSuccess exitcode then stdout else stderr let (header, body) = breakOn '\n' response case strip header of 'm':'i':'m':'e':mimetype -> return (uri, strip mimetype, Left $ Txt.pack body) 'u':'r':'l':header' | Just uri' <- parseURIReference $ strip header' -> fetchURL' (session {redirectCount = redirectCount session - 1}) mimes $ relativeTo uri' uri _ | isSuccess exitcode -> return (uri, "text/html", Left $ Txt.pack response) _ -> return (uri, mimeERR, Left $ Txt.pack response) where split p s = case dropWhile p s of "" -> [] s' -> let (w, s'') = break p s' in w : split p s'' strip = dropWhile isSpace . dropWhileEnd isSpace isSuccess ExitSuccess = True isSuccess _ = False #endif fetchURL' session mimes uri@(URI {uriScheme = "about:", uriPath = ""}) = fetchURL' session mimes $ uri {uriPath = "version"} fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath = path} = return (url, Txt.unpack $ Txt.strip $ convertCharset "utf-8" $ B.toStrict $ llookup (path ++ ".mime") "text/html" pages, Right $ llookup path "" pages) #ifdef WITH_HTTP_URI fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do cached <- if cachingEnabled session then readCacheHTTP uri else return (Nothing, Nothing) response <- case cached of (Just (mime, body), Nothing) -> return $ Right (mime, body) (cached, cachingHeaders) -> do request <- HTTP.requestFromURI uri response <- HTTP.httpLbs request { HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form. HTTP.requestHeaders = [ ("Accept", C8.pack $ intercalate ", " accept), ("Accept-Language", C8.pack $ intercalate ", " $ locale session) ] ++ fromMaybe [] cachingHeaders, HTTP.redirectCount = 0 } $ managerHTTP session case ( HTTP.responseStatus response, HTTP.responseBody response, [val | ("content-type", val) <- HTTP.responseHeaders response] ) of (Status 304 _, _, _) | Just cached'@(_, body) <- cached -> do cacheHTTP uri $ response { HTTP.responseBody = body } return $ Right cached' -- Manually handle redirects so the caller & HTTP cache gets the correct URI. (Status code _, _, _) | code > 300 && code < 400, Just location <- lookup "location" $ HTTP.responseHeaders response, Just uri' <- parseURIReference $ C8.unpack location -> return $ Left $ relativeTo uri' uri (Status _ msg, "", _) -> return $ Right (Txt.pack mimeERR, B.fromStrict msg) (_, body, (mimetype:_)) -> do cacheHTTP uri response forkIO cleanCacheHTTP -- Try to keep diskspace down... let mime = Txt.toLower $ convertCharset "utf-8" mimetype return $ Right (mime, body) (_, response, []) -> return $ Right (Txt.pack defaultMIME, response) case response of Left redirect -> let session' = session { redirectCount = redirectCount session - 1 } in fetchURL' session' accept redirect Right (mime, body) -> let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime in return $ resolveCharset' uri mime' body `catch` \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e) #endif #ifdef WITH_GEMINI_URI fetchURL' sess@Session {connCtxt = ctxt, locale = l} mimes uri@URI { uriScheme = "gemini:", uriAuthority = Just (URIAuth _ host port) } = TLSConn.withConnection ctxt host (parsePort 1965 port) $ \input output _ -> do writeTo output $ Just $ C8.pack $ uriToString id uri "\r\n" input' <- inputStreamToHandle input header <- hGetLine input' case parseHeader header of -- NOTE: This case won't actually do anything until the caller (Rhapsode) implements forms. ('1', _, label) -> return (uri, "application/xhtml+xml", Left $ Txt.concat [ "
" ]) ('2', _, mime) -> do body <- Strict.hGetContents input' let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime return $ resolveCharset' uri mime' $ B.fromStrict body ('3', _, redirect) | Just redirect' <- parseURIReference $ Txt.unpack redirect -> fetchURL' sess { redirectCount = redirectCount sess - 1 } mimes $ relativeTo redirect' uri -- TODO Implement client certificates, once I have a way for the user/caller to select one. -- And once I figure out how to configure the TLS cryptography. (_, _, err) -> return (uri, mimeERR, Left err) where parseHeader :: String -> (Char, Char, Text) parseHeader (major:minor:meta) = (major, minor, Txt.strip $ Txt.pack meta) parseHeader _ = ('4', '1', Txt.pack $ trans l MalformedResponse) handleIOErr :: IOError -> IO Strict.ByteString handleIOErr _ = return "" #endif #ifdef WITH_FILE_URI fetchURL' Session {locale = l} (defaultMIME:_) uri@URI {uriScheme = "file:"} = do response <- B.readFile $ uriPath uri return (uri, defaultMIME, Right response) `catch` \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans l $ ReadFailed $ displayException (e :: IOException)) #endif #ifdef WITH_DATA_URI fetchURL' _ (defaultMIME:_) uri@URI {uriScheme = "data:"} = let request = uriPath uri ++ uriQuery uri ++ uriFragment uri in case breakOn ',' $ unEscapeString request of ("", response) -> return (uri, defaultMIME, Left $ Txt.pack response) (mime', response) | '4':'6':'e':'s':'a':'b':';':mime <- reverse mime' -> return $ case B64.decode $ B.fromStrict $ C8.pack response of Left str -> (uri, mimeERR, Left $ Txt.pack $ unEscapeString str) Right bytes -> (uri, reverse mime, Right bytes) (mime, response) -> return (uri, mime, Left $ Txt.pack response) #endif #ifdef WITH_XDG fetchURL' Session {locale = l, apps = a} _ uri@(URI {uriScheme = s}) = do app <- dispatchURIByMIME a uri ("x-scheme-handler/" ++ init s) return (uri, htmlERR, Left $ Txt.pack $ trans l $ app) #else fetchURL' Session {locale = l} _ URI {uriScheme = scheme} = return (uri, mimeERR, Left $ Txt.pack $ trans l $ UnsupportedScheme scheme) #endif dispatchByMIME :: Session -> String -> URI -> IO (Maybe String) #if WITH_XDG dispatchByMIME Session {locale = l, apps = a} mime uri = do err <- dispatchURIByMIME a uri mime return $ case err of UnsupportedMIME _ -> Nothing _ -> Just $ trans l err #else dispatchByMIME _ _ _ = return Nothing #endif appsForMIME :: Session -> String -> IO [Application] #if WITH_XDG appsForMIME Session { apps = a, locale = l } = queryHandlers' a l #else appsForMIME _ _ = [] #endif dispatchByApp :: Session -> Application -> String -> URI -> IO Bool #if WITH_XDG dispatchByApp session@Session { locale = l } Application { appId = app} mime uri = do try1 <- launchApp' l uri app -- First try handing off the URL, feedreaders need this! case try1 of Left app -> return True Right False -> return False Right True -> do -- Download as temp file to open locally, the app requires it... temp <- canonicalizePath =<< getTemporaryDirectory resp <- fetchURL' session [mime] uri uri' <- saveDownload (URI "file:" Nothing "" "" "") temp resp isLeft <$> launchApp' l uri' app #else dispatchByApp _ _ _ _ = return False #endif -- Downloads utilities -- | write download to a file in the given directory. saveDownload :: URI -> FilePath -> (URI, String, Either Text ByteString) -> IO URI saveDownload baseURI dir (URI {uriPath = path}, mime, resp) = do dest <- unusedFilename (dir > takeFileName' path) case resp of Left txt -> writeFile dest $ Txt.unpack txt Right bytes -> B.writeFile dest bytes -- TODO set user.mime file attribute. return $ baseURI {uriPath = dest} where takeFileName' s = case takeFileName s of { "" -> "index"; f -> f} unusedFilename path = do exists <- doesFileExist path if exists then go 0 else return path where go n = do let path' = path ++ show n exists <- doesFileExist path' if exists then go (n+1) else return path' -- | Convert a download into a data: URI downloadToURI :: (URI, String, Either Text ByteString) -> URI downloadToURI (_, mime, Left txt) = nullURI { uriScheme = "data:", uriPath = mime ++ "," ++ escapeURIString isReserved (Txt.unpack txt) } downloadToURI (_, mime, Right bytes) = nullURI { uriScheme = "data:", uriPath = mime ++ ";base64," ++ C8.unpack (B.toStrict $ B64.encode bytes) } -- Logging API enableLogging :: Session -> IO Session enableLogging session = do log <- newMVar [] return session { requestLog = Just log } retrieveLog :: Session -> IO [LogRecord] retrieveLog session@Session { requestLog = Just log } = swapMVar log [] retrieveLog _ = return [] writeLog :: Handle -> Session -> IO () writeLog out session = do writeRow ["URL", "Redirected", "Accept", "MIMEtype", "Size", "Begin", "End", "Duration"] log <- retrieveLog session forM log $ \record -> writeRow [ show $ url record, show $ redirected record, show $ accept record, show $ mimetype record, case response record of Left txt -> show $ Txt.length txt Right bs -> show $ B.length bs, show $ begin record, show $ end record, show (end record `diffUTCTime` end record) ] return () where writeRow = hPutStrLn out . L.intercalate "\t" -- Utils breakOn c (a:as) | c == a = ([], as) | otherwise = let (x, y) = breakOn c as in (a:x, y) breakOn _ [] = ([], [])