{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} -- | Retrieves documents for a URL, supporting multiple URL schemes that can be -- disabled at build-time for reduced dependencies. module Network.URI.Fetch(Session, locale, newSession, fetchURL, dispatchByMIME, saveDownload) where import qualified Data.Text as Txt import Data.Text (Text) import Network.URI import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Char8 as C8 import Network.URI.Charset import Control.Exception import Control.Concurrent.Async (forConcurrently) -- for about: URIs & port parsing, all standard lib import Data.Maybe (fromMaybe, listToMaybe) import Text.Read (readMaybe) -- for saveDownload import System.Directory import System.FilePath #ifdef WITH_HTTP_URI import qualified Network.HTTP.Client as HTTP import qualified Network.HTTP.Client.TLS as TLS import Network.HTTP.Types import Data.List (intercalate) #endif #ifdef WITH_RAW_CONNECTIONS import Network.Connection #endif #ifdef WITH_DATA_URI import qualified Data.ByteString.Base64.URL.Lazy as B64 #endif import Network.URI.Locale import Network.URI.Messages #ifdef WITH_XDG import Network.URI.XDG #endif -- | Data shared accross multiple URI requests. data Session = Session { #ifdef WITH_HTTP_URI managerHTTP :: HTTP.Manager, #endif #ifdef WITH_RAW_CONNECTIONS connCtxt :: ConnectionContext, #endif #ifdef WITH_XDG apps :: XDGConfig, #endif -- | The languages (RFC2616-encoded) to which responses should be localized. locale :: [String], -- | Additional files to serve from about: URIs. aboutPages :: [(FilePath, ByteString)] } -- | Initializes a default Session object to support HTTPS & Accept-Language -- if HTTP is enabled. newSession :: IO Session newSession = do (ietfLocale, unixLocale) <- rfc2616Locale #ifdef WITH_HTTP_URI managerHTTP' <- HTTP.newManager TLS.tlsManagerSettings #endif #ifdef WITH_RAW_CONNECTIONS connCtxt <- initConnectionContext #endif #ifdef WITH_XDG apps' <- loadXDGConfig unixLocale #endif return Session { #ifdef WITH_HTTP_URI managerHTTP = managerHTTP', #endif #ifdef WITH_RAW_CONNECTIONS connCtxt = connCtxt, #endif #ifdef WITH_XDG apps = apps', #endif locale = ietfLocale, aboutPages = [] } llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key] parsePort fallback (':':port) = fallback `fromMaybe` readMaybe port parsePort fallback _ = fallback -- | Retrieves a URL-identified resource & it's MIMEtype, possibly decoding it's text. fetchURL :: Session -- ^ The session of which this request is a part. -> [String] -- ^ The expected MIMEtypes in priority order. -> URI -- ^ The URL to retrieve -> IO (String, Either Text ByteString) -- ^ The MIMEtype & possibly text-decoded response. fetchURL sess mimes uri = do (_, mime, resp) <- fetchURL' sess mimes uri return (mime, resp) -- | Concurrently fetch given URLs. fetchURLs :: Session -> [String] -> [URI] -> ((URI, String, Either Text ByteString) -> IO a) -> IO [(URI, a)] fetchURLs sess mimes uris cb = forConcurrently uris (\u -> fetchURL' sess mimes u >>= cb) >>= return . zip uris -- | As per `fetchURL`, but also returns the redirected URI. fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString) fetchURL' session mimes uri@(URI {uriScheme = "about:", uriPath = ""}) = fetchURL' session mimes $ uri {uriPath = "version"} fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath = path} = return (url, Txt.unpack $ convertCharset "utf-8" $ B.toStrict $ llookup (path ++ ".mime") "text/html" pages, Right $ llookup path "" pages) #ifdef WITH_HTTP_URI fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do request <- HTTP.requestFromURI uri response <- HTTP.httpLbs request { HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form. HTTP.requestHeaders = [ ("Accept", C8.pack $ intercalate ", " accept), ("Accept-Language", C8.pack $ intercalate ", " $ locale session) ] } $ managerHTTP session return $ case ( HTTP.responseBody response, [val | ("content-type", val) <- HTTP.responseHeaders response] ) of ("", _) -> (uri, "text/plain", Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response) (response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype in resolveCharset' uri (map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response (response, []) -> (uri, defaultMIME, Right response) `catches` [ Handler $ \e -> do return (uri, "text/plain", Left $ Txt.pack $ trans (locale session) $ Http e), Handler $ \(ErrorCall msg) -> do return (uri, "text/plain", Left $ Txt.pack msg) ] #endif #ifdef WITH_GEMINI_URI fetchURL' sess@Session {connCtxt = ctxt} mimes uri@URI { uriScheme = "gemini:", uriAuthority = Just (URIAuth _ host port) } = do conn <- connectTo ctxt $ ConnectionParams { connectionHostname = host, connectionPort = parsePort 1965 port, -- TODO implement Trust-On-First-Use, client certificates connectionUseSecure = Just $ TLSSettingsSimple False False False, connectionUseSocks = Nothing } connectionPut conn $ C8.pack $ uriToString id uri "\r\n" header <- connectionGetLine 1024 conn ret <- case parseHeader header of -- NOTE: This case won't actually do anything until the caller (Rhapsode) implements forms. ('1', _, label) -> return (uri, "application/xhtml+xml", Left $ Txt.concat [ "
" ]) ('2', _, mime) -> do chunks <- mWhile (connectionWaitForInput conn 60000) $ connectionGetChunk conn let mime' = map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime return $ resolveCharset' uri mime' $ B.fromChunks chunks ('3', _, redirect) | Just redirect' <- parseURIReference $ Txt.unpack redirect -> fetchURL' sess mimes $ relativeTo redirect' uri -- TODO Implement client certificates, once I have a way for the user/caller to select one. -- And once I figure out how to configure the TLS cryptography. (_, _, err) -> return (uri, "text/plain", Left err) connectionClose conn return ret where parseHeader header | Just (major, header') <- Txt.uncons $ convertCharset "utf-8" header, Just (minor, meta) <- Txt.uncons header' = (major, minor, Txt.strip meta) | otherwise = ('4', '1', "Invalid response!") #endif #ifdef WITH_FILE_URI fetchURL' Session {locale = l} (defaultMIME:_) uri@URI {uriScheme = "file:"} = do response <- B.readFile $ uriPath uri return (uri, defaultMIME, Right response) `catch` \e -> do return (uri, "text/plain", Left $ Txt.pack $ trans l $ ReadFailed $ displayException (e :: IOException)) #endif #ifdef WITH_DATA_URI fetchURL' _ (defaultMIME:_) uri@URI {uriScheme = "data:"} = let request = uriPath uri ++ uriQuery uri ++ uriFragment uri in case breakOn ',' $ unEscapeString request of ("", response) -> return (uri, defaultMIME, Left $ Txt.pack response) (mime', response) | '4':'6':'e':'s':'a':'b':';':mime <- reverse mime' -> return $ case B64.decode $ B.fromStrict $ C8.pack response of Left str -> (uri, "text/plain", Left $ Txt.pack $ unEscapeString str) Right bytes -> (uri, reverse mime, Right bytes) (mime, response) -> return (uri, mime, Left $ Txt.pack response) #endif #ifdef WITH_XDG fetchURL' Session {locale = l, apps = a} _ uri@(URI {uriScheme = s}) = do app <- dispatchURIByMIME a uri ("x-scheme-handler/" ++ init s) return (uri, "text/html", Left $ Txt.pack $ trans l $ app) #else fetchURL' Session {locale = l} _ URI {uriScheme = scheme} = return (uri, "text/plain", Left $ Txt.pack $ trans l $ UnsupportedScheme scheme) #endif dispatchByMIME :: Session -> String -> URI -> IO (Maybe String) #if WITH_XDG dispatchByMIME Session {locale = l, apps = a} mime uri = do err <- dispatchURIByMIME a uri mime return $ case err of UnsupportedMIME _ -> Nothing _ -> Just $ trans l err #else dispatchByMIME _ _ _ = return Nothing #endif -- Downloads utilities -- | write download to a file in the given directory. saveDownload :: URI -> FilePath -> (URI, String, Either Text ByteString) -> IO URI saveDownload baseURI dir (URI {uriPath = path}, mime, resp) = do dest <- unusedFilename (dir > takeFileName path) case resp of Left txt -> writeFile dest $ Txt.unpack txt Right bytes -> B.writeFile dest bytes -- TODO set user.mime file attribute. return $ baseURI {uriPath = dest} unusedFilename path = do exists <- doesFileExist path if exists then go 0 else return path where go n = do let path' = path ++ show n exists <- doesFileExist path' if exists then go (n+1) else return path' -- | Convert a download into a data: URI downloadToURI :: (URI, String, Either Text ByteString) -> URI downloadToURI (_, mime, Left txt) = nullURI { uriScheme = "data:", uriPath = mime ++ "," ++ escapeURIString isReserved (Txt.unpack txt) } downloadToURI (_, mime, Right bytes) = nullURI { uriScheme = "data:", uriPath = mime ++ ";base64," ++ C8.unpack (B.toStrict $ B64.encode bytes) } -- Utils #ifdef WITH_DATA_URI breakOn c (a:as) | c == a = ([], as) | otherwise = let (x, y) = breakOn c as in (a:x, y) breakOn _ [] = ([], []) #endif #ifdef WITH_GEMINI_URI mWhile test body = do cond <- test if cond then do x <- body xs <- mWhile test body return (x:xs) else return [] #endif