{-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} -- | Retrieves documents for a URL, supporting multiple URL schemes that can be -- disabled at build-time for reduced dependencies. module Network.URI.Fetch(Session, locale, newSession, fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR, dispatchByMIME, saveDownload, downloadToURI) where import qualified Data.Text as Txt import Data.Text (Text) import Network.URI import qualified Data.ByteString as Strict import Data.ByteString.Lazy (ByteString) import qualified Data.ByteString.Lazy as B import qualified Data.ByteString.Char8 as C8 import Network.URI.Charset import Control.Exception import System.IO.Error (isEOFError) import Control.Concurrent.Async (forConcurrently) -- for about: URIs & port parsing, all standard lib import Data.Maybe (fromMaybe, listToMaybe) import Text.Read (readMaybe) -- for saveDownload import System.Directory import System.FilePath #ifdef WITH_HTTP_URI import qualified Network.HTTP.Client as HTTP import qualified Network.HTTP.Client.TLS as TLS import Network.HTTP.Types import Data.List (intercalate) #endif #ifdef WITH_RAW_CONNECTIONS import Network.Connection #endif #ifdef WITH_DATA_URI import qualified Data.ByteString.Base64.URL.Lazy as B64 #endif import Network.URI.Locale import Network.URI.Messages #ifdef WITH_XDG import Network.URI.XDG #endif -- | Data shared accross multiple URI requests. data Session = Session { #ifdef WITH_HTTP_URI managerHTTP :: HTTP.Manager, #endif #ifdef WITH_RAW_CONNECTIONS connCtxt :: ConnectionContext, #endif #ifdef WITH_XDG apps :: XDGConfig, #endif -- | The languages (RFC2616-encoded) to which responses should be localized. locale :: [String], -- | Additional files to serve from about: URIs. aboutPages :: [(FilePath, ByteString)] } -- | Initializes a default Session object to support HTTPS & Accept-Language -- if HTTP is enabled. newSession :: IO Session newSession = do (ietfLocale, unixLocale) <- rfc2616Locale #ifdef WITH_HTTP_URI managerHTTP' <- HTTP.newManager TLS.tlsManagerSettings #endif #ifdef WITH_RAW_CONNECTIONS connCtxt <- initConnectionContext #endif #ifdef WITH_XDG apps' <- loadXDGConfig unixLocale #endif return Session { #ifdef WITH_HTTP_URI managerHTTP = managerHTTP', #endif #ifdef WITH_RAW_CONNECTIONS connCtxt = connCtxt, #endif #ifdef WITH_XDG apps = apps', #endif locale = ietfLocale, aboutPages = [] } llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key] parsePort fallback (':':port) = fallback `fromMaybe` readMaybe port parsePort fallback _ = fallback -- | Retrieves a URL-identified resource & it's MIMEtype, possibly decoding it's text. fetchURL :: Session -- ^ The session of which this request is a part. -> [String] -- ^ The expected MIMEtypes in priority order. -> URI -- ^ The URL to retrieve -> IO (String, Either Text ByteString) -- ^ The MIMEtype & possibly text-decoded response. fetchURL sess mimes uri = do (_, mime, resp) <- fetchURL' sess mimes uri return (mime, resp) -- | Concurrently fetch given URLs. fetchURLs :: Session -> [String] -> [URI] -> ((URI, String, Either Text ByteString) -> IO a) -> IO [(URI, a)] fetchURLs sess mimes uris cb = forConcurrently uris (\u -> fetchURL' sess mimes u >>= cb) >>= return . zip uris -- | Internal MIMEtypes for error reporting mimeERR, htmlERR :: String mimeERR = "txt/x-error\t" htmlERR = "html/x-error\t" -- | As per `fetchURL`, but also returns the redirected URI. fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString) fetchURL' session mimes uri@(URI {uriScheme = "about:", uriPath = ""}) = fetchURL' session mimes $ uri {uriPath = "version"} fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath = path} = return (url, Txt.unpack $ convertCharset "utf-8" $ B.toStrict $ llookup (path ++ ".mime") "text/html" pages, Right $ llookup path "" pages) #ifdef WITH_HTTP_URI fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do request <- HTTP.requestFromURI uri response <- HTTP.httpLbs request { HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form. HTTP.requestHeaders = [ ("Accept", C8.pack $ intercalate ", " accept), ("Accept-Language", C8.pack $ intercalate ", " $ locale session) ] } $ managerHTTP session return $ case ( HTTP.responseBody response, [val | ("content-type", val) <- HTTP.responseHeaders response] ) of ("", _) -> (uri, mimeERR, Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response) (response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype in resolveCharset' uri (map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response (response, []) -> (uri, defaultMIME, Right response) `catches` [ Handler $ \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e), Handler $ \(ErrorCall msg) -> do return (uri, mimeERR, Left $ Txt.pack msg) ] #endif #ifdef WITH_GEMINI_URI fetchURL' sess@Session {connCtxt = ctxt, locale = l} mimes uri@URI { uriScheme = "gemini:", uriAuthority = Just (URIAuth _ host port) } = do conn <- connectTo ctxt $ ConnectionParams { connectionHostname = host, connectionPort = parsePort 1965 port, -- TODO implement Trust-On-First-Use, client certificates connectionUseSecure = Just $ TLSSettingsSimple False False False, connectionUseSocks = Nothing } connectionPut conn $ C8.pack $ uriToString id uri "\r\n" header <- connectionGetLine 1024 conn ret <- case parseHeader header of -- NOTE: This case won't actually do anything until the caller (Rhapsode) implements forms. ('1', _, label) -> return (uri, "application/xhtml+xml", Left $ Txt.concat [ "
" ]) ('2', _, mime) -> do chunks <- mWhile (connectionWaitForInput conn 60000 `catch` (return . not . isEOFError)) $ (connectionGetChunk conn `catch` handleIOErr) let mime' = map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime return $ resolveCharset' uri mime' $ B.fromChunks chunks ('3', _, redirect) | Just redirect' <- parseURIReference $ Txt.unpack redirect -> fetchURL' sess mimes $ relativeTo redirect' uri -- TODO Implement client certificates, once I have a way for the user/caller to select one. -- And once I figure out how to configure the TLS cryptography. (_, _, err) -> return (uri, mimeERR, Left err) connectionClose conn return ret where parseHeader header | Just (major, header') <- Txt.uncons $ convertCharset "utf-8" header, Just (minor, meta) <- Txt.uncons header' = (major, minor, Txt.strip meta) | otherwise = ('4', '1', Txt.pack $ trans l MalformedResponse) handleIOErr :: IOError -> IO Strict.ByteString handleIOErr _ = return "" #endif #ifdef WITH_FILE_URI fetchURL' Session {locale = l} (defaultMIME:_) uri@URI {uriScheme = "file:"} = do response <- B.readFile $ uriPath uri return (uri, defaultMIME, Right response) `catch` \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans l $ ReadFailed $ displayException (e :: IOException)) #endif #ifdef WITH_DATA_URI fetchURL' _ (defaultMIME:_) uri@URI {uriScheme = "data:"} = let request = uriPath uri ++ uriQuery uri ++ uriFragment uri in case breakOn ',' $ unEscapeString request of ("", response) -> return (uri, defaultMIME, Left $ Txt.pack response) (mime', response) | '4':'6':'e':'s':'a':'b':';':mime <- reverse mime' -> return $ case B64.decode $ B.fromStrict $ C8.pack response of Left str -> (uri, mimeERR, Left $ Txt.pack $ unEscapeString str) Right bytes -> (uri, reverse mime, Right bytes) (mime, response) -> return (uri, mime, Left $ Txt.pack response) #endif #ifdef WITH_XDG fetchURL' Session {locale = l, apps = a} _ uri@(URI {uriScheme = s}) = do app <- dispatchURIByMIME a uri ("x-scheme-handler/" ++ init s) return (uri, htmlERR, Left $ Txt.pack $ trans l $ app) #else fetchURL' Session {locale = l} _ URI {uriScheme = scheme} = return (uri, mimeERR, Left $ Txt.pack $ trans l $ UnsupportedScheme scheme) #endif dispatchByMIME :: Session -> String -> URI -> IO (Maybe String) #if WITH_XDG dispatchByMIME Session {locale = l, apps = a} mime uri = do err <- dispatchURIByMIME a uri mime return $ case err of UnsupportedMIME _ -> Nothing _ -> Just $ trans l err #else dispatchByMIME _ _ _ = return Nothing #endif -- Downloads utilities -- | write download to a file in the given directory. saveDownload :: URI -> FilePath -> (URI, String, Either Text ByteString) -> IO URI saveDownload baseURI dir (URI {uriPath = path}, mime, resp) = do dest <- unusedFilename (dir > takeFileName' path) case resp of Left txt -> writeFile dest $ Txt.unpack txt Right bytes -> B.writeFile dest bytes -- TODO set user.mime file attribute. return $ baseURI {uriPath = dest} where takeFileName' s = case takeFileName s of { "" -> "index"; f -> f} unusedFilename path = do exists <- doesFileExist path if exists then go 0 else return path where go n = do let path' = path ++ show n exists <- doesFileExist path' if exists then go (n+1) else return path' -- | Convert a download into a data: URI downloadToURI :: (URI, String, Either Text ByteString) -> URI downloadToURI (_, mime, Left txt) = nullURI { uriScheme = "data:", uriPath = mime ++ "," ++ escapeURIString isReserved (Txt.unpack txt) } downloadToURI (_, mime, Right bytes) = nullURI { uriScheme = "data:", uriPath = mime ++ ";base64," ++ C8.unpack (B.toStrict $ B64.encode bytes) } -- Utils #ifdef WITH_DATA_URI breakOn c (a:as) | c == a = ([], as) | otherwise = let (x, y) = breakOn c as in (a:x, y) breakOn _ [] = ([], []) #endif #ifdef WITH_GEMINI_URI mWhile test body = do cond <- test if cond then do x <- body xs <- mWhile test body return (x:xs) else return [] #endif