{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
-- | Retrieves documents for a URL, supporting multiple URL schemes that can be
-- disabled at build-time for reduced dependencies.
module Network.URI.Fetch(Session(locale, aboutPages), newSession,
fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR,
dispatchByMIME, saveDownload, downloadToURI) where
import qualified Data.Text as Txt
import Data.Text (Text)
import Network.URI
import qualified Data.ByteString as Strict
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Char8 as C8
import Network.URI.Charset
import Control.Exception
import System.IO.Error (isEOFError)
import Control.Concurrent.Async (forConcurrently)
-- for about: URIs & port parsing, all standard lib
import Data.Maybe (fromMaybe, listToMaybe)
import Text.Read (readMaybe)
-- for saveDownload
import System.Directory
import System.FilePath
#ifdef WITH_HTTP_URI
import qualified Network.HTTP.Client as HTTP
import qualified Network.HTTP.Client.TLS as TLS
import Network.HTTP.Types
import Data.List (intercalate)
#endif
#ifdef WITH_RAW_CONNECTIONS
import Network.Connection
#endif
#ifdef WITH_DATA_URI
import qualified Data.ByteString.Base64.URL.Lazy as B64
#endif
import Network.URI.Locale
import Network.URI.Messages
#ifdef WITH_XDG
import Network.URI.XDG
#endif
#ifdef WITH_PLUGIN_REWRITES
import Network.URI.PlugIns.Rewriters
#endif
-- | Data shared accross multiple URI requests.
data Session = Session {
#ifdef WITH_HTTP_URI
managerHTTP :: HTTP.Manager,
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt :: ConnectionContext,
#endif
#ifdef WITH_XDG
apps :: XDGConfig,
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriter :: Rewriter,
#endif
-- | The languages (RFC2616-encoded) to which responses should be localized.
locale :: [String],
-- | Additional files to serve from about: URIs.
aboutPages :: [(FilePath, ByteString)]
}
-- | Initializes a default Session object to support HTTPS & Accept-Language
-- if HTTP is enabled.
newSession :: IO Session
newSession = newSession' ""
-- | Variant of `newSession` which loads plugins for the named app.
newSession' :: String -> IO Session
newSession' appname = do
(ietfLocale, unixLocale) <- rfc2616Locale
#ifdef WITH_HTTP_URI
managerHTTP' <- HTTP.newManager TLS.tlsManagerSettings
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt <- initConnectionContext
#endif
#ifdef WITH_XDG
apps' <- loadXDGConfig unixLocale
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriters <- parseRewriters appname
#endif
return Session {
#ifdef WITH_HTTP_URI
managerHTTP = managerHTTP',
#endif
#ifdef WITH_RAW_CONNECTIONS
connCtxt = connCtxt,
#endif
#ifdef WITH_XDG
apps = apps',
#endif
#ifdef WITH_PLUGIN_REWRITES
rewriter = rewriters,
#endif
locale = ietfLocale,
aboutPages = []
}
llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key]
parsePort fallback (':':port) = fallback `fromMaybe` readMaybe port
parsePort fallback _ = fallback
-- | Retrieves a URL-identified resource & it's MIMEtype, possibly decoding it's text.
fetchURL :: Session -- ^ The session of which this request is a part.
-> [String] -- ^ The expected MIMEtypes in priority order.
-> URI -- ^ The URL to retrieve
-> IO (String, Either Text ByteString) -- ^ The MIMEtype & possibly text-decoded response.
fetchURL sess mimes uri = do
(_, mime, resp) <- fetchURL' sess mimes uri
return (mime, resp)
-- | Concurrently fetch given URLs.
fetchURLs :: Session -> [String] -> [URI] -> ((URI, String, Either Text ByteString) -> IO a) -> IO [(URI, a)]
fetchURLs sess mimes uris cb =
forConcurrently uris (\u -> fetchURL' sess mimes u >>= cb) >>= return . zip uris
-- | Internal MIMEtypes for error reporting
mimeERR, htmlERR :: String
mimeERR = "txt/x-error\t"
htmlERR = "html/x-error\t"
-- | As per `fetchURL`, but also returns the redirected URI.
fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString)
#ifdef WITH_PLUGIN_REWRITES
fetchURL' session mimes uri
| Just uri' <- applyRewriter (rewriter session) uri = fetchURL' session mimes uri'
#endif
fetchURL' session mimes uri@(URI {uriScheme = "about:", uriPath = ""}) =
fetchURL' session mimes $ uri {uriPath = "version"}
fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath = path} =
return (url,
Txt.unpack $ Txt.strip $ convertCharset "utf-8" $ B.toStrict $
llookup (path ++ ".mime") "text/html" pages,
Right $ llookup path "" pages)
#ifdef WITH_HTTP_URI
fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do
request <- HTTP.requestFromURI uri
response <- HTTP.httpLbs request {
HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form.
HTTP.requestHeaders = [
("Accept", C8.pack $ intercalate ", " accept),
("Accept-Language", C8.pack $ intercalate ", " $ locale session)
]
} $ managerHTTP session
return $ case (
HTTP.responseBody response,
[val | ("content-type", val) <- HTTP.responseHeaders response]
) of
("", _) -> (uri, mimeERR, Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response)
(response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype
in resolveCharset' uri (map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response
(response, []) -> (uri, defaultMIME, Right response)
`catches` [
Handler $ \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e),
Handler $ \(ErrorCall msg) -> do return (uri, mimeERR, Left $ Txt.pack msg)
]
#endif
#ifdef WITH_GEMINI_URI
fetchURL' sess@Session {connCtxt = ctxt, locale = l} mimes uri@URI {
uriScheme = "gemini:", uriAuthority = Just (URIAuth _ host port)
} = do
conn <- connectTo ctxt $ ConnectionParams {
connectionHostname = host,
connectionPort = parsePort 1965 port,
-- TODO implement Trust-On-First-Use, client certificates
connectionUseSecure = Just $ TLSSettingsSimple False False False,
connectionUseSocks = Nothing
}
connectionPut conn $ C8.pack $ uriToString id uri "\r\n"
header <- connectionGetLine 1024 conn
ret <- case parseHeader header of
-- NOTE: This case won't actually do anything until the caller (Rhapsode) implements forms.
('1', _, label) -> return (uri, "application/xhtml+xml", Left $ Txt.concat [
"<form><label for='input'>",
Txt.replace "<" "<" $ Txt.replace "&" "&" label,
"</label><input id='input' /></form>"
])
('2', _, mime) -> do
chunks <- mWhile (connectionWaitForInput conn 60000 `catch` (return . not . isEOFError)) $
(connectionGetChunk conn `catch` handleIOErr)
let mime' = map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime
return $ resolveCharset' uri mime' $ B.fromChunks chunks
('3', _, redirect) | Just redirect' <- parseURIReference $ Txt.unpack redirect ->
fetchURL' sess mimes $ relativeTo redirect' uri
-- TODO Implement client certificates, once I have a way for the user/caller to select one.
-- And once I figure out how to configure the TLS cryptography.
(_, _, err) -> return (uri, mimeERR, Left err)
connectionClose conn
return ret
where
parseHeader header
| Just (major, header') <- Txt.uncons $ convertCharset "utf-8" header,
Just (minor, meta) <- Txt.uncons header' = (major, minor, Txt.strip meta)
| otherwise = ('4', '1', Txt.pack $ trans l MalformedResponse)
handleIOErr :: IOError -> IO Strict.ByteString
handleIOErr _ = return ""
#endif
#ifdef WITH_FILE_URI
fetchURL' Session {locale = l} (defaultMIME:_) uri@URI {uriScheme = "file:"} = do
response <- B.readFile $ uriPath uri
return (uri, defaultMIME, Right response)
`catch` \e -> do
return (uri, mimeERR,
Left $ Txt.pack $ trans l $ ReadFailed $ displayException (e :: IOException))
#endif
#ifdef WITH_DATA_URI
fetchURL' _ (defaultMIME:_) uri@URI {uriScheme = "data:"} =
let request = uriPath uri ++ uriQuery uri ++ uriFragment uri
in case breakOn ',' $ unEscapeString request of
("", response) -> return (uri, defaultMIME, Left $ Txt.pack response)
(mime', response) | '4':'6':'e':'s':'a':'b':';':mime <- reverse mime' ->
return $ case B64.decode $ B.fromStrict $ C8.pack response of
Left str -> (uri, mimeERR, Left $ Txt.pack $ unEscapeString str)
Right bytes -> (uri, reverse mime, Right bytes)
(mime, response) -> return (uri, mime, Left $ Txt.pack response)
#endif
#ifdef WITH_XDG
fetchURL' Session {locale = l, apps = a} _ uri@(URI {uriScheme = s}) = do
app <- dispatchURIByMIME a uri ("x-scheme-handler/" ++ init s)
return (uri, htmlERR, Left $ Txt.pack $ trans l $ app)
#else
fetchURL' Session {locale = l} _ URI {uriScheme = scheme} =
return (uri, mimeERR, Left $ Txt.pack $ trans l $ UnsupportedScheme scheme)
#endif
dispatchByMIME :: Session -> String -> URI -> IO (Maybe String)
#if WITH_XDG
dispatchByMIME Session {locale = l, apps = a} mime uri = do
err <- dispatchURIByMIME a uri mime
return $ case err of
UnsupportedMIME _ -> Nothing
_ -> Just $ trans l err
#else
dispatchByMIME _ _ _ = return Nothing
#endif
-- Downloads utilities
-- | write download to a file in the given directory.
saveDownload :: URI -> FilePath -> (URI, String, Either Text ByteString) -> IO URI
saveDownload baseURI dir (URI {uriPath = path}, mime, resp) = do
dest <- unusedFilename (dir </> takeFileName' path)
case resp of
Left txt -> writeFile dest $ Txt.unpack txt
Right bytes -> B.writeFile dest bytes
-- TODO set user.mime file attribute.
return $ baseURI {uriPath = dest}
where
takeFileName' s = case takeFileName s of { "" -> "index"; f -> f}
unusedFilename path = do
exists <- doesFileExist path
if exists then go 0 else return path
where
go n = do
let path' = path ++ show n
exists <- doesFileExist path'
if exists then go (n+1) else return path'
-- | Convert a download into a data: URI
downloadToURI :: (URI, String, Either Text ByteString) -> URI
downloadToURI (_, mime, Left txt) = nullURI {
uriScheme = "data:",
uriPath = mime ++ "," ++ escapeURIString isReserved (Txt.unpack txt)
}
downloadToURI (_, mime, Right bytes) = nullURI {
uriScheme = "data:",
uriPath = mime ++ ";base64," ++ C8.unpack (B.toStrict $ B64.encode bytes)
}
-- Utils
#ifdef WITH_DATA_URI
breakOn c (a:as) | c == a = ([], as)
| otherwise = let (x, y) = breakOn c as in (a:x, y)
breakOn _ [] = ([], [])
#endif
#ifdef WITH_GEMINI_URI
mWhile test body = do
cond <- test
if cond then do
x <- body
xs <- mWhile test body
return (x:xs)
else return []
#endif