From e41dd03174e0b03a30374f99f7424dcb0cee0ce6 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Thu, 16 Jan 2020 17:47:21 +1300 Subject: [PATCH] Extract URL resolver into 'hurl' hackage --- rhapsode.cabal | 5 ++-- src/Input.hs | 65 +++++++------------------------------------------- src/Main.hs | 37 +++++++++++++--------------- 3 files changed, 28 insertions(+), 79 deletions(-) diff --git a/rhapsode.cabal b/rhapsode.cabal index a1000f6..5639bc6 100644 --- a/rhapsode.cabal +++ b/rhapsode.cabal @@ -60,12 +60,11 @@ executable rhapsode -- other-extensions: -- Other library packages from which modules are imported. - build-depends: base >=4.9 && <=4.12, directory, - http-client, http-client-tls, bytestring, http-types, + build-depends: base >=4.9 && <=4.12, directory, bytestring, html-conduit, xml-conduit, text, containers, data-default-class, network-uri, stylist >= 1.1, css-syntax, xml-conduit-stylist, scientific, - async + async, hurl -- Directories containing source files. hs-source-dirs: src diff --git a/src/Input.hs b/src/Input.hs index 5ea0f17..491712b 100644 --- a/src/Input.hs +++ b/src/Input.hs @@ -1,23 +1,18 @@ {-# LANGUAGE OverloadedStrings #-} -module Input(parseArgs, ProgramCtl(..), fetchURL, convertCharset, charsets) where +module Input(parseArgs, ProgramCtl(..)) where -import qualified Network.HTTP.Client as HTTP -import Network.HTTP.Types -import qualified Data.Text as Txt import Data.Text.Lazy (fromStrict) -import Data.Text (Text) +import qualified Data.Text as Txt import Data.Text.Encoding -import qualified Data.Text.IO as TxtIO import qualified Data.ByteString.Lazy as B -import Data.ByteString.Lazy (ByteString) import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML import Network.URI +import Network.URI.Fetch import qualified Data.Map as M import Control.Concurrent.Async - -import System.IO +import System.IO import System.Environment import System.Directory import Data.List @@ -29,7 +24,7 @@ data ProgramCtl = ProgramCtl { outSSML :: Maybe Handle, outLinks :: Maybe Handle } -parseArgs :: HTTP.Manager -> IO ProgramCtl +parseArgs :: Session -> IO ProgramCtl parseArgs http = do args <- getArgs let (inputs, outputs) = partition (isPrefixOf "-" . fst) $ preparseArgs args @@ -73,10 +68,10 @@ parseLinksOut (_:args) = parseLinksOut args parseLinksOut [] = return Nothing -evalInput http ("-h", url) = fetchDocument http "text/html" url -evalInput http ("-html", url) = fetchDocument http "text/html" url -evalInput http ("-x", url) = fetchDocument http "text/xml" url -evalInput http ("-xml", url) = fetchDocument http "text/xml" url +evalInput http ("-h", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url +evalInput http ("-html", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url +evalInput http ("-x", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url +evalInput http ("-xml", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag) fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument @@ -97,45 +92,3 @@ docForText txt = XML.Document { }, XML.documentEpilogue = [] } - --------- - -fetchURL :: HTTP.Manager -> String -> URI -> IO (String, Either Text ByteString) -fetchURL http defaultMIME uri | uriScheme uri `elem` ["http:", "https:"] = do - request <- HTTP.requestFromURI uri - response <- HTTP.httpLbs request http - return $ case ( - HTTP.responseBody response, - [val | ("content-type", val) <- HTTP.responseHeaders response] - ) of - ("", _) -> ("text/plain", Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response) - (response, (mimetype:_)) -> let mime = Txt.toLower $ decodeUtf8 mimetype - in resolveCharset (map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" $ mime) response - (response, []) -> (defaultMIME, Right response) - -fetchURL _ defaultMIME uri@URI {uriScheme = "file:"} = do - response <- B.readFile $ uriPath uri - return (defaultMIME, Right response) - -fetchURL _ _ uri = return ("text/plain", Left $ Txt.concat ["Unsupported link type ", Txt.pack $ uriScheme uri]) - -resolveCharset :: [String] -> ByteString -> (String, Either Text ByteString) -resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):_) response = - (mime, Left $ convertCharset charset $ B.toStrict response) -resolveCharset (mime:_:params) response = resolveCharset (mime:params) response -resolveCharset [mime] response = (mime, Right $ response) -resolveCharset [] response = ("text/plain", Left "Filetype unspecified") - -convertCharset "iso-8859-1" = decodeLatin1 -convertCharset "latin1" = decodeLatin1 -convertCharset "us-ascii" = decodeUtf8 -convertCharset "utf-8" = decodeUtf8 -convertCharset "utf-16be" = decodeUtf16BE -convertCharset "utf-16le" = decodeUtf16LE -convertCharset "utf-16" = decodeUtf16LE -convertCharset "utf-32be" = decodeUtf32BE -convertCharset "utf-32le" = decodeUtf32LE -convertCharset "utf-32" = decodeUtf32LE -convertCharset _ = \_ -> "Unsupported text encoding!" -charsets :: [Text] -charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"] diff --git a/src/Main.hs b/src/Main.hs index d228e4a..23eeaee 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -1,35 +1,32 @@ {-# LANGUAGE OverloadedStrings #-} module Main where -import System.Environment -import Data.Char (isSpace) - -import qualified Network.HTTP.Client as HTTP -import Network.HTTP.Client.Internal -import qualified Network.HTTP.Client.TLS as TLS -import Network.URI import qualified Data.ByteString.Lazy.Char8 as C8 import qualified Data.ByteString.Lazy as B -import qualified Text.HTML.DOM as HTML import qualified Text.XML as XML -import Data.Text as Txt (pack, unpack, Text(..), append, - words, unwords, head, last, stripStart, stripEnd, intercalate) +import Data.Text as Txt (pack, unpack, Text(..), intercalate) + +import qualified Data.Map as M +import System.Directory as Dir +import Control.Monad +import System.IO (stdout, hPutStrLn) +-- To handle text encoding errors, whilst trying them out +import System.IO.Unsafe (unsafePerformIO) +import Control.Exception (catch, evaluate) +import Data.Text.Encoding.Error (UnicodeException) + +--- External Rhapsode subcomponents import qualified Data.CSS.Syntax.StyleSheet as CSS import qualified Data.CSS.Style as Style import qualified Data.CSS.Syntax.Tokens as CSSTok import qualified Data.CSS.Preprocessor.Conditions as CSSCond import qualified Data.HTML2CSS as H2C +import Network.URI.Fetch +import Network.URI.Charset -import qualified Data.List as L -import qualified Data.Map as M -import Data.Scientific (toRealFloat) -import Data.Maybe (fromJust, fromMaybe) -import System.Directory as Dir -import Control.Monad -import System.IO (stdout, hPutStrLn) - +-- Internal Rhapsode Subcomponents import DefaultCSS import StyleTree import SSML @@ -38,7 +35,7 @@ import Links main :: IO () main = do - http <- HTTP.newManager TLS.tlsManagerSettings + http <- newSession ProgramCtl docs outSSML outLinks <- parseArgs http forM docs $ \(uri, doc) -> do let html = XML.documentRoot doc @@ -76,7 +73,7 @@ retreiveStyles uri html manager base = do authorStyle = H2C.html2css html base loadURL url = do - response <- fetchURL manager "text/css" url + response <- fetchURL manager ["text/css"] url return $ case response of ("text/css", Left text) -> text ("text/css", Right bytes) -> applyCSScharset charsets $ B.toStrict bytes -- 2.30.2