~alcinnz/rhapsode

e41dd03174e0b03a30374f99f7424dcb0cee0ce6 — Adrian Cochrane 4 years ago 36f3008
Extract URL resolver into 'hurl' hackage
3 files changed, 28 insertions(+), 79 deletions(-)

M rhapsode.cabal
M src/Input.hs
M src/Main.hs
M rhapsode.cabal => rhapsode.cabal +2 -3
@@ 60,12 60,11 @@ executable rhapsode
  -- other-extensions:    
  
  -- Other library packages from which modules are imported.
  build-depends:       base >=4.9 && <=4.12, directory,
        http-client, http-client-tls, bytestring, http-types,
  build-depends:       base >=4.9 && <=4.12, directory, bytestring,
        html-conduit, xml-conduit, text, containers, data-default-class,
        network-uri,
        stylist >= 1.1, css-syntax, xml-conduit-stylist, scientific,
        async
        async, hurl
  
  -- Directories containing source files.
  hs-source-dirs:      src

M src/Input.hs => src/Input.hs +9 -56
@@ 1,23 1,18 @@
{-# LANGUAGE OverloadedStrings #-}
module Input(parseArgs, ProgramCtl(..), fetchURL, convertCharset, charsets) where
module Input(parseArgs, ProgramCtl(..)) where

import qualified Network.HTTP.Client as HTTP
import           Network.HTTP.Types
import qualified Data.Text as Txt
import           Data.Text.Lazy (fromStrict)
import           Data.Text (Text)
import qualified Data.Text as Txt
import           Data.Text.Encoding
import qualified Data.Text.IO as TxtIO
import qualified Data.ByteString.Lazy as B
import           Data.ByteString.Lazy (ByteString)
import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Network.URI
import           Network.URI.Fetch
import qualified Data.Map as M

import Control.Concurrent.Async

import           System.IO
import System.IO
import System.Environment
import System.Directory
import Data.List


@@ 29,7 24,7 @@ data ProgramCtl = ProgramCtl {
    outSSML :: Maybe Handle,
    outLinks :: Maybe Handle
}
parseArgs :: HTTP.Manager -> IO ProgramCtl
parseArgs :: Session -> IO ProgramCtl
parseArgs http = do
    args <- getArgs
    let (inputs, outputs) = partition (isPrefixOf "-" . fst) $ preparseArgs args


@@ 73,10 68,10 @@ parseLinksOut (_:args) = parseLinksOut args
parseLinksOut [] = return Nothing


evalInput http ("-h", url) = fetchDocument http "text/html" url
evalInput http ("-html", url) = fetchDocument http "text/html" url
evalInput http ("-x", url) = fetchDocument http "text/xml" url
evalInput http ("-xml", url) = fetchDocument http "text/xml" url
evalInput http ("-h", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url
evalInput http ("-html", url) = fetchDocument http ["text/html", "text/xml", "text/plain"] url
evalInput http ("-x", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url
evalInput http ("-xml", url) = fetchDocument http ["text/xml", "text/html", "text/plain"] url
evalInput _ (flag, _) = error ("Unsupported input flag " ++ flag)

fetchDocument http mime uri = fetchURL http mime uri >>= parseDocument


@@ 97,45 92,3 @@ docForText txt = XML.Document {
        },
        XML.documentEpilogue = []
    }

--------

fetchURL :: HTTP.Manager -> String -> URI -> IO (String, Either Text ByteString)
fetchURL http defaultMIME uri | uriScheme uri `elem` ["http:", "https:"] = do
    request <- HTTP.requestFromURI uri
    response <- HTTP.httpLbs request http
    return $ case (
            HTTP.responseBody response,
            [val | ("content-type", val) <- HTTP.responseHeaders response]
      ) of
        ("", _) -> ("text/plain", Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response)
        (response, (mimetype:_)) -> let mime = Txt.toLower $ decodeUtf8 mimetype
            in resolveCharset (map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" $ mime) response
        (response, []) -> (defaultMIME, Right response)

fetchURL _ defaultMIME uri@URI {uriScheme = "file:"} = do
    response <- B.readFile $ uriPath uri
    return (defaultMIME, Right response)

fetchURL _ _ uri = return ("text/plain", Left $ Txt.concat ["Unsupported link type ", Txt.pack $ uriScheme uri])

resolveCharset :: [String] -> ByteString -> (String, Either Text ByteString)
resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):_) response =
    (mime, Left $ convertCharset charset $ B.toStrict response)
resolveCharset (mime:_:params) response = resolveCharset (mime:params) response
resolveCharset [mime] response = (mime, Right $ response)
resolveCharset [] response = ("text/plain", Left "Filetype unspecified")

convertCharset "iso-8859-1" = decodeLatin1
convertCharset "latin1" = decodeLatin1
convertCharset "us-ascii" = decodeUtf8
convertCharset "utf-8" = decodeUtf8
convertCharset "utf-16be" = decodeUtf16BE
convertCharset "utf-16le" = decodeUtf16LE
convertCharset "utf-16" = decodeUtf16LE
convertCharset "utf-32be" = decodeUtf32BE
convertCharset "utf-32le" = decodeUtf32LE
convertCharset "utf-32" = decodeUtf32LE
convertCharset _ = \_ -> "Unsupported text encoding!"
charsets :: [Text]
charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"]

M src/Main.hs => src/Main.hs +17 -20
@@ 1,35 1,32 @@
{-# LANGUAGE OverloadedStrings #-}
module Main where

import System.Environment
import Data.Char (isSpace)

import qualified Network.HTTP.Client as HTTP
import           Network.HTTP.Client.Internal
import qualified Network.HTTP.Client.TLS as TLS
import           Network.URI
import qualified Data.ByteString.Lazy.Char8 as C8
import qualified Data.ByteString.Lazy as B

import qualified Text.HTML.DOM as HTML
import qualified Text.XML as XML
import           Data.Text as Txt (pack, unpack, Text(..), append,
        words, unwords, head, last, stripStart, stripEnd, intercalate)
import           Data.Text as Txt (pack, unpack, Text(..), intercalate)

import qualified Data.Map as M
import System.Directory as Dir
import Control.Monad
import System.IO (stdout, hPutStrLn)

-- To handle text encoding errors, whilst trying them out
import System.IO.Unsafe (unsafePerformIO)
import Control.Exception (catch, evaluate)
import Data.Text.Encoding.Error (UnicodeException)

--- External Rhapsode subcomponents
import qualified Data.CSS.Syntax.StyleSheet as CSS
import qualified Data.CSS.Style as Style
import qualified Data.CSS.Syntax.Tokens as CSSTok
import qualified Data.CSS.Preprocessor.Conditions as CSSCond
import qualified Data.HTML2CSS as H2C
import           Network.URI.Fetch
import           Network.URI.Charset

import qualified Data.List as L
import qualified Data.Map as M
import Data.Scientific (toRealFloat)
import Data.Maybe (fromJust, fromMaybe)
import System.Directory as Dir
import Control.Monad
import System.IO (stdout, hPutStrLn)

-- Internal Rhapsode Subcomponents
import DefaultCSS
import StyleTree
import SSML


@@ 38,7 35,7 @@ import Links

main :: IO ()
main = do
    http <- HTTP.newManager TLS.tlsManagerSettings
    http <- newSession
    ProgramCtl docs outSSML outLinks <- parseArgs http
    forM docs $ \(uri, doc) -> do
        let html = XML.documentRoot doc


@@ 76,7 73,7 @@ retreiveStyles uri html manager base = do
    authorStyle = H2C.html2css html base

    loadURL url = do
        response <- fetchURL manager "text/css" url
        response <- fetchURL manager ["text/css"] url
        return $ case response of
            ("text/css", Left text) -> text
            ("text/css", Right bytes) -> applyCSScharset charsets $ B.toStrict bytes