~alcinnz/hurl: src/Network/URI/Charset.hs blame

a7eb27a9 Adrian Cochrane 
e0988f7e Adrian Cochrane 


a7eb27a9 Adrian Cochrane 



47f9ef33 Adrian Cochrane 
a7eb27a9 Adrian Cochrane 
e0988f7e Adrian Cochrane 



a7eb27a9 Adrian Cochrane 





e0988f7e Adrian Cochrane 
a7eb27a9 Adrian Cochrane 

47f9ef33 Adrian Cochrane 











e0988f7e Adrian Cochrane 

a7eb27a9 Adrian Cochrane

4 years ago
4 years ago


4 years ago



4 years ago
4 years ago
4 years ago



4 years ago





4 years ago
4 years ago

4 years ago











4 years ago

4 years ago

{-# LANGUAGE OverloadedStrings #-}

-- | Handles server-specified text decoding.
module Network.URI.Charset(resolveCharset, convertCharset, charsets) where
import           Data.Text (Text)
import           Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import           Data.Text.Encoding
import           Debug.Trace (trace)

-- | If the MIMEtype specifies a charset parameter, apply it.
resolveCharset :: [String] -- ^ The MIMEtype, split by ';'
    -> ByteString -- ^ The bytes received from the server
    -> (String, Either Text ByteString) -- ^ The MIMEtype (minus parameters) & possibly decoded text, to be returned from protocol handlers.
resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):_) response =
    (mime, Left $ convertCharset charset $ B.toStrict response)
resolveCharset (mime:_:params) response = resolveCharset (mime:params) response
resolveCharset [mime] response = (mime, Right $ response)
resolveCharset [] response = ("text/plain", Left "Filetype unspecified")

-- | Decodes bytes according to a charset identified by it's IANA-assigned name(s).
convertCharset "iso-8859-1" = decodeLatin1
convertCharset "latin1" = decodeLatin1
convertCharset "us-ascii" = decodeUtf8With replaceChar
convertCharset "utf-8" = decodeUtf8With replaceChar
convertCharset "utf-16be" = decodeUtf16BEWith replaceChar
convertCharset "utf-16le" = decodeUtf16LEWith replaceChar
convertCharset "utf-16" = decodeUtf16LEWith replaceChar
convertCharset "utf-32be" = decodeUtf32BEWith replaceChar
convertCharset "utf-32le" = decodeUtf32LEWith replaceChar
convertCharset "utf-32" = decodeUtf32LEWith replaceChar
convertCharset charset = -- FIXME Is this the best fallback for unsupported charsets?
    trace ("Unsupported text encoding" ++ charset) $ decodeUtf8With replaceChar

replaceChar error _ = trace error $ Just '�'

-- | Lists all charsets supported by convertCharset
charsets :: [Text]
charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"]