~alcinnz/hurl

ref: e0988f7ea38ddfac188780118a50b5cf16e0b0f0 hurl/src/Network/URI/Charset.hs -rw-r--r-- 1.7 KiB
e0988f7e — Adrian Cochrane Add documentation, minor exported API adjustments. 4 years ago
                                                                                
a7eb27a9 Adrian Cochrane
e0988f7e Adrian Cochrane
a7eb27a9 Adrian Cochrane
e0988f7e Adrian Cochrane
a7eb27a9 Adrian Cochrane
e0988f7e Adrian Cochrane
a7eb27a9 Adrian Cochrane
e0988f7e Adrian Cochrane
a7eb27a9 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
{-# LANGUAGE OverloadedStrings #-}

-- | Handles server-specified text decoding.
module Network.URI.Charset(resolveCharset, convertCharset, charsets) where
import           Data.Text (Text)
import           Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import           Data.Text.Encoding

-- | If the MIMEtype specifies a charset parameter, apply it.
resolveCharset :: [String] -- ^ The MIMEtype, split by ';'
    -> ByteString -- ^ The bytes received from the server
    -> (String, Either Text ByteString) -- ^ The MIMEtype (minus parameters) & possibly decoded text, to be returned from protocol handlers.
resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):_) response =
    (mime, Left $ convertCharset charset $ B.toStrict response)
resolveCharset (mime:_:params) response = resolveCharset (mime:params) response
resolveCharset [mime] response = (mime, Right $ response)
resolveCharset [] response = ("text/plain", Left "Filetype unspecified")

-- | Decodes bytes according to a charset identified by it's IANA-assigned name(s).
convertCharset "iso-8859-1" = decodeLatin1
convertCharset "latin1" = decodeLatin1
convertCharset "us-ascii" = decodeUtf8
convertCharset "utf-8" = decodeUtf8
convertCharset "utf-16be" = decodeUtf16BE
convertCharset "utf-16le" = decodeUtf16LE
convertCharset "utf-16" = decodeUtf16LE
convertCharset "utf-32be" = decodeUtf32BE
convertCharset "utf-32le" = decodeUtf32LE
convertCharset "utf-32" = decodeUtf32LE
convertCharset _ = \_ -> "Unsupported text encoding!" -- TODO localize? Should I?

-- | Lists all charsets supported by convertCharset
charsets :: [Text]
charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"]