~alcinnz/hurl

ref: 53212960af41818f0cc0ec936e14533f6eebf77b hurl/src/Network/URI/Charset.hs -rw-r--r-- 2.6 KiB
53212960 — Adrian Cochrane Add new action: URI, don't refetch page upon local anchor links. 2 years ago
                                                                                
a7eb27a9 Adrian Cochrane
e0988f7e Adrian Cochrane
a7c88c86 Adrian Cochrane
a7eb27a9 Adrian Cochrane
47f9ef33 Adrian Cochrane
42aa3ea2 Adrian Cochrane
a7eb27a9 Adrian Cochrane
e0988f7e Adrian Cochrane
42aa3ea2 Adrian Cochrane
a7eb27a9 Adrian Cochrane
984867b7 Adrian Cochrane
a7eb27a9 Adrian Cochrane
42aa3ea2 Adrian Cochrane
a7c88c86 Adrian Cochrane
e0988f7e Adrian Cochrane
a7eb27a9 Adrian Cochrane
47f9ef33 Adrian Cochrane
5116153e Adrian Cochrane
e0988f7e Adrian Cochrane
a7eb27a9 Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
{-# LANGUAGE OverloadedStrings #-}

-- | Handles server-specified text decoding.
module Network.URI.Charset(resolveCharset, resolveCharset', convertCharset, charsets) where
import           Data.Text (Text)
import           Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import           Data.Text.Encoding
import           Debug.Trace (trace)
import           Data.List (intercalate)

-- | If the MIMEtype specifies a charset parameter, apply it.
resolveCharset :: [String] -- ^ The MIMEtype, split by ';'
    -> ByteString -- ^ The bytes received from the server
    -> (String, Either Text ByteString) -- ^ The MIMEtype (minus parameters) & possibly decoded text, to be returned from protocol handlers.
resolveCharset (mime:('c':'h':'a':'r':'s':'e':'t':'=':charset):params) response =
    (parameterizedMIME mime params, Left $ convertCharset charset $ B.toStrict response)
resolveCharset (mime:param:params) response =
    resolveCharset (parameterizedMIME mime [param]:params) response
resolveCharset [mime] response = (mime, Right $ response)
-- NOTE I can't localize this error string because resolveCharset doesn't know the locale.
--      I don't think this is worth fixing, because hitting this indicates the server is badly misbehaving.
resolveCharset [] response = ("text/x-error\t", Left "Filetype unspecified")

parameterizedMIME mime params = mime ++ ";" ++ intercalate ";" params

-- | As per `resolveCharset`, but also returns given URI (or other type).
resolveCharset' :: a -> [String] -> ByteString -> (a, String, Either Text ByteString)
resolveCharset' a mimes resp = let (mime, resp') = resolveCharset mimes resp in (a, mime, resp')

-- | Decodes bytes according to a charset identified by it's IANA-assigned name(s).
convertCharset "iso-8859-1" = decodeLatin1
convertCharset "latin1" = decodeLatin1
convertCharset "us-ascii" = decodeUtf8With replaceChar
convertCharset "utf-8" = decodeUtf8With replaceChar
convertCharset "utf-16be" = decodeUtf16BEWith replaceChar
convertCharset "utf-16le" = decodeUtf16LEWith replaceChar
convertCharset "utf-16" = decodeUtf16LEWith replaceChar
convertCharset "utf-32be" = decodeUtf32BEWith replaceChar
convertCharset "utf-32le" = decodeUtf32LEWith replaceChar
convertCharset "utf-32" = decodeUtf32LEWith replaceChar
convertCharset charset = -- FIXME Is this the best fallback for unsupported charsets?
    trace ("Unsupported text encoding" ++ charset) $ decodeUtf8With replaceChar

replaceChar _ _ = Just '�'

-- | Lists all charsets supported by convertCharset
charsets :: [Text]
charsets = ["iso-8859-1", "latin1", "us-ascii", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32be", "utf-32le", "utf-32"]