~alcinnz/rhapsode

2d6a43f4853f32149d5e751dc7af7cbc6c2e7122 — Adrian Cochrane 4 years ago e41dd03
Fix CSS charset detection.

Previously I didn't handle cases where the charset was unspecified,
or where it wasn't valid text in the charsets I trialled.
1 files changed, 8 insertions(+), 2 deletions(-)

M src/Main.hs
M src/Main.hs => src/Main.hs +8 -2
@@ 84,9 84,15 @@ retreiveStyles uri html manager base = do
    lowerVars _ = CSSCond.B False
    lowerToks _ = CSSCond.B False

applyCSScharset (charset:charsets) bytes | cssCharset (CSSTok.tokenize text) == charset = text
applyCSScharset (charset:charsets) bytes | cssCharset (CSSTok.tokenize text') == charset = text
        | otherwise = applyCSScharset charsets bytes
    where text = convertCharset charset bytes
    where
        text = convertCharset charset bytes
        -- I don't know how better to handle these errors in the APIs I'm using...
        text' = unsafePerformIO $ catch (evaluate text) handleDecodeError
        handleDecodeError :: UnicodeException -> IO Text -- Type signature REQUIRED
        handleDecodeError _ = return ""
applyCSScharset _ bytes = convertCharset "utf-8" bytes
cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks,
        (CSSTok.String charset:_) <- skipCSSspace toks' = charset
    | otherwise = ""