From 2d6a43f4853f32149d5e751dc7af7cbc6c2e7122 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Thu, 16 Jan 2020 17:48:25 +1300 Subject: [PATCH] Fix CSS charset detection. Previously I didn't handle cases where the charset was unspecified, or where it wasn't valid text in the charsets I trialled. --- src/Main.hs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Main.hs b/src/Main.hs index 23eeaee..f528f43 100644 --- a/src/Main.hs +++ b/src/Main.hs @@ -84,9 +84,15 @@ retreiveStyles uri html manager base = do lowerVars _ = CSSCond.B False lowerToks _ = CSSCond.B False -applyCSScharset (charset:charsets) bytes | cssCharset (CSSTok.tokenize text) == charset = text +applyCSScharset (charset:charsets) bytes | cssCharset (CSSTok.tokenize text') == charset = text | otherwise = applyCSScharset charsets bytes - where text = convertCharset charset bytes + where + text = convertCharset charset bytes + -- I don't know how better to handle these errors in the APIs I'm using... + text' = unsafePerformIO $ catch (evaluate text) handleDecodeError + handleDecodeError :: UnicodeException -> IO Text -- Type signature REQUIRED + handleDecodeError _ = return "" +applyCSScharset _ bytes = convertCharset "utf-8" bytes cssCharset toks | (CSSTok.AtKeyword "charset":toks') <- skipCSSspace toks, (CSSTok.String charset:_) <- skipCSSspace toks' = charset | otherwise = "" -- 2.30.2