module Graphics.Text.Font.Choose.CharSet where import Data.IntSet (IntSet, union) import qualified Data.IntSet as IntSet import Data.Char (isHexDigit) import Numeric (readHex) import Data.MessagePack (MessagePack(..)) -- | An FcCharSet is a set of Unicode characters. type CharSet = IntSet parseChar :: String -> Int parseChar str | ((x, _):_) <- readHex str = toEnum x | otherwise = 0 replaceWild :: Char -> String -> String replaceWild ch ('?':rest) = ch:replaceWild ch rest replaceWild ch (c:cs) = c:replaceWild ch cs replaceWild _ "" = "" parseWild :: Char -> String -> Int parseWild ch str = parseChar $ replaceWild ch str -- | Utility for parsing "unicode-range" @font-face property. parseCharSet :: String -> Maybe CharSet parseCharSet ('U':rest) = parseCharSet ('u':rest) -- lowercase initial "u" parseCharSet ('u':'+':cs) | (start@(_:_), '-':ends) <- span isHexDigit cs, (end@(_:_), rest) <- span isHexDigit ends, Just set <- parseCharSet' rest = Just $ union set $ IntSet.fromList [parseChar start..parseChar end] | (codepoint@(_:_), rest) <- span isHexDigit cs, Just set <- parseCharSet' rest = Just $ flip IntSet.insert set $ parseChar codepoint | (codepoint@(_:_), rest) <- span (\c -> isHexDigit c || c == '?') cs, Just set <- parseCharSet' rest = Just $ IntSet.union set $ IntSet.fromList [ parseWild '0' codepoint..parseWild 'f' codepoint] parseCharSet _ = Nothing parseCharSet' :: String -> Maybe CharSet parseCharSet' (',':rest) = parseCharSet rest parseCharSet' "" = Just IntSet.empty parseCharSet' _ = Nothing -- NOTE: Serial already provides IntSet a CBOR codec, but its quite naive. -- I suspect that CharSets are typically quite dense, -- So a diff-compression pass should play well with diffCompress :: Int -> [Int] -> [Int] diffCompress prev (x:xs) = x - prev:diffCompress x xs diffCompress _ [] = [] diffDecompress :: Int -> [Int] -> [Int] diffDecompress prev (x:xs) = let y = prev + x in y:diffDecompress y xs diffDecompress _ [] = [] newtype CharSet' = CharSet' { unCharSet :: CharSet } instance MessagePack CharSet' where toObject = toObject . diffCompress 0 . IntSet.toAscList . unCharSet fromObject msg = CharSet' <$> IntSet.fromAscList <$> diffDecompress 0 <$> fromObject msg