module Graphics.Text.Font.Choose.CharSet( CharSet, ord, chr, module IntSet, parseCharSet, CharSet'(..)) where import Data.IntSet (IntSet, union) import qualified Data.IntSet as IntSet import Data.Char (isHexDigit, ord, chr) import Numeric (readHex) import Data.MessagePack (MessagePack(..)) import Test.QuickCheck (Arbitrary(..)) -- | An FcCharSet is a set of Unicode characters. type CharSet = IntSet parseChar :: String -> Int parseChar str | ((x, _):_) <- readHex str = toEnum x | otherwise = 0 replaceWild :: Char -> String -> String replaceWild ch ('?':rest) = ch:replaceWild ch rest replaceWild ch (c:cs) = c:replaceWild ch cs replaceWild _ "" = "" parseWild :: Char -> String -> Int parseWild ch str = parseChar $ replaceWild ch str -- | Utility for parsing "unicode-range" @font-face property. parseCharSet :: String -> Maybe CharSet parseCharSet ('U':rest) = parseCharSet ('u':rest) -- lowercase initial "u" parseCharSet ('u':'+':cs) | (start@(_:_), '-':ends) <- span isHexDigit cs, (end@(_:_), rest) <- span isHexDigit ends, Just set <- parseCharSet' rest = Just $ union set $ IntSet.fromList [parseChar start..parseChar end] | (codepoint@(_:_), rest) <- span isHexDigit cs, Just set <- parseCharSet' rest = Just $ flip IntSet.insert set $ parseChar codepoint | (codepoint@(_:_), rest) <- span (\c -> isHexDigit c || c == '?') cs, Just set <- parseCharSet' rest = Just $ IntSet.union set $ IntSet.fromList [ parseWild '0' codepoint..parseWild 'f' codepoint] parseCharSet _ = Nothing parseCharSet' :: String -> Maybe CharSet parseCharSet' (',':rest) = parseCharSet rest parseCharSet' "" = Just IntSet.empty parseCharSet' _ = Nothing -- NOTE: Serial already provides IntSet a CBOR codec, but its quite naive. -- I suspect that CharSets are typically quite dense, -- So a diff-compression pass should play well with diffCompress :: Int -> [Int] -> [Int] diffCompress prev (x:xs) = x - prev:diffCompress x xs diffCompress _ [] = [] diffDecompress :: Int -> [Int] -> [Int] diffDecompress prev (x:xs) = let y = prev + x in y:diffDecompress y xs diffDecompress _ [] = [] newtype CharSet' = CharSet' { unCharSet :: CharSet } deriving (Eq, Read, Show) instance MessagePack CharSet' where toObject x = toObject $ diffCompress 0 $ IntSet.toAscList $ unCharSet x fromObject msg = CharSet' <$> IntSet.fromAscList <$> diffDecompress 0 <$> fromObject msg instance Arbitrary CharSet' where arbitrary = do x <- arbitrary -- Ensure its non-empty, known failure! xs <- arbitrary return $ CharSet' $ IntSet.insert x xs