module Graphics.Text.Font.Choose.CharSet where
import Data.IntSet (IntSet, union)
import qualified Data.IntSet as IntSet
import Data.Char (isHexDigit)
import Numeric (readHex)
import Data.MessagePack (MessagePack(..))
-- | An FcCharSet is a set of Unicode characters.
type CharSet = IntSet
parseChar :: String -> Int
parseChar str | ((x, _):_) <- readHex str = toEnum x
| otherwise = 0
replaceWild :: Char -> String -> String
replaceWild ch ('?':rest) = ch:replaceWild ch rest
replaceWild ch (c:cs) = c:replaceWild ch cs
replaceWild _ "" = ""
parseWild :: Char -> String -> Int
parseWild ch str = parseChar $ replaceWild ch str
-- | Utility for parsing "unicode-range" @font-face property.
parseCharSet :: String -> Maybe CharSet
parseCharSet ('U':rest) = parseCharSet ('u':rest) -- lowercase initial "u"
parseCharSet ('u':'+':cs)
| (start@(_:_), '-':ends) <- span isHexDigit cs,
(end@(_:_), rest) <- span isHexDigit ends, Just set <- parseCharSet' rest =
Just $ union set $ IntSet.fromList [parseChar start..parseChar end]
| (codepoint@(_:_), rest) <- span isHexDigit cs, Just set <- parseCharSet' rest =
Just $ flip IntSet.insert set $ parseChar codepoint
| (codepoint@(_:_), rest) <- span (\c -> isHexDigit c || c == '?') cs,
Just set <- parseCharSet' rest =
Just $ IntSet.union set $ IntSet.fromList [
parseWild '0' codepoint..parseWild 'f' codepoint]
parseCharSet _ = Nothing
parseCharSet' :: String -> Maybe CharSet
parseCharSet' (',':rest) = parseCharSet rest
parseCharSet' "" = Just IntSet.empty
parseCharSet' _ = Nothing
-- NOTE: Serial already provides IntSet a CBOR codec, but its quite naive.
-- I suspect that CharSets are typically quite dense,
-- So a diff-compression pass should play well with
diffCompress :: Int -> [Int] -> [Int]
diffCompress prev (x:xs) = x - prev:diffCompress x xs
diffCompress _ [] = []
diffDecompress :: Int -> [Int] -> [Int]
diffDecompress prev (x:xs) = prev + x:diffDecompress x xs
diffDecompress _ [] = []
newtype CharSet' = CharSet' { unCharSet :: CharSet }
instance MessagePack CharSet' where
toObject = toObject . diffCompress 0 . IntSet.toAscList . unCharSet
fromObject msg =
CharSet' <$> IntSet.fromAscList <$> diffDecompress 0 <$> fromObject msg