~alcinnz/fontconfig-pure

ref: b52f3294e5b7747d088a1d7da359aac85ca69683 fontconfig-pure/lib/Graphics/Text/Font/Choose/CharSet.hs -rw-r--r-- 2.9 KiB
b52f3294 — Adrian Cochrane Test CSS property parsing! 5 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
-- | Process sets of unicode characters, possibly parsed from CSS.
module Graphics.Text.Font.Choose.CharSet(
        CharSet, ord, chr, module IntSet, parseCharSet, CharSet'(..), validCharSet'
    ) where

import Data.IntSet as IntSet

import Data.Char (isHexDigit, isSpace, ord, chr)
import Numeric (readHex)

import Data.MessagePack (MessagePack(..), Object(..))
import Test.QuickCheck (Arbitrary(..))

-- | An FcCharSet is a set of Unicode characters.
type CharSet = IntSet

parseChar :: String -> Int
parseChar str | ((x, _):_) <- readHex str = toEnum x
    | otherwise = 0
replaceWild :: Char -> String -> String
replaceWild ch ('?':rest) = ch:replaceWild ch rest
replaceWild ch (c:cs) = c:replaceWild ch cs
replaceWild _ "" = ""
parseWild :: Char -> String -> Int
parseWild ch str = parseChar $ replaceWild ch str
-- | Utility for parsing "unicode-range" @font-face property.
parseCharSet :: String -> Maybe CharSet
parseCharSet (c:rest) | isSpace c = parseCharSet rest -- Irrelevant in Stylist integration.
parseCharSet ('U':rest) = parseCharSet ('u':rest) -- lowercase initial "u"
parseCharSet ('u':'+':cs)
    | (start@(_:_), '-':ends) <- span isHexDigit cs,
        (end@(_:_), rest) <- span isHexDigit ends, Just set <- parseCharSet' rest =
            Just $ union set $ IntSet.fromList [parseChar start..parseChar end]
    | (codepoint@(_:_), rest) <- span isHexDigit cs, Just set <- parseCharSet' rest =
        Just $ flip IntSet.insert set $ parseChar codepoint
    | (codepoint@(_:_), rest) <- span (\c -> isHexDigit c || c == '?') cs,
        Just set <- parseCharSet' rest =
            Just $ IntSet.union set $ IntSet.fromList [
                parseWild '0' codepoint..parseWild 'f' codepoint]
parseCharSet _ = Nothing
parseCharSet' :: String -> Maybe CharSet
parseCharSet' (',':rest) = parseCharSet rest
parseCharSet' "" = Just IntSet.empty
parseCharSet' _ = Nothing

-- NOTE: Serial already provides IntSet a CBOR codec, but its quite naive.
-- I suspect that CharSets are typically quite dense,
-- So a diff-compression pass should play well with 

diffCompress :: Int -> [Int] -> [Int]
diffCompress prev (x:xs) = x - prev:diffCompress x xs
diffCompress _ [] = []
diffDecompress :: Int -> [Int] -> [Int]
diffDecompress prev (x:xs) = let y = prev + x in y:diffDecompress y xs
diffDecompress _ [] = []

newtype CharSet' = CharSet' { unCharSet :: CharSet } deriving (Eq, Read, Show)
instance MessagePack CharSet' where
    toObject = toObject . diffCompress 0 . IntSet.toAscList . unCharSet
    fromObject (ObjectExt 0x63 _) = Just $ CharSet' IntSet.empty
    fromObject msg =
        CharSet' <$> IntSet.fromAscList <$> diffDecompress 0 <$> fromObject msg
instance Arbitrary CharSet' where
    arbitrary = CharSet' <$> IntSet.fromList <$> Prelude.map (succ . abs) <$> arbitrary

-- | Can this charset be processed by FontConfig?
validCharSet' :: CharSet' -> Bool
validCharSet' (CharSet' self) =
    not (IntSet.null self) && all (> 0) (IntSet.toList self)