~alcinnz/fontconfig-pure

ref: 484b1482a58e27db9fc806dd1d95b2e880b06a03 fontconfig-pure/lib/Graphics/Text/Font/Choose/CharSet.hs -rw-r--r-- 2.8 KiB
484b1482 — Adrian Cochrane Test & fix langset comparisons. 5 months ago
                                                                                
dbefdc06 Adrian Cochrane
1abac8a1 Adrian Cochrane
58463bff Adrian Cochrane
4da6f787 Adrian Cochrane
a7c384b2 Adrian Cochrane
4da6f787 Adrian Cochrane
1abac8a1 Adrian Cochrane
4da6f787 Adrian Cochrane
c406e201 Adrian Cochrane
40a431c7 Adrian Cochrane
4da6f787 Adrian Cochrane
35586a37 Adrian Cochrane
4da6f787 Adrian Cochrane
40a431c7 Adrian Cochrane
4da6f787 Adrian Cochrane
c406e201 Adrian Cochrane
4da6f787 Adrian Cochrane
40a431c7 Adrian Cochrane
a7c384b2 Adrian Cochrane
58463bff Adrian Cochrane
94860b2e Adrian Cochrane
58463bff Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
-- | Process sets of unicode characters, possibly parsed from CSS.
module Graphics.Text.Font.Choose.CharSet(
        CharSet, ord, chr, module IntSet, parseCharSet, CharSet'(..), validCharSet'
    ) where

import Data.IntSet as IntSet

import Data.Char (isHexDigit, ord, chr)
import Numeric (readHex)

import Data.MessagePack (MessagePack(..), Object(..))
import Test.QuickCheck (Arbitrary(..))

-- | An FcCharSet is a set of Unicode characters.
type CharSet = IntSet

parseChar :: String -> Int
parseChar str | ((x, _):_) <- readHex str = toEnum x
    | otherwise = 0
replaceWild :: Char -> String -> String
replaceWild ch ('?':rest) = ch:replaceWild ch rest
replaceWild ch (c:cs) = c:replaceWild ch cs
replaceWild _ "" = ""
parseWild :: Char -> String -> Int
parseWild ch str = parseChar $ replaceWild ch str
-- | Utility for parsing "unicode-range" @font-face property.
parseCharSet :: String -> Maybe CharSet
parseCharSet ('U':rest) = parseCharSet ('u':rest) -- lowercase initial "u"
parseCharSet ('u':'+':cs)
    | (start@(_:_), '-':ends) <- span isHexDigit cs,
        (end@(_:_), rest) <- span isHexDigit ends, Just set <- parseCharSet' rest =
            Just $ union set $ IntSet.fromList [parseChar start..parseChar end]
    | (codepoint@(_:_), rest) <- span isHexDigit cs, Just set <- parseCharSet' rest =
        Just $ flip IntSet.insert set $ parseChar codepoint
    | (codepoint@(_:_), rest) <- span (\c -> isHexDigit c || c == '?') cs,
        Just set <- parseCharSet' rest =
            Just $ IntSet.union set $ IntSet.fromList [
                parseWild '0' codepoint..parseWild 'f' codepoint]
parseCharSet _ = Nothing
parseCharSet' :: String -> Maybe CharSet
parseCharSet' (',':rest) = parseCharSet rest
parseCharSet' "" = Just IntSet.empty
parseCharSet' _ = Nothing

-- NOTE: Serial already provides IntSet a CBOR codec, but its quite naive.
-- I suspect that CharSets are typically quite dense,
-- So a diff-compression pass should play well with 

diffCompress :: Int -> [Int] -> [Int]
diffCompress prev (x:xs) = x - prev:diffCompress x xs
diffCompress _ [] = []
diffDecompress :: Int -> [Int] -> [Int]
diffDecompress prev (x:xs) = let y = prev + x in y:diffDecompress y xs
diffDecompress _ [] = []

newtype CharSet' = CharSet' { unCharSet :: CharSet } deriving (Eq, Read, Show)
instance MessagePack CharSet' where
    toObject = toObject . diffCompress 0 . IntSet.toAscList . unCharSet
    fromObject (ObjectExt 0x63 _) = Just $ CharSet' IntSet.empty
    fromObject msg =
        CharSet' <$> IntSet.fromAscList <$> diffDecompress 0 <$> fromObject msg
instance Arbitrary CharSet' where
    arbitrary = CharSet' <$> IntSet.fromList <$> Prelude.map (succ . abs) <$> arbitrary

-- | Can this charset be processed by FontConfig?
validCharSet' :: CharSet' -> Bool
validCharSet' (CharSet' self) =
    not (IntSet.null self) && all (> 0) (IntSet.toList self)