~alcinnz/harfbuzz-pure

ref: 27013b25cfe0eadb5661d7fbb910792d797e5e5c harfbuzz-pure/Data/Text/Glyphize/Buffer.hs -rw-r--r-- 17.9 KiB
27013b25 — Adrian Cochrane Correctly decode Harfbuzz output structs. 1 year, 6 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
{-# LANGUAGE MagicHash, UnliftedFFITypes, DeriveGeneric #-}
module Data.Text.Glyphize.Buffer where

import qualified Data.Text.Internal.Lazy as Lazy
import qualified Data.Text.Lazy as Lazy
import qualified Data.Text.Internal as Txt
import Data.Char (toUpper, toLower)
import Control.Monad (forM)

--- To fill computed text property.
import Data.Text.Encoding (decodeUtf8Lenient)
import Codec.Binary.UTF8.Light (encodeUTF8, w2c, c2w)

import qualified Data.Text.Array as A
import GHC.Exts (ByteArray#, sizeofByteArray#, Int#)
import Data.Word (Word32)
import Data.Int (Int32)
import Data.Bits ((.|.), (.&.), shiftR, shiftL, testBit)

import System.IO.Unsafe (unsafePerformIO)
import Foreign.Marshal.Alloc (alloca, allocaBytes)
import Foreign.Ptr
import Foreign.C.String (CString, withCString, peekCString)
import Foreign.Storable (Storable(..))
import GHC.Generics (Generic(..))
import Foreign.Storable.Generic (GStorable(..))

------
--- Public Datastructures
------

data Buffer = Buffer {
    text :: Lazy.Text,
    -- ^ The Unicode text, in visual order, for HarfBuzz to convert into glyphs.
    contentType :: Maybe ContentType,
    -- ^ What the bytes of the ByteString contents represents,
    -- namely unicode characters (before shaping) or glyphs (result of shaping).
    -- Typically callers should leave this as `Just ContentTypeUnicode`.
    direction :: Maybe Direction,
    -- ^ The text flow direction of the buffer.
    -- No shaping can happen without setting buffer direction, and it controls
    -- the visual direction for the output glyphs; for RTL direction the glyphs
    -- will be reversed. Many layout features depend on the proper setting of
    -- the direction, for example, reversing RTL text before shaping,
    -- then shaping with LTR direction is not the same as keeping the text in
    -- logical order and shaping with RTL direction.
    script :: Maybe String,
    -- ^ Script is crucial for choosing the proper shaping behaviour for scripts
    -- that require it (e.g. Arabic) and the which OpenType features defined in
    -- the font to be applied.
    language :: Maybe String,
    -- ^ Languages are crucial for selecting which OpenType feature to apply to
    -- the buffer which can result in applying language-specific behaviour.
    -- Languages are orthogonal to the scripts, and though they are related,
    -- they are different concepts and should not be confused with each other.
    beginsText :: Bool,
    -- ^ special handling of the beginning of text paragraph can be applied to
    -- this buffer. Should usually be set, unless you are passing to the buffer
    -- only part of the text without the full context.
    endsText :: Bool,
    -- ^ special handling of the end of text paragraph can be applied to this buffer.
    preserveDefaultIgnorables :: Bool,
    -- ^ character with Default_Ignorable Unicode property should use the
    -- corresponding glyph from the font, instead of hiding them (done by
    -- replacing them with the space glyph and zeroing the advance width.)
    -- Takes precedance over `removeDefaultIgnorables`.
    removeDefaultIgnorables :: Bool,
    -- ^ character with Default_Ignorable Unicode property should be removed
    -- from glyph string instead of hiding them (done by replacing them with
    -- the space glyph and zeroing the advance width.)
    don'tInsertDottedCircle :: Bool,
    -- ^ a dotted circle should not be inserted in the rendering of incorrect
    -- character sequences (such as <0905 093E>).
    clusterLevel :: ClusterLevel,
    -- ^ dictates one aspect of how HarfBuzz will treat non-base characters
    -- during shaping.
    invisibleGlyph :: Char,
    -- ^ The glyph number that replaces invisible characters in the
    -- shaping result. If set to zero (default), the glyph for the U+0020
    -- SPACE character is used. Otherwise, this value is used verbatim.
    replacementCodepoint :: Char,
    -- ^ the glyph number that replaces invalid entries for a given encoding
    -- when adding text to buffer.
    notFoundGlyph :: Char
    -- ^ the glyph number that replaces replaces characters not found in the font.
  } deriving (Eq, Show, Read, Ord)

data ContentType = ContentTypeUnicode | ContentTypeGlyphs deriving (Eq, Show, Read, Ord)
data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars
    deriving (Eq, Show, Read, Ord)

-- | An empty buffer with sensible default properties.
defaultBuffer = Buffer {
        text = Lazy.empty,
        contentType = Just ContentTypeUnicode,
        direction = Nothing,
        script = Nothing,
        language = Nothing,
        beginsText = True,
        endsText = True,
        preserveDefaultIgnorables = False,
        removeDefaultIgnorables = False,
        don'tInsertDottedCircle = False,
        clusterLevel = ClusterMonotoneGraphemes,
        invisibleGlyph = ' ',
        replacementCodepoint = '\xFFFD',
        notFoundGlyph = '\0'
    }

------
--- Directions
------

data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show, Read, Ord)
dirFromStr ('L':_) = DirLTR
dirFromStr ('l':_) = DirLTR
dirFromStr ('R':_) = DirRTL
dirFromStr ('r':_) = DirRTL
dirFromStr ('T':_) = DirTTB
dirFromStr ('t':_) = DirTTB
dirFromStr ('B':_) = DirBTT
dirFromStr ('b':_) = DirBTT
dirToStr DirLTR = "ltr"
dirToStr DirRTL = "rtl"
dirToStr DirTTB = "ttb"
dirToStr DirBTT = "btt"
dirReverse DirLTR = DirRTL
dirReverse DirRTL = DirLTR
dirReverse DirTTB = DirBTT
dirReverse DirBTT = DirTTB
dirBackward dir = dir `Prelude.elem` [DirRTL, DirBTT]
dirForward dir = dir `Prelude.elem` [DirLTR, DirTTB]
dirHorizontal dir = dir `Prelude.elem` [DirLTR, DirRTL]
dirVertical dir = dir `Prelude.elem` [DirTTB, DirBTT]

dir2int Nothing = 0
dir2int (Just DirLTR) = 4
dir2int (Just DirRTL) = 5
dir2int (Just DirTTB) = 6
dir2int (Just DirBTT) = 7
foreign import ccall "hb_buffer_set_direction" hb_buffer_set_direction
    :: Buffer' -> Int -> IO ()

int2dir 4 = Just DirLTR
int2dir 5 = Just DirRTL
int2dir 6 = Just DirTTB
int2dir 7 = Just DirBTT
int2dir _ = Nothing

scriptHorizontalDir :: String -> Maybe Direction
scriptHorizontalDir = int2dir . hb_script_get_horizontal_direction . script_from_string
foreign import ccall "hb_script_get_horizontal_direction" hb_script_get_horizontal_direction
    :: Word32 -> Int

------
--- Locales
------
data Language'
type Language = Ptr Language'
languageDefault :: IO String
languageDefault = hb_language_get_default >>= hb_language_to_string >>= peekCString
foreign import ccall "hb_language_to_string" hb_language_to_string :: Language -> IO CString
foreign import ccall "hb_language_get_default" hb_language_get_default :: IO Language

foreign import ccall "hb_language_from_string" hb_language_from_string'
    :: CString -> Int -> IO Language
hb_language_from_string :: String -> IO Language
hb_language_from_string str =
    withCString str $ \str' -> hb_language_from_string' str' (-1)

{-languageMatches :: String -> String -> Bool
languageMatches lang specific = unsafePerformIO $ do
    lang' <- hb_language_from_string lang
    specific' <- hb_language_from_string specific
    hb_language_matches lang' specific'
foreign import ccall "hb_language_matches" hb_language_matches :: Language -> Language -> IO Bool-}

------
--- FFI Support
------

data Buffer''
type Buffer' = Ptr Buffer''

withNewBuffer :: (Buffer' -> IO a) -> IO a
withNewBuffer cb = do
    buf <- hb_buffer_create
    ret <- cb buf
    hb_buffer_destroy buf
    return ret
foreign import ccall "hb_buffer_create" hb_buffer_create :: IO Buffer'
foreign import ccall "hb_buffer_destroy" hb_buffer_destroy :: Buffer' -> IO ()

{-txt2primitiveArray (A.ByteArray arr') = ByteArray arr'
withByteArrayLen :: ByteArray -> (CString -> Int -> IO a) -> IO a
withByteArrayLen arr cb = let len = sizeofByteArray arr in allocaBytes len $ \arr' -> do
    copyByteArrayToPtr arr' arr 0 len
    cb arr' len-}
bufferWithText _ Lazy.Empty cb = cb
bufferWithText buffer txt@(Lazy.Chunk (Txt.Text (A.ByteArray arr) offset length) txts) cb = do
    hb_buffer_add_utf8 buffer arr (sizeofByteArray# arr) (toEnum offset) length
    bufferWithText buffer txts cb
foreign import ccall "hb_buffer_add_utf8" hb_buffer_add_utf8
    :: Buffer' -> ByteArray# -> Int# -> Word -> Int -> IO ()

titlecase :: String -> String
titlecase "" = ""
titlecase (c:cs) = toUpper c : Prelude.map toLower cs
script_from_string str = tag_from_string $ case titlecase str of
    'Q':'a':'a':'i':_ -> "Zinh"
    'Q':'a':'a':'c':_ -> "Copt"

    'A':'r':'a':'n':_ -> "Arab"
    'C':'y':'r':'s':_ -> "Cyrl"
    'G':'e':'o':'k':_ -> "Geor"
    'H':'a':'n':'s':_ -> "Hani"
    'H':'a':'n':'t':_ -> "Hani"
    'J':'a':'m':'o':_ -> "Hang"
    'L':'a':'t':'f':_ -> "Latn"
    'L':'a':'t':'g':_ -> "Latn"
    'S':'y':'r':'e':_ -> "Syrc"
    'S':'y':'r':'j':_ -> "Syrc"
    'S':'y':'r':'n':_ -> "Syrc"
    x -> x
tag_from_string :: String -> Word32
tag_from_string str = case str ++ Prelude.repeat '\0' of
    c1:c2:c3:c4:_ -> Prelude.foldl (.|.) 0 [
        shiftL (c2w c1 .&. 0x7) 24,
        shiftL (c2w c2 .&. 0x7) 16,
        shiftL (c2w c3 .&. 0x7) 8,
        shiftL (c2w c4 .&. 0x7) 0
      ]
    _ -> 0
tag_to_string :: Word32 -> String
tag_to_string tag = [
    w2c (shiftR tag 24 .&. 0x7),
    w2c (shiftR tag 16 .&. 0x7),
    w2c (shiftR tag 8 .&. 0x7),
    w2c (shiftR tag 0 .&. 0x7)
  ]

------
--- Haskell-to-C conversion
------

withBuffer :: Buffer -> (Buffer' -> IO a) -> IO a
withBuffer buf cb = withNewBuffer $ \buf' -> bufferWithText buf' (text buf) $ do
    hb_buffer_set_content_type buf' $ case contentType buf of
        Nothing -> 0
        Just ContentTypeUnicode -> 1
        Just ContentTypeGlyphs -> 2
    hb_buffer_set_direction buf' $ dir2int $ direction buf
    case script buf of
        Just script' -> hb_buffer_set_script buf' $ script_from_string script'
        Nothing -> return ()
    case language buf of
        Just lang' -> hb_buffer_set_language buf' =<< hb_language_from_string lang'
        Nothing -> return ()
    hb_buffer_set_flags buf' $ Prelude.foldl (.|.) 0 [
        if beginsText buf then 1 else 0,
        if endsText buf then 2 else 0,
        if preserveDefaultIgnorables buf then 4 else 0,
        if removeDefaultIgnorables buf then 8 else 0,
        if don'tInsertDottedCircle buf then 16 else 0
      ]
    hb_buffer_set_cluster_level buf' $ case clusterLevel buf of
        ClusterMonotoneGraphemes -> 0
        ClusterMonotoneChars -> 1
        ClusterChars -> 2
    hb_buffer_set_invisible_glyph buf' $ c2w $ invisibleGlyph buf
    hb_buffer_set_replacement_codepoint buf' $ c2w $ replacementCodepoint buf
--    hb_buffer_set_not_found_glyph buf' $ c2w $ notFoundGlyph buf
    case (contentType buf, direction buf, script buf, language buf) of
        (Just ContentTypeUnicode, Nothing, _, _) -> hb_buffer_guess_segment_properties buf'
        (Just ContentTypeUnicode, _, Nothing, _) -> hb_buffer_guess_segment_properties buf'
        (Just ContentTypeUnicode, _, _, Nothing) -> hb_buffer_guess_segment_properties buf'
    cb buf'
foreign import ccall "hb_buffer_set_content_type" hb_buffer_set_content_type
    :: Buffer' -> Int -> IO ()
foreign import ccall "hb_buffer_set_script" hb_buffer_set_script
    :: Buffer' -> Word32 -> IO ()
foreign import ccall "hb_buffer_set_language" hb_buffer_set_language
    :: Buffer' -> Language -> IO ()
foreign import ccall "hb_buffer_set_flags" hb_buffer_set_flags :: Buffer' -> Int -> IO ()
foreign import ccall "hb_buffer_set_cluster_level" hb_buffer_set_cluster_level
    :: Buffer' -> Int -> IO ()
foreign import ccall "hb_buffer_set_invisible_glyph" hb_buffer_set_invisible_glyph
    :: Buffer' -> Word32 -> IO ()
foreign import ccall "hb_buffer_set_replacement_codepoint" hb_buffer_set_replacement_codepoint
    :: Buffer' -> Word32 -> IO ()
--foreign import ccall "hb_buffer_set_not_found_glyph" hb_buffer_set_not_found_glyph
--    :: Buffer' -> Word32 -> IO ()
foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segment_properties
    :: Buffer' -> IO ()

------
--- C-to-Haskell conversion
------
data GlyphInfo = GlyphInfo {
    codepoint :: Word32,
    -- ^ Glyph index (or unicode codepoint)
    cluster :: Word32,
    -- ^ The index of the character in the original text that corresponds to
    -- this `GlyphInfo`. More than one `GlyphInfo` may have the same `cluster`
    -- value if they resulted from the same character, & when more than one
    -- character gets merged into the same glyph `GlyphInfo` will have the
    -- smallest cluster value of them.
    -- By default some characters are merged into the same cluster even when
    -- they are seperate glyphs, `Buffer`'s `clusterLevel` property allows
    -- selecting more fine grained cluster handling.
    unsafeToBreak :: Bool,
    unsafeToConcat :: Bool,
    safeToInsertTatweel :: Bool
} deriving (Show, Read, Eq)
instance Storable GlyphInfo where
    sizeOf _ = sizeOf (undefined :: Word32) * 5
    alignment _ = alignment (undefined :: Word32)
    peek ptr = do
        let ptr' :: Ptr Word32
            ptr' = castPtr ptr
        -- Ignore private fields.
        codepoint' <- ptr' `peekElemOff` 0
        mask <- ptr' `peekElemOff` 1
        cluster' <- ptr' `peekElemOff` 2
        return $ GlyphInfo codepoint' cluster' (mask `testBit` 1) (mask `testBit` 2) (mask `testBit` 3)
    poke ptr (GlyphInfo codepoint' cluster' flag1 flag2 flag3) = do
        -- Zero private fields.
        let ptr' :: Ptr Word32
            ptr' = castPtr ptr
        pokeElemOff ptr' 0 codepoint'
        pokeElemOff ptr' 1 $ Prelude.foldl (.|.) 0 [
            if flag1 then 1 else 0,
            if flag2 then 2 else 0,
            if flag3 then 4 else 0
          ]
        pokeElemOff ptr' 2 cluster'
        pokeElemOff ptr' 3 0
        pokeElemOff ptr' 4 0
glyphInfos buf' = do
    arr <- hb_buffer_get_glyph_infos buf' nullPtr
    length <- hb_buffer_get_length buf'
    if length == 0 || arr == nullPtr
    then return []
    else forM [0..fromEnum length - 1] $ peekElemOff arr
foreign import ccall "hb_buffer_get_glyph_infos" hb_buffer_get_glyph_infos
    :: Buffer' -> Ptr Word -> IO (Ptr GlyphInfo)
foreign import ccall "hb_buffer_get_length" hb_buffer_get_length :: Buffer' -> IO Word
-- NOTE: The array returned from FFI is valid as long as the buffer is.

data GlyphPos = GlyphPos {
    x_advance :: Int32,
    -- ^ How much the line advances after drawing this glyph when setting text
    -- in horizontal direction.
    y_advance :: Int32,
    -- ^ How much the line advances after drawing this glyph when setting text
    -- in vertical direction.
    x_offset :: Int32,
    -- ^ How much the glyph moves on the X-axis before drawing it, this should
    -- not effect how much the line advances.
    y_offset :: Int32
    -- ^ How much the glyph moves on the Y-axis before drawing it, this should
    -- not effect how much the line advances.
} deriving (Show, Read, Eq, Generic)
instance Storable GlyphPos where
    sizeOf _ = sizeOf (undefined :: Int32) * 5
    alignment _ = alignment (undefined :: Int32)
    peek ptr = let ptr' = castPtr ptr in do
        x_advance' <- ptr' `peekElemOff` 0
        y_advance' <- ptr' `peekElemOff` 1
        x_offset' <- ptr' `peekElemOff` 2
        y_offset' <- ptr' `peekElemOff` 3
        return $ GlyphPos x_advance' y_advance' x_offset' y_offset'
    poke ptr (GlyphPos x_advance' y_advance' x_offset' y_offset') = do
        let ptr' :: Ptr Int32
            ptr' = castPtr ptr
        pokeElemOff ptr' 0 x_advance'
        pokeElemOff ptr' 1 y_advance'
        pokeElemOff ptr' 2 x_offset'
        pokeElemOff ptr' 3 y_offset'
        pokeElemOff ptr' 4 0 -- Zero private field.
glyphsPos buf' = do
    arr <- hb_buffer_get_glyph_positions buf' nullPtr
    length <- hb_buffer_get_length buf'
    if length == 0 || arr == nullPtr
    then return []
    else forM [0..fromEnum length - 1] $ peekElemOff arr
foreign import ccall "hb_buffer_get_glyph_positions" hb_buffer_get_glyph_positions
    :: Buffer' -> Ptr Word -> IO (Ptr GlyphPos)
-- NOTE: The array returned from FFI is valid as long as the buffer is.

thawBuffer :: Buffer' -> IO Buffer
thawBuffer buf' = do
    glyphInfos' <- glyphInfos buf'
    contentType' <- hb_buffer_get_content_type buf'
    direction' <- hb_buffer_get_direction buf'
    script' <- hb_buffer_get_script buf'
    language'' <- hb_buffer_get_language buf'
    language' <- peekCString language''
    flags' <- hb_buffer_get_flags buf'
    clusterLevel' <- hb_buffer_get_cluster_level buf'
    invisibleGlyph' <- hb_buffer_get_invisible_glyph buf'
    replacementCodepoint' <- hb_buffer_get_replacement_codepoint buf'
    return defaultBuffer {
        text = Lazy.pack $ Prelude.map (w2c . codepoint) glyphInfos',
        contentType = case contentType' of
            1 -> Just ContentTypeUnicode
            2 -> Just ContentTypeGlyphs
            _ -> Nothing,
        direction = int2dir direction',
        script = Just $ tag_to_string script',
        language = Just language',
        beginsText = testBit flags' 0, endsText = testBit flags' 1,
        preserveDefaultIgnorables = testBit flags' 2,
        removeDefaultIgnorables = testBit flags' 3,
        don'tInsertDottedCircle = testBit flags' 4,
        clusterLevel = case clusterLevel' of
            1 -> ClusterMonotoneChars
            2 -> ClusterChars
            _ -> ClusterMonotoneGraphemes,
        invisibleGlyph = w2c invisibleGlyph',
        replacementCodepoint = w2c replacementCodepoint'
    }
foreign import ccall "hb_buffer_get_content_type" hb_buffer_get_content_type
    :: Buffer' -> IO Int
foreign import ccall "hb_buffer_get_direction" hb_buffer_get_direction :: Buffer' -> IO Int
foreign import ccall "hb_buffer_get_script" hb_buffer_get_script :: Buffer' -> IO Word32
foreign import ccall "hb_buffer_get_language" hb_buffer_get_language :: Buffer' -> IO CString
foreign import ccall "hb_buffer_get_flags" hb_buffer_get_flags :: Buffer' -> IO Int
foreign import ccall "hb_buffer_get_cluster_level" hb_buffer_get_cluster_level
    :: Buffer' -> IO Int
foreign import ccall "hb_buffer_get_invisible_glyph" hb_buffer_get_invisible_glyph
    :: Buffer' -> IO Word32
foreign import ccall "hb_buffer_get_replacement_codepoint" hb_buffer_get_replacement_codepoint
    :: Buffer' -> IO Word32