~alcinnz/harfbuzz-pure

ref: 7aae895f30d192648e4257d7c695e541b07a1686 harfbuzz-pure/Data/Text/Glyphize/Buffer.hs -rw-r--r-- 9.5 KiB
7aae895f — Adrian Cochrane Integrate guessing chars, document properties, expose default property values. 2 years ago
                                                                                
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
f2772de8 Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
7aae895f Adrian Cochrane
2e079971 Adrian Cochrane
f2772de8 Adrian Cochrane
7aae895f Adrian Cochrane
f2772de8 Adrian Cochrane
7aae895f Adrian Cochrane
f2772de8 Adrian Cochrane
7aae895f Adrian Cochrane
f2772de8 Adrian Cochrane
7aae895f Adrian Cochrane
f2772de8 Adrian Cochrane
7aae895f Adrian Cochrane
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
module Data.Text.Glyphize.Buffer where

import Data.Text.Lazy as Lazy
import Data.ByteString.Lazy as Lazy
import Data.ByteString.Lazy as LBS
import Data.Text.Short

import Foreign.ForeignPtr
import Foreign.Ptr
import Foreign.C.Types
import Data.Word
import System.IO.Unsafe (unsafePerformIO)

import Data.Text.Lazy.Encoding
import Data.ByteString.Lazy.Internal as Lazy
import Data.ByteString.Internal as Strict
import Data.ByteString.Short.Internal as Strict
import Data.Bits ((.|.))
import Data.Char (ord)

data Buffer = Buffer {
    text :: Either Lazy.Text Lazy.ByteString,
    -- ^ The Unicode text, in visual order, for HarfBuzz to convert into glyphs.
    contentType :: Maybe ContentType,
    -- ^ What the bytes of the ByteString contents represents,
    -- namely unicode characters (before shaping) or glyphs (result of shaping).
    -- Typically callers should leave this as `Just ContentTypeUnicode`.
    direction :: Maybe Direction,
    -- ^ The text flow direction of the buffer.
    -- No shaping can happen without setting buffer direction, and it controls
    -- the visual direction for the output glyphs; for RTL direction the glyphs
    -- will be reversed. Many layout features depend on the proper setting of
    -- the direction, for example, reversing RTL text before shaping,
    -- then shaping with LTR direction is not the same as keeping the text in
    -- logical order and shaping with RTL direction.
    script :: Maybe ShortText,
    -- ^ Script is crucial for choosing the proper shaping behaviour for scripts
    -- that require it (e.g. Arabic) and the which OpenType features defined in
    -- the font to be applied.
    language :: Maybe ShortText,
    -- ^ Languages are crucial for selecting which OpenType feature to apply to
    -- the buffer which can result in applying language-specific behaviour.
    -- Languages are orthogonal to the scripts, and though they are related,
    -- they are different concepts and should not be confused with each other.
    beginsText :: Bool,
    -- ^ special handling of the beginning of text paragraph can be applied to
    -- this buffer. Should usually be set, unless you are passing to the buffer
    -- only part of the text without the full context.
    endsText :: Bool,
    -- ^ special handling of the end of text paragraph can be applied to this buffer.
    preserveDefaultIgnorables :: Bool,
    -- ^ character with Default_Ignorable Unicode property should use the
    -- corresponding glyph from the font, instead of hiding them (done by
    -- replacing them with the space glyph and zeroing the advance width.)
    -- Takes precedance over `removeDefaultIgnorables`.
    removeDefaultIgnorables :: Bool,
    -- ^ character with Default_Ignorable Unicode property should be removed
    -- from glyph string instead of hiding them (done by replacing them with
    -- the space glyph and zeroing the advance width.)
    don'tInsertDottedCircle :: Bool,
    -- ^ a dotted circle should not be inserted in the rendering of incorrect
    -- character sequences (such at <0905 093E>).
    clusterLevel :: ClusterLevel,
    -- ^ dictates one aspect of how HarfBuzz will treat non-base characters
    -- during shaping.
    invisibleGlyph :: Int,
    -- ^ The glyph number that replaces invisible characters in the
    -- shaping result. If set to zero (default), the glyph for the U+0020
    -- SPACE character is used. Otherwise, this value is used verbatim.
    notFoundGlyph :: Int,
    -- ^ the glyph number that replaces characters not found in the font during shaping.
    -- The not-found glyph defaults to zero, sometimes knows as the ".notdef" glyph.
    -- This API allows for differentiating the two.
    replacementCodepoint :: Char
    -- ^ the hb_codepoint_t that replaces invalid entries for a given encoding
    -- when adding text to buffer .
}

-- | An empty buffer with sensible default properties.
defaultBuffer = Buffer {
        text = Right LBS.empty,
        contentType = Just ContentTypeUnicode,
        direction = Nothing,
        script = Nothing,
        language = Nothing,
        beginsText = True,
        endsText = True,
        preserveDefaultIgnorables = False,
        removeDefaultIgnorables = False,
        don'tInsertDottedCircle = False,
        clusterLevel = ClusterMonotoneGraphemes,
        invisibleGlyph = 0,
        notFoundGlyph = 0,
        replacementCodepoint = '\xFFFD'
    }

data ContentType = ContentTypeUnicode | ContentTypeGlyphs deriving (Eq, Show)
data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show)
data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars deriving (Eq, Show)

data GlyphInfo = GlyphInfo {
    codepoint :: Int,
    cluster :: Int
}
data GlyphPos = GlyphPos {
    x_advance :: Int, y_advance :: Int,
    x_offset :: Int, y_offset :: Int
}

-- guessSegmentProperties :: Buffer -> Buffer
-- glyphInfo & glyphPositions to be zipped & return from shape function
-- scriptHorizontalDir :: ShortText -> Direction

dirReverse DirLTR = DirRTL
dirReverse DirRTL = DirLTR
dirReverse DirTTB = DirBTT
dirReverse DirBTT = DirTTB
dirBackward dir = dir `Prelude.elem` [DirRTL, DirBTT]
dirForward dir = dir `Prelude.elem` [DirLTR, DirTTB]
dirHorizontal dir = dir `Prelude.elem` [DirLTR, DirRTL]
dirVertical dir = dir `Prelude.elem` [DirTTB, DirBTT]

---

type Buffer' = ForeignPtr Buffer''
data Buffer''

freeze = unsafePerformIO . freeze'
freeze' buf = do
    buffer <- hb_buffer_create
    case text buf of
        Right bs -> hb_buffer_add_bytestring buffer bs
        -- Convert text to bytestring for now due to the text 2.0 UTF-8 transition.
        -- Unfortunately this may prevent Harfbuzz from reading opening context
        -- So for correctness we'll eventually want to depend on text>2.0
        Left txt -> hb_buffer_add_bytestring buffer $ encodeUtf8 txt
    hb_buffer_set_content_type buffer $ case contentType buf of
        Nothing -> 0
        Just ContentTypeUnicode -> 1
        Just ContentTypeGlyphs -> 2
    hb_buffer_set_direction buffer $ case direction buf of
        Nothing -> 0
        Just DirLTR -> 4
        Just DirRTL -> 5
        Just DirTTB -> 6
        Just DirBTT -> 7
    case script buf of
        Just script' -> hb_buffer_set_script buffer =<< hb_script_from_txt script'
        Nothing -> return ()
    case language buf of
        Just lang' -> hb_buffer_set_language buffer =<< hb_language_from_txt lang'
        Nothing -> return ()
    hb_buffer_set_flags buffer $ Prelude.foldl (.|.) 0 [
        if beginsText buf then 1 else 0,
        if endsText buf then 2 else 0,
        if preserveDefaultIgnorables buf then 4 else 0,
        if removeDefaultIgnorables buf then 8 else 0,
        if don'tInsertDottedCircle buf then 16 else 0
      ]
    hb_buffer_set_cluster_level buffer $ case clusterLevel buf of
        ClusterMonotoneGraphemes -> 0
        ClusterMonotoneChars -> 1
        ClusterChars -> 2
    hb_buffer_set_invisible_glyph buffer $ invisibleGlyph buf
    hb_buffer_set_not_found_glyph buffer $ notFoundGlyph buf
    hb_buffer_set_replacement_codepoint buffer $ ord $ replacementCodepoint buf
    case (contentType buf, direction buf, script buf, language buf) of
        (Just ContentTypeUnicode, Nothing, _, _) -> hb_buffer_guess_segment_properties buffer
        (Just ContentTypeUnicode, _, Nothing, _) -> hb_buffer_guess_segment_properties buffer
        (Just ContentTypeUnicode, _, _, Nothing) -> hb_buffer_guess_segment_properties buffer
    newForeignPtr hb_buffer_destroy buffer

thaw buf' = ()

foreign import ccall "hb_buffer_create" hb_buffer_create :: IO (Ptr Buffer'')
foreign import ccall "&hb_buffer_destroy" hb_buffer_destroy :: FunPtr (Ptr Buffer'' -> IO ())
foreign import ccall "hb_buffer_add_utf8" hb_buffer_add_utf8
    :: Ptr Buffer'' -> Ptr Word8 -> Int -> Int -> Int -> IO ()
hb_buffer_add_bytestring _ Lazy.Empty = return ()
hb_buffer_add_bytestring buf (Lazy.Chunk (Strict.PS ptr offset length) next) = do
    withForeignPtr ptr $ \ptr' -> hb_buffer_add_utf8 buf ptr' length offset (length - offset)
    hb_buffer_add_bytestring buf next
foreign import ccall "hb_buffer_set_content_type" hb_buffer_set_content_type
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_direction" hb_buffer_set_direction
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_script_from_string" hb_script_from_string
    :: Ptr Word8 -> Int -> Int
hb_script_from_txt txt = let Strict.PS ptr offset size = toByteString txt
    in withForeignPtr ptr $ \ptr' -> return $ hb_script_from_string ptr' size
foreign import ccall "hb_buffer_set_script" hb_buffer_set_script
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_language_from_string" hb_language_from_string
    :: Ptr Word8 -> Int -> Int
hb_language_from_txt txt = let Strict.PS ptr offset size = toByteString txt
    in withForeignPtr ptr $ \ptr' -> return $ hb_script_from_string ptr' size
foreign import ccall "hb_buffer_set_language" hb_buffer_set_language
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_flags" hb_buffer_set_flags :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_cluster_level" hb_buffer_set_cluster_level
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_invisible_glyph" hb_buffer_set_invisible_glyph
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_not_found_glyph" hb_buffer_set_not_found_glyph
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_replacement_codepoint" hb_buffer_set_replacement_codepoint
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segment_properties
    :: Ptr Buffer'' -> IO ()