~jaro/balkon

ref: 0cf536ef34fcaf0c4f02e9dd4dc47523a47f631f balkon/src/Data/Text/ParagraphLayout/Internal/Tree.hs -rw-r--r-- 6.5 KiB
0cf536efJaro Add internal structure for vertical offsets. 1 year, 4 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
-- | Represents the contents of a paragraph as a tree.
--
-- The tree is a hierarchy of boxes, with one box always present as the root.
-- Each box may contain any combination of text sequences and other boxes.
--
-- The type parameter @t@ refers to the way in which text is represented in the
-- tree:
--
-- - `Text` means that each leaf node directly contains its own text.
-- - `Int` means that the text comes from a contiguous byte array, and each
--   leaf node specifies only the number of bytes belonging to its text.
--
-- The `glue` function can be used to convert a `Text` tree into an `Int` tree.
--
-- The type parameter @d@ defines the type of user data to be associated with
-- each non-root node.
module Data.Text.ParagraphLayout.Internal.Tree
    ( RootNode (..)
    , InnerNode (..)
    , Box (..)
    , Leaf (..)
    , EmptyText
    , flatten
    , glue
    , strut
    )
where

import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (lengthWord8)
import qualified Data.Text.Lazy as Lazy (toStrict)
import qualified Data.Text.Internal.Lazy as Lazy (Text, chunk, empty)

import Data.Text.ParagraphLayout.Internal.BoxOptions
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.TextOptions

-- | Root of the paragraph tree.
data RootNode t d

    = RootBox
    -- ^ The root inline box. Always present in a paragraph.
    --
    -- Cannot be styled directly, but can still set options for formatting
    -- text sequences directly descending from this box.
        (Box t d)
        -- ^ Contents of the box.

-- | Non-root node of the paragraph tree.
data InnerNode t d

    = InlineBox
    -- ^ An inline box, nested in another box.
        d
        -- ^ User-defined data associated with the box.
        (Box t d)
        -- ^ Contents of the box.
        BoxOptions
        -- ^ Style options to apply to the inline box.

    | TextSequence
    -- ^ A leaf node containing text.
        d
        -- ^ User-defined data associated with the text node.
        t
        -- ^ Representation of the text contained by the text node.

-- | A box with content and a defined format. Corresponds to a DOM element.
data Box t d = Box

    [InnerNode t d]
    -- ^ Text nodes and other boxes contained by this box.
    --
    -- Please note that boxes which are not the ancestors of any leaf nodes
    -- will have no effect on the output.
    --
    -- To ensure that all boxes are represented in the output, as would be
    -- expected for DOM elements in HTML, an empty text sequence must be added
    -- to otherwise empty boxes. This can be done by using the `strut` function.

    TextOptions
    -- ^ Style options to apply to text sequences directly contained
    -- by this box.


type BoxPath d = [ResolvedBox d]

-- | Representation of a leaf node of the tree after flattening.
data Leaf t d = TextLeaf

    d
    -- ^ User-defined data associated with the text node.

    t
    -- ^ Representation of the text contained by the text node.

    TextOptions
    -- ^ Style options to apply to this text sequence.

    (BoxPath d)
    -- ^ Inline boxes found on the path from this text sequence to the root
    -- of the original tree.

-- | Data types that can represent an empty text sequence in a tree.
class EmptyText t where
    empty :: t

instance EmptyText Int where
    empty = 0

instance EmptyText Text where
    empty = Text.empty

-- | Insert an empty text sequence with the given user data into each empty box.
strut :: EmptyText t => d -> RootNode t d -> RootNode t d
strut d (RootBox (Box ns textOpts)) = RootBox $ Box (strutNodes d ns) textOpts

strutNodes :: EmptyText t => d -> [InnerNode t d] -> [InnerNode t d]
strutNodes d ns = map (strutNode d) ns

strutNode :: EmptyText t => d -> InnerNode t d -> InnerNode t d
strutNode _ x@(TextSequence _ _) = x
strutNode d (InlineBox boxData (Box [] textOpts) boxOpts) =
    InlineBox boxData (Box [TextSequence d empty] textOpts) boxOpts
strutNode d (InlineBox boxData (Box ns textOpts) boxOpts) =
    InlineBox boxData (Box (strutNodes d ns) textOpts) boxOpts

-- | Convert the tree to a flat list of its leaf nodes (text sequences).
flatten :: RootNode t d -> [Leaf t d]
flatten (RootBox (Box ns textOpts)) = snd $ flattenNodes 0 [] textOpts ns

flattenNodes :: Int -> BoxPath d -> TextOptions -> [InnerNode t d] ->
    (Int, [Leaf t d])
flattenNodes idx _ _ [] = (idx, [])
flattenNodes idx path textOpts (n : ns) = (idx'', flat1 ++ flat2)
    where
        (idx', flat1) = flattenNode idx path textOpts n
        (idx'', flat2) = flattenNodes idx' path textOpts ns

flattenNode :: Int -> BoxPath d -> TextOptions -> InnerNode t d ->
    (Int, [Leaf t d])
flattenNode idx path textOpts (TextSequence d t) =
    (idx, [TextLeaf d t textOpts path])
flattenNode idx path _ (InlineBox d (Box ns textOpts) boxOpts) =
    flattenNodes (idx + 1) (ResolvedBox d idx boxOpts dir : path) textOpts ns
    where
        dir = textDirection textOpts

-- | Concatenate all pieces of text from a `Text` tree, plus an optional prefix
-- and suffix, into a contiguous `Text` and build an `Int` tree with
-- corresponding byte lengths.
--
-- Using a contiguous `Text` lets HarfBuzz take neighbouring characters into
-- consideration when making shaping decisions, so that for example kerning
-- does not stop working on node boundaries.
glue :: Text -> RootNode Text d -> Text -> (Text, Int, RootNode Int d)
glue prefix root suffix = (txt, initialOffset, root')
    where
        txt = Lazy.toStrict $ Lazy.chunk prefix txtTail
        initialOffset = lengthWord8 prefix
        (txtTail, root') = glueRoot (Lazy.chunk suffix Lazy.empty) root

glueRoot :: Lazy.Text -> RootNode Text d -> (Lazy.Text, RootNode Int d)
glueRoot t (RootBox b) = (t', RootBox b')
    where
        (t', b') = glueBox t b

glueBox :: Lazy.Text -> Box Text d -> (Lazy.Text, Box Int d)
glueBox t (Box nodes opts) = (t', Box nodes' opts)
    where
        (t', nodes') = glueNodes t nodes

glueNodes :: Lazy.Text -> [InnerNode Text d] -> (Lazy.Text, [InnerNode Int d])
glueNodes t [] = (t, [])
glueNodes t (node : nodes) = (t'', node' : nodes')
    where
        (t'', node') = glueNode t' node
        (t', nodes') = glueNodes t nodes

glueNode :: Lazy.Text -> InnerNode Text d -> (Lazy.Text, InnerNode Int d)
glueNode suffix (TextSequence d txt) = (combinedText, node)
    where
        combinedText = Lazy.chunk txt suffix
        node = TextSequence d (lengthWord8 txt)
glueNode suffix (InlineBox d b o) = (combinedText, InlineBox d node o)
    where
        (combinedText, node) = glueBox suffix b