~jaro/balkon

ref: 88a5b6c714c5543b01d63d836ad403b5755d8d76 balkon/src/Data/Text/ParagraphLayout/Internal/Tree.hs -rw-r--r-- 5.2 KiB
88a5b6c7Jaro Test shaping across intra-word breaks. 1 year, 5 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
-- | Represents the contents of a paragraph as a tree.
--
-- The tree is a hierarchy of boxes, with one box always present as the root.
-- Each box may contain any combination of text sequences and other boxes.
--
-- The type parameter @t@ refers to the way in which text is represented in the
-- tree:
--
-- - `Text` means that each leaf node directly contains its own text.
-- - `Int` means that the text comes from a contiguous byte array, and each
--   leaf node specifies only the number of bytes belonging to its text.
--
-- The `glue` function can be used to convert a `Text` tree into an `Int` tree.
--
-- The type parameter @d@ defines the type of user data to be associated with
-- each non-root node.
module Data.Text.ParagraphLayout.Internal.Tree
    ( RootNode (..)
    , InnerNode (..)
    , Box (..)
    , Leaf (..)
    , flatten
    , glue
    )
where

import Data.Text (Text)
import Data.Text.Foreign (lengthWord8)
import qualified Data.Text.Lazy as Lazy (toStrict)
import qualified Data.Text.Internal.Lazy as Lazy (Text, chunk, empty)

import Data.Text.ParagraphLayout.Internal.BoxOptions
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.TextOptions

-- | Root of the paragraph tree.
data RootNode t d

    = RootBox
    -- ^ The root inline box. Always present in a paragraph.
    --
    -- Cannot be styled directly, but can still set options for formatting
    -- text sequences directly descending from this box.
        (Box t d)
        -- ^ Contents of the box.

-- | Non-root node of the paragraph tree.
data InnerNode t d

    = InlineBox
    -- ^ An inline box, nested in another box.
        d
        -- ^ User-defined data associated with the box.
        (Box t d)
        -- ^ Contents of the box.
        BoxOptions
        -- ^ Style options to apply to the inline box.

    | TextSequence
    -- ^ A leaf node containing text.
        d
        -- ^ User-defined data associated with the text node.
        t
        -- ^ Representation of the text contained by the text node.

-- | A box with content and a defined format. Corresponds to a DOM element.
data Box t d = Box

    [InnerNode t d]
    -- ^ Text nodes and other boxes contained by this box.

    TextOptions
    -- ^ Style options to apply to text sequences directly contained
    -- by this box.


type BoxPath d = [ResolvedBox d]

-- | Representation of a leaf node of the tree after flattening.
data Leaf t d = TextLeaf

    d
    -- ^ User-defined data associated with the text node.

    t
    -- ^ Representation of the text contained by the text node.

    TextOptions
    -- ^ Style options to apply to this text sequence.

    (BoxPath d)
    -- ^ Inline boxes found on the path from this text sequence to the root
    -- of the original tree.

-- | Convert the tree to a flat list of its leaf nodes (text sequences).
flatten :: RootNode t d -> [Leaf t d]
flatten (RootBox (Box ns textOpts)) = snd $ flattenNodes 0 [] textOpts ns

flattenNodes :: Int -> BoxPath d -> TextOptions -> [InnerNode t d] ->
    (Int, [Leaf t d])
flattenNodes idx _ _ [] = (idx, [])
flattenNodes idx path textOpts (n : ns) = (idx'', flat1 ++ flat2)
    where
        (idx', flat1) = flattenNode idx path textOpts n
        (idx'', flat2) = flattenNodes idx' path textOpts ns

flattenNode :: Int -> BoxPath d -> TextOptions -> InnerNode t d ->
    (Int, [Leaf t d])
flattenNode idx path textOpts (TextSequence d t) =
    (idx, [TextLeaf d t textOpts path])
flattenNode idx path _ (InlineBox d (Box ns textOpts) boxOpts) =
    flattenNodes (idx + 1) (ResolvedBox d idx boxOpts dir : path) textOpts ns
    where
        dir = textDirection textOpts

-- | Concatenate all pieces of text from a `Text` tree, plus an optional prefix
-- and suffix, into a contiguous `Text` and build an `Int` tree with
-- corresponding byte lengths.
--
-- Using a contiguous `Text` lets HarfBuzz take neighbouring characters into
-- consideration when making shaping decisions, so that for example kerning
-- does not stop working on node boundaries.
glue :: Text -> RootNode Text d -> Text -> (Text, Int, RootNode Int d)
glue prefix root suffix = (txt, initialOffset, root')
    where
        txt = Lazy.toStrict $ Lazy.chunk prefix txtTail
        initialOffset = lengthWord8 prefix
        (txtTail, root') = glueRoot (Lazy.chunk suffix Lazy.empty) root

glueRoot :: Lazy.Text -> RootNode Text d -> (Lazy.Text, RootNode Int d)
glueRoot t (RootBox b) = (t', RootBox b')
    where
        (t', b') = glueBox t b

glueBox :: Lazy.Text -> Box Text d -> (Lazy.Text, Box Int d)
glueBox t (Box nodes opts) = (t', Box nodes' opts)
    where
        (t', nodes') = glueNodes t nodes

glueNodes :: Lazy.Text -> [InnerNode Text d] -> (Lazy.Text, [InnerNode Int d])
glueNodes t [] = (t, [])
glueNodes t (node : nodes) = (t'', node' : nodes')
    where
        (t'', node') = glueNode t' node
        (t', nodes') = glueNodes t nodes

glueNode :: Lazy.Text -> InnerNode Text d -> (Lazy.Text, InnerNode Int d)
glueNode suffix (TextSequence d txt) = (combinedText, node)
    where
        combinedText = Lazy.chunk txt suffix
        node = TextSequence d (lengthWord8 txt)
glueNode suffix (InlineBox d b o) = (combinedText, InlineBox d node o)
    where
        (combinedText, node) = glueBox suffix b