-- | Represents the contents of a paragraph as a tree.
--
-- The tree is a hierarchy of boxes, with one box always present as the root.
-- Each box may contain any combination of text sequences and other boxes.
--
-- The type parameter @t@ refers to the way in which text is represented in the
-- tree:
--
-- - `Text` means that each leaf node directly contains its own text.
-- - `Int` means that the text comes from a contiguous byte array, and each
-- leaf node specifies only the number of bytes belonging to its text.
--
-- The `glue` function can be used to convert a `Text` tree into an `Int` tree.
--
-- The type parameter @d@ defines the type of user data to be associated with
-- each non-root node.
module Data.Text.ParagraphLayout.Internal.Tree
( RootNode (..)
, InnerNode (..)
, Box (..)
, Leaf (..)
, flatten
, glue
)
where
import Data.Text (Text)
import Data.Text.Foreign (lengthWord8)
import qualified Data.Text.Lazy as Lazy (toStrict)
import qualified Data.Text.Internal.Lazy as Lazy (Text, chunk, empty)
import Data.Text.ParagraphLayout.Internal.BoxOptions
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.TextOptions
-- | Root of the paragraph tree.
data RootNode t d
= RootBox
-- ^ The root inline box. Always present in a paragraph.
--
-- Cannot be styled directly, but can still set options for formatting
-- text sequences directly descending from this box.
(Box t d)
-- ^ Contents of the box.
-- | Non-root node of the paragraph tree.
data InnerNode t d
= InlineBox
-- ^ An inline box, nested in another box.
d
-- ^ User-defined data associated with the box.
(Box t d)
-- ^ Contents of the box.
BoxOptions
-- ^ Style options to apply to the inline box.
| TextSequence
-- ^ A leaf node containing text.
d
-- ^ User-defined data associated with the text node.
t
-- ^ Representation of the text contained by the text node.
-- | A box with content and a defined format. Corresponds to a DOM element.
data Box t d = Box
[InnerNode t d]
-- ^ Text nodes and other boxes contained by this box.
TextOptions
-- ^ Style options to apply to text sequences directly contained
-- by this box.
type BoxPath d = [ResolvedBox d]
-- | Representation of a leaf node of the tree after flattening.
data Leaf t d = TextLeaf
d
-- ^ User-defined data associated with the text node.
t
-- ^ Representation of the text contained by the text node.
TextOptions
-- ^ Style options to apply to this text sequence.
(BoxPath d)
-- ^ Inline boxes found on the path from this text sequence to the root
-- of the original tree.
-- | Convert the tree to a flat list of its leaf nodes (text sequences).
flatten :: RootNode t d -> [Leaf t d]
flatten (RootBox (Box ns textOpts)) = snd $ flattenNodes 0 [] textOpts ns
flattenNodes :: Int -> BoxPath d -> TextOptions -> [InnerNode t d] ->
(Int, [Leaf t d])
flattenNodes idx _ _ [] = (idx, [])
flattenNodes idx path textOpts (n : ns) = (idx'', flat1 ++ flat2)
where
(idx', flat1) = flattenNode idx path textOpts n
(idx'', flat2) = flattenNodes idx' path textOpts ns
flattenNode :: Int -> BoxPath d -> TextOptions -> InnerNode t d ->
(Int, [Leaf t d])
flattenNode idx path textOpts (TextSequence d t) =
(idx, [TextLeaf d t textOpts path])
flattenNode idx path _ (InlineBox d (Box ns textOpts) boxOpts) =
flattenNodes (idx + 1) (ResolvedBox d idx boxOpts dir : path) textOpts ns
where
dir = textDirection textOpts
-- | Concatenate all pieces of text from a `Text` tree, plus an optional prefix
-- and suffix, into a contiguous `Text` and build an `Int` tree with
-- corresponding byte lengths.
--
-- Using a contiguous `Text` lets HarfBuzz take neighbouring characters into
-- consideration when making shaping decisions, so that for example kerning
-- does not stop working on node boundaries.
glue :: Text -> RootNode Text d -> Text -> (Text, Int, RootNode Int d)
glue prefix root suffix = (txt, initialOffset, root')
where
txt = Lazy.toStrict $ Lazy.chunk prefix txtTail
initialOffset = lengthWord8 prefix
(txtTail, root') = glueRoot (Lazy.chunk suffix Lazy.empty) root
glueRoot :: Lazy.Text -> RootNode Text d -> (Lazy.Text, RootNode Int d)
glueRoot t (RootBox b) = (t', RootBox b')
where
(t', b') = glueBox t b
glueBox :: Lazy.Text -> Box Text d -> (Lazy.Text, Box Int d)
glueBox t (Box nodes opts) = (t', Box nodes' opts)
where
(t', nodes') = glueNodes t nodes
glueNodes :: Lazy.Text -> [InnerNode Text d] -> (Lazy.Text, [InnerNode Int d])
glueNodes t [] = (t, [])
glueNodes t (node : nodes) = (t'', node' : nodes')
where
(t'', node') = glueNode t' node
(t', nodes') = glueNodes t nodes
glueNode :: Lazy.Text -> InnerNode Text d -> (Lazy.Text, InnerNode Int d)
glueNode suffix (TextSequence d txt) = (combinedText, node)
where
combinedText = Lazy.chunk txt suffix
node = TextSequence d (lengthWord8 txt)
glueNode suffix (InlineBox d b o) = (combinedText, InlineBox d node o)
where
(combinedText, node) = glueBox suffix b