-- | Represents the contents of a paragraph as a tree.
--
-- The tree is a hierarchy of boxes, with one box always present as the root.
-- Each box may contain any combination of text sequences and other boxes.
--
-- The type parameter @t@ refers to the way in which text is represented in the
-- tree:
--
-- - `Text` means that each leaf node directly contains its own text.
-- - `Int` means that the text comes from a contiguous byte array, and each
-- leaf node specifies only the number of bytes belonging to its text.
--
-- The `glue` function can be used to convert a `Text` tree into an `Int` tree.
--
-- The type parameter @d@ defines the type of user data to be associated with
-- each non-root node.
module Data.Text.ParagraphLayout.Internal.Tree
( RootNode (..)
, InnerNode (..)
, Box (..)
, Leaf (..)
, EmptyText
, flatten
, glue
, strut
)
where
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (lengthWord8)
import qualified Data.Text.Lazy as Lazy (toStrict)
import qualified Data.Text.Internal.Lazy as Lazy (Text, chunk, empty)
import Data.Text.ParagraphLayout.Internal.BoxOptions
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.TextOptions
-- | Root of the paragraph tree.
data RootNode t d
= RootBox
-- ^ The root inline box. Always present in a paragraph.
--
-- Cannot be styled directly, but can still set options for formatting
-- text sequences directly descending from this box.
(Box t d)
-- ^ Contents of the box.
-- | Non-root node of the paragraph tree.
data InnerNode t d
= InlineBox
-- ^ An inline box, nested in another box.
d
-- ^ User-defined data associated with the box.
(Box t d)
-- ^ Contents of the box.
BoxOptions
-- ^ Style options to apply to the inline box.
| TextSequence
-- ^ A leaf node containing text.
d
-- ^ User-defined data associated with the text node.
t
-- ^ Representation of the text contained by the text node.
-- | A box with content and a defined format. Corresponds to a DOM element.
data Box t d = Box
[InnerNode t d]
-- ^ Text nodes and other boxes contained by this box.
--
-- Please note that boxes which are not the ancestors of any leaf nodes
-- will have no effect on the output.
--
-- To ensure that all boxes are represented in the output, as would be
-- expected for DOM elements in HTML, an empty text sequence must be added
-- to otherwise empty boxes. This can be done by using the `strut` function.
TextOptions
-- ^ Style options to apply to text sequences directly contained
-- by this box.
type BoxPath d = [ResolvedBox d]
-- | Representation of a leaf node of the tree after flattening.
data Leaf t d = TextLeaf
d
-- ^ User-defined data associated with the text node.
t
-- ^ Representation of the text contained by the text node.
TextOptions
-- ^ Style options to apply to this text sequence.
(BoxPath d)
-- ^ Inline boxes found on the path from this text sequence to the root
-- of the original tree.
-- | Data types that can represent an empty text sequence in a tree.
class EmptyText t where
empty :: t
instance EmptyText Int where
empty = 0
instance EmptyText Text where
empty = Text.empty
-- | Insert an empty text sequence with the given user data into each empty box.
strut :: EmptyText t => d -> RootNode t d -> RootNode t d
strut d (RootBox (Box ns textOpts)) = RootBox $ Box (strutNodes d ns) textOpts
strutNodes :: EmptyText t => d -> [InnerNode t d] -> [InnerNode t d]
strutNodes d ns = map (strutNode d) ns
strutNode :: EmptyText t => d -> InnerNode t d -> InnerNode t d
strutNode _ x@(TextSequence _ _) = x
strutNode d (InlineBox boxData (Box [] textOpts) boxOpts) =
InlineBox boxData (Box [TextSequence d empty] textOpts) boxOpts
strutNode d (InlineBox boxData (Box ns textOpts) boxOpts) =
InlineBox boxData (Box (strutNodes d ns) textOpts) boxOpts
-- | Convert the tree to a flat list of its leaf nodes (text sequences).
flatten :: RootNode t d -> [Leaf t d]
flatten (RootBox (Box ns textOpts)) = snd $ flattenNodes 0 [] textOpts ns
flattenNodes :: Int -> BoxPath d -> TextOptions -> [InnerNode t d] ->
(Int, [Leaf t d])
flattenNodes idx _ _ [] = (idx, [])
flattenNodes idx path textOpts (n : ns) = (idx'', flat1 ++ flat2)
where
(idx', flat1) = flattenNode idx path textOpts n
(idx'', flat2) = flattenNodes idx' path textOpts ns
flattenNode :: Int -> BoxPath d -> TextOptions -> InnerNode t d ->
(Int, [Leaf t d])
flattenNode idx path textOpts (TextSequence d t) =
(idx, [TextLeaf d t textOpts path])
flattenNode idx path _ (InlineBox d (Box ns textOpts) boxOpts) =
flattenNodes (idx + 1) (ResolvedBox d idx boxOpts dir : path) textOpts ns
where
dir = textDirection textOpts
-- | Concatenate all pieces of text from a `Text` tree, plus an optional prefix
-- and suffix, into a contiguous `Text` and build an `Int` tree with
-- corresponding byte lengths.
--
-- Using a contiguous `Text` lets HarfBuzz take neighbouring characters into
-- consideration when making shaping decisions, so that for example kerning
-- does not stop working on node boundaries.
glue :: Text -> RootNode Text d -> Text -> (Text, Int, RootNode Int d)
glue prefix root suffix = (txt, initialOffset, root')
where
txt = Lazy.toStrict $ Lazy.chunk prefix txtTail
initialOffset = lengthWord8 prefix
(txtTail, root') = glueRoot (Lazy.chunk suffix Lazy.empty) root
glueRoot :: Lazy.Text -> RootNode Text d -> (Lazy.Text, RootNode Int d)
glueRoot t (RootBox b) = (t', RootBox b')
where
(t', b') = glueBox t b
glueBox :: Lazy.Text -> Box Text d -> (Lazy.Text, Box Int d)
glueBox t (Box nodes opts) = (t', Box nodes' opts)
where
(t', nodes') = glueNodes t nodes
glueNodes :: Lazy.Text -> [InnerNode Text d] -> (Lazy.Text, [InnerNode Int d])
glueNodes t [] = (t, [])
glueNodes t (node : nodes) = (t'', node' : nodes')
where
(t'', node') = glueNode t' node
(t', nodes') = glueNodes t nodes
glueNode :: Lazy.Text -> InnerNode Text d -> (Lazy.Text, InnerNode Int d)
glueNode suffix (TextSequence d txt) = (combinedText, node)
where
combinedText = Lazy.chunk txt suffix
node = TextSequence d (lengthWord8 txt)
glueNode suffix (InlineBox d b o) = (combinedText, InlineBox d node o)
where
(combinedText, node) = glueBox suffix b