-- | Represents the contents of a paragraph as a tree. -- -- The tree is a hierarchy of boxes, with one box always present as the root. -- Each box may contain any combination of text sequences and other boxes. -- -- The type parameter @t@ refers to the way in which text is represented in the -- tree: -- -- - `Text` means that each leaf node directly contains its own text. -- - `Int` means that the text comes from a contiguous byte array, and each -- leaf node specifies only the number of bytes belonging to its text. -- -- The `glue` function can be used to convert a `Text` tree into an `Int` tree. -- -- The type parameter @d@ defines the type of user data to be associated with -- each non-root node. module Data.Text.ParagraphLayout.Internal.Tree ( RootNode (..) , InnerNode (..) , Box (..) , Leaf (..) , flatten , glue ) where import Data.Text (Text) import Data.Text.Foreign (lengthWord8) import qualified Data.Text.Lazy as Lazy (toStrict) import qualified Data.Text.Internal.Lazy as Lazy (Text, chunk, empty) import Data.Text.ParagraphLayout.Internal.BoxOptions import Data.Text.ParagraphLayout.Internal.ResolvedBox import Data.Text.ParagraphLayout.Internal.TextOptions -- | Root of the paragraph tree. data RootNode t d = RootBox -- ^ The root inline box. Always present in a paragraph. -- -- Cannot be styled directly, but can still set options for formatting -- text sequences directly descending from this box. (Box t d) -- ^ Contents of the box. -- | Non-root node of the paragraph tree. data InnerNode t d = InlineBox -- ^ An inline box, nested in another box. d -- ^ User-defined data associated with the box. (Box t d) -- ^ Contents of the box. BoxOptions -- ^ Style options to apply to the inline box. | TextSequence -- ^ A leaf node containing text. d -- ^ User-defined data associated with the text node. t -- ^ Representation of the text contained by the text node. -- | A box with content and a defined format. Corresponds to a DOM element. data Box t d = Box [InnerNode t d] -- ^ Text nodes and other boxes contained by this box. TextOptions -- ^ Style options to apply to text sequences directly contained -- by this box. type BoxPath d = [ResolvedBox d] -- | Representation of a leaf node of the tree after flattening. data Leaf t d = TextLeaf d -- ^ User-defined data associated with the text node. t -- ^ Representation of the text contained by the text node. TextOptions -- ^ Style options to apply to this text sequence. (BoxPath d) -- ^ Inline boxes found on the path from this text sequence to the root -- of the original tree. -- | Convert the tree to a flat list of its leaf nodes (text sequences). flatten :: RootNode t d -> [Leaf t d] flatten (RootBox (Box ns textOpts)) = snd $ flattenNodes 0 [] textOpts ns flattenNodes :: Int -> BoxPath d -> TextOptions -> [InnerNode t d] -> (Int, [Leaf t d]) flattenNodes idx _ _ [] = (idx, []) flattenNodes idx path textOpts (n : ns) = (idx'', flat1 ++ flat2) where (idx', flat1) = flattenNode idx path textOpts n (idx'', flat2) = flattenNodes idx' path textOpts ns flattenNode :: Int -> BoxPath d -> TextOptions -> InnerNode t d -> (Int, [Leaf t d]) flattenNode idx path textOpts (TextSequence d t) = (idx, [TextLeaf d t textOpts path]) flattenNode idx path _ (InlineBox d (Box ns textOpts) boxOpts) = flattenNodes (idx + 1) (ResolvedBox d idx boxOpts dir : path) textOpts ns where dir = textDirection textOpts -- | Concatenate all pieces of text from a `Text` tree, plus an optional prefix -- and suffix, into a contiguous `Text` and build an `Int` tree with -- corresponding byte lengths. -- -- Using a contiguous `Text` lets HarfBuzz take neighbouring characters into -- consideration when making shaping decisions, so that for example kerning -- does not stop working on node boundaries. glue :: Text -> RootNode Text d -> Text -> (Text, Int, RootNode Int d) glue prefix root suffix = (txt, initialOffset, root') where txt = Lazy.toStrict $ Lazy.chunk prefix txtTail initialOffset = lengthWord8 prefix (txtTail, root') = glueRoot (Lazy.chunk suffix Lazy.empty) root glueRoot :: Lazy.Text -> RootNode Text d -> (Lazy.Text, RootNode Int d) glueRoot t (RootBox b) = (t', RootBox b') where (t', b') = glueBox t b glueBox :: Lazy.Text -> Box Text d -> (Lazy.Text, Box Int d) glueBox t (Box nodes opts) = (t', Box nodes' opts) where (t', nodes') = glueNodes t nodes glueNodes :: Lazy.Text -> [InnerNode Text d] -> (Lazy.Text, [InnerNode Int d]) glueNodes t [] = (t, []) glueNodes t (node : nodes) = (t'', node' : nodes') where (t'', node') = glueNode t' node (t', nodes') = glueNodes t nodes glueNode :: Lazy.Text -> InnerNode Text d -> (Lazy.Text, InnerNode Int d) glueNode suffix (TextSequence d txt) = (combinedText, node) where combinedText = Lazy.chunk txt suffix node = TextSequence d (lengthWord8 txt) glueNode suffix (InlineBox d b o) = (combinedText, InlineBox d node o) where (combinedText, node) = glueBox suffix b