-- | Represents the contents of a paragraph as a tree. -- -- The tree is a hierarchy of boxes, with one box always present as the root. -- Each box may contain any combination of text sequences and other boxes. -- -- The type parameter @t@ refers to the way in which text is represented in the -- tree: -- -- - `Text` means that each leaf node directly contains its own text. -- - `Int` means that the text comes from a contiguous byte array, and each -- leaf node specifies only the number of bytes belonging to its text. -- -- The `glue` function can be used to convert a `Text` tree into an `Int` tree. -- -- The type parameter @d@ defines the type of user data to be associated with -- each non-root node. module Data.Text.ParagraphLayout.Internal.Tree ( RootNode (..) , InnerNode (..) , Box (..) , Leaf (..) , EmptyText , flatten , glue , strut ) where import Data.Text (Text) import qualified Data.Text as Text import Data.Text.Foreign (lengthWord8) import qualified Data.Text.Lazy as Lazy (toStrict) import qualified Data.Text.Internal.Lazy as Lazy (Text, chunk, empty) import Data.Text.ParagraphLayout.Internal.BoxOptions import Data.Text.ParagraphLayout.Internal.ResolvedBox import Data.Text.ParagraphLayout.Internal.TextOptions -- | Root of the paragraph tree. data RootNode t d = RootBox -- ^ The root inline box. Always present in a paragraph. -- -- Cannot be styled directly, but can still set options for formatting -- text sequences directly descending from this box. (Box t d) -- ^ Contents of the box. -- | Non-root node of the paragraph tree. data InnerNode t d = InlineBox -- ^ An inline box, nested in another box. d -- ^ User-defined data associated with the box. (Box t d) -- ^ Contents of the box. BoxOptions -- ^ Style options to apply to the inline box. | TextSequence -- ^ A leaf node containing text. d -- ^ User-defined data associated with the text node. t -- ^ Representation of the text contained by the text node. -- | A box with content and a defined format. Corresponds to a DOM element. data Box t d = Box [InnerNode t d] -- ^ Text nodes and other boxes contained by this box. -- -- Please note that boxes which are not the ancestors of any leaf nodes -- will have no effect on the output. -- -- To ensure that all boxes are represented in the output, as would be -- expected for DOM elements in HTML, an empty text sequence must be added -- to otherwise empty boxes. This can be done by using the `strut` function. TextOptions -- ^ Style options to apply to text sequences directly contained -- by this box. type BoxPath d = [ResolvedBox d] -- | Representation of a leaf node of the tree after flattening. data Leaf t d = TextLeaf d -- ^ User-defined data associated with the text node. t -- ^ Representation of the text contained by the text node. TextOptions -- ^ Style options to apply to this text sequence. (BoxPath d) -- ^ Inline boxes found on the path from this text sequence to the root -- of the original tree. -- | Data types that can represent an empty text sequence in a tree. class EmptyText t where empty :: t instance EmptyText Int where empty = 0 instance EmptyText Text where empty = Text.empty -- | Insert an empty text sequence with the given user data into each empty box. strut :: EmptyText t => d -> RootNode t d -> RootNode t d strut d (RootBox (Box ns textOpts)) = RootBox $ Box (strutNodes d ns) textOpts strutNodes :: EmptyText t => d -> [InnerNode t d] -> [InnerNode t d] strutNodes d ns = map (strutNode d) ns strutNode :: EmptyText t => d -> InnerNode t d -> InnerNode t d strutNode _ x@(TextSequence _ _) = x strutNode d (InlineBox boxData (Box [] textOpts) boxOpts) = InlineBox boxData (Box [TextSequence d empty] textOpts) boxOpts strutNode d (InlineBox boxData (Box ns textOpts) boxOpts) = InlineBox boxData (Box (strutNodes d ns) textOpts) boxOpts -- | Convert the tree to a flat list of its leaf nodes (text sequences). flatten :: RootNode t d -> [Leaf t d] flatten (RootBox (Box ns textOpts)) = snd $ flattenNodes 0 [] textOpts ns flattenNodes :: Int -> BoxPath d -> TextOptions -> [InnerNode t d] -> (Int, [Leaf t d]) flattenNodes idx _ _ [] = (idx, []) flattenNodes idx path textOpts (n : ns) = (idx'', flat1 ++ flat2) where (idx', flat1) = flattenNode idx path textOpts n (idx'', flat2) = flattenNodes idx' path textOpts ns flattenNode :: Int -> BoxPath d -> TextOptions -> InnerNode t d -> (Int, [Leaf t d]) flattenNode idx path textOpts (TextSequence d t) = (idx, [TextLeaf d t textOpts path]) flattenNode idx path _ (InlineBox d (Box ns textOpts) boxOpts) = flattenNodes (idx + 1) (ResolvedBox d idx boxOpts dir : path) textOpts ns where dir = textDirection textOpts -- | Concatenate all pieces of text from a `Text` tree, plus an optional prefix -- and suffix, into a contiguous `Text` and build an `Int` tree with -- corresponding byte lengths. -- -- Using a contiguous `Text` lets HarfBuzz take neighbouring characters into -- consideration when making shaping decisions, so that for example kerning -- does not stop working on node boundaries. glue :: Text -> RootNode Text d -> Text -> (Text, Int, RootNode Int d) glue prefix root suffix = (txt, initialOffset, root') where txt = Lazy.toStrict $ Lazy.chunk prefix txtTail initialOffset = lengthWord8 prefix (txtTail, root') = glueRoot (Lazy.chunk suffix Lazy.empty) root glueRoot :: Lazy.Text -> RootNode Text d -> (Lazy.Text, RootNode Int d) glueRoot t (RootBox b) = (t', RootBox b') where (t', b') = glueBox t b glueBox :: Lazy.Text -> Box Text d -> (Lazy.Text, Box Int d) glueBox t (Box nodes opts) = (t', Box nodes' opts) where (t', nodes') = glueNodes t nodes glueNodes :: Lazy.Text -> [InnerNode Text d] -> (Lazy.Text, [InnerNode Int d]) glueNodes t [] = (t, []) glueNodes t (node : nodes) = (t'', node' : nodes') where (t'', node') = glueNode t' node (t', nodes') = glueNodes t nodes glueNode :: Lazy.Text -> InnerNode Text d -> (Lazy.Text, InnerNode Int d) glueNode suffix (TextSequence d txt) = (combinedText, node) where combinedText = Lazy.chunk txt suffix node = TextSequence d (lengthWord8 txt) glueNode suffix (InlineBox d b o) = (combinedText, InlineBox d node o) where (combinedText, node) = glueBox suffix b