~jaro/balkon: src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs

module Data.Text.ParagraphLayout.Internal.Rich.Paragraph
    ( Paragraph (..)
    , constructParagraph
    , paragraphSpanBounds
    , paragraphSpanTexts
    , paragraphText
    )
where

import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Text.Array (Array)
import Data.Text.Internal (Text (Text))

import Data.Text.ParagraphLayout.Internal.ParagraphOptions
import Data.Text.ParagraphLayout.Internal.Tree

-- | Text to be laid out as a single paragraph.
--
-- May be divided into a hierarchy of boxes and spans.
--
-- The input text must be encoded as UTF-8 in a contiguous byte array.
--
-- You may need to use "Data.Text.Internal" in order to determine the byte
-- array and the necessary offsets to construct the paragraph without copying
-- data.
data Paragraph d = Paragraph

    Array
    -- ^ A byte array containing the whole text to be laid out, in UTF-8.
    --
    -- This array will be passed to "Data.Text.Glyphize", which passes it to
    -- [@hb_buffer_add_utf8()@]
    -- (https://harfbuzz.github.io/harfbuzz-hb-buffer.html#hb-buffer-add-utf8).
    --
    -- In the output, `Data.Text.Glyphize.cluster` will be a byte offset of
    -- the corresponding input character from this array.

    Int
    -- ^ Byte offset of the first text node from the start of the byte array.
    -- Any characters preceding this offset will not be shaped, but may still
    -- be used to influence the shape of neighbouring characters.

    (RootNode Int d)
    -- ^ Parts of the text to be laid out, represented as a tree.
    -- The in-order walk of this tree corresponds to the logical order
    -- of the text.

    ParagraphOptions
    -- ^ Options applying to the paragraph as a whole.

constructParagraph :: Text -> RootNode Text d -> Text -> ParagraphOptions ->
    Paragraph d
constructParagraph prefix root suffix = Paragraph arr afterPrefix root'
    where
        (Text arr beforePrefix _) = txt
        afterPrefix = beforePrefix + prefixLen
        (txt, prefixLen, root') = glue prefix root suffix

-- | Calculate the offsets into the `Paragraph`'s underlying `Data.Text.Array`
-- where each text node starts and ends, in ascending order. The resulting list
-- will be one larger than the number of text nodes in the input.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphSpanBounds :: Paragraph d -> NonEmpty Int
paragraphSpanBounds (Paragraph _ initialOffset root _) =
    -- TODO: Consider adding checks for array bounds.
    NonEmpty.scanl (+) initialOffset $ map len $ flatten root
    where
        len (TextLeaf _ l _ _) = l

-- | Turn each text node from the input `Paragraph` into a `Text`.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphSpanTexts :: Paragraph d -> [Text]
paragraphSpanTexts p@(Paragraph arr _ _ _) = zipWith toText sStarts sEnds
    where
        toText start end = Text arr start (end - start)
        sStarts = NonEmpty.init sBounds
        sEnds = NonEmpty.tail sBounds
        sBounds = paragraphSpanBounds p

-- | Turn all text nodes from the input `Paragraph` into one combined `Text`.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphText :: Paragraph d -> Text
paragraphText p@(Paragraph arr _ _ _) = Text arr start (end - start)
    where
        start = NonEmpty.head sBounds
        end = NonEmpty.last sBounds
        sBounds = paragraphSpanBounds p