~jaro/balkon: src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs

module Data.Text.ParagraphLayout.Internal.Rich.Paragraph
    ( Paragraph (..)
    , constructParagraph
    , paragraphPrefix
    , paragraphSpanBounds
    , paragraphSpanTexts
    , paragraphText
    )
where

import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Text.Array (Array)
import Data.Text.Internal (Text (Text))

import Data.Text.ParagraphLayout.Internal.ParagraphOptions
import Data.Text.ParagraphLayout.Internal.Tree

-- | Text to be laid out as a single paragraph.
--
-- May be divided into a hierarchy of boxes and spans.
--
-- The input text must be encoded as UTF-8 in a contiguous byte array.
--
-- You may need to use "Data.Text.Internal" in order to determine the byte
-- array and the necessary offsets to construct the paragraph without copying
-- data.
--
-- Alternatively, you can use `constructParagraph` with piecewise text.
data Paragraph d = Paragraph

    Array
    -- ^ A byte array containing the whole text to be laid out, in UTF-8.
    --
    -- This array will be passed to "Data.Text.Glyphize", which passes it to
    -- [@hb_buffer_add_utf8()@]
    -- (https://harfbuzz.github.io/harfbuzz-hb-buffer.html#hb-buffer-add-utf8).
    --
    -- In the output, `Data.Text.Glyphize.cluster` will be a byte offset of
    -- the corresponding input character from this array.

    Int
    -- ^ Byte offset of the first text node from the start of the byte array.
    -- Any characters preceding this offset will not be shaped, but may still
    -- be used to influence the shape of neighbouring characters.

    (RootNode Int d)
    -- ^ Parts of the text to be laid out, represented as a tree.
    -- The in-order walk of this tree corresponds to the logical order
    -- of the text.

    ParagraphOptions
    -- ^ Options applying to the paragraph as a whole.

-- | Construct a `Paragraph` from a tree containing individual pieces of text,
-- plus an optional prefix and suffix, all encoded as UTF-8.
--
-- This internally concatenates all input text into one contiguous byte array.
--
-- Alternatively, you can construct a `Paragraph` directly to avoid copying
-- data.
constructParagraph
    :: Text
    -- ^ Optional prefix. Will not be laid out but may affect the output.
    -> RootNode Text d
    -- ^ Parts of the text to be laid out, represented as a tree.
    -> Text
    -- ^ Optional suffix. Will not be laid out but may affect the output.
    -> ParagraphOptions
    -- ^ Options to apply to the paragraph as a whole.
    -> Paragraph d
    -- ^ Constructed paragraph that can be passed to
    -- `Data.Text.ParagraphLayout.Rich.layoutRich`.
constructParagraph prefix root suffix = Paragraph arr afterPrefix root'
    where
        (Text arr beforePrefix _) = txt
        afterPrefix = beforePrefix + prefixLen
        (txt, prefixLen, root') = glue prefix root suffix

-- | Calculate the offsets into the `Paragraph`'s underlying `Data.Text.Array`
-- where each text node starts and ends, in ascending order. The resulting list
-- will be one larger than the number of text nodes in the input.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphSpanBounds :: Paragraph d -> NonEmpty Int
paragraphSpanBounds (Paragraph _ initialOffset root _) =
    -- TODO: Consider adding checks for array bounds.
    NonEmpty.scanl (+) initialOffset $ map len $ flatten root
    where
        len (TextLeaf _ l _ _) = l

-- | Turn each text node from the input `Paragraph` into a `Text`.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphSpanTexts :: Paragraph d -> [Text]
paragraphSpanTexts p@(Paragraph arr _ _ _) = zipWith toText sStarts sEnds
    where
        toText start end = Text arr start (end - start)
        sStarts = NonEmpty.init sBounds
        sEnds = NonEmpty.tail sBounds
        sBounds = paragraphSpanBounds p

-- | Turn all text nodes from the input `Paragraph` into one combined `Text`.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphText :: Paragraph d -> Text
paragraphText p@(Paragraph arr _ _ _) = Text arr start (end - start)
    where
        start = NonEmpty.head sBounds
        end = NonEmpty.last sBounds
        sBounds = paragraphSpanBounds p

-- | Turn all text nodes from the input `Paragraph` up to the given end offset
-- into one combined `Text`. No bounds checking is performed!
paragraphPrefix :: Paragraph d -> Int -> Text
paragraphPrefix p@(Paragraph arr _ _ _) end = Text arr start (end - start)
    where
        start = NonEmpty.head sBounds
        sBounds = paragraphSpanBounds p