~jaro/balkon

ref: 88a5b6c714c5543b01d63d836ad403b5755d8d76 balkon/src/Data/Text/ParagraphLayout/Internal/Plain/Paragraph.hs -rw-r--r-- 3.4 KiB
88a5b6c7Jaro Test shaping across intra-word breaks. 1 year, 5 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
module Data.Text.ParagraphLayout.Internal.Plain.Paragraph
    ( Paragraph (..)
    , paragraphSpanBounds
    , paragraphSpanTexts
    , paragraphText
    )
where

import Data.List.NonEmpty (NonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Text.Array (Array)
import Data.Text.Internal (Text (Text))

import Data.Text.ParagraphLayout.Internal.ParagraphOptions
import Data.Text.ParagraphLayout.Internal.Span

-- | Text to be laid out as a single paragraph.
--
-- May be divided into any number of neighbouring spans, each of which will
-- be represented as a separate `SpanLayout` in the resulting layout.
--
-- The input text must be encoded as UTF-8 in a contiguous byte array.
--
-- You may need to use "Data.Text.Internal" in order to determine the byte
-- array and the necessary offsets to construct the paragraph without copying
-- data.
--
-- For simple use cases, it may be sufficient to construct paragraphs using
-- [ParagraphConstruction]("Data.Text.ParagraphLayout.ParagraphConstruction").
data Paragraph d = Paragraph

    Array
    -- ^ A byte array containing the whole text to be laid out, in UTF-8.
    --
    -- This array will be passed to "Data.Text.Glyphize", which passes it to
    -- [@hb_buffer_add_utf8()@]
    -- (https://harfbuzz.github.io/harfbuzz-hb-buffer.html#hb-buffer-add-utf8).
    --
    -- In the output, `Data.Text.Glyphize.cluster` will be a byte offset of
    -- the corresponding input character from this array.

    Int
    -- ^ Byte offset of the first span from the start of the byte array.
    -- Any characters preceding this offset will not be shaped, but may still
    -- be used to influence the shape of neighbouring characters.

    [Span d]
    -- ^ Parts of the text to be laid out, in logical order.
    -- The initial offset plus total length of all spans must not exceed
    -- the bounds of the byte array.
    -- Any characters following the last span will not be shaped, but may still
    -- be used to influence the shape of neighbouring characters.

    ParagraphOptions
    -- ^ Options applying to the paragraph as a whole.

-- | Calculate the offsets into the `Paragraph`'s underlying `Data.Text.Array`
-- where each span starts and ends, in ascending order. The resulting list
-- will be one larger than the list of input spans.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphSpanBounds :: Paragraph d -> NonEmpty Int
paragraphSpanBounds (Paragraph _ initialOffset spans _) =
    -- TODO: Consider adding checks for array bounds.
    NonEmpty.scanl (+) initialOffset (map spanLength spans)

-- | Turn each span of the input `Paragraph` into a `Text`.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphSpanTexts :: Paragraph d -> [Text]
paragraphSpanTexts p@(Paragraph arr _ _ _) = zipWith toText sStarts sEnds
    where
        toText start end = Text arr start (end - start)
        sStarts = NonEmpty.init sBounds
        sEnds = NonEmpty.tail sBounds
        sBounds = paragraphSpanBounds p

-- | Turn all spans of the input `Paragraph` into one combined `Text`.
--
-- You can use this function to verify that Balkón will slice the input text
-- correctly.
paragraphText :: Paragraph d -> Text
paragraphText p@(Paragraph arr _ _ _) = Text arr start (end - start)
    where
        start = NonEmpty.head sBounds
        end = NonEmpty.last sBounds
        sBounds = paragraphSpanBounds p