~jaro/balkon

764fa6ebba367c344dae178816fc9566c203a17c — Jaro 1 year, 5 months ago b464ad9
Internally support text breaks in ascending order.

Intended as a workaround to `breaksRight` not including the status of
the end-of-text break.
1 files changed, 22 insertions(+), 0 deletions(-)

M src/Data/Text/ParagraphLayout/Internal/Break.hs
M src/Data/Text/ParagraphLayout/Internal/Break.hs => src/Data/Text/ParagraphLayout/Internal/Break.hs +22 -0
@@ 8,6 8,7 @@
module Data.Text.ParagraphLayout.Internal.Break
    ( LineBreak (..)
    , locale
    , breaksAsc
    , breaksDesc
    , subOffsetsDesc
    )


@@ 19,7 20,9 @@ import Data.Text.ICU
    ( Break
    , Breaker
    , LocaleName (Locale)
    , breaks
    , breaksRight
    , brkBreak
    , brkPrefix
    , brkStatus
    )


@@ 49,6 52,16 @@ locale lang lb = Locale $ (clean lang) ++ (lbKeyword lb)
        -- This filter is here just to stop syntactically incorrect input.
        clean = takeWhile (`elem` ['A' .. 'Z'] ++ ['a' .. 'z'] ++ "_-")

-- | List of all breaks in the given text, with offsets in ascending order,
-- including the status of the break if applicable.
--
-- Excludes the start of the text (with offset 0).
--
-- Includes the end of the text (with offset equal to the text length)
-- as the last list item.
breaksAsc :: Breaker a -> Text -> [(Int, a)]
breaksAsc breaker input = map brkEndOffsetStatus $ breaks breaker input

-- | List of all breaks in the given text, with offsets in descending order,
-- including the status of the break if applicable.
--


@@ 61,12 74,21 @@ breaksDesc breaker input = map brkStartOffsetStatus $ breaksRight breaker input
brkStartOffsetStatus :: Break a -> (Int, a)
brkStartOffsetStatus brk = (brkStartOffset brk, brkStatus brk)

brkEndOffsetStatus :: Break a -> (Int, a)
brkEndOffsetStatus brk = (brkEndOffset brk, brkStatus brk)

-- | The ICU library returns "breaks" as slices of text with two boundaries.
-- This gives the smaller of the two distances from the start of the text
-- to the boundaries of the break.
brkStartOffset :: Break a -> Int
brkStartOffset brk = lengthWord8 (brkPrefix brk)

-- | The ICU library returns "breaks" as slices of text with two boundaries.
-- This gives the larger of the two distances from the start of the text
-- to the boundaries of the break.
brkEndOffset :: Break a -> Int
brkEndOffset brk = lengthWord8 (brkPrefix brk) + lengthWord8 (brkBreak brk)

-- | Given a list of offsets into a text in descending order, produce a list of
-- corresponding offsets into a slice of the text starting at a given offset.
subOffsetsDesc :: Int -> [(Int, a)] -> [(Int, a)]