From 764fa6ebba367c344dae178816fc9566c203a17c Mon Sep 17 00:00:00 2001 From: Jaro Date: Sun, 11 Jun 2023 05:38:10 +0200 Subject: [PATCH] Internally support text breaks in ascending order. Intended as a workaround to `breaksRight` not including the status of the end-of-text break. --- .../Text/ParagraphLayout/Internal/Break.hs | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/Data/Text/ParagraphLayout/Internal/Break.hs b/src/Data/Text/ParagraphLayout/Internal/Break.hs index c03285d..49d9934 100644 --- a/src/Data/Text/ParagraphLayout/Internal/Break.hs +++ b/src/Data/Text/ParagraphLayout/Internal/Break.hs @@ -8,6 +8,7 @@ module Data.Text.ParagraphLayout.Internal.Break ( LineBreak (..) , locale + , breaksAsc , breaksDesc , subOffsetsDesc ) @@ -19,7 +20,9 @@ import Data.Text.ICU ( Break , Breaker , LocaleName (Locale) + , breaks , breaksRight + , brkBreak , brkPrefix , brkStatus ) @@ -49,6 +52,16 @@ locale lang lb = Locale $ (clean lang) ++ (lbKeyword lb) -- This filter is here just to stop syntactically incorrect input. clean = takeWhile (`elem` ['A' .. 'Z'] ++ ['a' .. 'z'] ++ "_-") +-- | List of all breaks in the given text, with offsets in ascending order, +-- including the status of the break if applicable. +-- +-- Excludes the start of the text (with offset 0). +-- +-- Includes the end of the text (with offset equal to the text length) +-- as the last list item. +breaksAsc :: Breaker a -> Text -> [(Int, a)] +breaksAsc breaker input = map brkEndOffsetStatus $ breaks breaker input + -- | List of all breaks in the given text, with offsets in descending order, -- including the status of the break if applicable. -- @@ -61,12 +74,21 @@ breaksDesc breaker input = map brkStartOffsetStatus $ breaksRight breaker input brkStartOffsetStatus :: Break a -> (Int, a) brkStartOffsetStatus brk = (brkStartOffset brk, brkStatus brk) +brkEndOffsetStatus :: Break a -> (Int, a) +brkEndOffsetStatus brk = (brkEndOffset brk, brkStatus brk) + -- | The ICU library returns "breaks" as slices of text with two boundaries. -- This gives the smaller of the two distances from the start of the text -- to the boundaries of the break. brkStartOffset :: Break a -> Int brkStartOffset brk = lengthWord8 (brkPrefix brk) +-- | The ICU library returns "breaks" as slices of text with two boundaries. +-- This gives the larger of the two distances from the start of the text +-- to the boundaries of the break. +brkEndOffset :: Break a -> Int +brkEndOffset brk = lengthWord8 (brkPrefix brk) + lengthWord8 (brkBreak brk) + -- | Given a list of offsets into a text in descending order, produce a list of -- corresponding offsets into a slice of the text starting at a given offset. subOffsetsDesc :: Int -> [(Int, a)] -> [(Int, a)] -- 2.30.2