From b7db6d117bdaa5459bd162331d9ab2d887b365f6 Mon Sep 17 00:00:00 2001 From: Jaro Date: Thu, 23 Mar 2023 13:42:23 +0100 Subject: [PATCH] Remove collapsible spaces at beginning of lines. --- CHANGELOG.md | 2 + .../Text/ParagraphLayout/Internal/Plain.hs | 12 +++++- .../ParagraphLayout/Internal/ResolvedSpan.hs | 1 + src/Data/Text/ParagraphLayout/Internal/Run.hs | 8 ++++ .../ParagraphLayout/Internal/TextContainer.hs | 40 ++++++++++++++++++- .../Internal/TextContainerSpec.hs | 6 +++ 6 files changed, 65 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6656515..9ceda19 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ * Added support for forced (hard) line breaks in the input text. +* Now also trimming white space at the beginning of lines. + * Internally, language tags will be cut at the first invalid character before being passed to ICU. diff --git a/src/Data/Text/ParagraphLayout/Internal/Plain.hs b/src/Data/Text/ParagraphLayout/Internal/Plain.hs index 3488e1d..28cf0a9 100644 --- a/src/Data/Text/ParagraphLayout/Internal/Plain.hs +++ b/src/Data/Text/ParagraphLayout/Internal/Plain.hs @@ -179,7 +179,10 @@ hardSplit :: [WithSpan Run] -> ([WithSpan Run], [WithSpan Run]) hardSplit runs = trimFst $ NonEmpty.last $ splits where trimFst (runs1, runs2) = (trim runs1, runs2) - trim = trimTextsEndPreserve isEndSpace . trimTextsEndPreserve isNewline + trim + = trimTextsStartPreserve isStartSpace + . trimTextsEndPreserve isEndSpace + . trimTextsEndPreserve isNewline -- TODO: Consider optimising. -- We do not need to look for any line breaks further than the -- shortest hard break. @@ -205,7 +208,7 @@ softSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])] softSplits runs = map trimFst splits where trimFst (runs1, runs2) = (trim runs1, runs2) - trim = trimTextsEnd isEndSpace + trim = trimTextsStart isStartSpace . trimTextsEnd isEndSpace splits = lSplits ++ cSplits lSplits = splitTextsBy (map fst . runLineBreaks) runs -- TODO: Consider optimising. @@ -315,6 +318,11 @@ runBreaksFromSpan run spanBreaks = valid (off, _) = off < runLength runLength = lengthWord8 $ getText run +-- | Predicate for characters that can be potentially removed from the +-- beginning of a line according to the CSS Text Module. +isStartSpace :: Char -> Bool +isStartSpace c = c `elem` [' ', '\t'] + -- | Predicate for characters that can be potentially removed from the end of -- a line according to the CSS Text Module. isEndSpace :: Char -> Bool diff --git a/src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs b/src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs index 48531a8..74d07d4 100644 --- a/src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs +++ b/src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs @@ -45,6 +45,7 @@ instance TextContainer a => TextContainer (WithSpan a) where instance SeparableTextContainer a => SeparableTextContainer (WithSpan a) where splitTextAt8 n (WithSpan rs c) = (WithSpan rs c1, WithSpan rs c2) where (c1, c2) = splitTextAt8 n c + dropWhileStart p (WithSpan rs c) = WithSpan rs (dropWhileStart p c) dropWhileEnd p (WithSpan rs c) = WithSpan rs (dropWhileEnd p c) splitBySpanIndex :: [WithSpan a] -> [[a]] diff --git a/src/Data/Text/ParagraphLayout/Internal/Run.hs b/src/Data/Text/ParagraphLayout/Internal/Run.hs index 4448cf1..106ea91 100644 --- a/src/Data/Text/ParagraphLayout/Internal/Run.hs +++ b/src/Data/Text/ParagraphLayout/Internal/Run.hs @@ -40,6 +40,14 @@ instance SeparableTextContainer Run where t1 = takeWord8 (fromIntegral n) t t2 = dropWord8 (fromIntegral n) t t = getText r + dropWhileStart p r = r { runText = t', runOffsetInSpan = o' } + where + t = runText r + t' = Text.dropWhile p t + l = lengthWord8 t + l' = lengthWord8 t' + o = runOffsetInSpan r + o' = o + l - l' dropWhileEnd p r = r { runText = Text.dropWhileEnd p (runText r) } type ProtoRun = (Zipper, Maybe Direction, ScriptCode) diff --git a/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs b/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs index d0ef485..e1ec600 100644 --- a/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs +++ b/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs @@ -2,11 +2,14 @@ module Data.Text.ParagraphLayout.Internal.TextContainer (SeparableTextContainer ,TextContainer ,dropWhileEnd + ,dropWhileStart ,getText ,splitTextAt8 ,splitTextsBy ,trimTextsEnd ,trimTextsEndPreserve + ,trimTextsStart + ,trimTextsStartPreserve ) where @@ -33,8 +36,12 @@ class TextContainer a => SeparableTextContainer a where -- constraints the instance requires. splitTextAt8 :: Int -> a -> (a, a) - -- | Return the prefix remaining after dropping characters that satisfy - -- the given predicate from the end of the given `SeparableTextContainer`. + -- | Return the suffix remaining after dropping characters that satisfy the + -- given predicate from the beginning of the given `SeparableTextContainer`. + dropWhileStart :: (Char -> Bool) -> a -> a + + -- | Return the prefix remaining after dropping characters that satisfy the + -- given predicate from the end of the given `SeparableTextContainer`. dropWhileEnd :: (Char -> Bool) -> a -> a -- | As a trivial instance, each `Text` can be split directly. @@ -43,6 +50,7 @@ instance SeparableTextContainer Text where where t1 = takeWord8 (fromIntegral n) t t2 = dropWord8 (fromIntegral n) t + dropWhileStart = Text.dropWhile dropWhileEnd = Text.dropWhileEnd -- | Treat a list of text containers as a contiguous sequence, @@ -76,6 +84,26 @@ collapse (tc :| tcs) | Text.null (getText tc) = tcs | otherwise = tc:tcs +-- | Treat a list of text containers as a contiguous sequence, +-- and remove a prefix of characters that match the given predicate. +-- +-- Empty text containers are removed from the output, so the result may +-- potentially be an empty list. +trimTextsStart :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a] +trimTextsStart p tcs = trimTextsStart' p tcs + +-- | Treat a list of text containers as a contiguous sequence, +-- and remove a prefix of characters that match the given predicate. +-- +-- Empty text containers are removed from the output except the first one, +-- which is instead truncated to zero length. +trimTextsStartPreserve :: + SeparableTextContainer a => (Char -> Bool) -> [a] -> [a] +trimTextsStartPreserve _ [] = [] +trimTextsStartPreserve p ins@(in1:_) = case trimTextsStart' p ins of + [] -> [truncateText in1] + out -> out + -- | Treat a list of text containers as a contiguous sequence, -- and remove a suffix of characters that match the given predicate. -- @@ -96,6 +124,14 @@ trimTextsEndPreserve p ins@(in1:_) = case trimTextsEnd' p (reverse ins) of [] -> [truncateText in1] out -> out +trimTextsStart' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a] +trimTextsStart' _ [] = [] +trimTextsStart' p (tc:tcs) + | Text.null (getText trimmed) = trimTextsStart' p tcs + | otherwise = trimmed:tcs + where + trimmed = dropWhileStart p tc + trimTextsEnd' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a] trimTextsEnd' _ [] = [] trimTextsEnd' p (tc:tcs) diff --git a/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs b/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs index b7a852a..9d1953d 100644 --- a/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs +++ b/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs @@ -21,6 +21,12 @@ instance SeparableTextContainer ExampleContainer where (t1, t2) = splitTextAt8 n t o1 = o o2 = o + lengthWord8 t1 + dropWhileStart p (Contain t o) = Contain t' o' + where + l = lengthWord8 t + t' = dropWhileStart p t + l' = lengthWord8 t + o' = o + l - l' dropWhileEnd p (Contain t o) = Contain (dropWhileEnd p t) o exampleContainers :: [ExampleContainer] -- 2.30.2