From 2462c039c364112d9015b036fe9217368d7bf2a5 Mon Sep 17 00:00:00 2001 From: Jaro Date: Sat, 11 Mar 2023 14:33:04 +0100 Subject: [PATCH] Refactor line breaking. --- .../Text/ParagraphLayout/Internal/Plain.hs | 90 ++-------- .../ParagraphLayout/Internal/TextContainer.hs | 45 ++--- .../Internal/TextContainerSpec.hs | 164 +++++++++--------- 3 files changed, 126 insertions(+), 173 deletions(-) diff --git a/src/Data/Text/ParagraphLayout/Internal/Plain.hs b/src/Data/Text/ParagraphLayout/Internal/Plain.hs index 86e0cdd..ad09ba1 100644 --- a/src/Data/Text/ParagraphLayout/Internal/Plain.hs +++ b/src/Data/Text/ParagraphLayout/Internal/Plain.hs @@ -24,7 +24,7 @@ import Data.Text.Glyphize ,fontExtentsForDir ,shape ) -import Data.Text.ICU (LocaleName(Locale), breakCharacter, breakLine) +import Data.Text.ICU (Breaker, LocaleName(Locale), breakCharacter, breakLine) import qualified Data.Text.ICU as BreakStatus (Line) import Data.Text.Internal (Text(Text)) import qualified Data.Text.Lazy as Lazy @@ -162,9 +162,9 @@ layoutAndWrapRunsH maxWidth runs = NonEmpty.head $ validLayouts layout runs1 = layoutRunsH $ trimTextsEnd isEndSpace runs1 splits = noSplit :| (lSplits ++ cSplits) noSplit = (runs, []) - lSplits = filter hasContent $ lineSplits runs - cSplits = filter hasContent $ characterSplits runs - hasContent = not . null . fst + -- TODO: Use Soft/Hard line break distinction. + lSplits = splitTextsBy (map fst . runLineBreaks) runs + cSplits = splitTextsBy (map fst . runCharacterBreaks) runs -- | The suffix remaining after removing the longest prefix of the list for -- which the predicate holds, except always including at least the last element @@ -185,42 +185,6 @@ layoutRunsH runs = map layoutRunH runs totalAdvances :: [WithSpan PF.ProtoFragment] -> Int32 totalAdvances pfs = sum $ map (\(WithSpan _ pf) -> PF.advance pf) pfs --- | Recursive function for finding all possible ways to split a list of runs --- into two on a valid line-breaking boundary, including the start of the first --- run and excluding the end of the last run. --- --- The results in the form (prefix, suffix) will be ordered from the longest --- prefix to shortest. -lineSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])] -lineSplits xs = lineSplits' [] (reverse xs) - -lineSplits' :: [WithSpan Run] -> [WithSpan Run] -> - [([WithSpan Run], [WithSpan Run])] -lineSplits' _ [] = [] -lineSplits' closed (x:xs) = splits ++ lineSplits' (x:closed) xs - where - splits = map mapFunc $ runLineSplits x - mapFunc ((x1, x2), _) = - (reverse $ collapse $ x1 :| xs, collapse $ x2 :| closed) - --- | Recursive function for finding all possible ways to split a list of runs --- into two on a character boundary, including the start of the first run and --- excluding the end of the last run. --- --- The results in the form (prefix, suffix) will be ordered from the longest --- prefix to shortest. -characterSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])] -characterSplits xs = characterSplits' [] (reverse xs) - -characterSplits' :: [WithSpan Run] -> [WithSpan Run] -> - [([WithSpan Run], [WithSpan Run])] -characterSplits' _ [] = [] -characterSplits' closed (x:xs) = splits ++ characterSplits' (x:closed) xs - where - splits = map mapFunc $ runCharacterSplits x - mapFunc ((x1, x2), _) = - (reverse $ collapse $ x1 :| xs, collapse $ x2 :| closed) - -- | Calculate layout for the given horizontal run and attach extra information. layoutRunH :: WithSpan Run -> WithSpan PF.ProtoFragment layoutRunH (WithSpan rs run) = WithSpan rs pf @@ -257,8 +221,8 @@ resolveSpans p@(Paragraph arr pStart spans pOpts) = do <*> ZipList sStarts <*> ZipList sLengths let lang = spanLanguage $ spanOptions s - let lBreaks = paragraphLineBreaks p pEnd lang - let cBreaks = paragraphCharacterBreaks p pEnd lang + let lBreaks = paragraphBreaks breakLine p pEnd lang + let cBreaks = paragraphBreaks breakCharacter p pEnd lang return RS.ResolvedSpan { RS.spanIndex = i , RS.spanOffsetInParagraph = sStart - pStart @@ -271,41 +235,25 @@ resolveSpans p@(Paragraph arr pStart spans pOpts) = do , RS.spanCharacterBreaks = subOffsetsDesc (sStart - pStart) cBreaks } -paragraphLineBreaks :: Paragraph -> Int -> String -> [(Int, BreakStatus.Line)] -paragraphLineBreaks (Paragraph arr off _ _) end lang = - breaksDesc (breakLine (localeFromLanguage lang)) paragraphText +paragraphBreaks :: (LocaleName -> Breaker a) -> Paragraph -> Int -> String -> + [(Int, a)] +paragraphBreaks breakFunc (Paragraph arr off _ _) end lang = + breaksDesc (breakFunc (localeFromLanguage lang)) paragraphText where paragraphText = Text arr off (end - off) -paragraphCharacterBreaks :: Paragraph -> Int -> String -> [(Int, ())] -paragraphCharacterBreaks (Paragraph arr off _ _) end lang = - breaksDesc (breakCharacter (localeFromLanguage lang)) paragraphText - where - paragraphText = Text arr off (end - off) - --- | Split the given run at every valid line break position. -runLineSplits :: WithSpan Run -> - [((WithSpan Run, WithSpan Run), BreakStatus.Line)] -runLineSplits r = map split $ runLineBreaks r - where - split (i, status) = (splitTextAt8 i r, status) - runLineBreaks :: WithSpan Run -> [(Int, BreakStatus.Line)] -runLineBreaks (WithSpan rs run) = dropWhile (not . valid) $ - subOffsetsDesc (runOffsetInSpan run) $ RS.spanLineBreaks rs - where - valid (off, _) = off < runLength - runLength = lengthWord8 $ getText run - --- | Split the given run before every character. -runCharacterSplits :: WithSpan Run -> [((WithSpan Run, WithSpan Run), ())] -runCharacterSplits r = map split $ runCharacterBreaks r - where - split (i, ()) = (splitTextAt8 i r, ()) +runLineBreaks (WithSpan rs run) = + runBreaksFromSpan run $ RS.spanLineBreaks rs runCharacterBreaks :: WithSpan Run -> [(Int, ())] -runCharacterBreaks (WithSpan rs run) = dropWhile (not . valid) $ - subOffsetsDesc (runOffsetInSpan run) $ RS.spanCharacterBreaks rs +runCharacterBreaks (WithSpan rs run) = + runBreaksFromSpan run $ RS.spanCharacterBreaks rs + +-- | Constrain span breaks to a selected run and adjust offsets. +runBreaksFromSpan :: Run -> [(Int, a)] -> [(Int, a)] +runBreaksFromSpan run spanBreaks = + dropWhile (not . valid) $ subOffsetsDesc (runOffsetInSpan run) spanBreaks where valid (off, _) = off < runLength runLength = lengthWord8 $ getText run diff --git a/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs b/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs index 5ae4299..2cc4737 100644 --- a/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs +++ b/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs @@ -1,11 +1,10 @@ module Data.Text.ParagraphLayout.Internal.TextContainer (SeparableTextContainer ,TextContainer - ,collapse ,dropWhileEnd ,getText ,splitTextAt8 - ,splitTextsAt8 + ,splitTextsBy ,trimTextsEnd ) where @@ -13,7 +12,7 @@ where import Data.List.NonEmpty (NonEmpty((:|))) import Data.Text (Text) import qualified Data.Text as Text -import Data.Text.Foreign (dropWord8, lengthWord8, takeWord8) +import Data.Text.Foreign (dropWord8, takeWord8) -- | Class of data types containing `Text` that can be accessed. class TextContainer a where @@ -46,31 +45,35 @@ instance SeparableTextContainer Text where dropWhileEnd = Text.dropWhileEnd -- | Treat a list of text containers as a contiguous sequence, --- and make a split at the given number of `Data.Word.Word8` from the beginning --- of this sequence. +-- and find all possible ways to split them into two non-empty lists, +-- using the given function to find valit split offsets in `Data.Word.Word8` +-- units from the beginning of each container. -- --- If @n@ falls on a container boundary, the total number of output containers --- will equal the number of input containers; otherwise, it will be one larger. -splitTextsAt8 :: SeparableTextContainer a => Int -> [a] -> ([a], [a]) -splitTextsAt8 n rs = (pre, post) +-- The results in the form (prefix, suffix) will be ordered from the longest +-- prefix to shortest. +splitTextsBy :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [([a], [a])] +splitTextsBy breakFunc tcs = + filter notEmpty $ splitTextsBy' breakFunc [] (reverse tcs) where - pre = reverse rpre - (rpre, post) = splitTextsAt8' n [] rs + notEmpty (prefix, suffix) = not (null prefix || null suffix) -splitTextsAt8' :: SeparableTextContainer a => Int -> [a] -> [a] -> ([a], [a]) -splitTextsAt8' _ rpre [] = (rpre, []) -splitTextsAt8' n rpre (r:rs) - | n <= 0 = (rpre, r:rs) - | n >= l = splitTextsAt8' (n - l) (r:rpre) (rs) - | otherwise = let (r1, r2) = splitTextAt8 n r in (r1:rpre, r2:rs) +splitTextsBy' :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [a] -> + [([a], [a])] +splitTextsBy' _ _ [] = [] +splitTextsBy' breakFunc closed (tc:tcs) = + fullSplits ++ splitTextsBy' breakFunc (tc:closed) tcs where - l = lengthWord8 $ getText r + fullSplits = map mergeWithRest tcSplits + mergeWithRest (x1, x2) = + (reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed) + tcSplits = map (\i -> splitTextAt8 i tc) tcBreakOffsets + tcBreakOffsets = breakFunc tc -- | If the first container in the list is empty, remove it. collapse :: SeparableTextContainer a => NonEmpty a -> [a] -collapse (x :| xs) - | Text.null (getText x) = xs - | otherwise = x:xs +collapse (tc :| tcs) + | Text.null (getText tc) = tcs + | otherwise = tc:tcs -- | Treat a list of text containers as a contiguous sequence, -- and remove a suffix of characters that match the given predicate. diff --git a/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs b/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs index 6d3fecc..b7a852a 100644 --- a/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs +++ b/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs @@ -1,96 +1,98 @@ module Data.Text.ParagraphLayout.Internal.TextContainerSpec (spec) where import Data.Text (Text, empty, pack) -import Data.Text.Glyphize (Direction(..)) +import Data.Text.Foreign (lengthWord8) import Test.Hspec -import Data.Text.ParagraphLayout.Internal.Run import Data.Text.ParagraphLayout.Internal.TextContainer -isSpace :: Char -> Bool -isSpace = (==' ') +data ExampleContainer = Contain { cText :: Text, cOffset :: Int } + deriving (Show, Eq) + +contain :: String -> Int -> ExampleContainer +contain s o = Contain (pack s) o + +instance TextContainer ExampleContainer where + getText = cText + +instance SeparableTextContainer ExampleContainer where + splitTextAt8 n (Contain t o) = (Contain t1 o1, Contain t2 o2) + where + (t1, t2) = splitTextAt8 n t + o1 = o + o2 = o + lengthWord8 t1 + dropWhileEnd p (Contain t o) = Contain (dropWhileEnd p t) o -inputRuns :: [Run] -inputRuns = - [ Run - -- TODO: We might want both parentheses in the same run. - { runOffsetInSpan = 0 - , runText = pack "Vikipedija (" - , runDirection = Just DirLTR - , runScript = Just "Latn" - } - , Run - { runOffsetInSpan = 12 - , runText = pack "Википедија)" - , runDirection = Just DirLTR - , runScript = Just "Cyrl" - } +exampleContainers :: [ExampleContainer] +exampleContainers = [c1, c2] + where + (c1, c2) = splitTextAt8 11 $ contain "Vikipedija (Википедија)" 10 + +exampleBreaks :: [Int] +exampleBreaks = + [ + -- Out of bounds. Should not generate any splits. + 999, 50, + -- End of last text. Should not generate a split. + 43, + -- Word and syllable bounds in the second text, + -- similar to hyphenation rules. Each should generate a corresponding split. + 38, 34, 30, 26, + -- The exact edge between the two texts. + -- Should generate a split, but not any empty containers. + 21, + -- Word and syllable bounds in the first text. + -- Each should generate a corresponding split. + 18, 16, 14, 12, + -- Start of first text. Should not generate a split. + 10, + -- Out of bounds. Should not generate any splits. + 5, 0, -1 ] +exampleBreakPoints :: ExampleContainer -> [Int] +exampleBreakPoints c = + dropWhile (>= l) $ takeWhile (>= 0) $ map (subtract d) $ exampleBreaks + where + l = lengthWord8 $ cText c + d = cOffset c + +isSpace :: Char -> Bool +isSpace = (==' ') + spec :: Spec spec = do - describe "splitTextsAt8" $ do - it "negative value splits at beginning of first run" $ do - splitTextsAt8 (-1) inputRuns `shouldBe` ([], inputRuns) - it "zero splits at beginning of first run" $ do - splitTextsAt8 0 inputRuns `shouldBe` ([], inputRuns) - it "splits in first run" $ do - splitTextsAt8 11 inputRuns `shouldBe` - ( - [ Run - { runOffsetInSpan = 0 - , runText = pack "Vikipedija " - , runDirection = Just DirLTR - , runScript = Just "Latn" - } - ] - , - [ Run - { runOffsetInSpan = 11 - , runText = pack "(" - , runDirection = Just DirLTR - , runScript = Just "Latn" - } - , Run - { runOffsetInSpan = 12 - , runText = pack "Википедија)" - , runDirection = Just DirLTR - , runScript = Just "Cyrl" - } - ] - ) - it "split at run edges does not generate extra run" $ do - splitTextsAt8 12 inputRuns `shouldBe` - (take 1 inputRuns, drop 1 inputRuns) - it "splits in second run" $ do - splitTextsAt8 20 inputRuns `shouldBe` - ( - [ Run - { runOffsetInSpan = 0 - , runText = pack "Vikipedija (" - , runDirection = Just DirLTR - , runScript = Just "Latn" - } - , Run - { runOffsetInSpan = 12 - , runText = pack "Вики" - , runDirection = Just DirLTR - , runScript = Just "Cyrl" - } - ] - , - [ Run - { runOffsetInSpan = 20 - , runText = pack "педија)" - , runDirection = Just DirLTR - , runScript = Just "Cyrl" - } - ] - ) - it "split at end does not generate extra run" $ do - splitTextsAt8 33 inputRuns `shouldBe` (inputRuns, []) - it "large value splits at end of last run" $ do - splitTextsAt8 999 inputRuns `shouldBe` (inputRuns, []) + describe "splitTextsBy" $ do + it "splits example text containers" $ do + splitTextsBy exampleBreakPoints exampleContainers `shouldBe` + [ ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ] + , [ contain "ја)" 38 ] + ) + , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ] + , [ contain "дија)" 34 ] + ) + , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ] + , [ contain "педија)" 30 ] + ) + , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ] + , [ contain "кипедија)" 26 ] + ) + , ( [ contain "Vikipedija " 10 ] + , [ contain "(Википедија)" 21 ] + ) + , ( [ contain "Vikipedi" 10 ] + , [ contain "ja " 18, contain "(Википедија)" 21 ] + ) + , ( [ contain "Vikipe" 10 ] + , [ contain "dija " 16, contain "(Википедија)" 21 ] + ) + , ( [ contain "Viki" 10 ] + , [ contain "pedija " 14, contain "(Википедија)" 21 ] + ) + , ( [ contain "Vi" 10 ] + , [ contain "kipedija " 12, contain "(Википедија)" 21 ] + ) + ] describe "trimTextsEnd" $ do describe "isSpace" $ do it "does nothing on an empty list" $ do -- 2.30.2