From 92075f52e26577a9ba3c18ee0b8911214137021b Mon Sep 17 00:00:00 2001
From: Jaro <jaro@argonaut-constellation.org>
Date: Sun, 11 Jun 2023 21:14:19 +0200
Subject: [PATCH] Internally allow splitting texts at endpoints.

This is required in order to preserve a hard line break at the end of
the input.
---
 .../Text/ParagraphLayout/Internal/Layout.hs   |  11 +-
 .../ParagraphLayout/Internal/TextContainer.hs |   8 +-
 .../Internal/TextContainerSpec.hs             | 107 +++++++++++++-----
 3 files changed, 89 insertions(+), 37 deletions(-)

diff --git a/src/Data/Text/ParagraphLayout/Internal/Layout.hs b/src/Data/Text/ParagraphLayout/Internal/Layout.hs
index a1e5674..10b0e4b 100644
--- a/src/Data/Text/ParagraphLayout/Internal/Layout.hs
+++ b/src/Data/Text/ParagraphLayout/Internal/Layout.hs
@@ -259,9 +259,10 @@ hardSplit runs = allowFstEmpty $ trimFst $ NonEmpty.last $ splits
         -- TODO: Consider optimising.
         --       We do not need to look for any line breaks further than the
         --       shortest hard break.
-        splits = noSplit :| map allowSndEmpty hSplits
+        splits = noSplit :| hSplits
         noSplit = (runs, [])
-        hSplits = -- from longest to shortest
+        hSplits = nonEmptyFsts $
+            -- from longest to shortest
             splitTextsBy (map fst . filter isHard . runLineBreaks) runs
         isHard (_, status) = status == BreakStatus.Hard
 
@@ -284,11 +285,13 @@ softSplits runs = map (allowSndEmpty . trimFst) splits
         trimFst (runs1, runs2) = (trim runs1, runs2)
         trim = trimTextsStart isStartSpace . trimTextsEnd isEndSpace
         splits = lSplits ++ cSplits
-        lSplits = splitTextsBy (map fst . runLineBreaks) runs
+        lSplits = nonEmptyPairs $
+            splitTextsBy (map fst . runLineBreaks) runs
         -- TODO: Consider optimising.
         --       We do not need to look for character breaks further than the
         --       shortest line break.
-        cSplits = splitTextsBy (map fst . runCharacterBreaks) runs
+        cSplits = nonEmptyPairs $
+            splitTextsBy (map fst . runCharacterBreaks) runs
 
 -- | The suffix remaining after removing the longest prefix of the list for
 -- which the predicate holds, except always including at least the last element
diff --git a/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs b/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs
index 42e5827..f926c9e 100644
--- a/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs
+++ b/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs
@@ -20,8 +20,6 @@ import Data.Text (Text)
 import qualified Data.Text as Text
 import Data.Text.Foreign (dropWord8, takeWord8)
 
-import Data.Text.ParagraphLayout.Internal.SplitList
-
 -- | Class of data types containing `Text` that can be accessed.
 class TextContainer a where
     -- | Extract a `Text` from its container.
@@ -58,16 +56,16 @@ instance SeparableTextContainer Text where
     dropWhileEnd = Text.dropWhileEnd
 
 -- | Treat a list of text containers as a contiguous sequence,
--- and find all possible ways to split them into two non-empty lists,
+-- and find all possible ways to split them into two lists,
 -- using the given function to find valid split offsets in `Data.Word.Word8`
 -- units from the beginning of each container.
 --
 -- The results in the form (prefix, suffix) will be ordered from the longest
 -- prefix to shortest.
 splitTextsBy :: (SeparableTextContainer a, Foldable f) =>
-    (a -> [Int]) -> f a -> [(NonEmpty a, NonEmpty a)]
+    (a -> [Int]) -> f a -> [([a], [a])]
 splitTextsBy breakFunc tcs =
-    nonEmptyPairs $ splitTextsBy' breakFunc [] $ reverse $ toList tcs
+    splitTextsBy' breakFunc [] $ reverse $ toList tcs
 
 splitTextsBy' :: SeparableTextContainer a =>
     (a -> [Int]) -> [a] -> [a] -> [([a], [a])]
diff --git a/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs b/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs
index a9555db..393c0ad 100644
--- a/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs
+++ b/test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs
@@ -1,15 +1,11 @@
 module Data.Text.ParagraphLayout.Internal.TextContainerSpec (spec) where
 
-import qualified Data.List.NonEmpty
 import Data.Text (Text, empty, pack)
 import Data.Text.Foreign (lengthWord8)
 
 import Test.Hspec
 import Data.Text.ParagraphLayout.Internal.TextContainer
 
-ne :: [a] -> Data.List.NonEmpty.NonEmpty a
-ne = Data.List.NonEmpty.fromList
-
 data ExampleContainer = Contain { cText :: Text, cOffset :: Int }
     deriving (Show, Eq)
 
@@ -43,7 +39,8 @@ exampleBreaks =
     [
     -- Out of bounds. Should not generate any splits.
     999, 50,
-    -- End of last text. Should not generate a split.
+    -- End of last text.
+    -- Should only generate a split for end-biased breaks.
     43,
     -- Word and syllable bounds in the second text,
     -- similar to hyphenation rules. Each should generate a corresponding split.
@@ -54,53 +51,107 @@ exampleBreaks =
     -- Word and syllable bounds in the first text.
     -- Each should generate a corresponding split.
     18, 16, 14, 12,
-    -- Start of first text. Should not generate a split.
+    -- Start of first text.
+    -- Should only generate a split for start-biased breaks.
     10,
     -- Out of bounds. Should not generate any splits.
     5, 0, -1
     ]
 
-exampleBreakPoints :: ExampleContainer -> [Int]
-exampleBreakPoints c =
+-- | Bound `exampleBreaks` to the given text container and adjust offsets,
+-- including start of text but excluding end of text.
+--
+-- Container boundaries should therefore only generate one split,
+-- and all splits should have a non-empty prefix.
+startBiasedBreakPoints :: ExampleContainer -> [Int]
+startBiasedBreakPoints c =
     dropWhile (>= l) $ takeWhile (>= 0) $ map (subtract d) $ exampleBreaks
     where
         l = lengthWord8 $ cText c
         d = cOffset c
 
+-- | Bound `exampleBreaks` to the given text container and adjust offsets,
+-- excluding start of text but including end of text.
+--
+-- Container boundaries should therefore only generate one split,
+-- and all splits should have a non-empty prefix.
+endBiasedBreakPoints :: ExampleContainer -> [Int]
+endBiasedBreakPoints c =
+    dropWhile (> l) $ takeWhile (> 0) $ map (subtract d) $ exampleBreaks
+    where
+        l = lengthWord8 $ cText c
+        d = cOffset c
+
 isSpace :: Char -> Bool
 isSpace = (== ' ')
 
 spec :: Spec
 spec = do
     describe "splitTextsBy" $ do
-        it "splits example text containers" $ do
-            splitTextsBy exampleBreakPoints exampleContainers `shouldBe`
-                [ ( ne [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
-                  , ne [ contain "ја)" 38 ]
+        it "splits example text containers (start bias)" $ do
+            splitTextsBy startBiasedBreakPoints exampleContainers `shouldBe`
+                [ ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
+                  , [ contain "ја)" 38 ]
+                  )
+                , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
+                  , [ contain "дија)" 34 ]
+                  )
+                , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
+                  , [ contain "педија)" 30 ]
+                  )
+                , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
+                  , [ contain "кипедија)" 26 ]
+                  )
+                , ( [ contain "Vikipedija " 10 ]
+                  , [ contain "(Википедија)" 21 ]
+                  )
+                , ( [ contain "Vikipedi" 10 ]
+                  , [ contain "ja " 18, contain "(Википедија)" 21 ]
+                  )
+                , ( [ contain "Vikipe" 10 ]
+                  , [ contain "dija " 16, contain "(Википедија)" 21 ]
+                  )
+                , ( [ contain "Viki" 10 ]
+                  , [ contain "pedija " 14, contain "(Википедија)" 21 ]
+                  )
+                , ( [ contain "Vi" 10 ]
+                  , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
+                  )
+                , ( []
+                  , [ contain "Vikipedija " 10, contain "(Википедија)" 21 ]
+                  )
+                ]
+        it "splits example text containers (end bias)" $ do
+            splitTextsBy endBiasedBreakPoints exampleContainers `shouldBe`
+                [ ( [ contain "Vikipedija " 10, contain "(Википедија)" 21 ]
+                  , []
+                  )
+                , ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
+                  , [ contain "ја)" 38 ]
                   )
-                , ( ne [ contain "Vikipedija " 10, contain "(Википе" 21 ]
-                  , ne [ contain "дија)" 34 ]
+                , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
+                  , [ contain "дија)" 34 ]
                   )
-                , ( ne [ contain "Vikipedija " 10, contain "(Вики" 21 ]
-                  , ne [ contain "педија)" 30 ]
+                , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
+                  , [ contain "педија)" 30 ]
                   )
-                , ( ne [ contain "Vikipedija " 10, contain "(Ви" 21 ]
-                  , ne [ contain "кипедија)" 26 ]
+                , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
+                  , [ contain "кипедија)" 26 ]
                   )
-                , ( ne [ contain "Vikipedija " 10 ]
-                  , ne [ contain "(Википедија)" 21 ]
+                , ( [ contain "Vikipedija " 10 ]
+                  , [ contain "(Википедија)" 21 ]
                   )
-                , ( ne [ contain "Vikipedi" 10 ]
-                  , ne [ contain "ja " 18, contain "(Википедија)" 21 ]
+                , ( [ contain "Vikipedi" 10 ]
+                  , [ contain "ja " 18, contain "(Википедија)" 21 ]
                   )
-                , ( ne [ contain "Vikipe" 10 ]
-                  , ne [ contain "dija " 16, contain "(Википедија)" 21 ]
+                , ( [ contain "Vikipe" 10 ]
+                  , [ contain "dija " 16, contain "(Википедија)" 21 ]
                   )
-                , ( ne [ contain "Viki" 10 ]
-                  , ne [ contain "pedija " 14, contain "(Википедија)" 21 ]
+                , ( [ contain "Viki" 10 ]
+                  , [ contain "pedija " 14, contain "(Википедија)" 21 ]
                   )
-                , ( ne [ contain "Vi" 10 ]
-                  , ne [ contain "kipedija " 12, contain "(Википедија)" 21 ]
+                , ( [ contain "Vi" 10 ]
+                  , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
                   )
                 ]
     describe "trimTextsEnd" $ do
-- 
2.30.2