~jaro/balkon: Internally allow splitting texts at endpoints.

3 files changed, 89 insertions(+), 37 deletions(-)

M src/Data/Text/ParagraphLayout/Internal/Layout.hs
M src/Data/Text/ParagraphLayout/Internal/TextContainer.hs
M test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs

M src/Data/Text/ParagraphLayout/Internal/Layout.hs => src/Data/Text/ParagraphLayout/Internal/Layout.hs +7 -4

@@ 259,9 259,10 @@ hardSplit runs = allowFstEmpty $ trimFst $ NonEmpty.last $ splits
         -- TODO: Consider optimising.
         --       We do not need to look for any line breaks further than the
         --       shortest hard break.
-        splits = noSplit :| map allowSndEmpty hSplits
+        splits = noSplit :| hSplits
         noSplit = (runs, [])
-        hSplits = -- from longest to shortest
+        hSplits = nonEmptyFsts $
+            -- from longest to shortest
             splitTextsBy (map fst . filter isHard . runLineBreaks) runs
         isHard (_, status) = status == BreakStatus.Hard
 


@@ 284,11 285,13 @@ softSplits runs = map (allowSndEmpty . trimFst) splits
         trimFst (runs1, runs2) = (trim runs1, runs2)
         trim = trimTextsStart isStartSpace . trimTextsEnd isEndSpace
         splits = lSplits ++ cSplits
-        lSplits = splitTextsBy (map fst . runLineBreaks) runs
+        lSplits = nonEmptyPairs $
+            splitTextsBy (map fst . runLineBreaks) runs
         -- TODO: Consider optimising.
         --       We do not need to look for character breaks further than the
         --       shortest line break.
-        cSplits = splitTextsBy (map fst . runCharacterBreaks) runs
+        cSplits = nonEmptyPairs $
+            splitTextsBy (map fst . runCharacterBreaks) runs
 
 -- | The suffix remaining after removing the longest prefix of the list for
 -- which the predicate holds, except always including at least the last element

M src/Data/Text/ParagraphLayout/Internal/TextContainer.hs => src/Data/Text/ParagraphLayout/Internal/TextContainer.hs +3 -5

@@ 20,8 20,6 @@ import Data.Text (Text)
 import qualified Data.Text as Text
 import Data.Text.Foreign (dropWord8, takeWord8)
 
-import Data.Text.ParagraphLayout.Internal.SplitList
-
 -- | Class of data types containing `Text` that can be accessed.
 class TextContainer a where
     -- | Extract a `Text` from its container.


@@ 58,16 56,16 @@ instance SeparableTextContainer Text where
     dropWhileEnd = Text.dropWhileEnd
 
 -- | Treat a list of text containers as a contiguous sequence,
--- and find all possible ways to split them into two non-empty lists,
+-- and find all possible ways to split them into two lists,
 -- using the given function to find valid split offsets in `Data.Word.Word8`
 -- units from the beginning of each container.
 --
 -- The results in the form (prefix, suffix) will be ordered from the longest
 -- prefix to shortest.
 splitTextsBy :: (SeparableTextContainer a, Foldable f) =>
-    (a -> [Int]) -> f a -> [(NonEmpty a, NonEmpty a)]
+    (a -> [Int]) -> f a -> [([a], [a])]
 splitTextsBy breakFunc tcs =
-    nonEmptyPairs $ splitTextsBy' breakFunc [] $ reverse $ toList tcs
+    splitTextsBy' breakFunc [] $ reverse $ toList tcs
 
 splitTextsBy' :: SeparableTextContainer a =>
     (a -> [Int]) -> [a] -> [a] -> [([a], [a])]

M test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs => test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs +79 -28

@@ 1,15 1,11 @@
 module Data.Text.ParagraphLayout.Internal.TextContainerSpec (spec) where
 
-import qualified Data.List.NonEmpty
 import Data.Text (Text, empty, pack)
 import Data.Text.Foreign (lengthWord8)
 
 import Test.Hspec
 import Data.Text.ParagraphLayout.Internal.TextContainer
 
-ne :: [a] -> Data.List.NonEmpty.NonEmpty a
-ne = Data.List.NonEmpty.fromList
-
 data ExampleContainer = Contain { cText :: Text, cOffset :: Int }
     deriving (Show, Eq)
 


@@ 43,7 39,8 @@ exampleBreaks =
     [
     -- Out of bounds. Should not generate any splits.
     999, 50,
-    -- End of last text. Should not generate a split.
+    -- End of last text.
+    -- Should only generate a split for end-biased breaks.
     43,
     -- Word and syllable bounds in the second text,
     -- similar to hyphenation rules. Each should generate a corresponding split.


@@ 54,53 51,107 @@ exampleBreaks =
     -- Word and syllable bounds in the first text.
     -- Each should generate a corresponding split.
     18, 16, 14, 12,
-    -- Start of first text. Should not generate a split.
+    -- Start of first text.
+    -- Should only generate a split for start-biased breaks.
     10,
     -- Out of bounds. Should not generate any splits.
     5, 0, -1
     ]
 
-exampleBreakPoints :: ExampleContainer -> [Int]
-exampleBreakPoints c =
+-- | Bound `exampleBreaks` to the given text container and adjust offsets,
+-- including start of text but excluding end of text.
+--
+-- Container boundaries should therefore only generate one split,
+-- and all splits should have a non-empty prefix.
+startBiasedBreakPoints :: ExampleContainer -> [Int]
+startBiasedBreakPoints c =
     dropWhile (>= l) $ takeWhile (>= 0) $ map (subtract d) $ exampleBreaks
     where
         l = lengthWord8 $ cText c
         d = cOffset c
 
+-- | Bound `exampleBreaks` to the given text container and adjust offsets,
+-- excluding start of text but including end of text.
+--
+-- Container boundaries should therefore only generate one split,
+-- and all splits should have a non-empty prefix.
+endBiasedBreakPoints :: ExampleContainer -> [Int]
+endBiasedBreakPoints c =
+    dropWhile (> l) $ takeWhile (> 0) $ map (subtract d) $ exampleBreaks
+    where
+        l = lengthWord8 $ cText c
+        d = cOffset c
+
 isSpace :: Char -> Bool
 isSpace = (== ' ')
 
 spec :: Spec
 spec = do
     describe "splitTextsBy" $ do
-        it "splits example text containers" $ do
-            splitTextsBy exampleBreakPoints exampleContainers `shouldBe`
-                [ ( ne [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
-                  , ne [ contain "ја)" 38 ]
+        it "splits example text containers (start bias)" $ do
+            splitTextsBy startBiasedBreakPoints exampleContainers `shouldBe`
+                [ ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
+                  , [ contain "ја)" 38 ]
+                  )
+                , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
+                  , [ contain "дија)" 34 ]
+                  )
+                , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
+                  , [ contain "педија)" 30 ]
+                  )
+                , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
+                  , [ contain "кипедија)" 26 ]
+                  )
+                , ( [ contain "Vikipedija " 10 ]
+                  , [ contain "(Википедија)" 21 ]
+                  )
+                , ( [ contain "Vikipedi" 10 ]
+                  , [ contain "ja " 18, contain "(Википедија)" 21 ]
+                  )
+                , ( [ contain "Vikipe" 10 ]
+                  , [ contain "dija " 16, contain "(Википедија)" 21 ]
+                  )
+                , ( [ contain "Viki" 10 ]
+                  , [ contain "pedija " 14, contain "(Википедија)" 21 ]
+                  )
+                , ( [ contain "Vi" 10 ]
+                  , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
+                  )
+                , ( []
+                  , [ contain "Vikipedija " 10, contain "(Википедија)" 21 ]
+                  )
+                ]
+        it "splits example text containers (end bias)" $ do
+            splitTextsBy endBiasedBreakPoints exampleContainers `shouldBe`
+                [ ( [ contain "Vikipedija " 10, contain "(Википедија)" 21 ]
+                  , []
+                  )
+                , ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
+                  , [ contain "ја)" 38 ]
                   )
-                , ( ne [ contain "Vikipedija " 10, contain "(Википе" 21 ]
-                  , ne [ contain "дија)" 34 ]
+                , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
+                  , [ contain "дија)" 34 ]
                   )
-                , ( ne [ contain "Vikipedija " 10, contain "(Вики" 21 ]
-                  , ne [ contain "педија)" 30 ]
+                , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
+                  , [ contain "педија)" 30 ]
                   )
-                , ( ne [ contain "Vikipedija " 10, contain "(Ви" 21 ]
-                  , ne [ contain "кипедија)" 26 ]
+                , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
+                  , [ contain "кипедија)" 26 ]
                   )
-                , ( ne [ contain "Vikipedija " 10 ]
-                  , ne [ contain "(Википедија)" 21 ]
+                , ( [ contain "Vikipedija " 10 ]
+                  , [ contain "(Википедија)" 21 ]
                   )
-                , ( ne [ contain "Vikipedi" 10 ]
-                  , ne [ contain "ja " 18, contain "(Википедија)" 21 ]
+                , ( [ contain "Vikipedi" 10 ]
+                  , [ contain "ja " 18, contain "(Википедија)" 21 ]
                   )
-                , ( ne [ contain "Vikipe" 10 ]
-                  , ne [ contain "dija " 16, contain "(Википедија)" 21 ]
+                , ( [ contain "Vikipe" 10 ]
+                  , [ contain "dija " 16, contain "(Википедија)" 21 ]
                   )
-                , ( ne [ contain "Viki" 10 ]
-                  , ne [ contain "pedija " 14, contain "(Википедија)" 21 ]
+                , ( [ contain "Viki" 10 ]
+                  , [ contain "pedija " 14, contain "(Википедија)" 21 ]
                   )
-                , ( ne [ contain "Vi" 10 ]
-                  , ne [ contain "kipedija " 12, contain "(Википедија)" 21 ]
+                , ( [ contain "Vi" 10 ]
+                  , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
                   )
                 ]
     describe "trimTextsEnd" $ do