@@ 20,8 20,6 @@ import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)
-import Data.Text.ParagraphLayout.Internal.SplitList
-
-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
-- | Extract a `Text` from its container.
@@ 58,16 56,16 @@ instance SeparableTextContainer Text where
dropWhileEnd = Text.dropWhileEnd
-- | Treat a list of text containers as a contiguous sequence,
--- and find all possible ways to split them into two non-empty lists,
+-- and find all possible ways to split them into two lists,
-- using the given function to find valid split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: (SeparableTextContainer a, Foldable f) =>
- (a -> [Int]) -> f a -> [(NonEmpty a, NonEmpty a)]
+ (a -> [Int]) -> f a -> [([a], [a])]
splitTextsBy breakFunc tcs =
- nonEmptyPairs $ splitTextsBy' breakFunc [] $ reverse $ toList tcs
+ splitTextsBy' breakFunc [] $ reverse $ toList tcs
splitTextsBy' :: SeparableTextContainer a =>
(a -> [Int]) -> [a] -> [a] -> [([a], [a])]
@@ 1,15 1,11 @@
module Data.Text.ParagraphLayout.Internal.TextContainerSpec (spec) where
-import qualified Data.List.NonEmpty
import Data.Text (Text, empty, pack)
import Data.Text.Foreign (lengthWord8)
import Test.Hspec
import Data.Text.ParagraphLayout.Internal.TextContainer
-ne :: [a] -> Data.List.NonEmpty.NonEmpty a
-ne = Data.List.NonEmpty.fromList
-
data ExampleContainer = Contain { cText :: Text, cOffset :: Int }
deriving (Show, Eq)
@@ 43,7 39,8 @@ exampleBreaks =
[
-- Out of bounds. Should not generate any splits.
999, 50,
- -- End of last text. Should not generate a split.
+ -- End of last text.
+ -- Should only generate a split for end-biased breaks.
43,
-- Word and syllable bounds in the second text,
-- similar to hyphenation rules. Each should generate a corresponding split.
@@ 54,53 51,107 @@ exampleBreaks =
-- Word and syllable bounds in the first text.
-- Each should generate a corresponding split.
18, 16, 14, 12,
- -- Start of first text. Should not generate a split.
+ -- Start of first text.
+ -- Should only generate a split for start-biased breaks.
10,
-- Out of bounds. Should not generate any splits.
5, 0, -1
]
-exampleBreakPoints :: ExampleContainer -> [Int]
-exampleBreakPoints c =
+-- | Bound `exampleBreaks` to the given text container and adjust offsets,
+-- including start of text but excluding end of text.
+--
+-- Container boundaries should therefore only generate one split,
+-- and all splits should have a non-empty prefix.
+startBiasedBreakPoints :: ExampleContainer -> [Int]
+startBiasedBreakPoints c =
dropWhile (>= l) $ takeWhile (>= 0) $ map (subtract d) $ exampleBreaks
where
l = lengthWord8 $ cText c
d = cOffset c
+-- | Bound `exampleBreaks` to the given text container and adjust offsets,
+-- excluding start of text but including end of text.
+--
+-- Container boundaries should therefore only generate one split,
+-- and all splits should have a non-empty prefix.
+endBiasedBreakPoints :: ExampleContainer -> [Int]
+endBiasedBreakPoints c =
+ dropWhile (> l) $ takeWhile (> 0) $ map (subtract d) $ exampleBreaks
+ where
+ l = lengthWord8 $ cText c
+ d = cOffset c
+
isSpace :: Char -> Bool
isSpace = (== ' ')
spec :: Spec
spec = do
describe "splitTextsBy" $ do
- it "splits example text containers" $ do
- splitTextsBy exampleBreakPoints exampleContainers `shouldBe`
- [ ( ne [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
- , ne [ contain "ја)" 38 ]
+ it "splits example text containers (start bias)" $ do
+ splitTextsBy startBiasedBreakPoints exampleContainers `shouldBe`
+ [ ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
+ , [ contain "ја)" 38 ]
+ )
+ , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
+ , [ contain "дија)" 34 ]
+ )
+ , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
+ , [ contain "педија)" 30 ]
+ )
+ , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
+ , [ contain "кипедија)" 26 ]
+ )
+ , ( [ contain "Vikipedija " 10 ]
+ , [ contain "(Википедија)" 21 ]
+ )
+ , ( [ contain "Vikipedi" 10 ]
+ , [ contain "ja " 18, contain "(Википедија)" 21 ]
+ )
+ , ( [ contain "Vikipe" 10 ]
+ , [ contain "dija " 16, contain "(Википедија)" 21 ]
+ )
+ , ( [ contain "Viki" 10 ]
+ , [ contain "pedija " 14, contain "(Википедија)" 21 ]
+ )
+ , ( [ contain "Vi" 10 ]
+ , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
+ )
+ , ( []
+ , [ contain "Vikipedija " 10, contain "(Википедија)" 21 ]
+ )
+ ]
+ it "splits example text containers (end bias)" $ do
+ splitTextsBy endBiasedBreakPoints exampleContainers `shouldBe`
+ [ ( [ contain "Vikipedija " 10, contain "(Википедија)" 21 ]
+ , []
+ )
+ , ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
+ , [ contain "ја)" 38 ]
)
- , ( ne [ contain "Vikipedija " 10, contain "(Википе" 21 ]
- , ne [ contain "дија)" 34 ]
+ , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
+ , [ contain "дија)" 34 ]
)
- , ( ne [ contain "Vikipedija " 10, contain "(Вики" 21 ]
- , ne [ contain "педија)" 30 ]
+ , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
+ , [ contain "педија)" 30 ]
)
- , ( ne [ contain "Vikipedija " 10, contain "(Ви" 21 ]
- , ne [ contain "кипедија)" 26 ]
+ , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
+ , [ contain "кипедија)" 26 ]
)
- , ( ne [ contain "Vikipedija " 10 ]
- , ne [ contain "(Википедија)" 21 ]
+ , ( [ contain "Vikipedija " 10 ]
+ , [ contain "(Википедија)" 21 ]
)
- , ( ne [ contain "Vikipedi" 10 ]
- , ne [ contain "ja " 18, contain "(Википедија)" 21 ]
+ , ( [ contain "Vikipedi" 10 ]
+ , [ contain "ja " 18, contain "(Википедија)" 21 ]
)
- , ( ne [ contain "Vikipe" 10 ]
- , ne [ contain "dija " 16, contain "(Википедија)" 21 ]
+ , ( [ contain "Vikipe" 10 ]
+ , [ contain "dija " 16, contain "(Википедија)" 21 ]
)
- , ( ne [ contain "Viki" 10 ]
- , ne [ contain "pedija " 14, contain "(Википедија)" 21 ]
+ , ( [ contain "Viki" 10 ]
+ , [ contain "pedija " 14, contain "(Википедија)" 21 ]
)
- , ( ne [ contain "Vi" 10 ]
- , ne [ contain "kipedija " 12, contain "(Википедија)" 21 ]
+ , ( [ contain "Vi" 10 ]
+ , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
)
]
describe "trimTextsEnd" $ do