~jaro/balkon

92075f52e26577a9ba3c18ee0b8911214137021b — Jaro 1 year, 7 months ago 764fa6e
Internally allow splitting texts at endpoints.

This is required in order to preserve a hard line break at the end of
the input.
M src/Data/Text/ParagraphLayout/Internal/Layout.hs => src/Data/Text/ParagraphLayout/Internal/Layout.hs +7 -4
@@ 259,9 259,10 @@ hardSplit runs = allowFstEmpty $ trimFst $ NonEmpty.last $ splits
        -- TODO: Consider optimising.
        --       We do not need to look for any line breaks further than the
        --       shortest hard break.
        splits = noSplit :| map allowSndEmpty hSplits
        splits = noSplit :| hSplits
        noSplit = (runs, [])
        hSplits = -- from longest to shortest
        hSplits = nonEmptyFsts $
            -- from longest to shortest
            splitTextsBy (map fst . filter isHard . runLineBreaks) runs
        isHard (_, status) = status == BreakStatus.Hard



@@ 284,11 285,13 @@ softSplits runs = map (allowSndEmpty . trimFst) splits
        trimFst (runs1, runs2) = (trim runs1, runs2)
        trim = trimTextsStart isStartSpace . trimTextsEnd isEndSpace
        splits = lSplits ++ cSplits
        lSplits = splitTextsBy (map fst . runLineBreaks) runs
        lSplits = nonEmptyPairs $
            splitTextsBy (map fst . runLineBreaks) runs
        -- TODO: Consider optimising.
        --       We do not need to look for character breaks further than the
        --       shortest line break.
        cSplits = splitTextsBy (map fst . runCharacterBreaks) runs
        cSplits = nonEmptyPairs $
            splitTextsBy (map fst . runCharacterBreaks) runs

-- | The suffix remaining after removing the longest prefix of the list for
-- which the predicate holds, except always including at least the last element

M src/Data/Text/ParagraphLayout/Internal/TextContainer.hs => src/Data/Text/ParagraphLayout/Internal/TextContainer.hs +3 -5
@@ 20,8 20,6 @@ import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)

import Data.Text.ParagraphLayout.Internal.SplitList

-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
    -- | Extract a `Text` from its container.


@@ 58,16 56,16 @@ instance SeparableTextContainer Text where
    dropWhileEnd = Text.dropWhileEnd

-- | Treat a list of text containers as a contiguous sequence,
-- and find all possible ways to split them into two non-empty lists,
-- and find all possible ways to split them into two lists,
-- using the given function to find valid split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: (SeparableTextContainer a, Foldable f) =>
    (a -> [Int]) -> f a -> [(NonEmpty a, NonEmpty a)]
    (a -> [Int]) -> f a -> [([a], [a])]
splitTextsBy breakFunc tcs =
    nonEmptyPairs $ splitTextsBy' breakFunc [] $ reverse $ toList tcs
    splitTextsBy' breakFunc [] $ reverse $ toList tcs

splitTextsBy' :: SeparableTextContainer a =>
    (a -> [Int]) -> [a] -> [a] -> [([a], [a])]

M test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs => test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs +79 -28
@@ 1,15 1,11 @@
module Data.Text.ParagraphLayout.Internal.TextContainerSpec (spec) where

import qualified Data.List.NonEmpty
import Data.Text (Text, empty, pack)
import Data.Text.Foreign (lengthWord8)

import Test.Hspec
import Data.Text.ParagraphLayout.Internal.TextContainer

ne :: [a] -> Data.List.NonEmpty.NonEmpty a
ne = Data.List.NonEmpty.fromList

data ExampleContainer = Contain { cText :: Text, cOffset :: Int }
    deriving (Show, Eq)



@@ 43,7 39,8 @@ exampleBreaks =
    [
    -- Out of bounds. Should not generate any splits.
    999, 50,
    -- End of last text. Should not generate a split.
    -- End of last text.
    -- Should only generate a split for end-biased breaks.
    43,
    -- Word and syllable bounds in the second text,
    -- similar to hyphenation rules. Each should generate a corresponding split.


@@ 54,53 51,107 @@ exampleBreaks =
    -- Word and syllable bounds in the first text.
    -- Each should generate a corresponding split.
    18, 16, 14, 12,
    -- Start of first text. Should not generate a split.
    -- Start of first text.
    -- Should only generate a split for start-biased breaks.
    10,
    -- Out of bounds. Should not generate any splits.
    5, 0, -1
    ]

exampleBreakPoints :: ExampleContainer -> [Int]
exampleBreakPoints c =
-- | Bound `exampleBreaks` to the given text container and adjust offsets,
-- including start of text but excluding end of text.
--
-- Container boundaries should therefore only generate one split,
-- and all splits should have a non-empty prefix.
startBiasedBreakPoints :: ExampleContainer -> [Int]
startBiasedBreakPoints c =
    dropWhile (>= l) $ takeWhile (>= 0) $ map (subtract d) $ exampleBreaks
    where
        l = lengthWord8 $ cText c
        d = cOffset c

-- | Bound `exampleBreaks` to the given text container and adjust offsets,
-- excluding start of text but including end of text.
--
-- Container boundaries should therefore only generate one split,
-- and all splits should have a non-empty prefix.
endBiasedBreakPoints :: ExampleContainer -> [Int]
endBiasedBreakPoints c =
    dropWhile (> l) $ takeWhile (> 0) $ map (subtract d) $ exampleBreaks
    where
        l = lengthWord8 $ cText c
        d = cOffset c

isSpace :: Char -> Bool
isSpace = (== ' ')

spec :: Spec
spec = do
    describe "splitTextsBy" $ do
        it "splits example text containers" $ do
            splitTextsBy exampleBreakPoints exampleContainers `shouldBe`
                [ ( ne [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
                  , ne [ contain "ја)" 38 ]
        it "splits example text containers (start bias)" $ do
            splitTextsBy startBiasedBreakPoints exampleContainers `shouldBe`
                [ ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
                  , [ contain "ја)" 38 ]
                  )
                , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
                  , [ contain "дија)" 34 ]
                  )
                , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
                  , [ contain "педија)" 30 ]
                  )
                , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
                  , [ contain "кипедија)" 26 ]
                  )
                , ( [ contain "Vikipedija " 10 ]
                  , [ contain "(Википедија)" 21 ]
                  )
                , ( [ contain "Vikipedi" 10 ]
                  , [ contain "ja " 18, contain "(Википедија)" 21 ]
                  )
                , ( [ contain "Vikipe" 10 ]
                  , [ contain "dija " 16, contain "(Википедија)" 21 ]
                  )
                , ( [ contain "Viki" 10 ]
                  , [ contain "pedija " 14, contain "(Википедија)" 21 ]
                  )
                , ( [ contain "Vi" 10 ]
                  , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
                  )
                , ( []
                  , [ contain "Vikipedija " 10, contain "(Википедија)" 21 ]
                  )
                ]
        it "splits example text containers (end bias)" $ do
            splitTextsBy endBiasedBreakPoints exampleContainers `shouldBe`
                [ ( [ contain "Vikipedija " 10, contain "(Википедија)" 21 ]
                  , []
                  )
                , ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
                  , [ contain "ја)" 38 ]
                  )
                , ( ne [ contain "Vikipedija " 10, contain "(Википе" 21 ]
                  , ne [ contain "дија)" 34 ]
                , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
                  , [ contain "дија)" 34 ]
                  )
                , ( ne [ contain "Vikipedija " 10, contain "(Вики" 21 ]
                  , ne [ contain "педија)" 30 ]
                , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
                  , [ contain "педија)" 30 ]
                  )
                , ( ne [ contain "Vikipedija " 10, contain "(Ви" 21 ]
                  , ne [ contain "кипедија)" 26 ]
                , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
                  , [ contain "кипедија)" 26 ]
                  )
                , ( ne [ contain "Vikipedija " 10 ]
                  , ne [ contain "(Википедија)" 21 ]
                , ( [ contain "Vikipedija " 10 ]
                  , [ contain "(Википедија)" 21 ]
                  )
                , ( ne [ contain "Vikipedi" 10 ]
                  , ne [ contain "ja " 18, contain "(Википедија)" 21 ]
                , ( [ contain "Vikipedi" 10 ]
                  , [ contain "ja " 18, contain "(Википедија)" 21 ]
                  )
                , ( ne [ contain "Vikipe" 10 ]
                  , ne [ contain "dija " 16, contain "(Википедија)" 21 ]
                , ( [ contain "Vikipe" 10 ]
                  , [ contain "dija " 16, contain "(Википедија)" 21 ]
                  )
                , ( ne [ contain "Viki" 10 ]
                  , ne [ contain "pedija " 14, contain "(Википедија)" 21 ]
                , ( [ contain "Viki" 10 ]
                  , [ contain "pedija " 14, contain "(Википедија)" 21 ]
                  )
                , ( ne [ contain "Vi" 10 ]
                  , ne [ contain "kipedija " 12, contain "(Википедија)" 21 ]
                , ( [ contain "Vi" 10 ]
                  , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
                  )
                ]
    describe "trimTextsEnd" $ do