~jaro/balkon

2462c039c364112d9015b036fe9217368d7bf2a5 — Jaro 1 year, 9 months ago 7f7bfdd
Refactor line breaking.
M src/Data/Text/ParagraphLayout/Internal/Plain.hs => src/Data/Text/ParagraphLayout/Internal/Plain.hs +19 -71
@@ 24,7 24,7 @@ import Data.Text.Glyphize
    ,fontExtentsForDir
    ,shape
    )
import Data.Text.ICU (LocaleName(Locale), breakCharacter, breakLine)
import Data.Text.ICU (Breaker, LocaleName(Locale), breakCharacter, breakLine)
import qualified Data.Text.ICU as BreakStatus (Line)
import Data.Text.Internal (Text(Text))
import qualified Data.Text.Lazy as Lazy


@@ 162,9 162,9 @@ layoutAndWrapRunsH maxWidth runs = NonEmpty.head $ validLayouts
        layout runs1 = layoutRunsH $ trimTextsEnd isEndSpace runs1
        splits = noSplit :| (lSplits ++ cSplits)
        noSplit = (runs, [])
        lSplits = filter hasContent $ lineSplits runs
        cSplits = filter hasContent $ characterSplits runs
        hasContent = not . null . fst
        -- TODO: Use Soft/Hard line break distinction.
        lSplits = splitTextsBy (map fst . runLineBreaks) runs
        cSplits = splitTextsBy (map fst . runCharacterBreaks) runs

-- | The suffix remaining after removing the longest prefix of the list for
-- which the predicate holds, except always including at least the last element


@@ 185,42 185,6 @@ layoutRunsH runs = map layoutRunH runs
totalAdvances :: [WithSpan PF.ProtoFragment] -> Int32
totalAdvances pfs = sum $ map (\(WithSpan _ pf) -> PF.advance pf) pfs

-- | Recursive function for finding all possible ways to split a list of runs
-- into two on a valid line-breaking boundary, including the start of the first
-- run and excluding the end of the last run.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
lineSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])]
lineSplits xs = lineSplits' [] (reverse xs)

lineSplits' :: [WithSpan Run] -> [WithSpan Run] ->
    [([WithSpan Run], [WithSpan Run])]
lineSplits' _ [] = []
lineSplits' closed (x:xs) = splits ++ lineSplits' (x:closed) xs
    where
        splits = map mapFunc $ runLineSplits x
        mapFunc ((x1, x2), _) =
            (reverse $ collapse $ x1 :| xs, collapse $ x2 :| closed)

-- | Recursive function for finding all possible ways to split a list of runs
-- into two on a character boundary, including the start of the first run and
-- excluding the end of the last run.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
characterSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])]
characterSplits xs = characterSplits' [] (reverse xs)

characterSplits' :: [WithSpan Run] -> [WithSpan Run] ->
    [([WithSpan Run], [WithSpan Run])]
characterSplits' _ [] = []
characterSplits' closed (x:xs) = splits ++ characterSplits' (x:closed) xs
    where
        splits = map mapFunc $ runCharacterSplits x
        mapFunc ((x1, x2), _) =
            (reverse $ collapse $ x1 :| xs, collapse $ x2 :| closed)

-- | Calculate layout for the given horizontal run and attach extra information.
layoutRunH :: WithSpan Run -> WithSpan PF.ProtoFragment
layoutRunH (WithSpan rs run) = WithSpan rs pf


@@ 257,8 221,8 @@ resolveSpans p@(Paragraph arr pStart spans pOpts) = do
        <*> ZipList sStarts
        <*> ZipList sLengths
    let lang = spanLanguage $ spanOptions s
    let lBreaks = paragraphLineBreaks p pEnd lang
    let cBreaks = paragraphCharacterBreaks p pEnd lang
    let lBreaks = paragraphBreaks breakLine p pEnd lang
    let cBreaks = paragraphBreaks breakCharacter p pEnd lang
    return RS.ResolvedSpan
        { RS.spanIndex = i
        , RS.spanOffsetInParagraph = sStart - pStart


@@ 271,41 235,25 @@ resolveSpans p@(Paragraph arr pStart spans pOpts) = do
        , RS.spanCharacterBreaks = subOffsetsDesc (sStart - pStart) cBreaks
        }

paragraphLineBreaks :: Paragraph -> Int -> String -> [(Int, BreakStatus.Line)]
paragraphLineBreaks (Paragraph arr off _ _) end lang =
    breaksDesc (breakLine (localeFromLanguage lang)) paragraphText
paragraphBreaks :: (LocaleName -> Breaker a) -> Paragraph -> Int -> String ->
    [(Int, a)]
paragraphBreaks breakFunc (Paragraph arr off _ _) end lang =
    breaksDesc (breakFunc (localeFromLanguage lang)) paragraphText
    where
        paragraphText = Text arr off (end - off)

paragraphCharacterBreaks :: Paragraph -> Int -> String -> [(Int, ())]
paragraphCharacterBreaks (Paragraph arr off _ _) end lang =
    breaksDesc (breakCharacter (localeFromLanguage lang)) paragraphText
    where
        paragraphText = Text arr off (end - off)

-- | Split the given run at every valid line break position.
runLineSplits :: WithSpan Run ->
    [((WithSpan Run, WithSpan Run), BreakStatus.Line)]
runLineSplits r = map split $ runLineBreaks r
    where
        split (i, status) = (splitTextAt8 i r, status)

runLineBreaks :: WithSpan Run -> [(Int, BreakStatus.Line)]
runLineBreaks (WithSpan rs run) = dropWhile (not . valid) $
    subOffsetsDesc (runOffsetInSpan run) $ RS.spanLineBreaks rs
    where
        valid (off, _) = off < runLength
        runLength = lengthWord8 $ getText run

-- | Split the given run before every character.
runCharacterSplits :: WithSpan Run -> [((WithSpan Run, WithSpan Run), ())]
runCharacterSplits r = map split $ runCharacterBreaks r
    where
        split (i, ()) = (splitTextAt8 i r, ())
runLineBreaks (WithSpan rs run) =
    runBreaksFromSpan run $ RS.spanLineBreaks rs

runCharacterBreaks :: WithSpan Run -> [(Int, ())]
runCharacterBreaks (WithSpan rs run) = dropWhile (not . valid) $
    subOffsetsDesc (runOffsetInSpan run) $ RS.spanCharacterBreaks rs
runCharacterBreaks (WithSpan rs run) =
    runBreaksFromSpan run $ RS.spanCharacterBreaks rs

-- | Constrain span breaks to a selected run and adjust offsets.
runBreaksFromSpan :: Run -> [(Int, a)] -> [(Int, a)]
runBreaksFromSpan run spanBreaks =
    dropWhile (not . valid) $ subOffsetsDesc (runOffsetInSpan run) spanBreaks
    where
        valid (off, _) = off < runLength
        runLength = lengthWord8 $ getText run

M src/Data/Text/ParagraphLayout/Internal/TextContainer.hs => src/Data/Text/ParagraphLayout/Internal/TextContainer.hs +24 -21
@@ 1,11 1,10 @@
module Data.Text.ParagraphLayout.Internal.TextContainer
    (SeparableTextContainer
    ,TextContainer
    ,collapse
    ,dropWhileEnd
    ,getText
    ,splitTextAt8
    ,splitTextsAt8
    ,splitTextsBy
    ,trimTextsEnd
    )
where


@@ 13,7 12,7 @@ where
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, lengthWord8, takeWord8)
import Data.Text.Foreign (dropWord8, takeWord8)

-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where


@@ 46,31 45,35 @@ instance SeparableTextContainer Text where
    dropWhileEnd = Text.dropWhileEnd

-- | Treat a list of text containers as a contiguous sequence,
-- and make a split at the given number of `Data.Word.Word8` from the beginning
-- of this sequence.
-- and find all possible ways to split them into two non-empty lists,
-- using the given function to find valit split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- If @n@ falls on a container boundary, the total number of output containers
-- will equal the number of input containers; otherwise, it will be one larger.
splitTextsAt8 :: SeparableTextContainer a => Int -> [a] -> ([a], [a])
splitTextsAt8 n rs = (pre, post)
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [([a], [a])]
splitTextsBy breakFunc tcs =
    filter notEmpty $ splitTextsBy' breakFunc [] (reverse tcs)
    where
        pre = reverse rpre
        (rpre, post) = splitTextsAt8' n [] rs
        notEmpty (prefix, suffix) = not (null prefix || null suffix)

splitTextsAt8' :: SeparableTextContainer a => Int -> [a] -> [a] -> ([a], [a])
splitTextsAt8' _ rpre [] = (rpre, [])
splitTextsAt8' n rpre (r:rs)
    | n <= 0 = (rpre, r:rs)
    | n >= l = splitTextsAt8' (n - l) (r:rpre) (rs)
    | otherwise = let (r1, r2) = splitTextAt8 n r in (r1:rpre, r2:rs)
splitTextsBy' :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [a] ->
    [([a], [a])]
splitTextsBy' _ _ [] = []
splitTextsBy' breakFunc closed (tc:tcs) =
    fullSplits ++ splitTextsBy' breakFunc (tc:closed) tcs
    where
        l = lengthWord8 $ getText r
        fullSplits = map mergeWithRest tcSplits
        mergeWithRest (x1, x2) =
            (reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
        tcSplits = map (\i -> splitTextAt8 i tc) tcBreakOffsets
        tcBreakOffsets = breakFunc tc

-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
collapse (x :| xs)
    | Text.null (getText x) = xs
    | otherwise = x:xs
collapse (tc :| tcs)
    | Text.null (getText tc) = tcs
    | otherwise = tc:tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.

M test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs => test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs +83 -81
@@ 1,96 1,98 @@
module Data.Text.ParagraphLayout.Internal.TextContainerSpec (spec) where

import Data.Text (Text, empty, pack)
import Data.Text.Glyphize (Direction(..))
import Data.Text.Foreign (lengthWord8)

import Test.Hspec
import Data.Text.ParagraphLayout.Internal.Run
import Data.Text.ParagraphLayout.Internal.TextContainer

isSpace :: Char -> Bool
isSpace = (==' ')
data ExampleContainer = Contain { cText :: Text, cOffset :: Int }
    deriving (Show, Eq)

contain :: String -> Int -> ExampleContainer
contain s o = Contain (pack s) o

instance TextContainer ExampleContainer where
    getText = cText

instance SeparableTextContainer ExampleContainer where
    splitTextAt8 n (Contain t o) = (Contain t1 o1, Contain t2 o2)
        where
            (t1, t2) = splitTextAt8 n t
            o1 = o
            o2 = o + lengthWord8 t1
    dropWhileEnd p (Contain t o) = Contain (dropWhileEnd p t) o

inputRuns :: [Run]
inputRuns =
    [ Run
        -- TODO: We might want both parentheses in the same run.
        { runOffsetInSpan = 0
        , runText = pack "Vikipedija ("
        , runDirection = Just DirLTR
        , runScript = Just "Latn"
        }
    , Run
        { runOffsetInSpan = 12
        , runText = pack "Википедија)"
        , runDirection = Just DirLTR
        , runScript = Just "Cyrl"
        }
exampleContainers :: [ExampleContainer]
exampleContainers = [c1, c2]
    where
        (c1, c2) = splitTextAt8 11 $ contain "Vikipedija (Википедија)" 10

exampleBreaks :: [Int]
exampleBreaks =
    [
    -- Out of bounds. Should not generate any splits.
    999, 50,
    -- End of last text. Should not generate a split.
    43,
    -- Word and syllable bounds in the second text,
    -- similar to hyphenation rules. Each should generate a corresponding split.
    38, 34, 30, 26,
    -- The exact edge between the two texts.
    -- Should generate a split, but not any empty containers.
    21,
    -- Word and syllable bounds in the first text.
    -- Each should generate a corresponding split.
    18, 16, 14, 12,
    -- Start of first text. Should not generate a split.
    10,
    -- Out of bounds. Should not generate any splits.
    5, 0, -1
    ]

exampleBreakPoints :: ExampleContainer -> [Int]
exampleBreakPoints c =
    dropWhile (>= l) $ takeWhile (>= 0) $ map (subtract d) $ exampleBreaks
    where
        l = lengthWord8 $ cText c
        d = cOffset c

isSpace :: Char -> Bool
isSpace = (==' ')

spec :: Spec
spec = do
    describe "splitTextsAt8" $ do
        it "negative value splits at beginning of first run" $ do
            splitTextsAt8 (-1) inputRuns `shouldBe` ([], inputRuns)
        it "zero splits at beginning of first run" $ do
            splitTextsAt8 0 inputRuns `shouldBe` ([], inputRuns)
        it "splits in first run" $ do
            splitTextsAt8 11 inputRuns `shouldBe`
                (
                    [ Run
                        { runOffsetInSpan = 0
                        , runText = pack "Vikipedija "
                        , runDirection = Just DirLTR
                        , runScript = Just "Latn"
                        }
                    ]
                ,
                    [ Run
                        { runOffsetInSpan = 11
                        , runText = pack "("
                        , runDirection = Just DirLTR
                        , runScript = Just "Latn"
                        }
                    , Run
                        { runOffsetInSpan = 12
                        , runText = pack "Википедија)"
                        , runDirection = Just DirLTR
                        , runScript = Just "Cyrl"
                        }
                    ]
                )
        it "split at run edges does not generate extra run" $ do
            splitTextsAt8 12 inputRuns `shouldBe`
                (take 1 inputRuns, drop 1 inputRuns)
        it "splits in second run" $ do
            splitTextsAt8 20 inputRuns `shouldBe`
                (
                    [ Run
                        { runOffsetInSpan = 0
                        , runText = pack "Vikipedija ("
                        , runDirection = Just DirLTR
                        , runScript = Just "Latn"
                        }
                    , Run
                        { runOffsetInSpan = 12
                        , runText = pack "Вики"
                        , runDirection = Just DirLTR
                        , runScript = Just "Cyrl"
                        }
                    ]
                ,
                    [ Run
                        { runOffsetInSpan = 20
                        , runText = pack "педија)"
                        , runDirection = Just DirLTR
                        , runScript = Just "Cyrl"
                        }
                    ]
                )
        it "split at end does not generate extra run" $ do
            splitTextsAt8 33 inputRuns `shouldBe` (inputRuns, [])
        it "large value splits at end of last run" $ do
            splitTextsAt8 999 inputRuns `shouldBe` (inputRuns, [])
    describe "splitTextsBy" $ do
        it "splits example text containers" $ do
            splitTextsBy exampleBreakPoints exampleContainers `shouldBe`
                [ ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
                  , [ contain "ја)" 38 ]
                  )
                , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
                  , [ contain "дија)" 34 ]
                  )
                , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
                  , [ contain "педија)" 30 ]
                  )
                , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
                  , [ contain "кипедија)" 26 ]
                  )
                , ( [ contain "Vikipedija " 10 ]
                  , [ contain "(Википедија)" 21 ]
                  )
                , ( [ contain "Vikipedi" 10 ]
                  , [ contain "ja " 18, contain "(Википедија)" 21 ]
                  )
                , ( [ contain "Vikipe" 10 ]
                  , [ contain "dija " 16, contain "(Википедија)" 21 ]
                  )
                , ( [ contain "Viki" 10 ]
                  , [ contain "pedija " 14, contain "(Википедија)" 21 ]
                  )
                , ( [ contain "Vi" 10 ]
                  , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
                  )
                ]
    describe "trimTextsEnd" $ do
        describe "isSpace" $ do
            it "does nothing on an empty list" $ do