M src/Data/Text/ParagraphLayout/Internal/Plain.hs => src/Data/Text/ParagraphLayout/Internal/Plain.hs +19 -71
@@ 24,7 24,7 @@ import Data.Text.Glyphize
,fontExtentsForDir
,shape
)
-import Data.Text.ICU (LocaleName(Locale), breakCharacter, breakLine)
+import Data.Text.ICU (Breaker, LocaleName(Locale), breakCharacter, breakLine)
import qualified Data.Text.ICU as BreakStatus (Line)
import Data.Text.Internal (Text(Text))
import qualified Data.Text.Lazy as Lazy
@@ 162,9 162,9 @@ layoutAndWrapRunsH maxWidth runs = NonEmpty.head $ validLayouts
layout runs1 = layoutRunsH $ trimTextsEnd isEndSpace runs1
splits = noSplit :| (lSplits ++ cSplits)
noSplit = (runs, [])
- lSplits = filter hasContent $ lineSplits runs
- cSplits = filter hasContent $ characterSplits runs
- hasContent = not . null . fst
+ -- TODO: Use Soft/Hard line break distinction.
+ lSplits = splitTextsBy (map fst . runLineBreaks) runs
+ cSplits = splitTextsBy (map fst . runCharacterBreaks) runs
-- | The suffix remaining after removing the longest prefix of the list for
-- which the predicate holds, except always including at least the last element
@@ 185,42 185,6 @@ layoutRunsH runs = map layoutRunH runs
totalAdvances :: [WithSpan PF.ProtoFragment] -> Int32
totalAdvances pfs = sum $ map (\(WithSpan _ pf) -> PF.advance pf) pfs
--- | Recursive function for finding all possible ways to split a list of runs
--- into two on a valid line-breaking boundary, including the start of the first
--- run and excluding the end of the last run.
---
--- The results in the form (prefix, suffix) will be ordered from the longest
--- prefix to shortest.
-lineSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])]
-lineSplits xs = lineSplits' [] (reverse xs)
-
-lineSplits' :: [WithSpan Run] -> [WithSpan Run] ->
- [([WithSpan Run], [WithSpan Run])]
-lineSplits' _ [] = []
-lineSplits' closed (x:xs) = splits ++ lineSplits' (x:closed) xs
- where
- splits = map mapFunc $ runLineSplits x
- mapFunc ((x1, x2), _) =
- (reverse $ collapse $ x1 :| xs, collapse $ x2 :| closed)
-
--- | Recursive function for finding all possible ways to split a list of runs
--- into two on a character boundary, including the start of the first run and
--- excluding the end of the last run.
---
--- The results in the form (prefix, suffix) will be ordered from the longest
--- prefix to shortest.
-characterSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])]
-characterSplits xs = characterSplits' [] (reverse xs)
-
-characterSplits' :: [WithSpan Run] -> [WithSpan Run] ->
- [([WithSpan Run], [WithSpan Run])]
-characterSplits' _ [] = []
-characterSplits' closed (x:xs) = splits ++ characterSplits' (x:closed) xs
- where
- splits = map mapFunc $ runCharacterSplits x
- mapFunc ((x1, x2), _) =
- (reverse $ collapse $ x1 :| xs, collapse $ x2 :| closed)
-
-- | Calculate layout for the given horizontal run and attach extra information.
layoutRunH :: WithSpan Run -> WithSpan PF.ProtoFragment
layoutRunH (WithSpan rs run) = WithSpan rs pf
@@ 257,8 221,8 @@ resolveSpans p@(Paragraph arr pStart spans pOpts) = do
<*> ZipList sStarts
<*> ZipList sLengths
let lang = spanLanguage $ spanOptions s
- let lBreaks = paragraphLineBreaks p pEnd lang
- let cBreaks = paragraphCharacterBreaks p pEnd lang
+ let lBreaks = paragraphBreaks breakLine p pEnd lang
+ let cBreaks = paragraphBreaks breakCharacter p pEnd lang
return RS.ResolvedSpan
{ RS.spanIndex = i
, RS.spanOffsetInParagraph = sStart - pStart
@@ 271,41 235,25 @@ resolveSpans p@(Paragraph arr pStart spans pOpts) = do
, RS.spanCharacterBreaks = subOffsetsDesc (sStart - pStart) cBreaks
}
-paragraphLineBreaks :: Paragraph -> Int -> String -> [(Int, BreakStatus.Line)]
-paragraphLineBreaks (Paragraph arr off _ _) end lang =
- breaksDesc (breakLine (localeFromLanguage lang)) paragraphText
+paragraphBreaks :: (LocaleName -> Breaker a) -> Paragraph -> Int -> String ->
+ [(Int, a)]
+paragraphBreaks breakFunc (Paragraph arr off _ _) end lang =
+ breaksDesc (breakFunc (localeFromLanguage lang)) paragraphText
where
paragraphText = Text arr off (end - off)
-paragraphCharacterBreaks :: Paragraph -> Int -> String -> [(Int, ())]
-paragraphCharacterBreaks (Paragraph arr off _ _) end lang =
- breaksDesc (breakCharacter (localeFromLanguage lang)) paragraphText
- where
- paragraphText = Text arr off (end - off)
-
--- | Split the given run at every valid line break position.
-runLineSplits :: WithSpan Run ->
- [((WithSpan Run, WithSpan Run), BreakStatus.Line)]
-runLineSplits r = map split $ runLineBreaks r
- where
- split (i, status) = (splitTextAt8 i r, status)
-
runLineBreaks :: WithSpan Run -> [(Int, BreakStatus.Line)]
-runLineBreaks (WithSpan rs run) = dropWhile (not . valid) $
- subOffsetsDesc (runOffsetInSpan run) $ RS.spanLineBreaks rs
- where
- valid (off, _) = off < runLength
- runLength = lengthWord8 $ getText run
-
--- | Split the given run before every character.
-runCharacterSplits :: WithSpan Run -> [((WithSpan Run, WithSpan Run), ())]
-runCharacterSplits r = map split $ runCharacterBreaks r
- where
- split (i, ()) = (splitTextAt8 i r, ())
+runLineBreaks (WithSpan rs run) =
+ runBreaksFromSpan run $ RS.spanLineBreaks rs
runCharacterBreaks :: WithSpan Run -> [(Int, ())]
-runCharacterBreaks (WithSpan rs run) = dropWhile (not . valid) $
- subOffsetsDesc (runOffsetInSpan run) $ RS.spanCharacterBreaks rs
+runCharacterBreaks (WithSpan rs run) =
+ runBreaksFromSpan run $ RS.spanCharacterBreaks rs
+
+-- | Constrain span breaks to a selected run and adjust offsets.
+runBreaksFromSpan :: Run -> [(Int, a)] -> [(Int, a)]
+runBreaksFromSpan run spanBreaks =
+ dropWhile (not . valid) $ subOffsetsDesc (runOffsetInSpan run) spanBreaks
where
valid (off, _) = off < runLength
runLength = lengthWord8 $ getText run
M src/Data/Text/ParagraphLayout/Internal/TextContainer.hs => src/Data/Text/ParagraphLayout/Internal/TextContainer.hs +24 -21
@@ 1,11 1,10 @@
module Data.Text.ParagraphLayout.Internal.TextContainer
(SeparableTextContainer
,TextContainer
- ,collapse
,dropWhileEnd
,getText
,splitTextAt8
- ,splitTextsAt8
+ ,splitTextsBy
,trimTextsEnd
)
where
@@ 13,7 12,7 @@ where
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Text (Text)
import qualified Data.Text as Text
-import Data.Text.Foreign (dropWord8, lengthWord8, takeWord8)
+import Data.Text.Foreign (dropWord8, takeWord8)
-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
@@ 46,31 45,35 @@ instance SeparableTextContainer Text where
dropWhileEnd = Text.dropWhileEnd
-- | Treat a list of text containers as a contiguous sequence,
--- and make a split at the given number of `Data.Word.Word8` from the beginning
--- of this sequence.
+-- and find all possible ways to split them into two non-empty lists,
+-- using the given function to find valit split offsets in `Data.Word.Word8`
+-- units from the beginning of each container.
--
--- If @n@ falls on a container boundary, the total number of output containers
--- will equal the number of input containers; otherwise, it will be one larger.
-splitTextsAt8 :: SeparableTextContainer a => Int -> [a] -> ([a], [a])
-splitTextsAt8 n rs = (pre, post)
+-- The results in the form (prefix, suffix) will be ordered from the longest
+-- prefix to shortest.
+splitTextsBy :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [([a], [a])]
+splitTextsBy breakFunc tcs =
+ filter notEmpty $ splitTextsBy' breakFunc [] (reverse tcs)
where
- pre = reverse rpre
- (rpre, post) = splitTextsAt8' n [] rs
+ notEmpty (prefix, suffix) = not (null prefix || null suffix)
-splitTextsAt8' :: SeparableTextContainer a => Int -> [a] -> [a] -> ([a], [a])
-splitTextsAt8' _ rpre [] = (rpre, [])
-splitTextsAt8' n rpre (r:rs)
- | n <= 0 = (rpre, r:rs)
- | n >= l = splitTextsAt8' (n - l) (r:rpre) (rs)
- | otherwise = let (r1, r2) = splitTextAt8 n r in (r1:rpre, r2:rs)
+splitTextsBy' :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [a] ->
+ [([a], [a])]
+splitTextsBy' _ _ [] = []
+splitTextsBy' breakFunc closed (tc:tcs) =
+ fullSplits ++ splitTextsBy' breakFunc (tc:closed) tcs
where
- l = lengthWord8 $ getText r
+ fullSplits = map mergeWithRest tcSplits
+ mergeWithRest (x1, x2) =
+ (reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
+ tcSplits = map (\i -> splitTextAt8 i tc) tcBreakOffsets
+ tcBreakOffsets = breakFunc tc
-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
-collapse (x :| xs)
- | Text.null (getText x) = xs
- | otherwise = x:xs
+collapse (tc :| tcs)
+ | Text.null (getText tc) = tcs
+ | otherwise = tc:tcs
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
M test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs => test/Data/Text/ParagraphLayout/Internal/TextContainerSpec.hs +83 -81
@@ 1,96 1,98 @@
module Data.Text.ParagraphLayout.Internal.TextContainerSpec (spec) where
import Data.Text (Text, empty, pack)
-import Data.Text.Glyphize (Direction(..))
+import Data.Text.Foreign (lengthWord8)
import Test.Hspec
-import Data.Text.ParagraphLayout.Internal.Run
import Data.Text.ParagraphLayout.Internal.TextContainer
-isSpace :: Char -> Bool
-isSpace = (==' ')
+data ExampleContainer = Contain { cText :: Text, cOffset :: Int }
+ deriving (Show, Eq)
+
+contain :: String -> Int -> ExampleContainer
+contain s o = Contain (pack s) o
+
+instance TextContainer ExampleContainer where
+ getText = cText
+
+instance SeparableTextContainer ExampleContainer where
+ splitTextAt8 n (Contain t o) = (Contain t1 o1, Contain t2 o2)
+ where
+ (t1, t2) = splitTextAt8 n t
+ o1 = o
+ o2 = o + lengthWord8 t1
+ dropWhileEnd p (Contain t o) = Contain (dropWhileEnd p t) o
-inputRuns :: [Run]
-inputRuns =
- [ Run
- -- TODO: We might want both parentheses in the same run.
- { runOffsetInSpan = 0
- , runText = pack "Vikipedija ("
- , runDirection = Just DirLTR
- , runScript = Just "Latn"
- }
- , Run
- { runOffsetInSpan = 12
- , runText = pack "Википедија)"
- , runDirection = Just DirLTR
- , runScript = Just "Cyrl"
- }
+exampleContainers :: [ExampleContainer]
+exampleContainers = [c1, c2]
+ where
+ (c1, c2) = splitTextAt8 11 $ contain "Vikipedija (Википедија)" 10
+
+exampleBreaks :: [Int]
+exampleBreaks =
+ [
+ -- Out of bounds. Should not generate any splits.
+ 999, 50,
+ -- End of last text. Should not generate a split.
+ 43,
+ -- Word and syllable bounds in the second text,
+ -- similar to hyphenation rules. Each should generate a corresponding split.
+ 38, 34, 30, 26,
+ -- The exact edge between the two texts.
+ -- Should generate a split, but not any empty containers.
+ 21,
+ -- Word and syllable bounds in the first text.
+ -- Each should generate a corresponding split.
+ 18, 16, 14, 12,
+ -- Start of first text. Should not generate a split.
+ 10,
+ -- Out of bounds. Should not generate any splits.
+ 5, 0, -1
]
+exampleBreakPoints :: ExampleContainer -> [Int]
+exampleBreakPoints c =
+ dropWhile (>= l) $ takeWhile (>= 0) $ map (subtract d) $ exampleBreaks
+ where
+ l = lengthWord8 $ cText c
+ d = cOffset c
+
+isSpace :: Char -> Bool
+isSpace = (==' ')
+
spec :: Spec
spec = do
- describe "splitTextsAt8" $ do
- it "negative value splits at beginning of first run" $ do
- splitTextsAt8 (-1) inputRuns `shouldBe` ([], inputRuns)
- it "zero splits at beginning of first run" $ do
- splitTextsAt8 0 inputRuns `shouldBe` ([], inputRuns)
- it "splits in first run" $ do
- splitTextsAt8 11 inputRuns `shouldBe`
- (
- [ Run
- { runOffsetInSpan = 0
- , runText = pack "Vikipedija "
- , runDirection = Just DirLTR
- , runScript = Just "Latn"
- }
- ]
- ,
- [ Run
- { runOffsetInSpan = 11
- , runText = pack "("
- , runDirection = Just DirLTR
- , runScript = Just "Latn"
- }
- , Run
- { runOffsetInSpan = 12
- , runText = pack "Википедија)"
- , runDirection = Just DirLTR
- , runScript = Just "Cyrl"
- }
- ]
- )
- it "split at run edges does not generate extra run" $ do
- splitTextsAt8 12 inputRuns `shouldBe`
- (take 1 inputRuns, drop 1 inputRuns)
- it "splits in second run" $ do
- splitTextsAt8 20 inputRuns `shouldBe`
- (
- [ Run
- { runOffsetInSpan = 0
- , runText = pack "Vikipedija ("
- , runDirection = Just DirLTR
- , runScript = Just "Latn"
- }
- , Run
- { runOffsetInSpan = 12
- , runText = pack "Вики"
- , runDirection = Just DirLTR
- , runScript = Just "Cyrl"
- }
- ]
- ,
- [ Run
- { runOffsetInSpan = 20
- , runText = pack "педија)"
- , runDirection = Just DirLTR
- , runScript = Just "Cyrl"
- }
- ]
- )
- it "split at end does not generate extra run" $ do
- splitTextsAt8 33 inputRuns `shouldBe` (inputRuns, [])
- it "large value splits at end of last run" $ do
- splitTextsAt8 999 inputRuns `shouldBe` (inputRuns, [])
+ describe "splitTextsBy" $ do
+ it "splits example text containers" $ do
+ splitTextsBy exampleBreakPoints exampleContainers `shouldBe`
+ [ ( [ contain "Vikipedija " 10, contain "(Википеди" 21 ]
+ , [ contain "ја)" 38 ]
+ )
+ , ( [ contain "Vikipedija " 10, contain "(Википе" 21 ]
+ , [ contain "дија)" 34 ]
+ )
+ , ( [ contain "Vikipedija " 10, contain "(Вики" 21 ]
+ , [ contain "педија)" 30 ]
+ )
+ , ( [ contain "Vikipedija " 10, contain "(Ви" 21 ]
+ , [ contain "кипедија)" 26 ]
+ )
+ , ( [ contain "Vikipedija " 10 ]
+ , [ contain "(Википедија)" 21 ]
+ )
+ , ( [ contain "Vikipedi" 10 ]
+ , [ contain "ja " 18, contain "(Википедија)" 21 ]
+ )
+ , ( [ contain "Vikipe" 10 ]
+ , [ contain "dija " 16, contain "(Википедија)" 21 ]
+ )
+ , ( [ contain "Viki" 10 ]
+ , [ contain "pedija " 14, contain "(Википедија)" 21 ]
+ )
+ , ( [ contain "Vi" 10 ]
+ , [ contain "kipedija " 12, contain "(Википедија)" 21 ]
+ )
+ ]
describe "trimTextsEnd" $ do
describe "isSpace" $ do
it "does nothing on an empty list" $ do