From cb8b67f9026a8a1237a02bf6f1991d1a36a53649 Mon Sep 17 00:00:00 2001 From: Jaro Date: Sat, 11 Mar 2023 01:11:53 +0100 Subject: [PATCH] Allow breaking at character bounds. --- .golden/czechHelloParagraphNarrow/golden | 19 ++++++ .../Text/ParagraphLayout/Internal/Plain.hs | 67 ++++++++++++++----- .../ParagraphLayout/Internal/ResolvedSpan.hs | 2 + .../ParagraphLayout/Internal/BreakSpec.hs | 4 +- test/Data/Text/ParagraphLayoutSpec.hs | 7 ++ 5 files changed, 81 insertions(+), 18 deletions(-) create mode 100644 .golden/czechHelloParagraphNarrow/golden diff --git a/.golden/czechHelloParagraphNarrow/golden b/.golden/czechHelloParagraphNarrow/golden new file mode 100644 index 0000000..b282aa0 --- /dev/null +++ b/.golden/czechHelloParagraphNarrow/golden @@ -0,0 +1,19 @@ +ParagraphLayout {paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 1234, y_size = -5605}, spanLayouts = [ + SpanLayout [Fragment {fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 1234, y_size = -1121}, fragmentPen = (0,-932), fragmentGlyphs = + [(GlyphInfo {codepoint = 36, cluster = 5, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 663, y_advance = 0, x_offset = 0, y_offset = 0}), + (GlyphInfo {codepoint = 75, cluster = 6, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 571, y_advance = 0, x_offset = 0, y_offset = 0})] + }, Fragment {fragmentRect = Rect {x_origin = 0, y_origin = -1121, x_size = 1089, y_size = -1121}, fragmentPen = (0,-932), fragmentGlyphs = + [(GlyphInfo {codepoint = 82, cluster = 7, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 590, y_advance = 0, x_offset = 0, y_offset = 0}), + (GlyphInfo {codepoint = 77, cluster = 8, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 253, y_advance = 0, x_offset = 0, y_offset = 0}), + (GlyphInfo {codepoint = 15, cluster = 9, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 246, y_advance = 0, x_offset = 0, y_offset = 0})] + }, Fragment {fragmentRect = Rect {x_origin = 0, y_origin = -2242, x_size = 948, y_size = -1121}, fragmentPen = (0,-932), fragmentGlyphs = + [(GlyphInfo {codepoint = 86, cluster = 11, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 446, y_advance = 0, x_offset = 0, y_offset = 0}), + (GlyphInfo {codepoint = 89, cluster = 12, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 502, y_advance = 0, x_offset = 0, y_offset = 0})] + }, Fragment {fragmentRect = Rect {x_origin = 0, y_origin = -3363, x_size = 961, y_size = -1121}, fragmentPen = (0,-932), fragmentGlyphs = + [(GlyphInfo {codepoint = 246, cluster = 13, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 559, y_advance = 0, x_offset = 0, y_offset = 0}), + (GlyphInfo {codepoint = 87, cluster = 15, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 402, y_advance = 0, x_offset = 0, y_offset = 0})] + }, Fragment {fragmentRect = Rect {x_origin = 0, y_origin = -4484, x_size = 835, y_size = -1121}, fragmentPen = (0,-932), fragmentGlyphs = + [(GlyphInfo {codepoint = 72, cluster = 16, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 559, y_advance = 0, x_offset = 0, y_offset = 0}), + (GlyphInfo {codepoint = 4, cluster = 17, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False},GlyphPos {x_advance = 276, y_advance = 0, x_offset = 0, y_offset = 0})] + }] +]} diff --git a/src/Data/Text/ParagraphLayout/Internal/Plain.hs b/src/Data/Text/ParagraphLayout/Internal/Plain.hs index 53ce413..86e0cdd 100644 --- a/src/Data/Text/ParagraphLayout/Internal/Plain.hs +++ b/src/Data/Text/ParagraphLayout/Internal/Plain.hs @@ -24,7 +24,7 @@ import Data.Text.Glyphize ,fontExtentsForDir ,shape ) -import Data.Text.ICU (LocaleName(Locale), breakLine) +import Data.Text.ICU (LocaleName(Locale), breakCharacter, breakLine) import qualified Data.Text.ICU as BreakStatus (Line) import Data.Text.Internal (Text(Text)) import qualified Data.Text.Lazy as Lazy @@ -160,10 +160,10 @@ layoutAndWrapRunsH maxWidth runs = NonEmpty.head $ validLayouts layouts = NonEmpty.map layoutFst splits layoutFst (runs1, runs2) = (layout runs1, runs2) layout runs1 = layoutRunsH $ trimTextsEnd isEndSpace runs1 - splits = noSplit :| (wordSplits ++ [lastResortSplit]) + splits = noSplit :| (lSplits ++ cSplits) noSplit = (runs, []) - wordSplits = (filter hasContent $ breakSplits [] (reverse runs)) - lastResortSplit = splitTextsAt8 1 runs + lSplits = filter hasContent $ lineSplits runs + cSplits = filter hasContent $ characterSplits runs hasContent = not . null . fst -- | The suffix remaining after removing the longest prefix of the list for @@ -189,24 +189,38 @@ totalAdvances pfs = sum $ map (\(WithSpan _ pf) -> PF.advance pf) pfs -- into two on a valid line-breaking boundary, including the start of the first -- run and excluding the end of the last run. -- --- The first input list is the suffix consisting of runs that have already been --- considered for breaking. These will be appended to the output suffix as they --- are. --- --- The second input list is the prefix consisting of runs to be considered for --- breaking, in reverse order. --- -- The results in the form (prefix, suffix) will be ordered from the longest -- prefix to shortest. -breakSplits :: [WithSpan Run] -> [WithSpan Run] -> +lineSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])] +lineSplits xs = lineSplits' [] (reverse xs) + +lineSplits' :: [WithSpan Run] -> [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])] -breakSplits _ [] = [] -breakSplits closed (x:xs) = splits ++ breakSplits (x:closed) xs +lineSplits' _ [] = [] +lineSplits' closed (x:xs) = splits ++ lineSplits' (x:closed) xs where splits = map mapFunc $ runLineSplits x mapFunc ((x1, x2), _) = (reverse $ collapse $ x1 :| xs, collapse $ x2 :| closed) +-- | Recursive function for finding all possible ways to split a list of runs +-- into two on a character boundary, including the start of the first run and +-- excluding the end of the last run. +-- +-- The results in the form (prefix, suffix) will be ordered from the longest +-- prefix to shortest. +characterSplits :: [WithSpan Run] -> [([WithSpan Run], [WithSpan Run])] +characterSplits xs = characterSplits' [] (reverse xs) + +characterSplits' :: [WithSpan Run] -> [WithSpan Run] -> + [([WithSpan Run], [WithSpan Run])] +characterSplits' _ [] = [] +characterSplits' closed (x:xs) = splits ++ characterSplits' (x:closed) xs + where + splits = map mapFunc $ runCharacterSplits x + mapFunc ((x1, x2), _) = + (reverse $ collapse $ x1 :| xs, collapse $ x2 :| closed) + -- | Calculate layout for the given horizontal run and attach extra information. layoutRunH :: WithSpan Run -> WithSpan PF.ProtoFragment layoutRunH (WithSpan rs run) = WithSpan rs pf @@ -243,7 +257,8 @@ resolveSpans p@(Paragraph arr pStart spans pOpts) = do <*> ZipList sStarts <*> ZipList sLengths let lang = spanLanguage $ spanOptions s - let breaks = paragraphLineBreaks p pEnd lang + let lBreaks = paragraphLineBreaks p pEnd lang + let cBreaks = paragraphCharacterBreaks p pEnd lang return RS.ResolvedSpan { RS.spanIndex = i , RS.spanOffsetInParagraph = sStart - pStart @@ -252,7 +267,8 @@ resolveSpans p@(Paragraph arr pStart spans pOpts) = do , RS.spanFont = paragraphFont pOpts , RS.spanLineHeight = paragraphLineHeight pOpts , RS.spanLanguage = lang - , RS.spanLineBreaks = subOffsetsDesc (sStart - pStart) breaks + , RS.spanLineBreaks = subOffsetsDesc (sStart - pStart) lBreaks + , RS.spanCharacterBreaks = subOffsetsDesc (sStart - pStart) cBreaks } paragraphLineBreaks :: Paragraph -> Int -> String -> [(Int, BreakStatus.Line)] @@ -261,6 +277,12 @@ paragraphLineBreaks (Paragraph arr off _ _) end lang = where paragraphText = Text arr off (end - off) +paragraphCharacterBreaks :: Paragraph -> Int -> String -> [(Int, ())] +paragraphCharacterBreaks (Paragraph arr off _ _) end lang = + breaksDesc (breakCharacter (localeFromLanguage lang)) paragraphText + where + paragraphText = Text arr off (end - off) + -- | Split the given run at every valid line break position. runLineSplits :: WithSpan Run -> [((WithSpan Run, WithSpan Run), BreakStatus.Line)] @@ -275,6 +297,19 @@ runLineBreaks (WithSpan rs run) = dropWhile (not . valid) $ valid (off, _) = off < runLength runLength = lengthWord8 $ getText run +-- | Split the given run before every character. +runCharacterSplits :: WithSpan Run -> [((WithSpan Run, WithSpan Run), ())] +runCharacterSplits r = map split $ runCharacterBreaks r + where + split (i, ()) = (splitTextAt8 i r, ()) + +runCharacterBreaks :: WithSpan Run -> [(Int, ())] +runCharacterBreaks (WithSpan rs run) = dropWhile (not . valid) $ + subOffsetsDesc (runOffsetInSpan run) $ RS.spanCharacterBreaks rs + where + valid (off, _) = off < runLength + runLength = lengthWord8 $ getText run + -- | Predicate for characters that can be potentially removed from the end of -- a line according to the CSS Text Module. isEndSpace :: Char -> Bool diff --git a/src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs b/src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs index 26e3f1d..48531a8 100644 --- a/src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs +++ b/src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs @@ -22,6 +22,8 @@ data ResolvedSpan = ResolvedSpan , spanLineHeight :: LineHeight , spanLanguage :: String , spanLineBreaks :: [(Int, BreakStatus.Line)] + -- TODO: Can be optimised by starting with the shortest line break. + , spanCharacterBreaks :: [(Int, ())] } deriving (Show) diff --git a/test/Data/Text/ParagraphLayout/Internal/BreakSpec.hs b/test/Data/Text/ParagraphLayout/Internal/BreakSpec.hs index d37289b..544a03f 100644 --- a/test/Data/Text/ParagraphLayout/Internal/BreakSpec.hs +++ b/test/Data/Text/ParagraphLayout/Internal/BreakSpec.hs @@ -98,8 +98,8 @@ spec = do ,(0, BreakStatus.Uncategorized) ] - -- Probably not useful for Balkón; - -- HarfBuzz takes care of identifying character clusters for us. + -- Useful for breaking inside words for narrow output. + -- This can result in breaking ligatures. describe "breakCharacter" $ do let b lang = breaksDesc $ breakCharacter (Locale lang) diff --git a/test/Data/Text/ParagraphLayoutSpec.hs b/test/Data/Text/ParagraphLayoutSpec.hs index 2ef5a24..3d36034 100644 --- a/test/Data/Text/ParagraphLayoutSpec.hs +++ b/test/Data/Text/ParagraphLayoutSpec.hs @@ -134,6 +134,13 @@ spec = do (opts font) { paragraphLineHeight = Absolute 599 } result `shouldBeGolden` "lineHeightSmaller" + it "wraps mid-word when line is narrow" $ \font -> do + let + result = layoutPlain $ czechHelloParagraph $ + (opts font) + { paragraphMaxWidth = 1300 } + result `shouldBeGolden` "czechHelloParagraphNarrow" + -- TODO test breaking ligatures it "wraps by characters when line is ultra narrow" $ \font -> do let result = layoutPlain $ czechHelloParagraph $ -- 2.30.2