From 3d78355411f15c6d8ebf001921a5167657992712 Mon Sep 17 00:00:00 2001 From: Jaro Date: Tue, 28 Feb 2023 16:33:16 +0100 Subject: [PATCH] Minimise working with Data.Text.Internal. --- src/Data/Text/ParagraphLayout/Plain.hs | 41 +++++++++++-------- src/Data/Text/ParagraphLayout/ResolvedSpan.hs | 3 +- src/Data/Text/ParagraphLayout/Run.hs | 31 ++++++++++---- .../Text/ParagraphLayout/TextContainer.hs | 17 ++++---- test/Data/Text/ParagraphLayout/RunSpec.hs | 9 ++-- test/Data/Text/ParagraphLayout/SpanData.hs | 3 ++ .../Text/ParagraphLayout/TextContainerSpec.hs | 24 +++++++---- 7 files changed, 80 insertions(+), 48 deletions(-) diff --git a/src/Data/Text/ParagraphLayout/Plain.hs b/src/Data/Text/ParagraphLayout/Plain.hs index 7689530..1fab9c7 100644 --- a/src/Data/Text/ParagraphLayout/Plain.hs +++ b/src/Data/Text/ParagraphLayout/Plain.hs @@ -101,7 +101,8 @@ instance Functor WithSpan where instance TextContainer a => TextContainer (WithSpan a) where getText (WithSpan _ c) = getText c - setText t (WithSpan rs c) = WithSpan rs (setText t c) + splitTextAt8 n (WithSpan rs c) = (WithSpan rs c1, WithSpan rs c2) + where (c1, c2) = splitTextAt8 n c splitBySpanIndex :: [WithSpan a] -> [[a]] splitBySpanIndex xs = [getBySpanIndex i xs | i <- [0..]] @@ -112,6 +113,10 @@ getBySpanIndex idx xs = map contents $ filter matchingIndex $ xs matchingIndex (WithSpan rs _) = (RS.spanIndex rs) == idx contents (WithSpan _ x) = x +runOffsetInParagraph :: WithSpan Run -> I8 +runOffsetInParagraph (WithSpan rs run) = + runOffsetInSpan run + RS.spanOffsetInParagraph rs + spanRects :: SpanLayout -> [Rect Int32] spanRects (SpanLayout frags) = map fragmentRect frags @@ -258,8 +263,7 @@ nextBreakPoint :: Int -> [WithSpan Run] -> Int nextBreakPoint _ [] = 0 nextBreakPoint limit runs@(headRun:_) = fromMaybe 0 $ listToMaybe points where - -- TODO: Try to limit deconstruction of texts for offsets. - (Text _ firstRunOffset _) = getText headRun + firstRunOffset = fromIntegral $ runOffsetInParagraph headRun points = dropWhile (>= limit) $ breakPoints firstRunOffset $ reverse runs @@ -267,8 +271,7 @@ breakPoints :: Int -> [WithSpan Run] -> [Int] breakPoints _ [] = [] breakPoints firstRunOffset (x:xs) = offsets ++ rest where - -- TODO: Try to limit deconstruction of texts for offsets. - (Text _ thisRunOffset _) = getText x + thisRunOffset = fromIntegral $ runOffsetInParagraph x offsets = map (correctOffset . fst) (runLineBreaks x) correctOffset = (+ (thisRunOffset - firstRunOffset)) rest = breakPoints firstRunOffset xs @@ -314,22 +317,20 @@ shapeRun (WithSpan rs run) = shape font buffer features resolveSpans :: Paragraph -> [RS.ResolvedSpan] resolveSpans p@(Paragraph arr off spans opts) = do - let (end, texts) = cuts arr off spans + let (end, textsAndMarks) = cutsAndMarks arr off spans let indexes = [0..] - let paragraphStart = fromIntegral off - (s, t, i) <- zip3 spans texts indexes + (s, (o, t), i) <- zip3 spans textsAndMarks indexes let lang = spanLanguage s let breaks = paragraphLineBreaks p end lang - -- TODO: Try to limit deconstruction of texts for offsets. - let (Text _ spanStart _) = t return RS.ResolvedSpan { RS.spanIndex = i + , RS.spanOffsetInParagraph = o - off , RS.spanText = t , RS.spanFont = paragraphFont opts , RS.spanLineHeight = paragraphLineHeight opts , RS.spanLanguage = lang - , RS.spanLineBreaks = subOffsetsDesc (spanStart - paragraphStart) breaks + , RS.spanLineBreaks = subOffsetsDesc (fromIntegral $ o - off) breaks } paragraphLineBreaks :: Paragraph -> I8 -> String -> [(Int, BreakStatus.Line)] @@ -340,13 +341,10 @@ paragraphLineBreaks (Paragraph arr off _ _) end lang = runLineBreaks :: WithSpan Run -> [(Int, BreakStatus.Line)] runLineBreaks (WithSpan rs run) = dropWhile (not . valid) $ - subOffsetsDesc (runStart - spanStart) $ RS.spanLineBreaks rs + subOffsetsDesc (fromIntegral $ runOffsetInSpan run) $ RS.spanLineBreaks rs where valid (off, _) = off < runLength runLength = lengthWord8 $ getText run - -- TODO: Try to limit deconstruction of texts for offsets. - (Text _ runStart _) = getText run - (Text _ spanStart _) = getText rs -- TODO: Identify and correct for differences between the two. localeFromLanguage :: String -> LocaleName @@ -354,11 +352,18 @@ localeFromLanguage x = Locale x -- | Given an underlying `Array`, an initial offset, and a list of consecutive -- `Span`s, produce a list of `Text`s corresponding to the given spans, as well --- as the offset of the end of the last `Text`. +-- as the offset of the start of each `Text` and the end of the last `Text`. -- -- TODO: Consider adding checks for array bounds. -cuts :: Array -> I8 -> [Span] -> (I8, [Text]) -cuts arr initialOffset spans = mapAccumL (cut arr) initialOffset spans +cutsAndMarks :: Array -> I8 -> [Span] -> (I8, [(I8, Text)]) +cutsAndMarks arr initialOffset spans = + mapAccumL (cutAndMark arr) initialOffset spans + +-- | Like `cut`, but also include the starting offset in the output. +cutAndMark :: Array -> I8 -> Span -> (I8, (I8, Text)) +cutAndMark arr off s = (end, (off, t)) + where + (end, t) = cut arr off s -- | Produce a `Text`, defined by an initial offset and a `Span`, out of the -- underlying `Array`. diff --git a/src/Data/Text/ParagraphLayout/ResolvedSpan.hs b/src/Data/Text/ParagraphLayout/ResolvedSpan.hs index 445f729..23ac14d 100644 --- a/src/Data/Text/ParagraphLayout/ResolvedSpan.hs +++ b/src/Data/Text/ParagraphLayout/ResolvedSpan.hs @@ -2,6 +2,7 @@ module Data.Text.ParagraphLayout.ResolvedSpan (ResolvedSpan(..)) where import Data.Text (Text) +import Data.Text.Foreign (I8) import Data.Text.Glyphize (Font) import qualified Data.Text.ICU as BreakStatus (Line) @@ -12,6 +13,7 @@ import Data.Text.ParagraphLayout.TextContainer -- other spans across the paragraph. data ResolvedSpan = ResolvedSpan { spanIndex :: Int + , spanOffsetInParagraph :: I8 , spanText :: Text , spanFont :: Font , spanLineHeight :: LineHeight @@ -25,4 +27,3 @@ instance Eq ResolvedSpan where instance TextContainer ResolvedSpan where getText = spanText - setText t s = s { spanText = t } diff --git a/src/Data/Text/ParagraphLayout/Run.hs b/src/Data/Text/ParagraphLayout/Run.hs index ef7f445..ad14e8a 100644 --- a/src/Data/Text/ParagraphLayout/Run.hs +++ b/src/Data/Text/ParagraphLayout/Run.hs @@ -1,8 +1,10 @@ module Data.Text.ParagraphLayout.Run (Run(..), spanToRuns) where +import Data.List (mapAccumL) import Data.List.NonEmpty (NonEmpty((:|))) import Data.Text (Text) +import Data.Text.Foreign (I8, dropWord8, lengthWord8, takeWord8) import Data.Text.Glyphize (Direction(..)) import qualified Data.Text.ICU.Char as ICUChar import Data.Text.Script (charScript) @@ -19,7 +21,8 @@ type ScriptCode = String -- Each run could have a different script, language, or direction. -- data Run = Run - { runText :: Text + { runOffsetInSpan :: I8 + , runText :: Text , runDirection :: Maybe Direction , runScript :: Maybe ScriptCode } @@ -27,7 +30,15 @@ data Run = Run instance TextContainer Run where getText = runText - setText t r = r { runText = t } + splitTextAt8 n r = + ( r { runText = t1 } + , r { runText = t2, runOffsetInSpan = runOffsetInSpan r + l1 } + ) + where + l1 = fromIntegral (lengthWord8 t1) + t1 = takeWord8 n t + t2 = dropWord8 n t + t = getText r type ProtoRun = (Zipper, Maybe Direction, ScriptCode) @@ -54,14 +65,18 @@ considerNext z = case next z of data Merged a = Incompatible | Merged a spanToRuns :: ResolvedSpan -> [Run] -spanToRuns s = map run $ protoRuns zipper +spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper where zipper = start $ spanText s - run (z, d, sc) = Run - { runText = preceding z - , runDirection = d - , runScript = Just sc - } + run acc (z, d, sc) = let t = preceding z in + ( acc + lengthWord8 t + , Run + { runOffsetInSpan = fromIntegral acc + , runText = t + , runDirection = d + , runScript = Just sc + } + ) protoRuns :: Zipper -> [ProtoRun] protoRuns z = reverse $ protoRuns' z [] diff --git a/src/Data/Text/ParagraphLayout/TextContainer.hs b/src/Data/Text/ParagraphLayout/TextContainer.hs index 65e49d4..171274b 100644 --- a/src/Data/Text/ParagraphLayout/TextContainer.hs +++ b/src/Data/Text/ParagraphLayout/TextContainer.hs @@ -1,26 +1,23 @@ module Data.Text.ParagraphLayout.TextContainer (TextContainer ,getText - ,setText ,splitTextAt8 ,splitTextsAt8 ) where import Data.Text (Text) -import Data.Text.Foreign (I8, dropWord8, lengthWord8, takeWord8) +import Data.Text.Foreign (I8, lengthWord8) class TextContainer a where + + -- | Unwrap text from the container. getText :: a -> Text - setText :: Text -> a -> a --- | Split a text container at the given number of `Word8` units --- from its beginning. -splitTextAt8 :: TextContainer a => I8 -> a -> (a, a) -splitTextAt8 n r = (setText text1 r, setText text2 r) - where - text1 = takeWord8 n $ getText r - text2 = dropWord8 n $ getText r + -- | Split a text container at the given number of `Word8` units + -- from its beginning. + splitTextAt8 :: I8 -> a -> (a, a) + splitTextAt8 _ _ = error "container cannot be split" -- | Treat a list of text containers as a contiguous sequence, -- and make a split at the given number of `Word8` from the beginning diff --git a/test/Data/Text/ParagraphLayout/RunSpec.hs b/test/Data/Text/ParagraphLayout/RunSpec.hs index dd7d361..a8752a3 100644 --- a/test/Data/Text/ParagraphLayout/RunSpec.hs +++ b/test/Data/Text/ParagraphLayout/RunSpec.hs @@ -19,7 +19,8 @@ spec = do let runs = spanToRuns inputSpan runs `shouldBe` [ Run - { runText = spanText inputSpan + { runOffsetInSpan = 0 + , runText = spanText inputSpan , runDirection = Just DirLTR , runScript = Just "Latn" } @@ -30,12 +31,14 @@ spec = do runs `shouldBe` [ Run -- TODO: We might want both parentheses in the same run. - { runText = pack "Vikipedija (" + { runOffsetInSpan = 0 + , runText = pack "Vikipedija (" , runDirection = Just DirLTR , runScript = Just "Latn" } , Run - { runText = pack "Википедија)" + { runOffsetInSpan = 12 + , runText = pack "Википедија)" , runDirection = Just DirLTR , runScript = Just "Cyrl" } diff --git a/test/Data/Text/ParagraphLayout/SpanData.hs b/test/Data/Text/ParagraphLayout/SpanData.hs index fc1a825..47fb163 100644 --- a/test/Data/Text/ParagraphLayout/SpanData.hs +++ b/test/Data/Text/ParagraphLayout/SpanData.hs @@ -13,6 +13,7 @@ import Data.Text.ParagraphLayout.ResolvedSpan (ResolvedSpan(..)) emptySpan :: Font -> ResolvedSpan emptySpan font = ResolvedSpan { spanIndex = 0 + , spanOffsetInParagraph = 0 , spanText = pack "" , spanFont = font , spanLineHeight = Normal @@ -23,6 +24,7 @@ emptySpan font = ResolvedSpan czechHello :: Font -> ResolvedSpan czechHello font = ResolvedSpan { spanIndex = 0 + , spanOffsetInParagraph = 0 , spanText = pack "Ahoj, světe!" , spanFont = font , spanLineHeight = Normal @@ -33,6 +35,7 @@ czechHello font = ResolvedSpan serbianMixedScript :: Font -> ResolvedSpan serbianMixedScript font = ResolvedSpan { spanIndex = 0 + , spanOffsetInParagraph = 0 , spanText = pack "Vikipedija (Википедија)" , spanFont = font , spanLineHeight = Normal diff --git a/test/Data/Text/ParagraphLayout/TextContainerSpec.hs b/test/Data/Text/ParagraphLayout/TextContainerSpec.hs index b4cdeb8..17a866b 100644 --- a/test/Data/Text/ParagraphLayout/TextContainerSpec.hs +++ b/test/Data/Text/ParagraphLayout/TextContainerSpec.hs @@ -11,12 +11,14 @@ inputRuns :: [Run] inputRuns = [ Run -- TODO: We might want both parentheses in the same run. - { runText = pack "Vikipedija (" + { runOffsetInSpan = 0 + , runText = pack "Vikipedija (" , runDirection = Just DirLTR , runScript = Just "Latn" } , Run - { runText = pack "Википедија)" + { runOffsetInSpan = 12 + , runText = pack "Википедија)" , runDirection = Just DirLTR , runScript = Just "Cyrl" } @@ -33,19 +35,22 @@ spec = do splitTextsAt8 11 inputRuns `shouldBe` ( [ Run - { runText = pack "Vikipedija " + { runOffsetInSpan = 0 + , runText = pack "Vikipedija " , runDirection = Just DirLTR , runScript = Just "Latn" } ] , [ Run - { runText = pack "(" + { runOffsetInSpan = 11 + , runText = pack "(" , runDirection = Just DirLTR , runScript = Just "Latn" } , Run - { runText = pack "Википедија)" + { runOffsetInSpan = 12 + , runText = pack "Википедија)" , runDirection = Just DirLTR , runScript = Just "Cyrl" } @@ -58,19 +63,22 @@ spec = do splitTextsAt8 20 inputRuns `shouldBe` ( [ Run - { runText = pack "Vikipedija (" + { runOffsetInSpan = 0 + , runText = pack "Vikipedija (" , runDirection = Just DirLTR , runScript = Just "Latn" } , Run - { runText = pack "Вики" + { runOffsetInSpan = 12 + , runText = pack "Вики" , runDirection = Just DirLTR , runScript = Just "Cyrl" } ] , [ Run - { runText = pack "педија)" + { runOffsetInSpan = 20 + , runText = pack "педија)" , runDirection = Just DirLTR , runScript = Just "Cyrl" } -- 2.30.2