~jaro/balkon: Integrate BiDi levels with spanToRuns algorithm.

10 files changed, 129 insertions(+), 60 deletions(-)

M .golden/paragraphLayout/devanagariAccent.golden
M .golden/paragraphLayout/devanagariPrefixedAccent.golden
M src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs
M src/Data/Text/ParagraphLayout/Internal/ProtoRun.hs
M src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs
M src/Data/Text/ParagraphLayout/Internal/Rich.hs
M src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs
M src/Data/Text/ParagraphLayout/Internal/Run.hs
M test/Data/Text/ParagraphLayout/Internal/ApplyBoxesSpec.hs
M test/Data/Text/ParagraphLayout/Internal/RunSpec.hs

M .golden/paragraphLayout/devanagariAccent.golden => .golden/paragraphLayout/devanagariAccent.golden +3 -3

@@ 1,5 1,5 @@
 ParagraphLayout
-    { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1000}
+    { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1630}
     , spanLayouts = [
         SpanLayout
         [ Fragment


@@ 8,8 8,8 @@ ParagraphLayout
             , fragmentAncestorBoxes =
                 [ AncestorBox {boxUserData = (), boxLeftEdge = SpacedEdge 0, boxRightEdge = SpacedEdge 0, boxStartEdge = SpacedEdge 0, boxEndEdge = SpacedEdge 0}
                 ]
-            , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1000}
-            , fragmentPen = (0, -500)
+            , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1630}
+            , fragmentPen = (0, -1171)
             , fragmentGlyphs =
                 [ (GlyphInfo {codepoint = 529, cluster = 0, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 645, y_advance = 0, x_offset = 0, y_offset = 0})
                 , (GlyphInfo {codepoint = 424, cluster = 0, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 0, y_advance = 0, x_offset = 0, y_offset = 0})

M .golden/paragraphLayout/devanagariPrefixedAccent.golden => .golden/paragraphLayout/devanagariPrefixedAccent.golden +3 -3

@@ 1,5 1,5 @@
 ParagraphLayout
-    { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1000}
+    { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1630}
     , spanLayouts = [
         SpanLayout
         [ Fragment


@@ 8,8 8,8 @@ ParagraphLayout
             , fragmentAncestorBoxes =
                 [ AncestorBox {boxUserData = (), boxLeftEdge = SpacedEdge 0, boxRightEdge = SpacedEdge 0, boxStartEdge = SpacedEdge 0, boxEndEdge = SpacedEdge 0}
                 ]
-            , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1000}
-            , fragmentPen = (0, -500)
+            , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1630}
+            , fragmentPen = (0, -1171)
             , fragmentGlyphs =
                 [ (GlyphInfo {codepoint = 424, cluster = 1, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 0, y_advance = 0, x_offset = 0, y_offset = 0})
                 ]

M src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs => src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs +19 -0

@@ 2,7 2,11 @@ module Data.Text.ParagraphLayout.Internal.BiDiLevels
     ( Level
     , TextLevels (TextLevels)
     , WithLevel
+    , dropLevels
+    , headLevel
     , level
+    , levelDirectionH
+    , tailLevels
     , textLevels
     )
 where


@@ 29,6 33,15 @@ class WithLevel a where
 newtype TextLevels = TextLevels [Level]
     deriving (Eq, Show)
 
+headLevel :: TextLevels -> Level
+headLevel (TextLevels xs) = head xs
+
+tailLevels :: TextLevels -> TextLevels
+tailLevels (TextLevels xs) = TextLevels (tail xs)
+
+dropLevels :: Int -> TextLevels -> TextLevels
+dropLevels n (TextLevels xs) = TextLevels (drop n xs)
+
 -- | Determine the BiDi level of each character in the input text.
 --
 -- TODO: Use Haskell bindings to the ICU BiDi implementation once available,


@@ 81,6 94,12 @@ strongDirection c = case ICUChar.direction c of
 firstJust :: [Maybe a] -> Maybe a
 firstJust = listToMaybe . catMaybes
 
+-- | Convert embedding level to horizontal text direction.
+levelDirectionH :: Level -> Direction
+levelDirectionH lvl
+    | even lvl = DirLTR
+    | otherwise = DirRTL
+
 -- | Convert text direction to the smallest corresponding embedding level,
 -- but no smaller than the given minimum.
 directionLevel :: Level -> Direction -> Level

M src/Data/Text/ParagraphLayout/Internal/ProtoRun.hs => src/Data/Text/ParagraphLayout/Internal/ProtoRun.hs +3 -3

@@ 1,8 1,7 @@
 module Data.Text.ParagraphLayout.Internal.ProtoRun (ProtoRun (..))
 where
 
-import Data.Text.Glyphize (Direction)
-
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
 import Data.Text.ParagraphLayout.Internal.Script
 import Data.Text.ParagraphLayout.Internal.Zipper
 


@@ 10,6 9,7 @@ import Data.Text.ParagraphLayout.Internal.Zipper
 -- `Data.Text.ParagraphLayout.Internal.Run.Run` values.
 data ProtoRun = ProtoRun
     { zipper :: Zipper
-    , direction :: Maybe Direction
+    , followingLevels :: TextLevels
+    , level :: Level
     , script :: ScriptCode
     }

M src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs => src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs +2 -0

@@ 4,6 4,7 @@ where
 import Data.Text (Text)
 import qualified Data.Text.ICU as BreakStatus (Line)
 
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
 import Data.Text.ParagraphLayout.Internal.ResolvedBox
 import Data.Text.ParagraphLayout.Internal.TextContainer
 import Data.Text.ParagraphLayout.Internal.TextOptions


@@ 17,6 18,7 @@ data ResolvedSpan d = ResolvedSpan
     , spanText :: Text
     , spanTextOptions :: TextOptions
     , spanBoxes :: [ResolvedBox d]
+    , spanBiDiLevels :: TextLevels
     , spanLineBreaks :: [(Int, BreakStatus.Line)]
     -- TODO: Can be optimised by starting with the shortest line break.
     , spanCharacterBreaks :: [(Int, ())]

M src/Data/Text/ParagraphLayout/Internal/Rich.hs => src/Data/Text/ParagraphLayout/Internal/Rich.hs +12 -1

@@ 5,8 5,10 @@ import Control.Applicative (ZipList (ZipList), getZipList)
 import Data.List.NonEmpty (nonEmpty)
 import qualified Data.List.NonEmpty as NonEmpty
 import Data.Text (Text)
+import qualified Data.Text as Text
 import Data.Text.ICU (Breaker, LocaleName, breakCharacter, breakLine)
 
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
 import Data.Text.ParagraphLayout.Internal.Break
 import Data.Text.ParagraphLayout.Internal.Fragment
 import Data.Text.ParagraphLayout.Internal.Layout


@@ 43,7 45,9 @@ spanToRunsWrapped :: RS.ResolvedSpan d -> [WithSpan d Run]
 spanToRunsWrapped s = map (WithSpan s) (spanToRuns s)
 
 resolveSpans :: Paragraph d -> [RS.ResolvedSpan d]
-resolveSpans p@(Paragraph _ pStart root _) = do
+resolveSpans p = do
+    let Paragraph _ pStart root _ = p
+    let RootBox (Box _ rootTextOpts) = root
     let leaves = flatten root
     let sTexts = paragraphSpanTexts p
     let sBounds = paragraphSpanBounds p


@@ 57,8 61,14 @@ resolveSpans p@(Paragraph _ pStart root _) = do
         <*> ZipList sTexts
     let (TextLeaf userData _ textOpts boxes) = leaf
     let lang = textLanguage textOpts
+    -- TODO: Allow BiDi embedding/isolation for inner nodes.
+    let pLevels = textLevels (textDirection rootTextOpts) pText
     let lBreaks = paragraphBreaks breakLine pText lang
     let cBreaks = paragraphBreaks breakCharacter pText lang
+    -- TODO: Optimise. This has time complexity O(n*s), where n is number of
+    --       characters and s is number of resolved spans.
+    --       Maybe include byte offsets in the TextLevels data structure?
+    let pPrefixLen = Text.length $ paragraphPrefix p sStart
     return RS.ResolvedSpan
         { RS.spanUserData = userData
         , RS.spanIndex = i


@@ 67,6 77,7 @@ resolveSpans p@(Paragraph _ pStart root _) = do
         , RS.spanText = sText
         , RS.spanTextOptions = textOpts
         , RS.spanBoxes = boxes
+        , RS.spanBiDiLevels = dropLevels pPrefixLen pLevels
         , RS.spanLineBreaks = subOffsetsDesc (sStart - pStart) lBreaks
         , RS.spanCharacterBreaks = subOffsetsDesc (sStart - pStart) cBreaks
         }

M src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs => src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs +9 -0

@@ 1,6 1,7 @@
 module Data.Text.ParagraphLayout.Internal.Rich.Paragraph
     ( Paragraph (..)
     , constructParagraph
+    , paragraphPrefix
     , paragraphSpanBounds
     , paragraphSpanTexts
     , paragraphText


@@ 111,3 112,11 @@ paragraphText p@(Paragraph arr _ _ _) = Text arr start (end - start)
         start = NonEmpty.head sBounds
         end = NonEmpty.last sBounds
         sBounds = paragraphSpanBounds p
+
+-- | Turn all text nodes from the input `Paragraph` up to the given end offset
+-- into one combined `Text`. No bounds checking is performed!
+paragraphPrefix :: Paragraph d -> Int -> Text
+paragraphPrefix p@(Paragraph arr _ _ _) end = Text arr start (end - start)
+    where
+        start = NonEmpty.head sBounds
+        sBounds = paragraphSpanBounds p

M src/Data/Text/ParagraphLayout/Internal/Run.hs => src/Data/Text/ParagraphLayout/Internal/Run.hs +29 -34

@@ 7,8 7,8 @@ import Data.Text (Text)
 import qualified Data.Text as Text
 import Data.Text.Foreign (dropWord8, lengthWord8, takeWord8)
 import Data.Text.Glyphize (Direction (..))
-import qualified Data.Text.ICU.Char as ICUChar
 
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
 import Data.Text.ParagraphLayout.Internal.ProtoRun (ProtoRun (ProtoRun))
 import qualified Data.Text.ParagraphLayout.Internal.ProtoRun as PR
 import Data.Text.ParagraphLayout.Internal.ResolvedSpan


@@ 22,6 22,7 @@ import Data.Text.ParagraphLayout.Internal.Zipper
 data Run = Run
     { runOffsetInSpan :: Int
     , runText :: Text
+    , runLevel :: Level
     , runDirection :: Maybe Direction
     , runScript :: Maybe ScriptCode
     }


@@ 73,56 74,58 @@ considerNext z = case next z of
 data Merged a = Incompatible | Merged a
 
 spanToRuns :: ResolvedSpan d -> [Run]
-spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper
+spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper levels
     where
         wholeText = spanText s
         zipper = start wholeText
+        levels = spanBiDiLevels s
         run acc pr = let t = preceding (PR.zipper pr) in
             ( acc + lengthWord8 t
             , Run
                 { runOffsetInSpan = acc
                 , runText = t
-                , runDirection = PR.direction pr
+                , runLevel = PR.level pr
+                , runDirection = Just $ levelDirectionH $ PR.level pr
                 , runScript = Just $ PR.script pr
                 }
             )
 
-protoRuns :: Zipper -> [ProtoRun]
-protoRuns z = reverse $ protoRuns' z []
+protoRuns :: Zipper -> TextLevels -> [ProtoRun]
+protoRuns z bl = reverse $ protoRuns' z bl []
 
-protoRuns' :: Zipper -> [ProtoRun] -> [ProtoRun]
-protoRuns' curZipper curRuns = case considerNext curZipper of
+protoRuns' :: Zipper -> TextLevels -> [ProtoRun] -> [ProtoRun]
+protoRuns' curZipper curLevels curRuns = case considerNext curZipper of
     Nothing -> curRuns
     Just choice ->
-        let headRun :| tailRuns = foldRun choice curRuns
-        in protoRuns' (PR.zipper headRun) (headRun : tailRuns)
+        let headRun :| tailRuns = foldRun (choice, curLevels) curRuns
+        in protoRuns' (PR.zipper headRun) curLevels (headRun : tailRuns)
 
-foldRun :: ZipperChoice -> [ProtoRun] -> NonEmpty ProtoRun
+foldRun :: (ZipperChoice, TextLevels) -> [ProtoRun] -> NonEmpty ProtoRun
 
 -- If there are no runs, create a new run with a single character.
-foldRun x [] = ProtoRun (continuingRun x) d s :| []
+foldRun (x, bl) [] = ProtoRun (continuingRun x) lvls lvl s :| []
     where
-        d = charDirection (nextChar x)
+        lvl = headLevel bl
+        lvls = tailLevels bl
         s = charScript (nextChar x)
 
-foldRun x (previousRun : tailRuns) =
-    case (mergeDirections d1 d2, mergeScripts s1 s2) of
-        (Merged d, Merged s) -> ProtoRun (continuingRun x) d s :| tailRuns
-        _ -> ProtoRun (startingNewRun x) d2 s2 :| previousRun : tailRuns
+foldRun (x, _) (previousRun : tailRuns) =
+    case (mergeLevels l1 l2, mergeScripts s1 s2) of
+        (Merged l, Merged s) ->
+            ProtoRun (continuingRun x) bl' l s :| tailRuns
+        _ ->
+            ProtoRun (startingNewRun x) bl' l2 s2 :| previousRun : tailRuns
     where
-        d1 = PR.direction previousRun
+        bl = PR.followingLevels previousRun
+        l1 = PR.level previousRun
         s1 = PR.script previousRun
-        d2 = charDirection (nextChar x)
+        l2 = headLevel bl
         s2 = charScript (nextChar x)
+        bl' = tailLevels bl
 
--- | Simplified detection of text direction for unidirectional text.
-mergeDirections :: Maybe Direction -> Maybe Direction ->
-    Merged (Maybe Direction)
-mergeDirections Nothing Nothing = Merged Nothing
-mergeDirections (Just d1) Nothing = Merged (Just d1)
-mergeDirections Nothing (Just d2) = Merged (Just d2)
-mergeDirections (Just d1) (Just d2)
-    | d1 == d2 = Merged (Just d1)
+mergeLevels :: Level -> Level -> Merged Level
+mergeLevels l1 l2
+    | l1 == l2 = Merged l1
     | otherwise = Incompatible
 
 -- TODO: Implement proper inheritance rules.


@@ 133,11 136,3 @@ mergeScripts s1 "Zinh" = Merged s1
 mergeScripts s1 s2
     | s1 == s2 = Merged s1
     | otherwise = Incompatible
-
--- TODO: Use the BiDi algorithm to support bidirectional text.
-charDirection :: Char -> Maybe Direction
-charDirection c = case ICUChar.direction c of
-    ICUChar.LeftToRight -> Just DirLTR
-    ICUChar.RightToLeft -> Just DirRTL
-    ICUChar.RightToLeftArabic -> Just DirRTL
-    _ -> Nothing

M test/Data/Text/ParagraphLayout/Internal/ApplyBoxesSpec.hs => test/Data/Text/ParagraphLayout/Internal/ApplyBoxesSpec.hs +2 -0

@@ 6,6 6,7 @@ import Data.Text.Glyphize (Direction (DirLTR, DirRTL))
 
 import Test.Hspec
 import Data.Text.ParagraphLayout.Internal.ApplyBoxes
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
 import Data.Text.ParagraphLayout.Internal.BoxOptions
 import Data.Text.ParagraphLayout.Internal.ResolvedBox
 import Data.Text.ParagraphLayout.Internal.ResolvedSpan


@@ 23,6 24,7 @@ trivialSpan d i dir bs = ResolvedSpan
     , spanText = empty
     , spanTextOptions = defaultTextOptions dir
     , spanBoxes = bs
+    , spanBiDiLevels = TextLevels []
     , spanLineBreaks = []
     , spanCharacterBreaks = []
     }

M test/Data/Text/ParagraphLayout/Internal/RunSpec.hs => test/Data/Text/ParagraphLayout/Internal/RunSpec.hs +47 -16

@@ 4,19 4,21 @@ import Data.Text (Text, pack)
 import Data.Text.Glyphize (Direction (..), emptyFont)
 
 import Test.Hspec
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
 import Data.Text.ParagraphLayout.Internal.BoxOptions
 import Data.Text.ParagraphLayout.Internal.LineHeight
 import Data.Text.ParagraphLayout.Internal.ResolvedBox
 import Data.Text.ParagraphLayout.Internal.ResolvedSpan
 import Data.Text.ParagraphLayout.Internal.Run
 import Data.Text.ParagraphLayout.Internal.TextOptions
+import Data.Text.ParagraphLayout.RunLengthEncoding
 import Data.Text.ParagraphLayout.TextData
 
 defaultBox :: Direction -> ResolvedBox ()
 defaultBox dir = ResolvedBox () 0 defaultBoxOptions dir
 
-sampleSpan :: (Direction, String, Text, a) -> ResolvedSpan ()
-sampleSpan (dir, lang, text, _) = ResolvedSpan
+sampleSpan :: (Direction, String, Text, a) -> TextLevels -> ResolvedSpan ()
+sampleSpan (dir, lang, text, _) levels = ResolvedSpan
     { spanUserData = ()
     , spanIndex = 0
     , spanOffsetInParagraph = 0


@@ 27,106 29,135 @@ sampleSpan (dir, lang, text, _) = ResolvedSpan
         , textLanguage = lang
         }
     , spanBoxes = [defaultBox dir]
+    , spanBiDiLevels = levels
     , spanLineBreaks = []
     , spanCharacterBreaks = []
     }
 
+allLTR :: TextLevels
+allLTR = TextLevels $ repeat 0
+
+allRTL :: TextLevels
+allRTL = TextLevels $ repeat 1
+
+levelsRLE :: [(Int, Level)] -> TextLevels
+levelsRLE rls = TextLevels $ runLengthDecode rls
+
 spec :: Spec
 spec = do
     describe "spanToRuns" $ do
         it "handles span with no text" $ do
-            let inputSpan = sampleSpan englishEmpty
+            let inputSpan = sampleSpan englishEmpty allLTR
             let runs = spanToRuns inputSpan
             runs `shouldBe` []
         it "handles Czech hello" $ do
-            let inputSpan = sampleSpan czechHello
+            let inputSpan = sampleSpan czechHello allLTR
             let runs = spanToRuns inputSpan
             runs `shouldBe`
                 [ Run
                     { runOffsetInSpan = 0
                     , runText = spanText inputSpan
+                    , runLevel = 0
                     , runDirection = Just DirLTR
                     , runScript = Just "Latn"
                     }
                 ]
         it "handles Arabic hello" $ do
-            let inputSpan = sampleSpan arabicHello
+            let inputSpan = sampleSpan arabicHello allRTL
             let runs = spanToRuns inputSpan
             runs `shouldBe`
                 [ Run
                     { runOffsetInSpan = 0
                     , runText = spanText inputSpan
+                    , runLevel = 1
                     , runDirection = Just DirRTL
                     , runScript = Just "Arab"
                     }
                 ]
         it "handles Serbian with mixed script" $ do
-            let inputSpan = sampleSpan serbianMixedScript
+            let inputSpan = sampleSpan serbianMixedScript allLTR
             let runs = spanToRuns inputSpan
             runs `shouldBe`
                 [ Run
                     -- TODO: We might want both parentheses in the same run.
                     { runOffsetInSpan = 0
                     , runText = pack "Vikipedija ("
+                    , runLevel = 0
                     , runDirection = Just DirLTR
                     , runScript = Just "Latn"
                     }
                 , Run
                     { runOffsetInSpan = 12
                     , runText = pack "Википедија)"
+                    , runLevel = 0
                     , runDirection = Just DirLTR
                     , runScript = Just "Cyrl"
                     }
                 ]
         it "handles Arabic text with English inside" $ do
-            let inputSpan = sampleSpan arabicAroundEnglish
+            let levels = levelsRLE [(3, 1), (9, 2), (36, 1), (3, 2), (1, 1)]
+            let inputSpan = sampleSpan arabicAroundEnglish levels
             let runs = spanToRuns inputSpan
             runs `shouldBe`
                 [ Run
                     { runOffsetInSpan = 0
                     , runText = pack "في "
+                    , runLevel = 1
                     , runDirection = Just DirRTL
                     , runScript = Just "Arab"
                     }
                 , Run
                     { runOffsetInSpan = 5
-                    , runText = pack "XHTML 1.0 "
+                    , runText = pack "XHTML 1.0"
+                    , runLevel = 2
                     , runDirection = Just DirLTR
                     , runScript = Just "Latn"
                     }
                 , Run
-                    { runOffsetInSpan = 15
-                    , runText = pack "يتم تحقيق ذلك بإضافة العنصر المضمن "
+                    { runOffsetInSpan = 14
+                    , runText = pack " يتم تحقيق ذلك بإضافة العنصر المضمن "
+                    , runLevel = 1
                     , runDirection = Just DirRTL
                     , runScript = Just "Arab"
                     }
                 , Run
                     { runOffsetInSpan = 79
-                    , runText = pack "bdo."
+                    , runText = pack "bdo"
+                    , runLevel = 2
                     , runDirection = Just DirLTR
                     , runScript = Just "Latn"
                     }
+                , Run
+                    { runOffsetInSpan = 82
+                    , runText = pack "."
+                    , runLevel = 1
+                    , runDirection = Just DirRTL
+                    , runScript = Just "Zyyy"
+                    }
                 ]
         it "handles English text with Arabic inside" $ do
-            let inputSpan = sampleSpan englishAroundArabic
+            let levels = levelsRLE [(13, 0), (18, 1), (11, 0)]
+            let inputSpan = sampleSpan englishAroundArabic levels
             let runs = spanToRuns inputSpan
             runs `shouldBe`
                 [ Run
                     { runOffsetInSpan = 0
                     , runText = pack "The title is "
+                    , runLevel = 0
                     , runDirection = Just DirLTR
                     , runScript = Just "Latn"
                     }
                 , Run
                     { runOffsetInSpan = 13
-                    -- TODO: The final space should be excluded.
-                    , runText = pack "مفتاح معايير الويب "
+                    , runText = pack "مفتاح معايير الويب"
+                    , runLevel = 1
                     , runDirection = Just DirRTL
                     , runScript = Just "Arab"
                     }
                 , Run
-                    { runOffsetInSpan = 48
-                    , runText = pack "in Arabic."
+                    { runOffsetInSpan = 47
+                    , runText = pack " in Arabic."
+                    , runLevel = 0
                     , runDirection = Just DirLTR
                     , runScript = Just "Latn"
                     }