~jaro/balkon

86c64fdf20be56f5a5002990a6c193d09a35788e — Jaro 1 year, 5 months ago 5e3e1e9
Integrate BiDi levels with spanToRuns algorithm.

Span/box boundaries no longer reset directionality.

BREAKING: Text with no strongly directional characters would
previously be treated as having no direction. It will now be
treated as having the direction defined in TextOptions.
This affects calculated positions.
M .golden/paragraphLayout/devanagariAccent.golden => .golden/paragraphLayout/devanagariAccent.golden +3 -3
@@ 1,5 1,5 @@
ParagraphLayout
    { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1000}
    { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1630}
    , spanLayouts = [
        SpanLayout
        [ Fragment


@@ 8,8 8,8 @@ ParagraphLayout
            , fragmentAncestorBoxes =
                [ AncestorBox {boxUserData = (), boxLeftEdge = SpacedEdge 0, boxRightEdge = SpacedEdge 0, boxStartEdge = SpacedEdge 0, boxEndEdge = SpacedEdge 0}
                ]
            , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1000}
            , fragmentPen = (0, -500)
            , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1630}
            , fragmentPen = (0, -1171)
            , fragmentGlyphs =
                [ (GlyphInfo {codepoint = 529, cluster = 0, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 645, y_advance = 0, x_offset = 0, y_offset = 0})
                , (GlyphInfo {codepoint = 424, cluster = 0, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 0, y_advance = 0, x_offset = 0, y_offset = 0})

M .golden/paragraphLayout/devanagariPrefixedAccent.golden => .golden/paragraphLayout/devanagariPrefixedAccent.golden +3 -3
@@ 1,5 1,5 @@
ParagraphLayout
    { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1000}
    { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1630}
    , spanLayouts = [
        SpanLayout
        [ Fragment


@@ 8,8 8,8 @@ ParagraphLayout
            , fragmentAncestorBoxes =
                [ AncestorBox {boxUserData = (), boxLeftEdge = SpacedEdge 0, boxRightEdge = SpacedEdge 0, boxStartEdge = SpacedEdge 0, boxEndEdge = SpacedEdge 0}
                ]
            , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1000}
            , fragmentPen = (0, -500)
            , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1630}
            , fragmentPen = (0, -1171)
            , fragmentGlyphs =
                [ (GlyphInfo {codepoint = 424, cluster = 1, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 0, y_advance = 0, x_offset = 0, y_offset = 0})
                ]

M src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs => src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs +19 -0
@@ 2,7 2,11 @@ module Data.Text.ParagraphLayout.Internal.BiDiLevels
    ( Level
    , TextLevels (TextLevels)
    , WithLevel
    , dropLevels
    , headLevel
    , level
    , levelDirectionH
    , tailLevels
    , textLevels
    )
where


@@ 29,6 33,15 @@ class WithLevel a where
newtype TextLevels = TextLevels [Level]
    deriving (Eq, Show)

headLevel :: TextLevels -> Level
headLevel (TextLevels xs) = head xs

tailLevels :: TextLevels -> TextLevels
tailLevels (TextLevels xs) = TextLevels (tail xs)

dropLevels :: Int -> TextLevels -> TextLevels
dropLevels n (TextLevels xs) = TextLevels (drop n xs)

-- | Determine the BiDi level of each character in the input text.
--
-- TODO: Use Haskell bindings to the ICU BiDi implementation once available,


@@ 81,6 94,12 @@ strongDirection c = case ICUChar.direction c of
firstJust :: [Maybe a] -> Maybe a
firstJust = listToMaybe . catMaybes

-- | Convert embedding level to horizontal text direction.
levelDirectionH :: Level -> Direction
levelDirectionH lvl
    | even lvl = DirLTR
    | otherwise = DirRTL

-- | Convert text direction to the smallest corresponding embedding level,
-- but no smaller than the given minimum.
directionLevel :: Level -> Direction -> Level

M src/Data/Text/ParagraphLayout/Internal/ProtoRun.hs => src/Data/Text/ParagraphLayout/Internal/ProtoRun.hs +3 -3
@@ 1,8 1,7 @@
module Data.Text.ParagraphLayout.Internal.ProtoRun (ProtoRun (..))
where

import Data.Text.Glyphize (Direction)

import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.Script
import Data.Text.ParagraphLayout.Internal.Zipper



@@ 10,6 9,7 @@ import Data.Text.ParagraphLayout.Internal.Zipper
-- `Data.Text.ParagraphLayout.Internal.Run.Run` values.
data ProtoRun = ProtoRun
    { zipper :: Zipper
    , direction :: Maybe Direction
    , followingLevels :: TextLevels
    , level :: Level
    , script :: ScriptCode
    }

M src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs => src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs +2 -0
@@ 4,6 4,7 @@ where
import Data.Text (Text)
import qualified Data.Text.ICU as BreakStatus (Line)

import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.TextContainer
import Data.Text.ParagraphLayout.Internal.TextOptions


@@ 17,6 18,7 @@ data ResolvedSpan d = ResolvedSpan
    , spanText :: Text
    , spanTextOptions :: TextOptions
    , spanBoxes :: [ResolvedBox d]
    , spanBiDiLevels :: TextLevels
    , spanLineBreaks :: [(Int, BreakStatus.Line)]
    -- TODO: Can be optimised by starting with the shortest line break.
    , spanCharacterBreaks :: [(Int, ())]

M src/Data/Text/ParagraphLayout/Internal/Rich.hs => src/Data/Text/ParagraphLayout/Internal/Rich.hs +12 -1
@@ 5,8 5,10 @@ import Control.Applicative (ZipList (ZipList), getZipList)
import Data.List.NonEmpty (nonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.ICU (Breaker, LocaleName, breakCharacter, breakLine)

import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.Break
import Data.Text.ParagraphLayout.Internal.Fragment
import Data.Text.ParagraphLayout.Internal.Layout


@@ 43,7 45,9 @@ spanToRunsWrapped :: RS.ResolvedSpan d -> [WithSpan d Run]
spanToRunsWrapped s = map (WithSpan s) (spanToRuns s)

resolveSpans :: Paragraph d -> [RS.ResolvedSpan d]
resolveSpans p@(Paragraph _ pStart root _) = do
resolveSpans p = do
    let Paragraph _ pStart root _ = p
    let RootBox (Box _ rootTextOpts) = root
    let leaves = flatten root
    let sTexts = paragraphSpanTexts p
    let sBounds = paragraphSpanBounds p


@@ 57,8 61,14 @@ resolveSpans p@(Paragraph _ pStart root _) = do
        <*> ZipList sTexts
    let (TextLeaf userData _ textOpts boxes) = leaf
    let lang = textLanguage textOpts
    -- TODO: Allow BiDi embedding/isolation for inner nodes.
    let pLevels = textLevels (textDirection rootTextOpts) pText
    let lBreaks = paragraphBreaks breakLine pText lang
    let cBreaks = paragraphBreaks breakCharacter pText lang
    -- TODO: Optimise. This has time complexity O(n*s), where n is number of
    --       characters and s is number of resolved spans.
    --       Maybe include byte offsets in the TextLevels data structure?
    let pPrefixLen = Text.length $ paragraphPrefix p sStart
    return RS.ResolvedSpan
        { RS.spanUserData = userData
        , RS.spanIndex = i


@@ 67,6 77,7 @@ resolveSpans p@(Paragraph _ pStart root _) = do
        , RS.spanText = sText
        , RS.spanTextOptions = textOpts
        , RS.spanBoxes = boxes
        , RS.spanBiDiLevels = dropLevels pPrefixLen pLevels
        , RS.spanLineBreaks = subOffsetsDesc (sStart - pStart) lBreaks
        , RS.spanCharacterBreaks = subOffsetsDesc (sStart - pStart) cBreaks
        }

M src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs => src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs +9 -0
@@ 1,6 1,7 @@
module Data.Text.ParagraphLayout.Internal.Rich.Paragraph
    ( Paragraph (..)
    , constructParagraph
    , paragraphPrefix
    , paragraphSpanBounds
    , paragraphSpanTexts
    , paragraphText


@@ 111,3 112,11 @@ paragraphText p@(Paragraph arr _ _ _) = Text arr start (end - start)
        start = NonEmpty.head sBounds
        end = NonEmpty.last sBounds
        sBounds = paragraphSpanBounds p

-- | Turn all text nodes from the input `Paragraph` up to the given end offset
-- into one combined `Text`. No bounds checking is performed!
paragraphPrefix :: Paragraph d -> Int -> Text
paragraphPrefix p@(Paragraph arr _ _ _) end = Text arr start (end - start)
    where
        start = NonEmpty.head sBounds
        sBounds = paragraphSpanBounds p

M src/Data/Text/ParagraphLayout/Internal/Run.hs => src/Data/Text/ParagraphLayout/Internal/Run.hs +29 -34
@@ 7,8 7,8 @@ import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, lengthWord8, takeWord8)
import Data.Text.Glyphize (Direction (..))
import qualified Data.Text.ICU.Char as ICUChar

import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.ProtoRun (ProtoRun (ProtoRun))
import qualified Data.Text.ParagraphLayout.Internal.ProtoRun as PR
import Data.Text.ParagraphLayout.Internal.ResolvedSpan


@@ 22,6 22,7 @@ import Data.Text.ParagraphLayout.Internal.Zipper
data Run = Run
    { runOffsetInSpan :: Int
    , runText :: Text
    , runLevel :: Level
    , runDirection :: Maybe Direction
    , runScript :: Maybe ScriptCode
    }


@@ 73,56 74,58 @@ considerNext z = case next z of
data Merged a = Incompatible | Merged a

spanToRuns :: ResolvedSpan d -> [Run]
spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper
spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper levels
    where
        wholeText = spanText s
        zipper = start wholeText
        levels = spanBiDiLevels s
        run acc pr = let t = preceding (PR.zipper pr) in
            ( acc + lengthWord8 t
            , Run
                { runOffsetInSpan = acc
                , runText = t
                , runDirection = PR.direction pr
                , runLevel = PR.level pr
                , runDirection = Just $ levelDirectionH $ PR.level pr
                , runScript = Just $ PR.script pr
                }
            )

protoRuns :: Zipper -> [ProtoRun]
protoRuns z = reverse $ protoRuns' z []
protoRuns :: Zipper -> TextLevels -> [ProtoRun]
protoRuns z bl = reverse $ protoRuns' z bl []

protoRuns' :: Zipper -> [ProtoRun] -> [ProtoRun]
protoRuns' curZipper curRuns = case considerNext curZipper of
protoRuns' :: Zipper -> TextLevels -> [ProtoRun] -> [ProtoRun]
protoRuns' curZipper curLevels curRuns = case considerNext curZipper of
    Nothing -> curRuns
    Just choice ->
        let headRun :| tailRuns = foldRun choice curRuns
        in protoRuns' (PR.zipper headRun) (headRun : tailRuns)
        let headRun :| tailRuns = foldRun (choice, curLevels) curRuns
        in protoRuns' (PR.zipper headRun) curLevels (headRun : tailRuns)

foldRun :: ZipperChoice -> [ProtoRun] -> NonEmpty ProtoRun
foldRun :: (ZipperChoice, TextLevels) -> [ProtoRun] -> NonEmpty ProtoRun

-- If there are no runs, create a new run with a single character.
foldRun x [] = ProtoRun (continuingRun x) d s :| []
foldRun (x, bl) [] = ProtoRun (continuingRun x) lvls lvl s :| []
    where
        d = charDirection (nextChar x)
        lvl = headLevel bl
        lvls = tailLevels bl
        s = charScript (nextChar x)

foldRun x (previousRun : tailRuns) =
    case (mergeDirections d1 d2, mergeScripts s1 s2) of
        (Merged d, Merged s) -> ProtoRun (continuingRun x) d s :| tailRuns
        _ -> ProtoRun (startingNewRun x) d2 s2 :| previousRun : tailRuns
foldRun (x, _) (previousRun : tailRuns) =
    case (mergeLevels l1 l2, mergeScripts s1 s2) of
        (Merged l, Merged s) ->
            ProtoRun (continuingRun x) bl' l s :| tailRuns
        _ ->
            ProtoRun (startingNewRun x) bl' l2 s2 :| previousRun : tailRuns
    where
        d1 = PR.direction previousRun
        bl = PR.followingLevels previousRun
        l1 = PR.level previousRun
        s1 = PR.script previousRun
        d2 = charDirection (nextChar x)
        l2 = headLevel bl
        s2 = charScript (nextChar x)
        bl' = tailLevels bl

-- | Simplified detection of text direction for unidirectional text.
mergeDirections :: Maybe Direction -> Maybe Direction ->
    Merged (Maybe Direction)
mergeDirections Nothing Nothing = Merged Nothing
mergeDirections (Just d1) Nothing = Merged (Just d1)
mergeDirections Nothing (Just d2) = Merged (Just d2)
mergeDirections (Just d1) (Just d2)
    | d1 == d2 = Merged (Just d1)
mergeLevels :: Level -> Level -> Merged Level
mergeLevels l1 l2
    | l1 == l2 = Merged l1
    | otherwise = Incompatible

-- TODO: Implement proper inheritance rules.


@@ 133,11 136,3 @@ mergeScripts s1 "Zinh" = Merged s1
mergeScripts s1 s2
    | s1 == s2 = Merged s1
    | otherwise = Incompatible

-- TODO: Use the BiDi algorithm to support bidirectional text.
charDirection :: Char -> Maybe Direction
charDirection c = case ICUChar.direction c of
    ICUChar.LeftToRight -> Just DirLTR
    ICUChar.RightToLeft -> Just DirRTL
    ICUChar.RightToLeftArabic -> Just DirRTL
    _ -> Nothing

M test/Data/Text/ParagraphLayout/Internal/ApplyBoxesSpec.hs => test/Data/Text/ParagraphLayout/Internal/ApplyBoxesSpec.hs +2 -0
@@ 6,6 6,7 @@ import Data.Text.Glyphize (Direction (DirLTR, DirRTL))

import Test.Hspec
import Data.Text.ParagraphLayout.Internal.ApplyBoxes
import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.BoxOptions
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.ResolvedSpan


@@ 23,6 24,7 @@ trivialSpan d i dir bs = ResolvedSpan
    , spanText = empty
    , spanTextOptions = defaultTextOptions dir
    , spanBoxes = bs
    , spanBiDiLevels = TextLevels []
    , spanLineBreaks = []
    , spanCharacterBreaks = []
    }

M test/Data/Text/ParagraphLayout/Internal/RunSpec.hs => test/Data/Text/ParagraphLayout/Internal/RunSpec.hs +47 -16
@@ 4,19 4,21 @@ import Data.Text (Text, pack)
import Data.Text.Glyphize (Direction (..), emptyFont)

import Test.Hspec
import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.BoxOptions
import Data.Text.ParagraphLayout.Internal.LineHeight
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.ResolvedSpan
import Data.Text.ParagraphLayout.Internal.Run
import Data.Text.ParagraphLayout.Internal.TextOptions
import Data.Text.ParagraphLayout.RunLengthEncoding
import Data.Text.ParagraphLayout.TextData

defaultBox :: Direction -> ResolvedBox ()
defaultBox dir = ResolvedBox () 0 defaultBoxOptions dir

sampleSpan :: (Direction, String, Text, a) -> ResolvedSpan ()
sampleSpan (dir, lang, text, _) = ResolvedSpan
sampleSpan :: (Direction, String, Text, a) -> TextLevels -> ResolvedSpan ()
sampleSpan (dir, lang, text, _) levels = ResolvedSpan
    { spanUserData = ()
    , spanIndex = 0
    , spanOffsetInParagraph = 0


@@ 27,106 29,135 @@ sampleSpan (dir, lang, text, _) = ResolvedSpan
        , textLanguage = lang
        }
    , spanBoxes = [defaultBox dir]
    , spanBiDiLevels = levels
    , spanLineBreaks = []
    , spanCharacterBreaks = []
    }

allLTR :: TextLevels
allLTR = TextLevels $ repeat 0

allRTL :: TextLevels
allRTL = TextLevels $ repeat 1

levelsRLE :: [(Int, Level)] -> TextLevels
levelsRLE rls = TextLevels $ runLengthDecode rls

spec :: Spec
spec = do
    describe "spanToRuns" $ do
        it "handles span with no text" $ do
            let inputSpan = sampleSpan englishEmpty
            let inputSpan = sampleSpan englishEmpty allLTR
            let runs = spanToRuns inputSpan
            runs `shouldBe` []
        it "handles Czech hello" $ do
            let inputSpan = sampleSpan czechHello
            let inputSpan = sampleSpan czechHello allLTR
            let runs = spanToRuns inputSpan
            runs `shouldBe`
                [ Run
                    { runOffsetInSpan = 0
                    , runText = spanText inputSpan
                    , runLevel = 0
                    , runDirection = Just DirLTR
                    , runScript = Just "Latn"
                    }
                ]
        it "handles Arabic hello" $ do
            let inputSpan = sampleSpan arabicHello
            let inputSpan = sampleSpan arabicHello allRTL
            let runs = spanToRuns inputSpan
            runs `shouldBe`
                [ Run
                    { runOffsetInSpan = 0
                    , runText = spanText inputSpan
                    , runLevel = 1
                    , runDirection = Just DirRTL
                    , runScript = Just "Arab"
                    }
                ]
        it "handles Serbian with mixed script" $ do
            let inputSpan = sampleSpan serbianMixedScript
            let inputSpan = sampleSpan serbianMixedScript allLTR
            let runs = spanToRuns inputSpan
            runs `shouldBe`
                [ Run
                    -- TODO: We might want both parentheses in the same run.
                    { runOffsetInSpan = 0
                    , runText = pack "Vikipedija ("
                    , runLevel = 0
                    , runDirection = Just DirLTR
                    , runScript = Just "Latn"
                    }
                , Run
                    { runOffsetInSpan = 12
                    , runText = pack "Википедија)"
                    , runLevel = 0
                    , runDirection = Just DirLTR
                    , runScript = Just "Cyrl"
                    }
                ]
        it "handles Arabic text with English inside" $ do
            let inputSpan = sampleSpan arabicAroundEnglish
            let levels = levelsRLE [(3, 1), (9, 2), (36, 1), (3, 2), (1, 1)]
            let inputSpan = sampleSpan arabicAroundEnglish levels
            let runs = spanToRuns inputSpan
            runs `shouldBe`
                [ Run
                    { runOffsetInSpan = 0
                    , runText = pack "في "
                    , runLevel = 1
                    , runDirection = Just DirRTL
                    , runScript = Just "Arab"
                    }
                , Run
                    { runOffsetInSpan = 5
                    , runText = pack "XHTML 1.0 "
                    , runText = pack "XHTML 1.0"
                    , runLevel = 2
                    , runDirection = Just DirLTR
                    , runScript = Just "Latn"
                    }
                , Run
                    { runOffsetInSpan = 15
                    , runText = pack "يتم تحقيق ذلك بإضافة العنصر المضمن "
                    { runOffsetInSpan = 14
                    , runText = pack " يتم تحقيق ذلك بإضافة العنصر المضمن "
                    , runLevel = 1
                    , runDirection = Just DirRTL
                    , runScript = Just "Arab"
                    }
                , Run
                    { runOffsetInSpan = 79
                    , runText = pack "bdo."
                    , runText = pack "bdo"
                    , runLevel = 2
                    , runDirection = Just DirLTR
                    , runScript = Just "Latn"
                    }
                , Run
                    { runOffsetInSpan = 82
                    , runText = pack "."
                    , runLevel = 1
                    , runDirection = Just DirRTL
                    , runScript = Just "Zyyy"
                    }
                ]
        it "handles English text with Arabic inside" $ do
            let inputSpan = sampleSpan englishAroundArabic
            let levels = levelsRLE [(13, 0), (18, 1), (11, 0)]
            let inputSpan = sampleSpan englishAroundArabic levels
            let runs = spanToRuns inputSpan
            runs `shouldBe`
                [ Run
                    { runOffsetInSpan = 0
                    , runText = pack "The title is "
                    , runLevel = 0
                    , runDirection = Just DirLTR
                    , runScript = Just "Latn"
                    }
                , Run
                    { runOffsetInSpan = 13
                    -- TODO: The final space should be excluded.
                    , runText = pack "مفتاح معايير الويب "
                    , runText = pack "مفتاح معايير الويب"
                    , runLevel = 1
                    , runDirection = Just DirRTL
                    , runScript = Just "Arab"
                    }
                , Run
                    { runOffsetInSpan = 48
                    , runText = pack "in Arabic."
                    { runOffsetInSpan = 47
                    , runText = pack " in Arabic."
                    , runLevel = 0
                    , runDirection = Just DirLTR
                    , runScript = Just "Latn"
                    }