From 97faca5ffecc168c774af90056c365f03e047631 Mon Sep 17 00:00:00 2001 From: Jaro Date: Tue, 23 May 2023 06:17:41 +0200 Subject: [PATCH] Convert legacy span directions into levels. --- balkon.cabal | 1 + .../ParagraphLayout/Internal/BiDiLevels.hs | 87 ++++++++++++++++++- .../Internal/BiDiLevelsSpec.hs | 72 +++++++++++++++ 3 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 test/Data/Text/ParagraphLayout/Internal/BiDiLevelsSpec.hs diff --git a/balkon.cabal b/balkon.cabal index 563f813..749313b 100644 --- a/balkon.cabal +++ b/balkon.cabal @@ -184,6 +184,7 @@ test-suite balkon-test other-modules: Data.Text.ParagraphLayout.FontLoader, Data.Text.ParagraphLayout.Internal.ApplyBoxesSpec, + Data.Text.ParagraphLayout.Internal.BiDiLevelsSpec, Data.Text.ParagraphLayout.Internal.BiDiReorderSpec, Data.Text.ParagraphLayout.Internal.BreakSpec, Data.Text.ParagraphLayout.Internal.LinePaginationSpec, diff --git a/src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs b/src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs index decbc53..5ff8e58 100644 --- a/src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs +++ b/src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs @@ -1,6 +1,16 @@ -module Data.Text.ParagraphLayout.Internal.BiDiLevels (Level, WithLevel, level) +module Data.Text.ParagraphLayout.Internal.BiDiLevels + ( Level + , TextLevels (TextLevels) + , WithLevel + , level + , textLevels + ) where +import Data.Maybe (catMaybes, listToMaybe) +import Data.Text (Text, uncons, unpack) +import Data.Text.Glyphize (Direction (..)) +import qualified Data.Text.ICU.Char as ICUChar import Data.Word (Word8) -- | BiDi level, between 0 and 125 inclusive. @@ -11,3 +21,78 @@ type Level = Word8 -- | Typeclass for any data structure with an associated BiDi level. class WithLevel a where level :: a -> Level + +-- | BiDi levels for each character from a given input text. +-- +-- This wrapper is meant to ease the transition to a different internal +-- representation, if required when integrating with the ICU. +newtype TextLevels = TextLevels [Level] + deriving (Eq, Show) + +-- | Determine the BiDi level of each character in the input text. +-- +-- TODO: Use Haskell bindings to the ICU BiDi implementation once available, +-- or provide a reasonable approximation. +-- +-- The current implementation is very simplified and only works well for +-- unidirectional text, or for very simple cases of LTR text with embedded +-- RTL words. It only takes strongly directional characters into account. +-- Each strongly directional character sets the directionality for itself and +-- the text that follows it. The beginning of the text is set according to the +-- first strongly directional character, or to the base direction if no such +-- character is found. +textLevels :: Direction -> Text -> TextLevels +textLevels baseDir txt = TextLevels $ textLevels' baseLevel startLevel txt + where + baseLevel = directionLevel 0 baseDir + startLevel = directionLevel baseLevel startDir + startDir = case firstStrongDirection txt of + Just dir -> dir + Nothing -> baseDir + +-- | Determine the BiDi level of each character in the input text by either +-- continuing the previous level, or resetting it when a strongly directional +-- character is encountered. +textLevels' :: Level -> Level -> Text -> [Level] +textLevels' baseLevel previousLevel txt = case uncons txt of + Just (char, rest) -> + let currentLevel = case strongDirection char of + Just dir -> directionLevel baseLevel dir + Nothing -> previousLevel + in currentLevel : textLevels' baseLevel currentLevel rest + Nothing -> [] + +-- | `Just` the direction of the first strongly directional character, +-- or `Nothing` if there is no strongly directional character. +firstStrongDirection :: Text -> Maybe Direction +firstStrongDirection = firstJust . map strongDirection . unpack + +-- | `Just` the direction of a strongly directional character, +-- or `Nothing` if the character is not strongly directional. +strongDirection :: Char -> Maybe Direction +strongDirection c = case ICUChar.direction c of + ICUChar.LeftToRight -> Just DirLTR + ICUChar.RightToLeft -> Just DirRTL + ICUChar.RightToLeftArabic -> Just DirRTL + _ -> Nothing + +-- | The first `Just` value found in the input list, if there is one, +-- otherwise `Nothing`. +firstJust :: [Maybe a] -> Maybe a +firstJust = listToMaybe . catMaybes + +-- | Convert text direction to the smallest corresponding embedding level, +-- but no smaller than the given minimum. +directionLevel :: Level -> Direction -> Level +directionLevel low DirLTR = smallestEvenAtLeast low +directionLevel low DirRTL = smallestOddAtLeast low +directionLevel low DirTTB = smallestEvenAtLeast low +directionLevel low DirBTT = smallestOddAtLeast low + +-- | Smallest even integer greater than or equal to @x@. +smallestEvenAtLeast :: Integral a => a -> a +smallestEvenAtLeast x = if even x then x else x + 1 + +-- | Smallest odd integer greater than or equal to @x@. +smallestOddAtLeast :: Integral a => a -> a +smallestOddAtLeast x = if odd x then x else x + 1 diff --git a/test/Data/Text/ParagraphLayout/Internal/BiDiLevelsSpec.hs b/test/Data/Text/ParagraphLayout/Internal/BiDiLevelsSpec.hs new file mode 100644 index 0000000..38f489a --- /dev/null +++ b/test/Data/Text/ParagraphLayout/Internal/BiDiLevelsSpec.hs @@ -0,0 +1,72 @@ +module Data.Text.ParagraphLayout.Internal.BiDiLevelsSpec where + +import Data.Text (Text) +import Data.Text.Glyphize (Direction (DirLTR, DirRTL)) + +import Test.Hspec +import Data.Text.ParagraphLayout.Internal.BiDiLevels +import Data.Text.ParagraphLayout.RunLengthEncoding +import Data.Text.ParagraphLayout.TextData + +-- | Test that `textLevels` produces the levels of the lengths given in +-- "Data.Text.ParagraphLayout.TextData" and values given here. +shouldHaveLevels :: (Direction, a, Text, [Int]) -> [Level] -> SpecWith () +shouldHaveLevels (dir, _, text, lens) levels = it description $ + result `shouldBe` TextLevels (runLengthDecode rls) + where + result = textLevels dir text + rls = zip lens levels + description = case rls of + [] -> "should be empty" + [(_, lvl)] -> "should all have level " ++ show lvl + _ -> "should have multiple levels" + +-- | Override the paragraph direction of sample data. +setDirection :: Direction -> (Direction, a, b, c) -> (Direction, a, b, c) +setDirection dir (_, a, b, c) = (dir, a, b, c) + +-- | Override the paragraph direction of sample data to LTR. +setLTR :: (Direction, a, b, c) -> (Direction, a, b, c) +setLTR = setDirection DirLTR + +-- | Override the paragraph direction of sample data to RTL. +setRTL :: (Direction, a, b, c) -> (Direction, a, b, c) +setRTL = setDirection DirRTL + +spec :: Spec +spec = do + + describe "textLevels" $ do + + -- Empty input should produce empty output. + -- Infinite list of level 0 is also acceptable. + describe "on English input" $ + englishEmpty `shouldHaveLevels` [] + + -- Empty input should produce empty output. + -- Infinite list of level 1 is also acceptable. + describe "on Arabic empty" $ + arabicEmpty `shouldHaveLevels` [] + + -- All LTR text without numbers should always stay at the base level. + describe "on English word in LTR" $ + setLTR englishWord `shouldHaveLevels` [0] + + -- All characters in a RTL paragraph must be at least at level 1. + describe "on English word in RTL" $ + setRTL englishWord `shouldHaveLevels` [2] + + describe "on Arabic word in LTR" $ + setLTR arabicHello `shouldHaveLevels` [1] + + describe "on Arabic word in RTL" $ + setRTL arabicHello `shouldHaveLevels` [1] + + describe "on Serbian mixed script" $ + serbianMixedScript `shouldHaveLevels` [0] + + describe "on Arabic around English" $ + arabicAroundEnglish `shouldHaveLevels` [1, 2, 1, 2, 1] + + describe "on English around Arabic" $ + englishAroundArabic `shouldHaveLevels` [0, 1, 0] -- 2.30.2