M balkon.cabal => balkon.cabal +1 -0
@@ 184,6 184,7 @@ test-suite balkon-test
other-modules:
Data.Text.ParagraphLayout.FontLoader,
Data.Text.ParagraphLayout.Internal.ApplyBoxesSpec,
+ Data.Text.ParagraphLayout.Internal.BiDiLevelsSpec,
Data.Text.ParagraphLayout.Internal.BiDiReorderSpec,
Data.Text.ParagraphLayout.Internal.BreakSpec,
Data.Text.ParagraphLayout.Internal.LinePaginationSpec,
M src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs => src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs +86 -1
@@ 1,6 1,16 @@
-module Data.Text.ParagraphLayout.Internal.BiDiLevels (Level, WithLevel, level)
+module Data.Text.ParagraphLayout.Internal.BiDiLevels
+ ( Level
+ , TextLevels (TextLevels)
+ , WithLevel
+ , level
+ , textLevels
+ )
where
+import Data.Maybe (catMaybes, listToMaybe)
+import Data.Text (Text, uncons, unpack)
+import Data.Text.Glyphize (Direction (..))
+import qualified Data.Text.ICU.Char as ICUChar
import Data.Word (Word8)
-- | BiDi level, between 0 and 125 inclusive.
@@ 11,3 21,78 @@ type Level = Word8
-- | Typeclass for any data structure with an associated BiDi level.
class WithLevel a where
level :: a -> Level
+
+-- | BiDi levels for each character from a given input text.
+--
+-- This wrapper is meant to ease the transition to a different internal
+-- representation, if required when integrating with the ICU.
+newtype TextLevels = TextLevels [Level]
+ deriving (Eq, Show)
+
+-- | Determine the BiDi level of each character in the input text.
+--
+-- TODO: Use Haskell bindings to the ICU BiDi implementation once available,
+-- or provide a reasonable approximation.
+--
+-- The current implementation is very simplified and only works well for
+-- unidirectional text, or for very simple cases of LTR text with embedded
+-- RTL words. It only takes strongly directional characters into account.
+-- Each strongly directional character sets the directionality for itself and
+-- the text that follows it. The beginning of the text is set according to the
+-- first strongly directional character, or to the base direction if no such
+-- character is found.
+textLevels :: Direction -> Text -> TextLevels
+textLevels baseDir txt = TextLevels $ textLevels' baseLevel startLevel txt
+ where
+ baseLevel = directionLevel 0 baseDir
+ startLevel = directionLevel baseLevel startDir
+ startDir = case firstStrongDirection txt of
+ Just dir -> dir
+ Nothing -> baseDir
+
+-- | Determine the BiDi level of each character in the input text by either
+-- continuing the previous level, or resetting it when a strongly directional
+-- character is encountered.
+textLevels' :: Level -> Level -> Text -> [Level]
+textLevels' baseLevel previousLevel txt = case uncons txt of
+ Just (char, rest) ->
+ let currentLevel = case strongDirection char of
+ Just dir -> directionLevel baseLevel dir
+ Nothing -> previousLevel
+ in currentLevel : textLevels' baseLevel currentLevel rest
+ Nothing -> []
+
+-- | `Just` the direction of the first strongly directional character,
+-- or `Nothing` if there is no strongly directional character.
+firstStrongDirection :: Text -> Maybe Direction
+firstStrongDirection = firstJust . map strongDirection . unpack
+
+-- | `Just` the direction of a strongly directional character,
+-- or `Nothing` if the character is not strongly directional.
+strongDirection :: Char -> Maybe Direction
+strongDirection c = case ICUChar.direction c of
+ ICUChar.LeftToRight -> Just DirLTR
+ ICUChar.RightToLeft -> Just DirRTL
+ ICUChar.RightToLeftArabic -> Just DirRTL
+ _ -> Nothing
+
+-- | The first `Just` value found in the input list, if there is one,
+-- otherwise `Nothing`.
+firstJust :: [Maybe a] -> Maybe a
+firstJust = listToMaybe . catMaybes
+
+-- | Convert text direction to the smallest corresponding embedding level,
+-- but no smaller than the given minimum.
+directionLevel :: Level -> Direction -> Level
+directionLevel low DirLTR = smallestEvenAtLeast low
+directionLevel low DirRTL = smallestOddAtLeast low
+directionLevel low DirTTB = smallestEvenAtLeast low
+directionLevel low DirBTT = smallestOddAtLeast low
+
+-- | Smallest even integer greater than or equal to @x@.
+smallestEvenAtLeast :: Integral a => a -> a
+smallestEvenAtLeast x = if even x then x else x + 1
+
+-- | Smallest odd integer greater than or equal to @x@.
+smallestOddAtLeast :: Integral a => a -> a
+smallestOddAtLeast x = if odd x then x else x + 1
A test/Data/Text/ParagraphLayout/Internal/BiDiLevelsSpec.hs => test/Data/Text/ParagraphLayout/Internal/BiDiLevelsSpec.hs +72 -0
@@ 0,0 1,72 @@
+module Data.Text.ParagraphLayout.Internal.BiDiLevelsSpec where
+
+import Data.Text (Text)
+import Data.Text.Glyphize (Direction (DirLTR, DirRTL))
+
+import Test.Hspec
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
+import Data.Text.ParagraphLayout.RunLengthEncoding
+import Data.Text.ParagraphLayout.TextData
+
+-- | Test that `textLevels` produces the levels of the lengths given in
+-- "Data.Text.ParagraphLayout.TextData" and values given here.
+shouldHaveLevels :: (Direction, a, Text, [Int]) -> [Level] -> SpecWith ()
+shouldHaveLevels (dir, _, text, lens) levels = it description $
+ result `shouldBe` TextLevels (runLengthDecode rls)
+ where
+ result = textLevels dir text
+ rls = zip lens levels
+ description = case rls of
+ [] -> "should be empty"
+ [(_, lvl)] -> "should all have level " ++ show lvl
+ _ -> "should have multiple levels"
+
+-- | Override the paragraph direction of sample data.
+setDirection :: Direction -> (Direction, a, b, c) -> (Direction, a, b, c)
+setDirection dir (_, a, b, c) = (dir, a, b, c)
+
+-- | Override the paragraph direction of sample data to LTR.
+setLTR :: (Direction, a, b, c) -> (Direction, a, b, c)
+setLTR = setDirection DirLTR
+
+-- | Override the paragraph direction of sample data to RTL.
+setRTL :: (Direction, a, b, c) -> (Direction, a, b, c)
+setRTL = setDirection DirRTL
+
+spec :: Spec
+spec = do
+
+ describe "textLevels" $ do
+
+ -- Empty input should produce empty output.
+ -- Infinite list of level 0 is also acceptable.
+ describe "on English input" $
+ englishEmpty `shouldHaveLevels` []
+
+ -- Empty input should produce empty output.
+ -- Infinite list of level 1 is also acceptable.
+ describe "on Arabic empty" $
+ arabicEmpty `shouldHaveLevels` []
+
+ -- All LTR text without numbers should always stay at the base level.
+ describe "on English word in LTR" $
+ setLTR englishWord `shouldHaveLevels` [0]
+
+ -- All characters in a RTL paragraph must be at least at level 1.
+ describe "on English word in RTL" $
+ setRTL englishWord `shouldHaveLevels` [2]
+
+ describe "on Arabic word in LTR" $
+ setLTR arabicHello `shouldHaveLevels` [1]
+
+ describe "on Arabic word in RTL" $
+ setRTL arabicHello `shouldHaveLevels` [1]
+
+ describe "on Serbian mixed script" $
+ serbianMixedScript `shouldHaveLevels` [0]
+
+ describe "on Arabic around English" $
+ arabicAroundEnglish `shouldHaveLevels` [1, 2, 1, 2, 1]
+
+ describe "on English around Arabic" $
+ englishAroundArabic `shouldHaveLevels` [0, 1, 0]