~jaro/balkon

97faca5ffecc168c774af90056c365f03e047631 — Jaro 1 year, 6 months ago 8537ba9
Convert legacy span directions into levels.
M balkon.cabal => balkon.cabal +1 -0
@@ 184,6 184,7 @@ test-suite balkon-test
    other-modules:
        Data.Text.ParagraphLayout.FontLoader,
        Data.Text.ParagraphLayout.Internal.ApplyBoxesSpec,
        Data.Text.ParagraphLayout.Internal.BiDiLevelsSpec,
        Data.Text.ParagraphLayout.Internal.BiDiReorderSpec,
        Data.Text.ParagraphLayout.Internal.BreakSpec,
        Data.Text.ParagraphLayout.Internal.LinePaginationSpec,

M src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs => src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs +86 -1
@@ 1,6 1,16 @@
module Data.Text.ParagraphLayout.Internal.BiDiLevels (Level, WithLevel, level)
module Data.Text.ParagraphLayout.Internal.BiDiLevels
    ( Level
    , TextLevels (TextLevels)
    , WithLevel
    , level
    , textLevels
    )
where

import Data.Maybe (catMaybes, listToMaybe)
import Data.Text (Text, uncons, unpack)
import Data.Text.Glyphize (Direction (..))
import qualified Data.Text.ICU.Char as ICUChar
import Data.Word (Word8)

-- | BiDi level, between 0 and 125 inclusive.


@@ 11,3 21,78 @@ type Level = Word8
-- | Typeclass for any data structure with an associated BiDi level.
class WithLevel a where
    level :: a -> Level

-- | BiDi levels for each character from a given input text.
--
-- This wrapper is meant to ease the transition to a different internal
-- representation, if required when integrating with the ICU.
newtype TextLevels = TextLevels [Level]
    deriving (Eq, Show)

-- | Determine the BiDi level of each character in the input text.
--
-- TODO: Use Haskell bindings to the ICU BiDi implementation once available,
--       or provide a reasonable approximation.
--
-- The current implementation is very simplified and only works well for
-- unidirectional text, or for very simple cases of LTR text with embedded
-- RTL words. It only takes strongly directional characters into account.
-- Each strongly directional character sets the directionality for itself and
-- the text that follows it. The beginning of the text is set according to the
-- first strongly directional character, or to the base direction if no such
-- character is found.
textLevels :: Direction -> Text -> TextLevels
textLevels baseDir txt = TextLevels $ textLevels' baseLevel startLevel txt
    where
        baseLevel = directionLevel 0 baseDir
        startLevel = directionLevel baseLevel startDir
        startDir = case firstStrongDirection txt of
            Just dir -> dir
            Nothing -> baseDir

-- | Determine the BiDi level of each character in the input text by either
-- continuing the previous level, or resetting it when a strongly directional
-- character is encountered.
textLevels' :: Level -> Level -> Text -> [Level]
textLevels' baseLevel previousLevel txt = case uncons txt of
    Just (char, rest) ->
        let currentLevel = case strongDirection char of
                Just dir -> directionLevel baseLevel dir
                Nothing -> previousLevel
        in currentLevel : textLevels' baseLevel currentLevel rest
    Nothing -> []

-- | `Just` the direction of the first strongly directional character,
-- or `Nothing` if there is no strongly directional character.
firstStrongDirection :: Text -> Maybe Direction
firstStrongDirection = firstJust . map strongDirection . unpack

-- | `Just` the direction of a strongly directional character,
-- or `Nothing` if the character is not strongly directional.
strongDirection :: Char -> Maybe Direction
strongDirection c = case ICUChar.direction c of
    ICUChar.LeftToRight -> Just DirLTR
    ICUChar.RightToLeft -> Just DirRTL
    ICUChar.RightToLeftArabic -> Just DirRTL
    _ -> Nothing

-- | The first `Just` value found in the input list, if there is one,
-- otherwise `Nothing`.
firstJust :: [Maybe a] -> Maybe a
firstJust = listToMaybe . catMaybes

-- | Convert text direction to the smallest corresponding embedding level,
-- but no smaller than the given minimum.
directionLevel :: Level -> Direction -> Level
directionLevel low DirLTR = smallestEvenAtLeast low
directionLevel low DirRTL = smallestOddAtLeast low
directionLevel low DirTTB = smallestEvenAtLeast low
directionLevel low DirBTT = smallestOddAtLeast low

-- | Smallest even integer greater than or equal to @x@.
smallestEvenAtLeast :: Integral a => a -> a
smallestEvenAtLeast x = if even x then x else x + 1

-- | Smallest odd integer greater than or equal to @x@.
smallestOddAtLeast :: Integral a => a -> a
smallestOddAtLeast x = if odd x then x else x + 1

A test/Data/Text/ParagraphLayout/Internal/BiDiLevelsSpec.hs => test/Data/Text/ParagraphLayout/Internal/BiDiLevelsSpec.hs +72 -0
@@ 0,0 1,72 @@
module Data.Text.ParagraphLayout.Internal.BiDiLevelsSpec where

import Data.Text (Text)
import Data.Text.Glyphize (Direction (DirLTR, DirRTL))

import Test.Hspec
import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.RunLengthEncoding
import Data.Text.ParagraphLayout.TextData

-- | Test that `textLevels` produces the levels of the lengths given in
-- "Data.Text.ParagraphLayout.TextData" and values given here.
shouldHaveLevels :: (Direction, a, Text, [Int]) -> [Level] -> SpecWith ()
shouldHaveLevels (dir, _, text, lens) levels = it description $
    result `shouldBe` TextLevels (runLengthDecode rls)
    where
        result = textLevels dir text
        rls = zip lens levels
        description = case rls of
            [] -> "should be empty"
            [(_, lvl)] -> "should all have level " ++ show lvl
            _ -> "should have multiple levels"

-- | Override the paragraph direction of sample data.
setDirection :: Direction -> (Direction, a, b, c) -> (Direction, a, b, c)
setDirection dir (_, a, b, c) = (dir, a, b, c)

-- | Override the paragraph direction of sample data to LTR.
setLTR :: (Direction, a, b, c) -> (Direction, a, b, c)
setLTR = setDirection DirLTR

-- | Override the paragraph direction of sample data to RTL.
setRTL :: (Direction, a, b, c) -> (Direction, a, b, c)
setRTL = setDirection DirRTL

spec :: Spec
spec = do

    describe "textLevels" $ do

        -- Empty input should produce empty output.
        -- Infinite list of level 0 is also acceptable.
        describe "on English input" $
            englishEmpty `shouldHaveLevels` []

        -- Empty input should produce empty output.
        -- Infinite list of level 1 is also acceptable.
        describe "on Arabic empty" $
            arabicEmpty `shouldHaveLevels` []

        -- All LTR text without numbers should always stay at the base level.
        describe "on English word in LTR" $
            setLTR englishWord `shouldHaveLevels` [0]

        -- All characters in a RTL paragraph must be at least at level 1.
        describe "on English word in RTL" $
            setRTL englishWord `shouldHaveLevels` [2]

        describe "on Arabic word in LTR" $
            setLTR arabicHello `shouldHaveLevels` [1]

        describe "on Arabic word in RTL" $
            setRTL arabicHello `shouldHaveLevels` [1]

        describe "on Serbian mixed script" $
            serbianMixedScript `shouldHaveLevels` [0]

        describe "on Arabic around English" $
            arabicAroundEnglish `shouldHaveLevels` [1, 2, 1, 2, 1]

        describe "on English around Arabic" $
            englishAroundArabic `shouldHaveLevels` [0, 1, 0]