~jaro/balkon

a46ca7a4959ee1fdff13f57e9da27cc54fc29525 — Jaro 1 year, 1 month ago dc979f4
Implement BiDi resolved level reordering.
M balkon.cabal => balkon.cabal +2 -0
@@ 124,6 124,7 @@ library balkon-internal

    -- Modules exported to tests and to the public part of the library.
    exposed-modules:
        Data.Text.ParagraphLayout.Internal.BiDiReorder,
        Data.Text.ParagraphLayout.Internal.Break,
        Data.Text.ParagraphLayout.Internal.Fragment,
        Data.Text.ParagraphLayout.Internal.LineHeight,


@@ 185,6 186,7 @@ test-suite balkon-test
    other-modules:
        Data.Text.ParagraphLayoutSpec,
        Data.Text.ParagraphLayout.FontLoader,
        Data.Text.ParagraphLayout.Internal.BiDiReorderSpec,
        Data.Text.ParagraphLayout.Internal.BreakSpec,
        Data.Text.ParagraphLayout.Internal.LinePaginationSpec,
        Data.Text.ParagraphLayout.Internal.RunSpec,

A src/Data/Text/ParagraphLayout/Internal/BiDiReorder.hs => src/Data/Text/ParagraphLayout/Internal/BiDiReorder.hs +57 -0
@@ 0,0 1,57 @@
module Data.Text.ParagraphLayout.Internal.BiDiReorder
    ( Level
    , WithLevel
    , level
    , reorder
    )
where

import Data.List.NonEmpty (NonEmpty ((:|)))
import qualified Data.List.NonEmpty as NonEmpty
import Data.Semigroup (sconcat)
import Data.Word (Word8)

-- | BiDi level, between 0 and 125 inclusive.
-- Even values mean left-to-right text.
-- Odd values mean right-to-left text.
type Level = Word8

-- | Typeclass for any data structure with an associated BiDi level.
class WithLevel a where
    level :: a -> Level

-- | Generic reordering of bidirectional text according to rule L2 of UAX #9
-- <https://www.unicode.org/reports/tr9/#Reordering_Resolved_Levels>.
--
-- Given an input in logical order and its corresponding BiDi levels,
-- this algorithm produces output in visual order, always from left to right.
--
-- Although defined by UAX #9 for reordering on the glyph level, this can also
-- be used for reordering runs of text, provided that the glyphs within each
-- shaped run are already ordered visually from left to right. This is the case
-- for HarfBuzz output.
reorder :: WithLevel a => NonEmpty a -> NonEmpty a
reorder xs = reorderLevels minOddLevel maxLevel xs
    where
        minOddLevel = minimum oddLevels
        maxLevel = maximum levels
        oddLevels = 1 :| NonEmpty.filter odd levels
        levels = NonEmpty.map level xs

-- | For each integer value from @high@ to @low@ inclusive, reverse any
-- contiguous sequence of items that are at the given level or higher.
--
-- The value of @low@ must be at least 1 to avoid integer overflow.
reorderLevels :: WithLevel a => Level -> Level -> NonEmpty a -> NonEmpty a
reorderLevels low high xs =
    if low > high
        then xs
        else reorderLevels low (high - 1) $ reorderLevel high xs

-- | Reverse any contiguous sequence of items that are at level @lvl@ or higher.
reorderLevel :: WithLevel a => Level -> NonEmpty a -> NonEmpty a
reorderLevel lvl xs = sconcat $ NonEmpty.map reverseHigh $ groupHigh xs
    where
        reverseHigh g@(x :| _) = if isHigh x then NonEmpty.reverse g else g
        groupHigh = NonEmpty.groupWith1 isHigh
        isHigh x = level x >= lvl

A test/Data/Text/ParagraphLayout/Internal/BiDiReorderSpec.hs => test/Data/Text/ParagraphLayout/Internal/BiDiReorderSpec.hs +230 -0
@@ 0,0 1,230 @@
module Data.Text.ParagraphLayout.Internal.BiDiReorderSpec where

import Data.Foldable (toList)
import Data.List.NonEmpty (NonEmpty, fromList)
import Data.Word (Word8)

import Test.Hspec
import Data.Text.ParagraphLayout.Internal.BiDiReorder

ne :: [a] -> NonEmpty a
ne = fromList

data ExampleItem = ExampleItem Word8 String
    deriving (Eq, Show)

instance WithLevel ExampleItem where
    level (ExampleItem lvl _) = lvl

toString :: ExampleItem -> String
toString (ExampleItem _ str) = str

toStrings :: Foldable t => t ExampleItem -> [String]
toStrings = map toString . toList

singleItemLevel :: Level -> NonEmpty ExampleItem
singleItemLevel lvl = ne
    [ ExampleItem lvl "item"
    ]

flatLevel :: Level -> NonEmpty ExampleItem
flatLevel lvl = ne
    [ ExampleItem lvl "start"
    , ExampleItem lvl "mid"
    , ExampleItem lvl "end"
    ]

-- | Example 1 from <https://www.unicode.org/reports/tr9/#L2>,
-- split into words and individual punctuation marks.
example1Words :: NonEmpty ExampleItem
example1Words = ne
    [ ExampleItem 0 "car"
    , ExampleItem 0 "means"
    , ExampleItem 1 "CAR"
    , ExampleItem 0 "."
    ]

-- | Example 2 from <https://www.unicode.org/reports/tr9/#L2>,
-- split into words and individual punctuation marks.
example2Words :: NonEmpty ExampleItem
example2Words = ne
    [ ExampleItem 2 "car"
    , ExampleItem 1 "MEANS"
    , ExampleItem 1 "CAR"
    , ExampleItem 1 "."
    ]

-- | Example 3 from <https://www.unicode.org/reports/tr9/#L2>,
-- split into words and individual punctuation marks.
example3Words :: NonEmpty ExampleItem
example3Words = ne
    [ ExampleItem 0 "he"
    , ExampleItem 0 "said"
    , ExampleItem 0 "“"
    , ExampleItem 2 "car"
    , ExampleItem 1 "MEANS"
    , ExampleItem 1 "CAR"
    , ExampleItem 0 "."
    , ExampleItem 0 "”"
    , ExampleItem 0 "“"
    , ExampleItem 1 "IT"
    , ExampleItem 1 "DOES"
    , ExampleItem 0 ","
    , ExampleItem 0 "”"
    , ExampleItem 0 "she"
    , ExampleItem 0 "agreed"
    , ExampleItem 0 "."
    ]

-- | Example 4 from <https://www.unicode.org/reports/tr9/#L2>,
-- split into words and individual punctuation marks.
example4Words :: NonEmpty ExampleItem
example4Words = ne
    [ ExampleItem 1 "DID"
    , ExampleItem 1 "YOU"
    , ExampleItem 1 "SAY"
    , ExampleItem 1 "’"
    , ExampleItem 2 "he"
    , ExampleItem 2 "said"
    , ExampleItem 2 "“"
    , ExampleItem 4 "car"
    , ExampleItem 3 "MEANS"
    , ExampleItem 3 "CAR"
    , ExampleItem 2 "”"
    , ExampleItem 1 "‘"
    , ExampleItem 1 "?"
    ]

-- | Example 4 from <https://www.unicode.org/reports/tr9/#L2>,
-- split into individual characters.
example4Characters :: NonEmpty ExampleItem
example4Characters = ne
    [ ExampleItem 1 "D"
    , ExampleItem 1 "I"
    , ExampleItem 1 "D"
    , ExampleItem 1 " "
    , ExampleItem 1 "Y"
    , ExampleItem 1 "O"
    , ExampleItem 1 "U"
    , ExampleItem 1 " "
    , ExampleItem 1 "S"
    , ExampleItem 1 "A"
    , ExampleItem 1 "Y"
    , ExampleItem 1 " "
    , ExampleItem 1 "’"
    , ExampleItem 1 ">"
    , ExampleItem 2 "h"
    , ExampleItem 2 "e"
    , ExampleItem 2 " "
    , ExampleItem 2 "s"
    , ExampleItem 2 "a"
    , ExampleItem 2 "i"
    , ExampleItem 2 "d"
    , ExampleItem 2 " "
    , ExampleItem 2 "“"
    , ExampleItem 2 "<"
    , ExampleItem 4 "c"
    , ExampleItem 4 "a"
    , ExampleItem 4 "r"
    , ExampleItem 3 " "
    , ExampleItem 3 "M"
    , ExampleItem 3 "E"
    , ExampleItem 3 "A"
    , ExampleItem 3 "N"
    , ExampleItem 3 "S"
    , ExampleItem 3 " "
    , ExampleItem 3 "C"
    , ExampleItem 3 "A"
    , ExampleItem 3 "R"
    , ExampleItem 2 "="
    , ExampleItem 2 "”"
    , ExampleItem 1 "="
    , ExampleItem 1 "‘"
    , ExampleItem 1 "?"
    ]

spec :: Spec
spec = do

    describe "reorder" $ do

        it "passes through single item at level 0" $ do
            toStrings (reorder $ singleItemLevel 0) `shouldBe` ["item"]

        it "passes through single item at level 1" $ do
            toStrings (reorder $ singleItemLevel 1) `shouldBe` ["item"]

        it "passes through single item at level 2" $ do
            toStrings (reorder $ singleItemLevel 2) `shouldBe` ["item"]

        it "passes through single item at level 3" $ do
            toStrings (reorder $ singleItemLevel 3) `shouldBe` ["item"]

        it "does not reverse at level 0" $ do
            toStrings (reorder $ flatLevel 0) `shouldBe` ["start", "mid", "end"]

        it "reverses at level 1" $ do
            toStrings (reorder $ flatLevel 1) `shouldBe` ["end", "mid", "start"]

        it "does not reverse at level 2" $ do
            toStrings (reorder $ flatLevel 2) `shouldBe` ["start", "mid", "end"]

        it "reverses at level 3" $ do
            toStrings (reorder $ flatLevel 3) `shouldBe` ["end", "mid", "start"]

        it "reorders UAX #9 example 1 as words" $
            toStrings (reorder example1Words) `shouldBe`
                [ "car"
                , "means"
                , "CAR"
                , "."
                ]

        it "reorders UAX #9 example 2 as words" $
            toStrings (reorder example2Words) `shouldBe`
                [ "."
                , "CAR"
                , "MEANS"
                , "car"
                ]

        it "reorders UAX #9 example 3 as words" $
            toStrings (reorder example3Words) `shouldBe`
                [ "he"
                , "said"
                , "“"
                , "CAR"
                , "MEANS"
                , "car"
                , "."
                , "”"
                , "“"
                , "DOES"
                , "IT"
                , ","
                , "”"
                , "she"
                , "agreed"
                , "."
                ]

        it "reorders UAX #9 example 4 as words" $
            toStrings (reorder example4Words) `shouldBe`
                [ "?"
                , "‘"
                , "he"
                , "said"
                , "“"
                , "CAR"
                , "MEANS"
                , "car"
                , "”"
                , "’"
                , "SAY"
                , "YOU"
                , "DID"
                ]

        it "reorders UAX #9 example 4 as characters" $
            concat (toStrings (reorder example4Characters)) `shouldBe`
                "?‘=he said “<RAC SNAEM car=”>’ YAS UOY DID"