1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
module Data.Text.ParagraphLayout.Internal.BiDiLevels
( Level
, TextLevels (TextLevels)
, WithLevel
, level
, textLevels
)
where
import Data.Maybe (catMaybes, listToMaybe)
import Data.Text (Text, uncons, unpack)
import Data.Text.Glyphize (Direction (..))
import qualified Data.Text.ICU.Char as ICUChar
import Data.Word (Word8)
-- | BiDi level, between 0 and 125 inclusive.
-- Even values mean left-to-right text.
-- Odd values mean right-to-left text.
type Level = Word8
-- | Typeclass for any data structure with an associated BiDi level.
class WithLevel a where
level :: a -> Level
-- | BiDi levels for each character from a given input text.
--
-- This wrapper is meant to ease the transition to a different internal
-- representation, if required when integrating with the ICU.
newtype TextLevels = TextLevels [Level]
deriving (Eq, Show)
-- | Determine the BiDi level of each character in the input text.
--
-- TODO: Use Haskell bindings to the ICU BiDi implementation once available,
-- or provide a reasonable approximation.
--
-- The current implementation is very simplified and only works well for
-- unidirectional text, or for very simple cases of LTR text with embedded
-- RTL words. It only takes strongly directional characters into account.
-- Each strongly directional character sets the directionality for itself and
-- the text that follows it. The beginning of the text is set according to the
-- first strongly directional character, or to the base direction if no such
-- character is found.
textLevels :: Direction -> Text -> TextLevels
textLevels baseDir txt = TextLevels $ textLevels' baseLevel startLevel txt
where
baseLevel = directionLevel 0 baseDir
startLevel = directionLevel baseLevel startDir
startDir = case firstStrongDirection txt of
Just dir -> dir
Nothing -> baseDir
-- | Determine the BiDi level of each character in the input text by either
-- continuing the previous level, or resetting it when a strongly directional
-- character is encountered.
textLevels' :: Level -> Level -> Text -> [Level]
textLevels' baseLevel previousLevel txt = case uncons txt of
Just (char, rest) ->
let currentLevel = case strongDirection char of
Just dir -> directionLevel baseLevel dir
Nothing -> previousLevel
in currentLevel : textLevels' baseLevel currentLevel rest
Nothing -> []
-- | `Just` the direction of the first strongly directional character,
-- or `Nothing` if there is no strongly directional character.
firstStrongDirection :: Text -> Maybe Direction
firstStrongDirection = firstJust . map strongDirection . unpack
-- | `Just` the direction of a strongly directional character,
-- or `Nothing` if the character is not strongly directional.
strongDirection :: Char -> Maybe Direction
strongDirection c = case ICUChar.direction c of
ICUChar.LeftToRight -> Just DirLTR
ICUChar.RightToLeft -> Just DirRTL
ICUChar.RightToLeftArabic -> Just DirRTL
_ -> Nothing
-- | The first `Just` value found in the input list, if there is one,
-- otherwise `Nothing`.
firstJust :: [Maybe a] -> Maybe a
firstJust = listToMaybe . catMaybes
-- | Convert text direction to the smallest corresponding embedding level,
-- but no smaller than the given minimum.
directionLevel :: Level -> Direction -> Level
directionLevel low DirLTR = smallestEvenAtLeast low
directionLevel low DirRTL = smallestOddAtLeast low
directionLevel low DirTTB = smallestEvenAtLeast low
directionLevel low DirBTT = smallestOddAtLeast low
-- | Smallest even integer greater than or equal to @x@.
smallestEvenAtLeast :: Integral a => a -> a
smallestEvenAtLeast x = if even x then x else x + 1
-- | Smallest odd integer greater than or equal to @x@.
smallestOddAtLeast :: Integral a => a -> a
smallestOddAtLeast x = if odd x then x else x + 1