~jaro/balkon

ref: 0615750e4579cfdb72cebbc24c22e23491d5e102 balkon/src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs -rw-r--r-- 4.3 KiB
0615750eJaro Autodetect text direction for legacy interface. 1 year, 3 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
module Data.Text.ParagraphLayout.Internal.BiDiLevels
    ( Level
    , TextLevels (TextLevels)
    , WithLevel
    , dropLevels
    , firstStrongDirection
    , headLevel
    , level
    , levelDirectionH
    , tailLevels
    , textLevels
    )
where

import Data.Maybe (catMaybes, listToMaybe)
import Data.Text (Text, uncons, unpack)
import Data.Text.Glyphize (Direction (..))
import qualified Data.Text.ICU.Char as ICUChar
import Data.Word (Word8)

-- | BiDi level, between 0 and 125 inclusive.
-- Even values mean left-to-right text.
-- Odd values mean right-to-left text.
type Level = Word8

-- | Typeclass for any data structure with an associated BiDi level.
class WithLevel a where
    level :: a -> Level

-- | BiDi levels for each character from a given input text.
--
-- This wrapper is meant to ease the transition to a different internal
-- representation, if required when integrating with the ICU.
newtype TextLevels = TextLevels [Level]
    deriving (Eq, Show)

headLevel :: TextLevels -> Level
headLevel (TextLevels xs) = head xs

tailLevels :: TextLevels -> TextLevels
tailLevels (TextLevels xs) = TextLevels (tail xs)

dropLevels :: Int -> TextLevels -> TextLevels
dropLevels n (TextLevels xs) = TextLevels (drop n xs)

-- | Determine the BiDi level of each character in the input text.
--
-- TODO: Use Haskell bindings to the ICU BiDi implementation once available,
--       or provide a reasonable approximation.
--
-- The current implementation is very simplified and only works well for
-- unidirectional text, or for very simple cases of LTR text with embedded
-- RTL words. It only takes strongly directional characters into account.
-- Each strongly directional character sets the directionality for itself and
-- the text that follows it. The beginning of the text is set according to the
-- first strongly directional character, or to the base direction if no such
-- character is found.
textLevels :: Direction -> Text -> TextLevels
textLevels baseDir txt = TextLevels $ textLevels' baseLevel startLevel txt
    where
        baseLevel = directionLevel 0 baseDir
        startLevel = directionLevel baseLevel startDir
        startDir = case firstStrongDirection txt of
            Just dir -> dir
            Nothing -> baseDir

-- | Determine the BiDi level of each character in the input text by either
-- continuing the previous level, or resetting it when a strongly directional
-- character is encountered.
textLevels' :: Level -> Level -> Text -> [Level]
textLevels' baseLevel previousLevel txt = case uncons txt of
    Just (char, rest) ->
        let currentLevel = case strongDirection char of
                Just dir -> directionLevel baseLevel dir
                Nothing -> previousLevel
        in currentLevel : textLevels' baseLevel currentLevel rest
    Nothing -> []

-- | `Just` the direction of the first strongly directional character,
-- or `Nothing` if there is no strongly directional character.
firstStrongDirection :: Text -> Maybe Direction
firstStrongDirection = firstJust . map strongDirection . unpack

-- | `Just` the direction of a strongly directional character,
-- or `Nothing` if the character is not strongly directional.
strongDirection :: Char -> Maybe Direction
strongDirection c = case ICUChar.direction c of
    ICUChar.LeftToRight -> Just DirLTR
    ICUChar.RightToLeft -> Just DirRTL
    ICUChar.RightToLeftArabic -> Just DirRTL
    _ -> Nothing

-- | The first `Just` value found in the input list, if there is one,
-- otherwise `Nothing`.
firstJust :: [Maybe a] -> Maybe a
firstJust = listToMaybe . catMaybes

-- | Convert embedding level to horizontal text direction.
levelDirectionH :: Level -> Direction
levelDirectionH lvl
    | even lvl = DirLTR
    | otherwise = DirRTL

-- | Convert text direction to the smallest corresponding embedding level,
-- but no smaller than the given minimum.
directionLevel :: Level -> Direction -> Level
directionLevel low DirLTR = smallestEvenAtLeast low
directionLevel low DirRTL = smallestOddAtLeast low
directionLevel low DirTTB = smallestEvenAtLeast low
directionLevel low DirBTT = smallestOddAtLeast low

-- | Smallest even integer greater than or equal to @x@.
smallestEvenAtLeast :: Integral a => a -> a
smallestEvenAtLeast x = if even x then x else x + 1

-- | Smallest odd integer greater than or equal to @x@.
smallestOddAtLeast :: Integral a => a -> a
smallestOddAtLeast x = if odd x then x else x + 1