~jaro/balkon

ref: 58f69b24f9a2138833dd86fbc8a1590760fecf64 balkon/src/Data/Text/ParagraphLayout/Internal/Run.hs -rw-r--r-- 4.9 KiB
58f69b24Jaro Add helper function for run-length encoding. 1 year, 6 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
module Data.Text.ParagraphLayout.Internal.Run (Run (..), spanToRuns)
where

import Data.List (mapAccumL)
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, lengthWord8, takeWord8)
import Data.Text.Glyphize (Direction (..))
import qualified Data.Text.ICU.Char as ICUChar

import Data.Text.ParagraphLayout.Internal.ProtoRun (ProtoRun (ProtoRun))
import qualified Data.Text.ParagraphLayout.Internal.ProtoRun as PR
import Data.Text.ParagraphLayout.Internal.ResolvedSpan
import Data.Text.ParagraphLayout.Internal.Script
import Data.Text.ParagraphLayout.Internal.TextContainer
import Data.Text.ParagraphLayout.Internal.Zipper

-- | Each span can be broken into one or more runs by Balkón.
--
-- Each run could have a different script, language, or direction.
data Run = Run
    { runOffsetInSpan :: Int
    , runText :: Text
    , runDirection :: Maybe Direction
    , runScript :: Maybe ScriptCode
    }
    deriving (Eq, Show)

instance TextContainer Run where
    getText = runText

instance SeparableTextContainer Run where
    splitTextAt8 n r =
        ( r { runText = t1 }
        , r { runText = t2, runOffsetInSpan = runOffsetInSpan r + l1 }
        )
        where
            l1 = lengthWord8 t1
            t1 = takeWord8 (fromIntegral n) t
            t2 = dropWord8 (fromIntegral n) t
            t = getText r
    dropWhileStart p r = r { runText = t', runOffsetInSpan = o' }
        where
            t = runText r
            t' = Text.dropWhile p t
            l = lengthWord8 t
            l' = lengthWord8 t'
            o = runOffsetInSpan r
            o' = o + l - l'
    dropWhileEnd p r = r { runText = Text.dropWhileEnd p (runText r) }

-- | Represents a zipper that can advance by at least one character.
data ZipperChoice = ZipperChoice
    { nextChar :: Char
    , continuingRun :: Zipper
    -- ^ The zipper will advance over the next character,
    -- merging it with all preceding characters.
    , startingNewRun :: Zipper
    -- ^ The zipper will forget all preceding characters and then advance over
    -- the next character, making it the first character in a new run of text.
    }

considerNext :: Zipper -> Maybe ZipperChoice
considerNext z = case next z of
    Nothing -> Nothing
    Just c -> Just ZipperChoice
        { nextChar = c
        , continuingRun = step z
        , startingNewRun = step $ start $ following z
        }

data Merged a = Incompatible | Merged a

spanToRuns :: ResolvedSpan d -> [Run]
spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper
    where
        wholeText = spanText s
        zipper = start wholeText
        run acc pr = let t = preceding (PR.zipper pr) in
            ( acc + lengthWord8 t
            , Run
                { runOffsetInSpan = acc
                , runText = t
                , runDirection = PR.direction pr
                , runScript = Just $ PR.script pr
                }
            )

protoRuns :: Zipper -> [ProtoRun]
protoRuns z = reverse $ protoRuns' z []

protoRuns' :: Zipper -> [ProtoRun] -> [ProtoRun]
protoRuns' curZipper curRuns = case considerNext curZipper of
    Nothing -> curRuns
    Just choice ->
        let headRun :| tailRuns = foldRun choice curRuns
        in protoRuns' (PR.zipper headRun) (headRun : tailRuns)

foldRun :: ZipperChoice -> [ProtoRun] -> NonEmpty ProtoRun

-- If there are no runs, create a new run with a single character.
foldRun x [] = ProtoRun (continuingRun x) d s :| []
    where
        d = charDirection (nextChar x)
        s = charScript (nextChar x)

foldRun x (previousRun : tailRuns) =
    case (mergeDirections d1 d2, mergeScripts s1 s2) of
        (Merged d, Merged s) -> ProtoRun (continuingRun x) d s :| tailRuns
        _ -> ProtoRun (startingNewRun x) d2 s2 :| previousRun : tailRuns
    where
        d1 = PR.direction previousRun
        s1 = PR.script previousRun
        d2 = charDirection (nextChar x)
        s2 = charScript (nextChar x)

-- | Simplified detection of text direction for unidirectional text.
mergeDirections :: Maybe Direction -> Maybe Direction ->
    Merged (Maybe Direction)
mergeDirections Nothing Nothing = Merged Nothing
mergeDirections (Just d1) Nothing = Merged (Just d1)
mergeDirections Nothing (Just d2) = Merged (Just d2)
mergeDirections (Just d1) (Just d2)
    | d1 == d2 = Merged (Just d1)
    | otherwise = Incompatible

-- TODO: Implement proper inheritance rules.
mergeScripts :: ScriptCode -> ScriptCode -> Merged ScriptCode
mergeScripts "Zyyy" s2 = Merged s2
mergeScripts s1 "Zyyy" = Merged s1
mergeScripts s1 "Zinh" = Merged s1
mergeScripts s1 s2
    | s1 == s2 = Merged s1
    | otherwise = Incompatible

-- TODO: Use the BiDi algorithm to support bidirectional text.
charDirection :: Char -> Maybe Direction
charDirection c = case ICUChar.direction c of
    ICUChar.LeftToRight -> Just DirLTR
    ICUChar.RightToLeft -> Just DirRTL
    ICUChar.RightToLeftArabic -> Just DirRTL
    _ -> Nothing