~jaro/balkon

ref: cfc868c1a40565206517ffe83f7cbb5ed8e41441 balkon/src/Data/Text/ParagraphLayout/Internal/Zipper.hs -rw-r--r-- 4.0 KiB
cfc868c1Jaro Expose text slicing functions. 1 year, 7 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
-- | Zipper API for reading text from start to end.
--
-- All measurements are in UTF-8 code points, each of which can be between
-- 1 and 4 bytes long (inclusive).
module Data.Text.ParagraphLayout.Internal.Zipper
    ( Zipper (preceding, following)
    , advanceBy
    , atEnd
    , atStart
    , next
    , recombine
    , splitAt
    , start
    , step
    )
where

import Data.Bool (Bool, otherwise)
import Data.Char (Char)
import Data.Functor (fmap)
import Data.Int (Int)
import Data.Maybe (Maybe (Just, Nothing))
import Data.Ord ((<=), (>=))
import Data.Text (measureOff, null, uncons)
import Data.Text.Internal (Text (Text), empty)
import Prelude (Eq, Show, fst, (+), (-), (.))

-- | Represents a body of text with a read cursor which can be moved forward.
data Zipper = Zipper { preceding :: Text, following :: Text }
    deriving
        ( Show
        , Eq
        -- ^ /O(n)/ Compare zippers by their contents. Mostly for tests.
        )

-- | /O(1)/ Create a zipper located at the beginning of the given `Text`.
start :: Text -> Zipper
start = splitAt 0

-- | /O(n)/ Create a zipper located @n@ code points into the `Text`,
-- if possible, or located at the beginning or end of the `Text` otherwise.
--
-- Similar to `Data.Text.splitAt`, except the resulting structure holds both
-- halves of the original `Text` and can be moved forward.
splitAt :: Int -> Text -> Zipper
splitAt n t
    | n <= 0 =
        Zipper { preceding = empty, following = t }
    | otherwise = case measureI8 n t of
        Just m ->
            Zipper { preceding = takeWord8 m t, following = dropWord8 m t }
        Nothing ->
            Zipper { preceding = t, following = empty }

-- | /O(1)/ Move the zipper forward one code point, if possible.
step :: Zipper -> Zipper
step = advanceBy 1

-- | /O(n)/ Move the zipper forward at most @n@ code points.
advanceBy :: Int -> Zipper -> Zipper
advanceBy n z
    | n <= 0 = z
    | atEnd z = z
    | otherwise = case measureI8 n (following z) of
        Just m -> advanceByWord8 m z
        Nothing -> Zipper (recombine z) empty

-- | /O(1)/ Produce the original `Text`.
recombine :: Zipper -> Text
recombine (Zipper t1 t2) = recombine' t1 t2

-- | /O(1)/ Test whether the zipper is at the start of a `Text`.
atStart :: Zipper -> Bool
atStart = null . preceding

-- | /O(1)/ Test whether the zipper is at the end of a `Text`.
atEnd :: Zipper -> Bool
atEnd = null . following

-- | /O(1)/ Read the next code point.
next :: Zipper -> Maybe Char
next = fmap fst . uncons . following

-- | /O(n)/ If @t@ is long enough to contain @n@ characters, return their size
-- in `Data.Word.Word8`.
measureI8 :: Int -> Text -> Maybe Int
measureI8 n t =
    let m = measureOff n t in
    if m >= 0
        then Just m
        else Nothing

-- | /O(1)/ Unsafe recombination of two `Text`s.
--
-- Requires that both `Text`s are based on the same `Array` and adjacent to
-- each other.
recombine' :: Text -> Text -> Text
recombine' (Text _ _ 0) t = t
recombine' t (Text _ _ 0) = t
recombine' (Text arr off len1) (Text _ _ len2) = Text arr off (len1 + len2)

-- | /O(1)/ Unsafely move the zipper forward @m@ `Data.Word.Word8` units.
advanceByWord8 :: Int -> Zipper -> Zipper
advanceByWord8 m z = Zipper (recombine' a b) c
    where
        a = preceding z
        b = takeWord8 m (following z)
        c = dropWord8 m (following z)

-- | /O(1)/ Unsafe version of `Data.Text.Foreign.dropWord8`.
--
-- Return the prefix of the `Text` of @m@ `Data.Word.Word8` units in length.
--
-- Requires that @m@ be within the bounds of the `Text`, not at the beginning
-- or at the end, and not inside a code point.
takeWord8 :: Int -> Text -> Text
takeWord8 m (Text arr off _) = Text arr off m

-- | /O(1)/ Unsafe version of `Data.Text.Foreign.dropWord8`.
--
-- Return the suffix of the `Text`, with @m@ `Data.Word.Word8` units dropped
-- from its beginning.
--
-- Requires that @m@ be within the bounds of the `Text`, not at the beginning
-- or at the end, and not inside a code point.
dropWord8 :: Int -> Text -> Text
dropWord8 m (Text arr off len) = Text arr (off + m) (len - m)