~jaro/balkon

ref: 4bd95db2a46001cccd030584d3de570b5e9366fa balkon/src/Data/Text/Zipper.hs -rw-r--r-- 4.1 KiB
4bd95db2Jaro Clean up ParagraphConstruction. 1 year, 8 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
-- | Zipper API for reading text from start to end.
--
-- All measurements are in UTF-8 code points, each of which can be between
-- 1 and 4 bytes long (inclusive).
module Data.Text.Zipper
    -- TODO: Consider renaming the module to avoid conflict with text-zipper
    --       from Hackage.
    (Zipper(preceding, following)
    ,advanceBy
    ,atEnd
    ,atStart
    ,next
    ,recombine
    ,splitAt
    ,start
    ,step
    )
where

import Data.Text (measureOff, null, uncons)
import Data.Text.Internal (Text(Text), empty)
import Prelude
    (Bool
    ,Char
    ,Eq
    ,Int
    ,Maybe(Just, Nothing)
    ,Show
    ,fmap
    ,fst
    ,otherwise
    ,(+)
    ,(-)
    ,(.)
    ,(<=)
    ,(>=)
    )

-- | A type representing a number of UTF-8 code units, that is `Word8` units.
newtype I8 = I8 Int

-- | Represents a body of text with a read cursor which can be moved forward.
data Zipper = Zipper { preceding :: Text, following :: Text }
    deriving
        ( Show
        , Eq
        -- ^ /O(n)/ Compare zippers by their contents. Mostly for tests.
        )

-- | /O(1)/ Create a zipper located at the beginning of the given `Text`.
start :: Text -> Zipper
start = splitAt 0

-- | /O(n)/ Create a zipper located @n@ code points into the `Text`,
-- if possible, or located at the beginning or end of the `Text` otherwise.
--
-- Similar to `Data.Text.splitAt`, except the resulting structure holds both
-- halves of the original `Text` and can be moved forward.
splitAt :: Int -> Text -> Zipper
splitAt n t
    | n <= 0 =
        Zipper { preceding = empty, following = t }
    | otherwise = case measureI8 n t of
        Just m ->
            Zipper { preceding = takeWord8 m t, following = dropWord8 m t }
        Nothing ->
            Zipper { preceding = t, following = empty }

-- | /O(1)/ Move the zipper forward one code point, if possible.
step :: Zipper -> Zipper
step = advanceBy 1

-- | /O(n)/ Move the zipper forward at most @n@ code points.
advanceBy :: Int -> Zipper -> Zipper
advanceBy n z
    | n <= 0 = z
    | atEnd z = z
    | otherwise = case measureI8 n (following z) of
        Just m -> advanceByWord8 m z
        Nothing -> Zipper (recombine z) empty

-- | /O(1)/ Produce the original `Text`.
recombine :: Zipper -> Text
recombine (Zipper t1 t2) = recombine' t1 t2

-- | /O(1)/ Test whether the zipper is at the start of a `Text`.
atStart :: Zipper -> Bool
atStart = null . preceding

-- | /O(1)/ Test whether the zipper is at the end of a `Text`.
atEnd :: Zipper -> Bool
atEnd = null . following

-- | /O(1)/ Read the next code point.
next :: Zipper -> Maybe Char
next = fmap fst . uncons . following

-- | /O(n)/ If @t@ is long enough to contain @n@ characters, return their size
-- in `Word8`.
measureI8 :: Int -> Text -> Maybe I8
measureI8 n t =
    let m = measureOff n t in
    if m >= 0
        then Just (I8 m)
        else Nothing

-- | /O(1)/ Unsafe recombination of two `Text`s.
--
-- Requires that both `Text`s are based on the same `Array` and adjacent to
-- each other.
recombine' :: Text -> Text -> Text
recombine' (Text _ _ 0) t = t
recombine' t (Text _ _ 0) = t
recombine' (Text arr off len1) (Text _ _ len2) = Text arr off (len1 + len2)

-- | /O(1)/ Unsafely move the zipper forward @m@ `Word8` units.
advanceByWord8 :: I8 -> Zipper -> Zipper
advanceByWord8 (I8 m) z = Zipper (recombine' a b) c
    where
        a = preceding z
        b = takeWord8 (I8 m) (following z)
        c = dropWord8 (I8 m) (following z)

-- | /O(1)/ Unsafe version of `Data.Text.Foreign.dropWord8`.
--
-- Return the prefix of the `Text` of @m@ `Word8` units in length.
--
-- Requires that @m@ be within the bounds of the `Text`, not at the beginning
-- or at the end, and not inside a code point.
takeWord8 :: I8 -> Text -> Text
takeWord8 (I8 m) (Text arr off _) = Text arr off m

-- | /O(1)/ Unsafe version of `Data.Text.Foreign.dropWord8`.
--
-- Return the suffix of the `Text`, with @m@ `Word8` units dropped from its
-- beginning.
--
-- Requires that @m@ be within the bounds of the `Text`, not at the beginning
-- or at the end, and not inside a code point.
dropWord8 :: I8 -> Text -> Text
dropWord8 (I8 m) (Text arr off len) = Text arr (off+m) (len-m)