1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
module Data.Text.ParagraphLayout.Internal.TextContainer
( SeparableTextContainer
, TextContainer
, dropWhileEnd
, dropWhileStart
, getText
, splitTextAt8
, splitTextsBy
, trimTextsEnd
, trimTextsEndPreserve
, trimTextsStart
, trimTextsStartPreserve
)
where
import Data.Foldable (toList)
import Data.List.NonEmpty (NonEmpty ((:|)), nonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Maybe (catMaybes)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)
-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
-- | Extract a `Text` from its container.
getText :: a -> Text
-- | As a trivial instance, each `Text` contains itself.
instance TextContainer Text where
getText = id
-- | Class of data types containing `Text` that can be split at a given number
-- of `Data.Word.Word8` units from the start of the text.
class TextContainer a => SeparableTextContainer a where
-- | Split the given `SeparableTextContainer` at the given number of
-- `Data.Word.Word8` units from the start of the text, preserving whatever
-- constraints the instance requires.
splitTextAt8 :: Int -> a -> (a, a)
-- | Return the suffix remaining after dropping characters that satisfy the
-- given predicate from the beginning of the given `SeparableTextContainer`.
dropWhileStart :: (Char -> Bool) -> a -> a
-- | Return the prefix remaining after dropping characters that satisfy the
-- given predicate from the end of the given `SeparableTextContainer`.
dropWhileEnd :: (Char -> Bool) -> a -> a
-- | As a trivial instance, each `Text` can be split directly.
instance SeparableTextContainer Text where
splitTextAt8 n t = (t1, t2)
where
t1 = takeWord8 (fromIntegral n) t
t2 = dropWord8 (fromIntegral n) t
dropWhileStart = Text.dropWhile
dropWhileEnd = Text.dropWhileEnd
-- | Treat a list of text containers as a contiguous sequence,
-- and find all possible ways to split them into two non-empty lists,
-- using the given function to find valid split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: (SeparableTextContainer a, Foldable f) =>
(a -> [Int]) -> f a -> [(NonEmpty a, NonEmpty a)]
splitTextsBy breakFunc tcs =
nonEmptyPairs $ splitTextsBy' breakFunc [] $ reverse $ toList tcs
splitTextsBy' :: SeparableTextContainer a =>
(a -> [Int]) -> [a] -> [a] -> [([a], [a])]
splitTextsBy' _ _ [] = []
splitTextsBy' breakFunc closed (tc : tcs) =
fullSplits ++ splitTextsBy' breakFunc (tc : closed) tcs
where
fullSplits = map mergeWithRest tcSplits
mergeWithRest (x1, x2) =
(reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
tcSplits = map (\ i -> splitTextAt8 i tc) tcBreakOffsets
tcBreakOffsets = breakFunc tc
-- | Turn pairs of normal lists into pairs of `NonEmpty` lists,
-- removing pairs in which either list is empty.
nonEmptyPairs :: [([a], [b])] -> [(NonEmpty a, NonEmpty b)]
nonEmptyPairs = catMaybes . map nonEmptyPair
-- | Turn a pair of normal lists into `Just` a pair of `NonEmpty` lists,
-- or `Nothing` if either list is empty.
nonEmptyPair :: ([a], [b]) -> Maybe (NonEmpty a, NonEmpty b)
nonEmptyPair (xs, ys) = case (nonEmpty xs, nonEmpty ys) of
(Just xs1, Just ys1) -> Just (xs1, ys1)
(_, _) -> Nothing
-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
collapse (tc :| tcs)
| Text.null (getText tc) = tcs
| otherwise = tc : tcs
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsStart :: (SeparableTextContainer a, Foldable f) =>
(Char -> Bool) -> f a -> [a]
trimTextsStart p tcs = trimTextsStart' p $ toList tcs
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsStartPreserve :: SeparableTextContainer a =>
(Char -> Bool) -> NonEmpty a -> NonEmpty a
trimTextsStartPreserve p tcs =
case nonEmpty $ trimTextsStart p $ NonEmpty.toList tcs of
Nothing -> truncateText (NonEmpty.head tcs) :| []
Just out -> out
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsEnd :: (SeparableTextContainer a, Foldable f) =>
(Char -> Bool) -> f a -> [a]
trimTextsEnd p tcs = trimTextsEnd' p $ reverse $ toList tcs
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsEndPreserve :: SeparableTextContainer a =>
(Char -> Bool) -> NonEmpty a -> NonEmpty a
trimTextsEndPreserve p tcs =
case nonEmpty $ trimTextsEnd p $ NonEmpty.toList tcs of
Nothing -> truncateText (NonEmpty.head tcs) :| []
Just out -> out
trimTextsStart' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStart' _ [] = []
trimTextsStart' p (tc : tcs)
| Text.null (getText trimmed) = trimTextsStart' p tcs
| otherwise = trimmed : tcs
where
trimmed = dropWhileStart p tc
trimTextsEnd' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd' _ [] = []
trimTextsEnd' p (tc : tcs)
| Text.null (getText trimmed) = trimTextsEnd' p tcs
| otherwise = reverse $ trimmed : tcs
where
trimmed = dropWhileEnd p tc
-- | Discard all text from the container by creating a prefix of length 0.
truncateText :: SeparableTextContainer a => a -> a
truncateText tc = fst $ splitTextAt8 0 tc