1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
module Data.Text.ParagraphLayout.Internal.TextContainer
(SeparableTextContainer
,TextContainer
,dropWhileEnd
,dropWhileStart
,getText
,splitTextAt8
,splitTextsBy
,trimTextsEnd
,trimTextsEndPreserve
,trimTextsStart
,trimTextsStartPreserve
)
where
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)
-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
-- | Extract a `Text` from its container.
getText :: a -> Text
-- | As a trivial instance, each `Text` contains itself.
instance TextContainer Text where
getText = id
-- | Class of data types containing `Text` that can be split at a given number
-- of `Data.Word.Word8` units from the start of the text.
class TextContainer a => SeparableTextContainer a where
-- | Split the given `SeparableTextContainer` at the given number of
-- `Data.Word.Word8` units from the start of the text, preserving whatever
-- constraints the instance requires.
splitTextAt8 :: Int -> a -> (a, a)
-- | Return the suffix remaining after dropping characters that satisfy the
-- given predicate from the beginning of the given `SeparableTextContainer`.
dropWhileStart :: (Char -> Bool) -> a -> a
-- | Return the prefix remaining after dropping characters that satisfy the
-- given predicate from the end of the given `SeparableTextContainer`.
dropWhileEnd :: (Char -> Bool) -> a -> a
-- | As a trivial instance, each `Text` can be split directly.
instance SeparableTextContainer Text where
splitTextAt8 n t = (t1, t2)
where
t1 = takeWord8 (fromIntegral n) t
t2 = dropWord8 (fromIntegral n) t
dropWhileStart = Text.dropWhile
dropWhileEnd = Text.dropWhileEnd
-- | Treat a list of text containers as a contiguous sequence,
-- and find all possible ways to split them into two non-empty lists,
-- using the given function to find valit split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [([a], [a])]
splitTextsBy breakFunc tcs =
filter notEmpty $ splitTextsBy' breakFunc [] (reverse tcs)
where
notEmpty (prefix, suffix) = not (null prefix || null suffix)
splitTextsBy' :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [a] ->
[([a], [a])]
splitTextsBy' _ _ [] = []
splitTextsBy' breakFunc closed (tc:tcs) =
fullSplits ++ splitTextsBy' breakFunc (tc:closed) tcs
where
fullSplits = map mergeWithRest tcSplits
mergeWithRest (x1, x2) =
(reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
tcSplits = map (\i -> splitTextAt8 i tc) tcBreakOffsets
tcBreakOffsets = breakFunc tc
-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
collapse (tc :| tcs)
| Text.null (getText tc) = tcs
| otherwise = tc:tcs
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsStart :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStart p tcs = trimTextsStart' p tcs
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsStartPreserve ::
SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStartPreserve _ [] = []
trimTextsStartPreserve p ins@(in1:_) = case trimTextsStart' p ins of
[] -> [truncateText in1]
out -> out
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsEnd :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd p tcs = trimTextsEnd' p (reverse tcs)
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsEndPreserve ::
SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEndPreserve _ [] = []
trimTextsEndPreserve p ins@(in1:_) = case trimTextsEnd' p (reverse ins) of
[] -> [truncateText in1]
out -> out
trimTextsStart' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStart' _ [] = []
trimTextsStart' p (tc:tcs)
| Text.null (getText trimmed) = trimTextsStart' p tcs
| otherwise = trimmed:tcs
where
trimmed = dropWhileStart p tc
trimTextsEnd' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd' _ [] = []
trimTextsEnd' p (tc:tcs)
| Text.null (getText trimmed) = trimTextsEnd' p tcs
| otherwise = reverse $ trimmed:tcs
where
trimmed = dropWhileEnd p tc
-- | Discard all text from the container by creating a prefix of length 0.
truncateText :: SeparableTextContainer a => a -> a
truncateText tc = fst $ splitTextAt8 0 tc