~jaro/balkon

ref: 39ec2358151d0d99dbeb4528efde98158031920a balkon/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs -rw-r--r-- 6.2 KiB
39ec2358Jaro Define tree for structuring paragraph content. 1 year, 6 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
module Data.Text.ParagraphLayout.Internal.TextContainer
    ( SeparableTextContainer
    , TextContainer
    , dropWhileEnd
    , dropWhileStart
    , getText
    , splitTextAt8
    , splitTextsBy
    , trimTextsEnd
    , trimTextsEndPreserve
    , trimTextsStart
    , trimTextsStartPreserve
    )
where

import Data.Foldable (toList)
import Data.List.NonEmpty (NonEmpty ((:|)), nonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Maybe (catMaybes)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)

-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
    -- | Extract a `Text` from its container.
    getText :: a -> Text

-- | As a trivial instance, each `Text` contains itself.
instance TextContainer Text where
    getText = id

-- | Class of data types containing `Text` that can be split at a given number
-- of `Data.Word.Word8` units from the start of the text.
class TextContainer a => SeparableTextContainer a where

    -- | Split the given `SeparableTextContainer` at the given number of
    -- `Data.Word.Word8` units from the start of the text, preserving whatever
    -- constraints the instance requires.
    splitTextAt8 :: Int -> a -> (a, a)

    -- | Return the suffix remaining after dropping characters that satisfy the
    -- given predicate from the beginning of the given `SeparableTextContainer`.
    dropWhileStart :: (Char -> Bool) -> a -> a

    -- | Return the prefix remaining after dropping characters that satisfy the
    -- given predicate from the end of the given `SeparableTextContainer`.
    dropWhileEnd :: (Char -> Bool) -> a -> a

-- | As a trivial instance, each `Text` can be split directly.
instance SeparableTextContainer Text where
    splitTextAt8 n t = (t1, t2)
        where
            t1 = takeWord8 (fromIntegral n) t
            t2 = dropWord8 (fromIntegral n) t
    dropWhileStart = Text.dropWhile
    dropWhileEnd = Text.dropWhileEnd

-- | Treat a list of text containers as a contiguous sequence,
-- and find all possible ways to split them into two non-empty lists,
-- using the given function to find valid split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: (SeparableTextContainer a, Foldable f) =>
    (a -> [Int]) -> f a -> [(NonEmpty a, NonEmpty a)]
splitTextsBy breakFunc tcs =
    nonEmptyPairs $ splitTextsBy' breakFunc [] $ reverse $ toList tcs

splitTextsBy' :: SeparableTextContainer a =>
    (a -> [Int]) -> [a] -> [a] -> [([a], [a])]
splitTextsBy' _ _ [] = []
splitTextsBy' breakFunc closed (tc : tcs) =
    fullSplits ++ splitTextsBy' breakFunc (tc : closed) tcs
    where
        fullSplits = map mergeWithRest tcSplits
        mergeWithRest (x1, x2) =
            (reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
        tcSplits = map (\ i -> splitTextAt8 i tc) tcBreakOffsets
        tcBreakOffsets = breakFunc tc

-- | Turn pairs of normal lists into pairs of `NonEmpty` lists,
-- removing pairs in which either list is empty.
nonEmptyPairs :: [([a], [b])] -> [(NonEmpty a, NonEmpty b)]
nonEmptyPairs = catMaybes . map nonEmptyPair

-- | Turn a pair of normal lists into `Just` a pair of `NonEmpty` lists,
-- or `Nothing` if either list is empty.
nonEmptyPair :: ([a], [b]) -> Maybe (NonEmpty a, NonEmpty b)
nonEmptyPair (xs, ys) = case (nonEmpty xs, nonEmpty ys) of
    (Just xs1, Just ys1) -> Just (xs1, ys1)
    (_, _) -> Nothing

-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
collapse (tc :| tcs)
    | Text.null (getText tc) = tcs
    | otherwise = tc : tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsStart :: (SeparableTextContainer a, Foldable f) =>
    (Char -> Bool) -> f a -> [a]
trimTextsStart p tcs = trimTextsStart' p $ toList tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsStartPreserve :: SeparableTextContainer a =>
    (Char -> Bool) -> NonEmpty a -> NonEmpty a
trimTextsStartPreserve p tcs =
    case nonEmpty $ trimTextsStart p $ NonEmpty.toList tcs of
        Nothing -> truncateText (NonEmpty.head tcs) :| []
        Just out -> out

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsEnd :: (SeparableTextContainer a, Foldable f) =>
    (Char -> Bool) -> f a -> [a]
trimTextsEnd p tcs = trimTextsEnd' p $ reverse $ toList tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsEndPreserve :: SeparableTextContainer a =>
    (Char -> Bool) -> NonEmpty a -> NonEmpty a
trimTextsEndPreserve p tcs =
    case nonEmpty $ trimTextsEnd p $ NonEmpty.toList tcs of
        Nothing -> truncateText (NonEmpty.head tcs) :| []
        Just out -> out

trimTextsStart' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStart' _ [] = []
trimTextsStart' p (tc : tcs)
    | Text.null (getText trimmed) = trimTextsStart' p tcs
    | otherwise = trimmed : tcs
    where
        trimmed = dropWhileStart p tc

trimTextsEnd' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd' _ [] = []
trimTextsEnd' p (tc : tcs)
    | Text.null (getText trimmed) = trimTextsEnd' p tcs
    | otherwise = reverse $ trimmed : tcs
    where
        trimmed = dropWhileEnd p tc

-- | Discard all text from the container by creating a prefix of length 0.
truncateText :: SeparableTextContainer a => a -> a
truncateText tc = fst $ splitTextAt8 0 tc