~jaro/balkon

ref: 44ea112944690211d7c2da3c19bdadf1a0ca9c14 balkon/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs -rw-r--r-- 5.4 KiB
44ea1129Jaro Implement splitting ParagraphLayout into lines. 1 year, 7 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
module Data.Text.ParagraphLayout.Internal.TextContainer
    (SeparableTextContainer
    ,TextContainer
    ,dropWhileEnd
    ,dropWhileStart
    ,getText
    ,splitTextAt8
    ,splitTextsBy
    ,trimTextsEnd
    ,trimTextsEndPreserve
    ,trimTextsStart
    ,trimTextsStartPreserve
    )
where

import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)

-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
    -- | Extract a `Text` from its container.
    getText :: a -> Text

-- | As a trivial instance, each `Text` contains itself.
instance TextContainer Text where
    getText = id

-- | Class of data types containing `Text` that can be split at a given number
-- of `Data.Word.Word8` units from the start of the text.
class TextContainer a => SeparableTextContainer a where

    -- | Split the given `SeparableTextContainer` at the given number of
    -- `Data.Word.Word8` units from the start of the text, preserving whatever
    -- constraints the instance requires.
    splitTextAt8 :: Int -> a -> (a, a)

    -- | Return the suffix remaining after dropping characters that satisfy the
    -- given predicate from the beginning of the given `SeparableTextContainer`.
    dropWhileStart :: (Char -> Bool) -> a -> a

    -- | Return the prefix remaining after dropping characters that satisfy the
    -- given predicate from the end of the given `SeparableTextContainer`.
    dropWhileEnd :: (Char -> Bool) -> a -> a

-- | As a trivial instance, each `Text` can be split directly.
instance SeparableTextContainer Text where
    splitTextAt8 n t = (t1, t2)
        where
            t1 = takeWord8 (fromIntegral n) t
            t2 = dropWord8 (fromIntegral n) t
    dropWhileStart = Text.dropWhile
    dropWhileEnd = Text.dropWhileEnd

-- | Treat a list of text containers as a contiguous sequence,
-- and find all possible ways to split them into two non-empty lists,
-- using the given function to find valit split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [([a], [a])]
splitTextsBy breakFunc tcs =
    filter notEmpty $ splitTextsBy' breakFunc [] (reverse tcs)
    where
        notEmpty (prefix, suffix) = not (null prefix || null suffix)

splitTextsBy' :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [a] ->
    [([a], [a])]
splitTextsBy' _ _ [] = []
splitTextsBy' breakFunc closed (tc:tcs) =
    fullSplits ++ splitTextsBy' breakFunc (tc:closed) tcs
    where
        fullSplits = map mergeWithRest tcSplits
        mergeWithRest (x1, x2) =
            (reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
        tcSplits = map (\i -> splitTextAt8 i tc) tcBreakOffsets
        tcBreakOffsets = breakFunc tc

-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
collapse (tc :| tcs)
    | Text.null (getText tc) = tcs
    | otherwise = tc:tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsStart :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStart p tcs = trimTextsStart' p tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsStartPreserve ::
    SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStartPreserve _ [] = []
trimTextsStartPreserve p ins@(in1:_) = case trimTextsStart' p ins of
    [] -> [truncateText in1]
    out -> out

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsEnd :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd p tcs = trimTextsEnd' p (reverse tcs)

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsEndPreserve ::
    SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEndPreserve _ [] = []
trimTextsEndPreserve p ins@(in1:_) = case trimTextsEnd' p (reverse ins) of
    [] -> [truncateText in1]
    out -> out

trimTextsStart' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStart' _ [] = []
trimTextsStart' p (tc:tcs)
    | Text.null (getText trimmed) = trimTextsStart' p tcs
    | otherwise = trimmed:tcs
    where
        trimmed = dropWhileStart p tc

trimTextsEnd' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd' _ [] = []
trimTextsEnd' p (tc:tcs)
    | Text.null (getText trimmed) = trimTextsEnd' p tcs
    | otherwise = reverse $ trimmed:tcs
    where
        trimmed = dropWhileEnd p tc

-- | Discard all text from the container by creating a prefix of length 0.
truncateText :: SeparableTextContainer a => a -> a
truncateText tc = fst $ splitTextAt8 0 tc