~jaro/balkon

ref: a084e3323207472c41ea5f174d13626080d6531d balkon/src/Data/Text/ParagraphLayout/Internal/TextContainer.hs -rw-r--r-- 4.9 KiB
a084e332Jaro Test shifted baseline. 1 year, 4 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
module Data.Text.ParagraphLayout.Internal.TextContainer
    ( SeparableTextContainer
    , TextContainer
    , dropWhileEnd
    , dropWhileEndCascade
    , dropWhileStart
    , dropWhileStartCascade
    , getText
    , splitTextAt8
    , splitTextsBy
    )
where

import Data.Foldable (toList)
import Data.List (mapAccumL, mapAccumR)
import Data.List.NonEmpty (NonEmpty ((:|)))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)

-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
    -- | Extract a `Text` from its container.
    getText :: a -> Text

-- | As a trivial instance, each `Text` contains itself.
instance TextContainer Text where
    getText = id

-- | Class of data types containing `Text` that can be split at a given number
-- of `Data.Word.Word8` units from the start of the text.
class TextContainer a => SeparableTextContainer a where

    -- | Split the given `SeparableTextContainer` at the given number of
    -- `Data.Word.Word8` units from the start of the text, preserving whatever
    -- constraints the instance requires.
    splitTextAt8 :: Int -> a -> (a, a)

    -- | Return the suffix remaining after dropping characters that satisfy the
    -- given predicate from the beginning of the given `SeparableTextContainer`.
    dropWhileStart :: (Char -> Bool) -> a -> a

    -- | Return the prefix remaining after dropping characters that satisfy the
    -- given predicate from the end of the given `SeparableTextContainer`.
    dropWhileEnd :: (Char -> Bool) -> a -> a

-- | As a trivial instance, each `Text` can be split directly.
instance SeparableTextContainer Text where
    splitTextAt8 n t = (t1, t2)
        where
            t1 = takeWord8 (fromIntegral n) t
            t2 = dropWord8 (fromIntegral n) t
    dropWhileStart = Text.dropWhile
    dropWhileEnd = Text.dropWhileEnd

-- | Treat a list of text containers as a contiguous sequence,
-- and find all possible ways to split them into two lists,
-- using the given function to find valid split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: (SeparableTextContainer a, Foldable f) =>
    (a -> [Int]) -> f a -> [([a], [a])]
splitTextsBy breakFunc tcs =
    splitTextsBy' breakFunc [] $ reverse $ toList tcs

splitTextsBy' :: SeparableTextContainer a =>
    (a -> [Int]) -> [a] -> [a] -> [([a], [a])]
splitTextsBy' _ _ [] = []
splitTextsBy' breakFunc closed (tc : tcs) =
    fullSplits ++ splitTextsBy' breakFunc (tc : closed) tcs
    where
        fullSplits = map mergeWithRest tcSplits
        mergeWithRest (x1, x2) =
            (reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
        tcSplits = map (\ i -> splitTextAt8 i tc) tcBreakOffsets
        tcBreakOffsets = breakFunc tc

-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
collapse (tc :| tcs)
    | Text.null (getText tc) = tcs
    | otherwise = tc : tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- All text containers are preserved but their contents may end up having
-- zero length.
dropWhileStartCascade :: (SeparableTextContainer a, Traversable t) =>
    (Char -> Bool) -> t a -> t a
dropWhileStartCascade p tcs = trimTextsStartCascade (dropWhileStart p) tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- All text containers are preserved but their contents may end up having
-- zero length.
dropWhileEndCascade :: (SeparableTextContainer a, Traversable t) =>
    (Char -> Bool) -> t a -> t a
dropWhileEndCascade p tcs = trimTextsEndCascade (dropWhileEnd p) tcs

-- | Traverse the given structure from start to end, applying the given
-- text trimming function to each text container until a non-empty container
-- is produced.
trimTextsStartCascade :: (SeparableTextContainer a, Traversable t) =>
    (a -> a) -> t a -> t a
trimTextsStartCascade trimFunc tcs =
    snd $ mapAccumL (cascadingTrim trimFunc) True tcs

-- | Traverse the given structure from end to start, applying the given
-- text trimming function to each text container until a non-empty container
-- is produced.
trimTextsEndCascade :: (SeparableTextContainer a, Traversable t) =>
    (a -> a) -> t a -> t a
trimTextsEndCascade trimFunc tcs =
    snd $ mapAccumR (cascadingTrim trimFunc) True tcs

-- | Wraps a text trimming function in a controlled cascade.
-- When the trim produces an empty text, the cascade continues.
cascadingTrim :: SeparableTextContainer a => (a -> a) -> Bool -> a -> (Bool, a)
cascadingTrim _ False tc = (False, tc)
cascadingTrim trimFunc True tc = (continue, trimmed)
    where
        trimmed = trimFunc tc
        continue = Text.null $ getText trimmed