module Data.Text.ParagraphLayout.Internal.TextContainer
(SeparableTextContainer
,TextContainer
,dropWhileEnd
,getText
,splitTextAt8
,splitTextsBy
,trimTextsEnd
,trimTextsEndPreserve
)
where
import Data.List.NonEmpty (NonEmpty((:|)))
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)
-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
-- | Extract a `Text` from its container.
getText :: a -> Text
-- | As a trivial instance, each `Text` contains itself.
instance TextContainer Text where
getText = id
-- | Class of data types containing `Text` that can be split at a given number
-- of `Data.Word.Word8` units from the start of the text.
class TextContainer a => SeparableTextContainer a where
-- | Split the given `SeparableTextContainer` at the given number of
-- `Data.Word.Word8` units from the start of the text, preserving whatever
-- constraints the instance requires.
splitTextAt8 :: Int -> a -> (a, a)
-- | Return the prefix remaining after dropping characters that satisfy
-- the given predicate from the end of the given `SeparableTextContainer`.
dropWhileEnd :: (Char -> Bool) -> a -> a
-- | As a trivial instance, each `Text` can be split directly.
instance SeparableTextContainer Text where
splitTextAt8 n t = (t1, t2)
where
t1 = takeWord8 (fromIntegral n) t
t2 = dropWord8 (fromIntegral n) t
dropWhileEnd = Text.dropWhileEnd
-- | Treat a list of text containers as a contiguous sequence,
-- and find all possible ways to split them into two non-empty lists,
-- using the given function to find valit split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [([a], [a])]
splitTextsBy breakFunc tcs =
filter notEmpty $ splitTextsBy' breakFunc [] (reverse tcs)
where
notEmpty (prefix, suffix) = not (null prefix || null suffix)
splitTextsBy' :: SeparableTextContainer a => (a -> [Int]) -> [a] -> [a] ->
[([a], [a])]
splitTextsBy' _ _ [] = []
splitTextsBy' breakFunc closed (tc:tcs) =
fullSplits ++ splitTextsBy' breakFunc (tc:closed) tcs
where
fullSplits = map mergeWithRest tcSplits
mergeWithRest (x1, x2) =
(reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
tcSplits = map (\i -> splitTextAt8 i tc) tcBreakOffsets
tcBreakOffsets = breakFunc tc
-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
collapse (tc :| tcs)
| Text.null (getText tc) = tcs
| otherwise = tc:tcs
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsEnd :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd p tcs = trimTextsEnd' p (reverse tcs)
-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsEndPreserve ::
SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEndPreserve _ [] = []
trimTextsEndPreserve p ins@(in1:_) = case trimTextsEnd' p (reverse ins) of
[] -> [truncateText in1]
out -> out
trimTextsEnd' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd' _ [] = []
trimTextsEnd' p (tc:tcs)
| Text.null (getText trimmed) = trimTextsEnd' p tcs
| otherwise = reverse $ trimmed:tcs
where
trimmed = dropWhileEnd p tc
-- | Discard all text from the container by creating a prefix of length 0.
truncateText :: SeparableTextContainer a => a -> a
truncateText tc = fst $ splitTextAt8 0 tc