~jaro/balkon: src/Data/Text/ParagraphLayout/Internal/TextContainer.hs

module Data.Text.ParagraphLayout.Internal.TextContainer
    (SeparableTextContainer
    ,TextContainer
    ,dropWhileEnd
    ,dropWhileStart
    ,getText
    ,splitTextAt8
    ,splitTextsBy
    ,trimTextsEnd
    ,trimTextsEndPreserve
    ,trimTextsStart
    ,trimTextsStartPreserve
    )
where

import Data.Foldable (toList)
import Data.List.NonEmpty (NonEmpty((:|)), nonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Maybe (catMaybes)
import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, takeWord8)

-- | Class of data types containing `Text` that can be accessed.
class TextContainer a where
    -- | Extract a `Text` from its container.
    getText :: a -> Text

-- | As a trivial instance, each `Text` contains itself.
instance TextContainer Text where
    getText = id

-- | Class of data types containing `Text` that can be split at a given number
-- of `Data.Word.Word8` units from the start of the text.
class TextContainer a => SeparableTextContainer a where

    -- | Split the given `SeparableTextContainer` at the given number of
    -- `Data.Word.Word8` units from the start of the text, preserving whatever
    -- constraints the instance requires.
    splitTextAt8 :: Int -> a -> (a, a)

    -- | Return the suffix remaining after dropping characters that satisfy the
    -- given predicate from the beginning of the given `SeparableTextContainer`.
    dropWhileStart :: (Char -> Bool) -> a -> a

    -- | Return the prefix remaining after dropping characters that satisfy the
    -- given predicate from the end of the given `SeparableTextContainer`.
    dropWhileEnd :: (Char -> Bool) -> a -> a

-- | As a trivial instance, each `Text` can be split directly.
instance SeparableTextContainer Text where
    splitTextAt8 n t = (t1, t2)
        where
            t1 = takeWord8 (fromIntegral n) t
            t2 = dropWord8 (fromIntegral n) t
    dropWhileStart = Text.dropWhile
    dropWhileEnd = Text.dropWhileEnd

-- | Treat a list of text containers as a contiguous sequence,
-- and find all possible ways to split them into two non-empty lists,
-- using the given function to find valid split offsets in `Data.Word.Word8`
-- units from the beginning of each container.
--
-- The results in the form (prefix, suffix) will be ordered from the longest
-- prefix to shortest.
splitTextsBy :: (SeparableTextContainer a, Foldable f) =>
    (a -> [Int]) -> f a -> [(NonEmpty a, NonEmpty a)]
splitTextsBy breakFunc tcs =
    nonEmptyPairs $ splitTextsBy' breakFunc [] $ reverse $ toList tcs

splitTextsBy' :: SeparableTextContainer a =>
    (a -> [Int]) -> [a] -> [a] -> [([a], [a])]
splitTextsBy' _ _ [] = []
splitTextsBy' breakFunc closed (tc:tcs) =
    fullSplits ++ splitTextsBy' breakFunc (tc:closed) tcs
    where
        fullSplits = map mergeWithRest tcSplits
        mergeWithRest (x1, x2) =
            (reverse $ collapse $ x1 :| tcs, collapse $ x2 :| closed)
        tcSplits = map (\i -> splitTextAt8 i tc) tcBreakOffsets
        tcBreakOffsets = breakFunc tc

-- | Turn pairs of normal lists into pairs of `NonEmpty` lists,
-- removing pairs in which either list is empty.
nonEmptyPairs :: [([a], [b])] -> [(NonEmpty a, NonEmpty b)]
nonEmptyPairs = catMaybes . map nonEmptyPair

-- | Turn a pair of normal lists into `Just` a pair of `NonEmpty` lists,
-- or `Nothing` if either list is empty.
nonEmptyPair :: ([a], [b]) -> Maybe (NonEmpty a, NonEmpty b)
nonEmptyPair (xs, ys) = case (nonEmpty xs, nonEmpty ys) of
    (Just xs1, Just ys1) -> Just (xs1, ys1)
    (_, _) -> Nothing

-- | If the first container in the list is empty, remove it.
collapse :: SeparableTextContainer a => NonEmpty a -> [a]
collapse (tc :| tcs)
    | Text.null (getText tc) = tcs
    | otherwise = tc:tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsStart :: (SeparableTextContainer a, Foldable f) =>
    (Char -> Bool) -> f a -> [a]
trimTextsStart p tcs = trimTextsStart' p $ toList tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a prefix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsStartPreserve :: SeparableTextContainer a =>
    (Char -> Bool) -> NonEmpty a -> NonEmpty a
trimTextsStartPreserve p tcs =
    case nonEmpty $ trimTextsStart p $ NonEmpty.toList tcs of
        Nothing -> NonEmpty.singleton $ truncateText $ NonEmpty.head tcs
        Just out -> out

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output, so the result may
-- potentially be an empty list.
trimTextsEnd :: (SeparableTextContainer a, Foldable f) =>
    (Char -> Bool) -> f a -> [a]
trimTextsEnd p tcs = trimTextsEnd' p $ reverse $ toList tcs

-- | Treat a list of text containers as a contiguous sequence,
-- and remove a suffix of characters that match the given predicate.
--
-- Empty text containers are removed from the output except the first one,
-- which is instead truncated to zero length.
trimTextsEndPreserve :: SeparableTextContainer a =>
    (Char -> Bool) -> NonEmpty a -> NonEmpty a
trimTextsEndPreserve p tcs =
    case nonEmpty $ trimTextsEnd p $ NonEmpty.toList tcs of
        Nothing -> NonEmpty.singleton $ truncateText $ NonEmpty.head tcs
        Just out -> out

trimTextsStart' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsStart' _ [] = []
trimTextsStart' p (tc:tcs)
    | Text.null (getText trimmed) = trimTextsStart' p tcs
    | otherwise = trimmed:tcs
    where
        trimmed = dropWhileStart p tc

trimTextsEnd' :: SeparableTextContainer a => (Char -> Bool) -> [a] -> [a]
trimTextsEnd' _ [] = []
trimTextsEnd' p (tc:tcs)
    | Text.null (getText trimmed) = trimTextsEnd' p tcs
    | otherwise = reverse $ trimmed:tcs
    where
        trimmed = dropWhileEnd p tc

-- | Discard all text from the container by creating a prefix of length 0.
truncateText :: SeparableTextContainer a => a -> a
truncateText tc = fst $ splitTextAt8 0 tc