~jaro/balkon: src/Data/Text/ParagraphLayout/Internal/Break.hs blame

259fe6a1 Jaro 


32e1c839 Jaro 



7364410d Jaro 
e2944b4d Jaro 

764fa6eb Jaro 
e2944b4d Jaro 

7364410d Jaro 
259fe6a1 Jaro 



7364410d Jaro 
e2944b4d Jaro 


764fa6eb Jaro 
e2944b4d Jaro 
764fa6eb Jaro 
e2944b4d Jaro 

7364410d Jaro 
























cb3f4787 Jaro 
259fe6a1 Jaro 
764fa6eb Jaro 









259fe6a1 Jaro 











764fa6eb Jaro 


259fe6a1 Jaro 





764fa6eb Jaro 





259fe6a1 Jaro

1 year, 8 months ago


1 year, 8 months ago



1 year, 8 months ago
1 year, 7 months ago

1 year, 5 months ago
1 year, 7 months ago

1 year, 8 months ago
1 year, 8 months ago



1 year, 8 months ago
1 year, 7 months ago


1 year, 5 months ago
1 year, 7 months ago
1 year, 5 months ago
1 year, 7 months ago

1 year, 8 months ago
























1 year, 7 months ago
1 year, 8 months ago
1 year, 5 months ago









1 year, 8 months ago











1 year, 5 months ago


1 year, 8 months ago





1 year, 5 months ago





1 year, 8 months ago

-- | Boundary analysis using `Data.Text.ICU`, but returning numeric offsets
-- instead of text slices.
--
-- Within this module, each /offset/ refers to the number of `Data.Word.Word8`
-- items (also called UTF-8 code units or bytes) between the start of the input
-- `Text` and the position of the break. The internal offset of the `Text` from
-- the start of its underlying byte array is excluded.
module Data.Text.ParagraphLayout.Internal.Break
    ( LineBreak (..)
    , locale
    , breaksAsc
    , breaksDesc
    , subOffsetsDesc
    )
where

import Data.Text (Text)
import Data.Text.Foreign (lengthWord8)
import Data.Text.ICU
    ( Break
    , Breaker
    , LocaleName (Locale)
    , breaks
    , breaksRight
    , brkBreak
    , brkPrefix
    , brkStatus
    )

-- | Strictness levels of line-breaking rules,
-- corresponding to the CSS @line-break@ property.
data LineBreak = LBAuto | LBLoose | LBNormal | LBStrict

-- | Line breaking keyword to use in an ICU locale identifier.
lbKeyword :: LineBreak -> String
lbKeyword LBAuto = ""
lbKeyword LBLoose = "@lb=loose"
lbKeyword LBNormal = "@lb=normal"
lbKeyword LBStrict = "@lb=strict"

-- | An ICU locale identifier corresponding to the given IETF BCP 47 language
-- tag and line breaking strictness.
--
-- For line breaking, the differences are mostly in the strictness of breaking
-- Chinese and Japanese text.
locale :: String -> LineBreak -> LocaleName
locale lang lb = Locale $ (clean lang) ++ (lbKeyword lb)
    where
        -- ICU's "level 1 canonicalisation" can handle most BCP 47 tags,
        -- including case changes and converting hyphens to underscores.
        --
        -- This filter is here just to stop syntactically incorrect input.
        clean = takeWhile (`elem` ['A' .. 'Z'] ++ ['a' .. 'z'] ++ "_-")

-- | List of all breaks in the given text, with offsets in ascending order,
-- including the status of the break if applicable.
--
-- Excludes the start of the text (with offset 0).
--
-- Includes the end of the text (with offset equal to the text length)
-- as the last list item.
breaksAsc :: Breaker a -> Text -> [(Int, a)]
breaksAsc breaker input = map brkEndOffsetStatus $ breaks breaker input

-- | List of all breaks in the given text, with offsets in descending order,
-- including the status of the break if applicable.
--
-- Includes the start of the text (with offset 0) as the last list item.
--
-- Excludes the end of the text (with offset equal to the text length).
breaksDesc :: Breaker a -> Text -> [(Int, a)]
breaksDesc breaker input = map brkStartOffsetStatus $ breaksRight breaker input

brkStartOffsetStatus :: Break a -> (Int, a)
brkStartOffsetStatus brk = (brkStartOffset brk, brkStatus brk)

brkEndOffsetStatus :: Break a -> (Int, a)
brkEndOffsetStatus brk = (brkEndOffset brk, brkStatus brk)

-- | The ICU library returns "breaks" as slices of text with two boundaries.
-- This gives the smaller of the two distances from the start of the text
-- to the boundaries of the break.
brkStartOffset :: Break a -> Int
brkStartOffset brk = lengthWord8 (brkPrefix brk)

-- | The ICU library returns "breaks" as slices of text with two boundaries.
-- This gives the larger of the two distances from the start of the text
-- to the boundaries of the break.
brkEndOffset :: Break a -> Int
brkEndOffset brk = lengthWord8 (brkPrefix brk) + lengthWord8 (brkBreak brk)

-- | Given a list of offsets into a text in descending order, produce a list of
-- corresponding offsets into a slice of the text starting at a given offset.
subOffsetsDesc :: Int -> [(Int, a)] -> [(Int, a)]
subOffsetsDesc d = takeWhile valid . map adjust
    where
        valid (off, _) = off >= 0
        adjust (off, a) = (off - d, a)