~jaro/balkon: test/Data/Text/ParagraphLayout/Internal/BreakSpec.hs blame

1555a8cc Jaro 
259fe6a1 Jaro 
7364410d Jaro 
259fe6a1 Jaro 

e2944b4d Jaro 




259fe6a1 Jaro 
e2944b4d Jaro 
259fe6a1 Jaro 

1555a8cc Jaro 
259fe6a1 Jaro 















7319cb0d Jaro 
259fe6a1 Jaro 


7319cb0d Jaro 

259fe6a1 Jaro 
dbcfcd67 Jaro 


7319cb0d Jaro 


dbcfcd67 Jaro 
259fe6a1 Jaro 


7319cb0d Jaro 


259fe6a1 Jaro 

56b3ee16 Jaro 

7319cb0d Jaro 





56b3ee16 Jaro 

259fe6a1 Jaro 

7319cb0d Jaro 



259fe6a1 Jaro 



7319cb0d Jaro 








259fe6a1 Jaro 

7364410d Jaro 

7319cb0d Jaro 




7364410d Jaro 

7319cb0d Jaro 






7364410d Jaro 





7319cb0d Jaro 







7364410d Jaro 

7319cb0d Jaro 







7364410d Jaro 

1c4f51d8 Jaro 
7364410d Jaro 


1c4f51d8 Jaro 
7364410d Jaro 


259fe6a1 Jaro 






















7319cb0d Jaro 

259fe6a1 Jaro 


7319cb0d Jaro 




259fe6a1 Jaro 

cb8b67f9 Jaro 

259fe6a1 Jaro

1 year, 8 months ago
1 year, 8 months ago
1 year, 7 months ago
1 year, 8 months ago

1 year, 7 months ago




1 year, 8 months ago
1 year, 7 months ago
1 year, 8 months ago

1 year, 8 months ago
1 year, 8 months ago















1 year, 7 months ago
1 year, 8 months ago


1 year, 7 months ago

1 year, 8 months ago
1 year, 7 months ago


1 year, 7 months ago


1 year, 7 months ago
1 year, 8 months ago


1 year, 7 months ago


1 year, 8 months ago

1 year, 7 months ago

1 year, 7 months ago





1 year, 7 months ago

1 year, 8 months ago

1 year, 7 months ago



1 year, 8 months ago



1 year, 7 months ago








1 year, 8 months ago

1 year, 7 months ago

1 year, 7 months ago




1 year, 7 months ago

1 year, 7 months ago






1 year, 7 months ago





1 year, 7 months ago







1 year, 7 months ago

1 year, 7 months ago







1 year, 7 months ago

1 year, 7 months ago
1 year, 7 months ago


1 year, 7 months ago
1 year, 7 months ago


1 year, 8 months ago






















1 year, 7 months ago

1 year, 8 months ago


1 year, 7 months ago




1 year, 8 months ago

1 year, 8 months ago

1 year, 8 months ago

module Data.Text.ParagraphLayout.Internal.BreakSpec (spec) where

import Control.Monad (forM_)
import Data.Text (empty, pack, singleton)
import Data.Text.ICU
    ( LocaleName (Locale)
    , breakCharacter
    , breakLine
    , breakSentence
    , breakWord
    )
import qualified Data.Text.ICU as BreakStatus (Line (..), Word (..))

import Test.Hspec
import Data.Text.ParagraphLayout.Internal.Break

spec :: Spec
spec = do

    describe "breaksDesc" $ do

        -- One of the crucial building blocks of a text layout engine.
        describe "breakLine" $ do
            let b lang = breaksDesc $ breakLine (Locale lang)

            it "finds no breaks in empty input" $
                b "en" empty `shouldBe`
                    []

            it "finds break at offset 0 in non-empty input" $
                b "en" (singleton 'a') `shouldBe`
                    [ (0, BreakStatus.Soft)]

            it "finds hard break after newline" $
                b "en" (pack "hello\nworld") `shouldBe`
                    [ (6, BreakStatus.Hard)
                    , (0, BreakStatus.Soft)
                    ]

            it "finds hard break after each of newlines" $
                b "en" (pack "hello\n\nworld") `shouldBe`
                    [ (7, BreakStatus.Hard)
                    , (6, BreakStatus.Hard)
                    , (0, BreakStatus.Soft)
                    ]

            it "finds soft breaks after spaces and tabs" $
                b "en" (pack "a few\twords") `shouldBe`
                    [ (6, BreakStatus.Soft)
                    , (2, BreakStatus.Soft)
                    , (0, BreakStatus.Soft)
                    ]

            it "finds soft breaks after each run of whitespace" $
                b "en" (pack " a  few\t more \n words\n") `shouldBe`
                    [ (16, BreakStatus.Soft)
                    , (15, BreakStatus.Hard)
                    , (9, BreakStatus.Soft)
                    , (4, BreakStatus.Soft)
                    , (1, BreakStatus.Soft)
                    , (0, BreakStatus.Soft)
                    ]

            it "finds soft breaks after spaces and hyphens" $
                b "cs" (pack "následuje stanice Frýdek-Místek") `shouldBe`
                    [ (27, BreakStatus.Soft)
                    , (19, BreakStatus.Soft)
                    , (11, BreakStatus.Soft)
                    , (0, BreakStatus.Soft)
                    ]

            it "finds soft breaks in Japanese kana" $
                b "ja" (pack "トイレはどこですか？") `shouldBe`
                    [ (24, BreakStatus.Soft)
                    , (21, BreakStatus.Soft)
                    , (18, BreakStatus.Soft)
                    , (15, BreakStatus.Soft)
                    , (12, BreakStatus.Soft)
                    , (9, BreakStatus.Soft)
                    , (6, BreakStatus.Soft)
                    , (3, BreakStatus.Soft)
                    , (0, BreakStatus.Soft)
                    ]

            let jaText = pack "五ヶ月‡コード"
            let jaBreaksStrict =
                    [ (18, BreakStatus.Soft)
                    , (12, BreakStatus.Soft)
                    , (9, BreakStatus.Soft)
                    , (6, BreakStatus.Soft)
                    , (0, BreakStatus.Soft)
                    ]
            let jaBreaksLoose =
                    [ (18, BreakStatus.Soft)
                    , (15, BreakStatus.Soft)
                    , (12, BreakStatus.Soft)
                    , (9, BreakStatus.Soft)
                    , (6, BreakStatus.Soft)
                    , (3, BreakStatus.Soft)
                    , (0, BreakStatus.Soft)
                    ]

            -- Observed behaviour.
            -- Not sure why Chinese rules are stricter for Japanese text.
            -- This behaviour may change with future versions of ICU.
            let expectedStrictLocales =
                    [ ""
                    , "en"
                    , "ja@lb=strict"
                    , "zh"
                    , "zh_Hans"
                    , "zh_Hant"
                    , "zxx"
                    , "zxx-any-invalid-suffix"
                    ]
            let expectedLooseLocales =
                    [ "@lb=loose"
                    , "en@lb=loose"
                    , "ja"
                    , "ja_JP"
                    , "ja-JP"
                    , "ja-any-invalid-suffix"
                    , "zh@lb=loose"
                    , "zxx-any-invalid-suffix@lb=loose"
                    ]

            expectedStrictLocales `forM_` \ l ->
                it ("uses strict line breaks for " ++ l ++ " locale") $
                    b l jaText `shouldBe` jaBreaksStrict

            expectedLooseLocales `forM_` \ l ->
                it ("uses loose line breaks for " ++ l ++ " locale") $
                    b l jaText `shouldBe` jaBreaksLoose

        -- Probably not useful for a web browser rendering engine.
        describe "breakSentence" $ do
            let b lang = breaksDesc $ breakSentence (Locale lang)

            it "finds no breaks in empty input" $
                b "en" empty `shouldBe`
                    []

            it "finds break at offset 0 in non-empty input" $
                b "en" (singleton 'a') `shouldBe`
                    [(0, ())]

        -- Probably not useful for a web browser rendering engine,
        -- but may be used for text search and selection.
        describe "breakWord" $ do
            let b lang = breaksDesc $ breakWord (Locale lang)

            it "finds no breaks in empty input" $
                b "en" empty `shouldBe`
                    []

            it "finds break at offset 0 in non-empty input" $
                b "en" (singleton 'a') `shouldBe`
                    [ (0, BreakStatus.Uncategorized)
                    ]

            it "finds breaks after runs of letters and spaces" $
                b "en" (pack "a few   words") `shouldBe`
                    [ (8, BreakStatus.Uncategorized)
                    , (5, BreakStatus.Letter)
                    , (2, BreakStatus.Uncategorized)
                    , (1, BreakStatus.Letter)
                    , (0, BreakStatus.Uncategorized)
                    ]

        -- Useful for breaking inside words for narrow output.
        -- This can result in breaking ligatures.
        describe "breakCharacter" $ do
            let b lang = breaksDesc $ breakCharacter (Locale lang)

            it "finds no breaks in empty input" $
                b "en" empty `shouldBe`
                    []

            it "finds break at offset 0 in non-empty input" $
                b "en" (singleton 'a') `shouldBe`
                    [(0, ())]

    describe "subOffsetsDesc" $ do

        let result = subOffsetsDesc 5 [(11, 'a'), (8, 'b'), (5, 'c'), (2, 'd')]

        it "should reduce offsets" $
            map fst result `shouldBe` [6, 3, 0]

        it "should preserve payload" $
            map snd result `shouldBe` ['a', 'b', 'c']