module Data.Text.ParagraphLayout.Internal.BreakSpec (spec) where import Control.Monad (forM_) import Data.Text (empty, pack, singleton) import Data.Text.ICU (LocaleName(Locale) ,breakCharacter ,breakLine ,breakSentence ,breakWord ) import qualified Data.Text.ICU as BreakStatus (Line(..), Word(..)) import Test.Hspec import Data.Text.ParagraphLayout.Internal.Break spec :: Spec spec = do describe "breaksDesc" $ do -- One of the crucial building blocks of a text layout engine. describe "breakLine" $ do let b lang = breaksDesc $ breakLine (Locale lang) it "finds no breaks in empty input" $ b "en" empty `shouldBe` [] it "finds break at offset 0 in non-empty input" $ b "en" (singleton 'a') `shouldBe` [(0, BreakStatus.Soft)] it "finds hard break after newline" $ b "en" (pack "hello\nworld") `shouldBe` [(6, BreakStatus.Hard) ,(0, BreakStatus.Soft) ] it "finds soft breaks after spaces and tabs" $ b "en" (pack "a few\twords") `shouldBe` [(6, BreakStatus.Soft) ,(2, BreakStatus.Soft) ,(0, BreakStatus.Soft) ] it "finds soft breaks after spaces and hyphens" $ b "cs" (pack "následuje stanice Frýdek-Místek") `shouldBe` [(27, BreakStatus.Soft) ,(19, BreakStatus.Soft) ,(11, BreakStatus.Soft) ,(0, BreakStatus.Soft) ] it "finds soft breaks in Japanese kana" $ b "ja" (pack "トイレはどこですか?") `shouldBe` [(24, BreakStatus.Soft) ,(21, BreakStatus.Soft) ,(18, BreakStatus.Soft) ,(15, BreakStatus.Soft) ,(12, BreakStatus.Soft) ,(9, BreakStatus.Soft) ,(6, BreakStatus.Soft) ,(3, BreakStatus.Soft) ,(0, BreakStatus.Soft) ] let jaText = pack "五ヶ月‡コード" let jaBreaksStrict = [(18, BreakStatus.Soft) ,(12, BreakStatus.Soft) ,(9, BreakStatus.Soft) ,(6, BreakStatus.Soft) ,(0, BreakStatus.Soft) ] let jaBreaksLoose = [(18, BreakStatus.Soft) ,(15, BreakStatus.Soft) ,(12, BreakStatus.Soft) ,(9, BreakStatus.Soft) ,(6, BreakStatus.Soft) ,(3, BreakStatus.Soft) ,(0, BreakStatus.Soft) ] -- Observed behaviour. -- Not sure why Chinese rules are stricter for Japanese text. -- This behaviour may change with future versions of ICU. let expectedStrictLocales = ["" ,"en" ,"ja@lb=strict" ,"zh" ,"zh_Hans" ,"zh_Hant" ,"zxx" ,"zxx-any-invalid-suffix" ] let expectedLooseLocales = ["@lb=loose" ,"en@lb=loose" ,"ja" ,"ja_JP" ,"ja-JP" ,"ja-any-invalid-suffix" ,"zh@lb=loose" ,"zxx-any-invalid-suffix@lb=loose" ] expectedStrictLocales `forM_` \l -> it ("uses strict line breaks for " ++ l ++ " locale") $ b l jaText `shouldBe` jaBreaksStrict expectedLooseLocales `forM_` \l -> it ("uses loose line breaks for " ++ l ++ " locale") $ b l jaText `shouldBe` jaBreaksLoose -- Probably not useful for a web browser rendering engine. describe "breakSentence" $ do let b lang = breaksDesc $ breakSentence (Locale lang) it "finds no breaks in empty input" $ b "en" empty `shouldBe` [] it "finds break at offset 0 in non-empty input" $ b "en" (singleton 'a') `shouldBe` [(0, ())] -- Probably not useful for a web browser rendering engine, -- but may be used for text search and selection. describe "breakWord" $ do let b lang = breaksDesc $ breakWord (Locale lang) it "finds no breaks in empty input" $ b "en" empty `shouldBe` [] it "finds break at offset 0 in non-empty input" $ b "en" (singleton 'a') `shouldBe` [(0, BreakStatus.Uncategorized)] it "finds breaks after runs of letters and spaces" $ b "en" (pack "a few words") `shouldBe` [(8, BreakStatus.Uncategorized) ,(5, BreakStatus.Letter) ,(2, BreakStatus.Uncategorized) ,(1, BreakStatus.Letter) ,(0, BreakStatus.Uncategorized) ] -- Useful for breaking inside words for narrow output. -- This can result in breaking ligatures. describe "breakCharacter" $ do let b lang = breaksDesc $ breakCharacter (Locale lang) it "finds no breaks in empty input" $ b "en" empty `shouldBe` [] it "finds break at offset 0 in non-empty input" $ b "en" (singleton 'a') `shouldBe` [(0, ())] describe "subOffsetsDesc" $ do let result = subOffsetsDesc 5 [(11, 'a'), (8, 'b'), (5, 'c'), (2, 'd')] it "should reduce offsets" $ map fst result `shouldBe` [6, 3, 0] it "should preserve payload" $ map snd result `shouldBe` ['a', 'b', 'c']