module Data.Text.ParagraphLayout.Internal.BreakSpec (spec) where
import Control.Monad (forM_)
import Data.Text (empty, pack, singleton)
import Data.Text.ICU
( LocaleName (Locale)
, breakCharacter
, breakLine
, breakSentence
, breakWord
)
import qualified Data.Text.ICU as BreakStatus (Line (..), Word (..))
import Test.Hspec
import Data.Text.ParagraphLayout.Internal.Break
spec :: Spec
spec = do
describe "breaksDesc" $ do
-- One of the crucial building blocks of a text layout engine.
describe "breakLine" $ do
let b lang = breaksDesc $ breakLine (Locale lang)
it "finds no breaks in empty input" $
b "en" empty `shouldBe`
[]
it "finds break at offset 0 in non-empty input" $
b "en" (singleton 'a') `shouldBe`
[ (0, BreakStatus.Soft)]
it "finds hard break after newline" $
b "en" (pack "hello\nworld") `shouldBe`
[ (6, BreakStatus.Hard)
, (0, BreakStatus.Soft)
]
it "finds hard break after each of newlines" $
b "en" (pack "hello\n\nworld") `shouldBe`
[ (7, BreakStatus.Hard)
, (6, BreakStatus.Hard)
, (0, BreakStatus.Soft)
]
it "finds soft breaks after spaces and tabs" $
b "en" (pack "a few\twords") `shouldBe`
[ (6, BreakStatus.Soft)
, (2, BreakStatus.Soft)
, (0, BreakStatus.Soft)
]
it "finds soft breaks after each run of whitespace" $
b "en" (pack " a few\t more \n words\n") `shouldBe`
[ (16, BreakStatus.Soft)
, (15, BreakStatus.Hard)
, (9, BreakStatus.Soft)
, (4, BreakStatus.Soft)
, (1, BreakStatus.Soft)
, (0, BreakStatus.Soft)
]
it "finds soft breaks after spaces and hyphens" $
b "cs" (pack "následuje stanice Frýdek-Místek") `shouldBe`
[ (27, BreakStatus.Soft)
, (19, BreakStatus.Soft)
, (11, BreakStatus.Soft)
, (0, BreakStatus.Soft)
]
it "finds soft breaks in Japanese kana" $
b "ja" (pack "トイレはどこですか?") `shouldBe`
[ (24, BreakStatus.Soft)
, (21, BreakStatus.Soft)
, (18, BreakStatus.Soft)
, (15, BreakStatus.Soft)
, (12, BreakStatus.Soft)
, (9, BreakStatus.Soft)
, (6, BreakStatus.Soft)
, (3, BreakStatus.Soft)
, (0, BreakStatus.Soft)
]
let jaText = pack "五ヶ月‡コード"
let jaBreaksStrict =
[ (18, BreakStatus.Soft)
, (12, BreakStatus.Soft)
, (9, BreakStatus.Soft)
, (6, BreakStatus.Soft)
, (0, BreakStatus.Soft)
]
let jaBreaksLoose =
[ (18, BreakStatus.Soft)
, (15, BreakStatus.Soft)
, (12, BreakStatus.Soft)
, (9, BreakStatus.Soft)
, (6, BreakStatus.Soft)
, (3, BreakStatus.Soft)
, (0, BreakStatus.Soft)
]
-- Observed behaviour.
-- Not sure why Chinese rules are stricter for Japanese text.
-- This behaviour may change with future versions of ICU.
let expectedStrictLocales =
[ ""
, "en"
, "ja@lb=strict"
, "zh"
, "zh_Hans"
, "zh_Hant"
, "zxx"
, "zxx-any-invalid-suffix"
]
let expectedLooseLocales =
[ "@lb=loose"
, "en@lb=loose"
, "ja"
, "ja_JP"
, "ja-JP"
, "ja-any-invalid-suffix"
, "zh@lb=loose"
, "zxx-any-invalid-suffix@lb=loose"
]
expectedStrictLocales `forM_` \ l ->
it ("uses strict line breaks for " ++ l ++ " locale") $
b l jaText `shouldBe` jaBreaksStrict
expectedLooseLocales `forM_` \ l ->
it ("uses loose line breaks for " ++ l ++ " locale") $
b l jaText `shouldBe` jaBreaksLoose
-- Probably not useful for a web browser rendering engine.
describe "breakSentence" $ do
let b lang = breaksDesc $ breakSentence (Locale lang)
it "finds no breaks in empty input" $
b "en" empty `shouldBe`
[]
it "finds break at offset 0 in non-empty input" $
b "en" (singleton 'a') `shouldBe`
[(0, ())]
-- Probably not useful for a web browser rendering engine,
-- but may be used for text search and selection.
describe "breakWord" $ do
let b lang = breaksDesc $ breakWord (Locale lang)
it "finds no breaks in empty input" $
b "en" empty `shouldBe`
[]
it "finds break at offset 0 in non-empty input" $
b "en" (singleton 'a') `shouldBe`
[ (0, BreakStatus.Uncategorized)
]
it "finds breaks after runs of letters and spaces" $
b "en" (pack "a few words") `shouldBe`
[ (8, BreakStatus.Uncategorized)
, (5, BreakStatus.Letter)
, (2, BreakStatus.Uncategorized)
, (1, BreakStatus.Letter)
, (0, BreakStatus.Uncategorized)
]
-- Useful for breaking inside words for narrow output.
-- This can result in breaking ligatures.
describe "breakCharacter" $ do
let b lang = breaksDesc $ breakCharacter (Locale lang)
it "finds no breaks in empty input" $
b "en" empty `shouldBe`
[]
it "finds break at offset 0 in non-empty input" $
b "en" (singleton 'a') `shouldBe`
[(0, ())]
describe "subOffsetsDesc" $ do
let result = subOffsetsDesc 5 [(11, 'a'), (8, 'b'), (5, 'c'), (2, 'd')]
it "should reduce offsets" $
map fst result `shouldBe` [6, 3, 0]
it "should preserve payload" $
map snd result `shouldBe` ['a', 'b', 'c']