module Data.Text.ParagraphLayout.Internal.BreakSpec (spec) where
import Data.Text (empty, pack, singleton)
import Data.Text.ICU
(LocaleName(Locale)
,breakCharacter
,breakLine
,breakSentence
,breakWord
)
import qualified Data.Text.ICU as BreakStatus (Line(..), Word(..))
import Test.Hspec
import Data.Text.ParagraphLayout.Internal.Break
spec :: Spec
spec = do
describe "breaksDesc" $ do
-- One of the crucial building blocks of a text layout engine.
describe "breakLine" $ do
let b lang = breaksDesc $ breakLine (Locale lang)
it "finds no breaks in empty input" $
b "en" empty `shouldBe`
[]
it "finds break at offset 0 in non-empty input" $
b "en" (singleton 'a') `shouldBe`
[(0, BreakStatus.Soft)]
it "finds hard break after newline" $
b "en" (pack "hello\nworld") `shouldBe`
[(6, BreakStatus.Hard)
,(0, BreakStatus.Soft)
]
it "finds soft breaks after spaces and tabs" $
b "en" (pack "a few\twords") `shouldBe`
[(6, BreakStatus.Soft)
,(2, BreakStatus.Soft)
,(0, BreakStatus.Soft)
]
it "finds soft breaks after spaces and hyphens" $
b "cs" (pack "následuje stanice Frýdek-Místek") `shouldBe`
[(27, BreakStatus.Soft)
,(19, BreakStatus.Soft)
,(11, BreakStatus.Soft)
,(0, BreakStatus.Soft)
]
it "finds soft breaks in Japanese kana" $
b "ja" (pack "トイレはどこですか?") `shouldBe`
[(24, BreakStatus.Soft)
,(21, BreakStatus.Soft)
,(18, BreakStatus.Soft)
,(15, BreakStatus.Soft)
,(12, BreakStatus.Soft)
,(9, BreakStatus.Soft)
,(6, BreakStatus.Soft)
,(3, BreakStatus.Soft)
,(0, BreakStatus.Soft)
]
-- Probably not useful for a web browser rendering engine.
describe "breakSentence" $ do
let b lang = breaksDesc $ breakSentence (Locale lang)
it "finds no breaks in empty input" $
b "en" empty `shouldBe`
[]
it "finds break at offset 0 in non-empty input" $
b "en" (singleton 'a') `shouldBe`
[(0, ())]
-- Probably not useful for a web browser rendering engine,
-- but may be used for text search and selection.
describe "breakWord" $ do
let b lang = breaksDesc $ breakWord (Locale lang)
it "finds no breaks in empty input" $
b "en" empty `shouldBe`
[]
it "finds break at offset 0 in non-empty input" $
b "en" (singleton 'a') `shouldBe`
[(0, BreakStatus.Uncategorized)]
it "finds breaks after runs of letters and spaces" $
b "en" (pack "a few words") `shouldBe`
[(8, BreakStatus.Uncategorized)
,(5, BreakStatus.Letter)
,(2, BreakStatus.Uncategorized)
,(1, BreakStatus.Letter)
,(0, BreakStatus.Uncategorized)
]
-- Useful for breaking inside words for narrow output.
-- This can result in breaking ligatures.
describe "breakCharacter" $ do
let b lang = breaksDesc $ breakCharacter (Locale lang)
it "finds no breaks in empty input" $
b "en" empty `shouldBe`
[]
it "finds break at offset 0 in non-empty input" $
b "en" (singleton 'a') `shouldBe`
[(0, ())]
describe "subOffsetsDesc" $ do
let result = subOffsetsDesc 5 [(11, 'a'), (8, 'b'), (5, 'c'), (2, 'd')]
it "should reduce offsets" $
map fst result `shouldBe` [6, 3, 0]
it "should preserve payload" $
map snd result `shouldBe` ['a', 'b', 'c']