M .golden/paragraphLayout/devanagariAccent.golden => .golden/paragraphLayout/devanagariAccent.golden +3 -3
@@ 1,5 1,5 @@
ParagraphLayout
- { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1000}
+ { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1630}
, spanLayouts = [
SpanLayout
[ Fragment
@@ 8,8 8,8 @@ ParagraphLayout
, fragmentAncestorBoxes =
[ AncestorBox {boxUserData = (), boxLeftEdge = SpacedEdge 0, boxRightEdge = SpacedEdge 0, boxStartEdge = SpacedEdge 0, boxEndEdge = SpacedEdge 0}
]
- , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1000}
- , fragmentPen = (0, -500)
+ , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 645, y_size = -1630}
+ , fragmentPen = (0, -1171)
, fragmentGlyphs =
[ (GlyphInfo {codepoint = 529, cluster = 0, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 645, y_advance = 0, x_offset = 0, y_offset = 0})
, (GlyphInfo {codepoint = 424, cluster = 0, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 0, y_advance = 0, x_offset = 0, y_offset = 0})
M .golden/paragraphLayout/devanagariPrefixedAccent.golden => .golden/paragraphLayout/devanagariPrefixedAccent.golden +3 -3
@@ 1,5 1,5 @@
ParagraphLayout
- { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1000}
+ { paragraphRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1630}
, spanLayouts = [
SpanLayout
[ Fragment
@@ 8,8 8,8 @@ ParagraphLayout
, fragmentAncestorBoxes =
[ AncestorBox {boxUserData = (), boxLeftEdge = SpacedEdge 0, boxRightEdge = SpacedEdge 0, boxStartEdge = SpacedEdge 0, boxEndEdge = SpacedEdge 0}
]
- , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1000}
- , fragmentPen = (0, -500)
+ , fragmentRect = Rect {x_origin = 0, y_origin = 0, x_size = 0, y_size = -1630}
+ , fragmentPen = (0, -1171)
, fragmentGlyphs =
[ (GlyphInfo {codepoint = 424, cluster = 1, unsafeToBreak = False, unsafeToConcat = False, safeToInsertTatweel = False}, GlyphPos {x_advance = 0, y_advance = 0, x_offset = 0, y_offset = 0})
]
M src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs => src/Data/Text/ParagraphLayout/Internal/BiDiLevels.hs +19 -0
@@ 2,7 2,11 @@ module Data.Text.ParagraphLayout.Internal.BiDiLevels
( Level
, TextLevels (TextLevels)
, WithLevel
+ , dropLevels
+ , headLevel
, level
+ , levelDirectionH
+ , tailLevels
, textLevels
)
where
@@ 29,6 33,15 @@ class WithLevel a where
newtype TextLevels = TextLevels [Level]
deriving (Eq, Show)
+headLevel :: TextLevels -> Level
+headLevel (TextLevels xs) = head xs
+
+tailLevels :: TextLevels -> TextLevels
+tailLevels (TextLevels xs) = TextLevels (tail xs)
+
+dropLevels :: Int -> TextLevels -> TextLevels
+dropLevels n (TextLevels xs) = TextLevels (drop n xs)
+
-- | Determine the BiDi level of each character in the input text.
--
-- TODO: Use Haskell bindings to the ICU BiDi implementation once available,
@@ 81,6 94,12 @@ strongDirection c = case ICUChar.direction c of
firstJust :: [Maybe a] -> Maybe a
firstJust = listToMaybe . catMaybes
+-- | Convert embedding level to horizontal text direction.
+levelDirectionH :: Level -> Direction
+levelDirectionH lvl
+ | even lvl = DirLTR
+ | otherwise = DirRTL
+
-- | Convert text direction to the smallest corresponding embedding level,
-- but no smaller than the given minimum.
directionLevel :: Level -> Direction -> Level
M src/Data/Text/ParagraphLayout/Internal/ProtoRun.hs => src/Data/Text/ParagraphLayout/Internal/ProtoRun.hs +3 -3
@@ 1,8 1,7 @@
module Data.Text.ParagraphLayout.Internal.ProtoRun (ProtoRun (..))
where
-import Data.Text.Glyphize (Direction)
-
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.Script
import Data.Text.ParagraphLayout.Internal.Zipper
@@ 10,6 9,7 @@ import Data.Text.ParagraphLayout.Internal.Zipper
-- `Data.Text.ParagraphLayout.Internal.Run.Run` values.
data ProtoRun = ProtoRun
{ zipper :: Zipper
- , direction :: Maybe Direction
+ , followingLevels :: TextLevels
+ , level :: Level
, script :: ScriptCode
}
M src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs => src/Data/Text/ParagraphLayout/Internal/ResolvedSpan.hs +2 -0
@@ 4,6 4,7 @@ where
import Data.Text (Text)
import qualified Data.Text.ICU as BreakStatus (Line)
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.TextContainer
import Data.Text.ParagraphLayout.Internal.TextOptions
@@ 17,6 18,7 @@ data ResolvedSpan d = ResolvedSpan
, spanText :: Text
, spanTextOptions :: TextOptions
, spanBoxes :: [ResolvedBox d]
+ , spanBiDiLevels :: TextLevels
, spanLineBreaks :: [(Int, BreakStatus.Line)]
-- TODO: Can be optimised by starting with the shortest line break.
, spanCharacterBreaks :: [(Int, ())]
M src/Data/Text/ParagraphLayout/Internal/Rich.hs => src/Data/Text/ParagraphLayout/Internal/Rich.hs +12 -1
@@ 5,8 5,10 @@ import Control.Applicative (ZipList (ZipList), getZipList)
import Data.List.NonEmpty (nonEmpty)
import qualified Data.List.NonEmpty as NonEmpty
import Data.Text (Text)
+import qualified Data.Text as Text
import Data.Text.ICU (Breaker, LocaleName, breakCharacter, breakLine)
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.Break
import Data.Text.ParagraphLayout.Internal.Fragment
import Data.Text.ParagraphLayout.Internal.Layout
@@ 43,7 45,9 @@ spanToRunsWrapped :: RS.ResolvedSpan d -> [WithSpan d Run]
spanToRunsWrapped s = map (WithSpan s) (spanToRuns s)
resolveSpans :: Paragraph d -> [RS.ResolvedSpan d]
-resolveSpans p@(Paragraph _ pStart root _) = do
+resolveSpans p = do
+ let Paragraph _ pStart root _ = p
+ let RootBox (Box _ rootTextOpts) = root
let leaves = flatten root
let sTexts = paragraphSpanTexts p
let sBounds = paragraphSpanBounds p
@@ 57,8 61,14 @@ resolveSpans p@(Paragraph _ pStart root _) = do
<*> ZipList sTexts
let (TextLeaf userData _ textOpts boxes) = leaf
let lang = textLanguage textOpts
+ -- TODO: Allow BiDi embedding/isolation for inner nodes.
+ let pLevels = textLevels (textDirection rootTextOpts) pText
let lBreaks = paragraphBreaks breakLine pText lang
let cBreaks = paragraphBreaks breakCharacter pText lang
+ -- TODO: Optimise. This has time complexity O(n*s), where n is number of
+ -- characters and s is number of resolved spans.
+ -- Maybe include byte offsets in the TextLevels data structure?
+ let pPrefixLen = Text.length $ paragraphPrefix p sStart
return RS.ResolvedSpan
{ RS.spanUserData = userData
, RS.spanIndex = i
@@ 67,6 77,7 @@ resolveSpans p@(Paragraph _ pStart root _) = do
, RS.spanText = sText
, RS.spanTextOptions = textOpts
, RS.spanBoxes = boxes
+ , RS.spanBiDiLevels = dropLevels pPrefixLen pLevels
, RS.spanLineBreaks = subOffsetsDesc (sStart - pStart) lBreaks
, RS.spanCharacterBreaks = subOffsetsDesc (sStart - pStart) cBreaks
}
M src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs => src/Data/Text/ParagraphLayout/Internal/Rich/Paragraph.hs +9 -0
@@ 1,6 1,7 @@
module Data.Text.ParagraphLayout.Internal.Rich.Paragraph
( Paragraph (..)
, constructParagraph
+ , paragraphPrefix
, paragraphSpanBounds
, paragraphSpanTexts
, paragraphText
@@ 111,3 112,11 @@ paragraphText p@(Paragraph arr _ _ _) = Text arr start (end - start)
start = NonEmpty.head sBounds
end = NonEmpty.last sBounds
sBounds = paragraphSpanBounds p
+
+-- | Turn all text nodes from the input `Paragraph` up to the given end offset
+-- into one combined `Text`. No bounds checking is performed!
+paragraphPrefix :: Paragraph d -> Int -> Text
+paragraphPrefix p@(Paragraph arr _ _ _) end = Text arr start (end - start)
+ where
+ start = NonEmpty.head sBounds
+ sBounds = paragraphSpanBounds p
M src/Data/Text/ParagraphLayout/Internal/Run.hs => src/Data/Text/ParagraphLayout/Internal/Run.hs +29 -34
@@ 7,8 7,8 @@ import Data.Text (Text)
import qualified Data.Text as Text
import Data.Text.Foreign (dropWord8, lengthWord8, takeWord8)
import Data.Text.Glyphize (Direction (..))
-import qualified Data.Text.ICU.Char as ICUChar
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.ProtoRun (ProtoRun (ProtoRun))
import qualified Data.Text.ParagraphLayout.Internal.ProtoRun as PR
import Data.Text.ParagraphLayout.Internal.ResolvedSpan
@@ 22,6 22,7 @@ import Data.Text.ParagraphLayout.Internal.Zipper
data Run = Run
{ runOffsetInSpan :: Int
, runText :: Text
+ , runLevel :: Level
, runDirection :: Maybe Direction
, runScript :: Maybe ScriptCode
}
@@ 73,56 74,58 @@ considerNext z = case next z of
data Merged a = Incompatible | Merged a
spanToRuns :: ResolvedSpan d -> [Run]
-spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper
+spanToRuns s = snd $ mapAccumL run 0 $ protoRuns zipper levels
where
wholeText = spanText s
zipper = start wholeText
+ levels = spanBiDiLevels s
run acc pr = let t = preceding (PR.zipper pr) in
( acc + lengthWord8 t
, Run
{ runOffsetInSpan = acc
, runText = t
- , runDirection = PR.direction pr
+ , runLevel = PR.level pr
+ , runDirection = Just $ levelDirectionH $ PR.level pr
, runScript = Just $ PR.script pr
}
)
-protoRuns :: Zipper -> [ProtoRun]
-protoRuns z = reverse $ protoRuns' z []
+protoRuns :: Zipper -> TextLevels -> [ProtoRun]
+protoRuns z bl = reverse $ protoRuns' z bl []
-protoRuns' :: Zipper -> [ProtoRun] -> [ProtoRun]
-protoRuns' curZipper curRuns = case considerNext curZipper of
+protoRuns' :: Zipper -> TextLevels -> [ProtoRun] -> [ProtoRun]
+protoRuns' curZipper curLevels curRuns = case considerNext curZipper of
Nothing -> curRuns
Just choice ->
- let headRun :| tailRuns = foldRun choice curRuns
- in protoRuns' (PR.zipper headRun) (headRun : tailRuns)
+ let headRun :| tailRuns = foldRun (choice, curLevels) curRuns
+ in protoRuns' (PR.zipper headRun) curLevels (headRun : tailRuns)
-foldRun :: ZipperChoice -> [ProtoRun] -> NonEmpty ProtoRun
+foldRun :: (ZipperChoice, TextLevels) -> [ProtoRun] -> NonEmpty ProtoRun
-- If there are no runs, create a new run with a single character.
-foldRun x [] = ProtoRun (continuingRun x) d s :| []
+foldRun (x, bl) [] = ProtoRun (continuingRun x) lvls lvl s :| []
where
- d = charDirection (nextChar x)
+ lvl = headLevel bl
+ lvls = tailLevels bl
s = charScript (nextChar x)
-foldRun x (previousRun : tailRuns) =
- case (mergeDirections d1 d2, mergeScripts s1 s2) of
- (Merged d, Merged s) -> ProtoRun (continuingRun x) d s :| tailRuns
- _ -> ProtoRun (startingNewRun x) d2 s2 :| previousRun : tailRuns
+foldRun (x, _) (previousRun : tailRuns) =
+ case (mergeLevels l1 l2, mergeScripts s1 s2) of
+ (Merged l, Merged s) ->
+ ProtoRun (continuingRun x) bl' l s :| tailRuns
+ _ ->
+ ProtoRun (startingNewRun x) bl' l2 s2 :| previousRun : tailRuns
where
- d1 = PR.direction previousRun
+ bl = PR.followingLevels previousRun
+ l1 = PR.level previousRun
s1 = PR.script previousRun
- d2 = charDirection (nextChar x)
+ l2 = headLevel bl
s2 = charScript (nextChar x)
+ bl' = tailLevels bl
--- | Simplified detection of text direction for unidirectional text.
-mergeDirections :: Maybe Direction -> Maybe Direction ->
- Merged (Maybe Direction)
-mergeDirections Nothing Nothing = Merged Nothing
-mergeDirections (Just d1) Nothing = Merged (Just d1)
-mergeDirections Nothing (Just d2) = Merged (Just d2)
-mergeDirections (Just d1) (Just d2)
- | d1 == d2 = Merged (Just d1)
+mergeLevels :: Level -> Level -> Merged Level
+mergeLevels l1 l2
+ | l1 == l2 = Merged l1
| otherwise = Incompatible
-- TODO: Implement proper inheritance rules.
@@ 133,11 136,3 @@ mergeScripts s1 "Zinh" = Merged s1
mergeScripts s1 s2
| s1 == s2 = Merged s1
| otherwise = Incompatible
-
--- TODO: Use the BiDi algorithm to support bidirectional text.
-charDirection :: Char -> Maybe Direction
-charDirection c = case ICUChar.direction c of
- ICUChar.LeftToRight -> Just DirLTR
- ICUChar.RightToLeft -> Just DirRTL
- ICUChar.RightToLeftArabic -> Just DirRTL
- _ -> Nothing
M test/Data/Text/ParagraphLayout/Internal/ApplyBoxesSpec.hs => test/Data/Text/ParagraphLayout/Internal/ApplyBoxesSpec.hs +2 -0
@@ 6,6 6,7 @@ import Data.Text.Glyphize (Direction (DirLTR, DirRTL))
import Test.Hspec
import Data.Text.ParagraphLayout.Internal.ApplyBoxes
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.BoxOptions
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.ResolvedSpan
@@ 23,6 24,7 @@ trivialSpan d i dir bs = ResolvedSpan
, spanText = empty
, spanTextOptions = defaultTextOptions dir
, spanBoxes = bs
+ , spanBiDiLevels = TextLevels []
, spanLineBreaks = []
, spanCharacterBreaks = []
}
M test/Data/Text/ParagraphLayout/Internal/RunSpec.hs => test/Data/Text/ParagraphLayout/Internal/RunSpec.hs +47 -16
@@ 4,19 4,21 @@ import Data.Text (Text, pack)
import Data.Text.Glyphize (Direction (..), emptyFont)
import Test.Hspec
+import Data.Text.ParagraphLayout.Internal.BiDiLevels
import Data.Text.ParagraphLayout.Internal.BoxOptions
import Data.Text.ParagraphLayout.Internal.LineHeight
import Data.Text.ParagraphLayout.Internal.ResolvedBox
import Data.Text.ParagraphLayout.Internal.ResolvedSpan
import Data.Text.ParagraphLayout.Internal.Run
import Data.Text.ParagraphLayout.Internal.TextOptions
+import Data.Text.ParagraphLayout.RunLengthEncoding
import Data.Text.ParagraphLayout.TextData
defaultBox :: Direction -> ResolvedBox ()
defaultBox dir = ResolvedBox () 0 defaultBoxOptions dir
-sampleSpan :: (Direction, String, Text, a) -> ResolvedSpan ()
-sampleSpan (dir, lang, text, _) = ResolvedSpan
+sampleSpan :: (Direction, String, Text, a) -> TextLevels -> ResolvedSpan ()
+sampleSpan (dir, lang, text, _) levels = ResolvedSpan
{ spanUserData = ()
, spanIndex = 0
, spanOffsetInParagraph = 0
@@ 27,106 29,135 @@ sampleSpan (dir, lang, text, _) = ResolvedSpan
, textLanguage = lang
}
, spanBoxes = [defaultBox dir]
+ , spanBiDiLevels = levels
, spanLineBreaks = []
, spanCharacterBreaks = []
}
+allLTR :: TextLevels
+allLTR = TextLevels $ repeat 0
+
+allRTL :: TextLevels
+allRTL = TextLevels $ repeat 1
+
+levelsRLE :: [(Int, Level)] -> TextLevels
+levelsRLE rls = TextLevels $ runLengthDecode rls
+
spec :: Spec
spec = do
describe "spanToRuns" $ do
it "handles span with no text" $ do
- let inputSpan = sampleSpan englishEmpty
+ let inputSpan = sampleSpan englishEmpty allLTR
let runs = spanToRuns inputSpan
runs `shouldBe` []
it "handles Czech hello" $ do
- let inputSpan = sampleSpan czechHello
+ let inputSpan = sampleSpan czechHello allLTR
let runs = spanToRuns inputSpan
runs `shouldBe`
[ Run
{ runOffsetInSpan = 0
, runText = spanText inputSpan
+ , runLevel = 0
, runDirection = Just DirLTR
, runScript = Just "Latn"
}
]
it "handles Arabic hello" $ do
- let inputSpan = sampleSpan arabicHello
+ let inputSpan = sampleSpan arabicHello allRTL
let runs = spanToRuns inputSpan
runs `shouldBe`
[ Run
{ runOffsetInSpan = 0
, runText = spanText inputSpan
+ , runLevel = 1
, runDirection = Just DirRTL
, runScript = Just "Arab"
}
]
it "handles Serbian with mixed script" $ do
- let inputSpan = sampleSpan serbianMixedScript
+ let inputSpan = sampleSpan serbianMixedScript allLTR
let runs = spanToRuns inputSpan
runs `shouldBe`
[ Run
-- TODO: We might want both parentheses in the same run.
{ runOffsetInSpan = 0
, runText = pack "Vikipedija ("
+ , runLevel = 0
, runDirection = Just DirLTR
, runScript = Just "Latn"
}
, Run
{ runOffsetInSpan = 12
, runText = pack "Википедија)"
+ , runLevel = 0
, runDirection = Just DirLTR
, runScript = Just "Cyrl"
}
]
it "handles Arabic text with English inside" $ do
- let inputSpan = sampleSpan arabicAroundEnglish
+ let levels = levelsRLE [(3, 1), (9, 2), (36, 1), (3, 2), (1, 1)]
+ let inputSpan = sampleSpan arabicAroundEnglish levels
let runs = spanToRuns inputSpan
runs `shouldBe`
[ Run
{ runOffsetInSpan = 0
, runText = pack "في "
+ , runLevel = 1
, runDirection = Just DirRTL
, runScript = Just "Arab"
}
, Run
{ runOffsetInSpan = 5
- , runText = pack "XHTML 1.0 "
+ , runText = pack "XHTML 1.0"
+ , runLevel = 2
, runDirection = Just DirLTR
, runScript = Just "Latn"
}
, Run
- { runOffsetInSpan = 15
- , runText = pack "يتم تحقيق ذلك بإضافة العنصر المضمن "
+ { runOffsetInSpan = 14
+ , runText = pack " يتم تحقيق ذلك بإضافة العنصر المضمن "
+ , runLevel = 1
, runDirection = Just DirRTL
, runScript = Just "Arab"
}
, Run
{ runOffsetInSpan = 79
- , runText = pack "bdo."
+ , runText = pack "bdo"
+ , runLevel = 2
, runDirection = Just DirLTR
, runScript = Just "Latn"
}
+ , Run
+ { runOffsetInSpan = 82
+ , runText = pack "."
+ , runLevel = 1
+ , runDirection = Just DirRTL
+ , runScript = Just "Zyyy"
+ }
]
it "handles English text with Arabic inside" $ do
- let inputSpan = sampleSpan englishAroundArabic
+ let levels = levelsRLE [(13, 0), (18, 1), (11, 0)]
+ let inputSpan = sampleSpan englishAroundArabic levels
let runs = spanToRuns inputSpan
runs `shouldBe`
[ Run
{ runOffsetInSpan = 0
, runText = pack "The title is "
+ , runLevel = 0
, runDirection = Just DirLTR
, runScript = Just "Latn"
}
, Run
{ runOffsetInSpan = 13
- -- TODO: The final space should be excluded.
- , runText = pack "مفتاح معايير الويب "
+ , runText = pack "مفتاح معايير الويب"
+ , runLevel = 1
, runDirection = Just DirRTL
, runScript = Just "Arab"
}
, Run
- { runOffsetInSpan = 48
- , runText = pack "in Arabic."
+ { runOffsetInSpan = 47
+ , runText = pack " in Arabic."
+ , runLevel = 0
, runDirection = Just DirLTR
, runScript = Just "Latn"
}