~alcinnz/harfbuzz-pure: Document Buffer API.

1 files changed, 97 insertions(+), 22 deletions(-)

M Data/Text/Glyphize/Buffer.hs

M Data/Text/Glyphize/Buffer.hs => Data/Text/Glyphize/Buffer.hs +97 -22

@@ 6,6 6,7 @@ import qualified Data.Text.Lazy as Lazy
 import qualified Data.Text.Internal as Txt
 import Data.Char (toUpper, toLower)
 import Control.Monad (forM)
+import Control.Exception (bracket)
 
 --- To fill computed text property.
 import Data.Text.Encoding (decodeUtf8Lenient)


@@ 29,6 30,7 @@ import Foreign.Storable.Generic (GStorable(..))
 --- Public Datastructures
 ------
 
+-- | Text to be shaped or the resulting glyphs, for which language/script/direction/etc.
 data Buffer = Buffer {
     text :: Lazy.Text,
     -- ^ The Unicode text, in visual order, for HarfBuzz to convert into glyphs.


@@ 85,7 87,9 @@ data Buffer = Buffer {
     -- ^ the glyph number that replaces replaces characters not found in the font.
   } deriving (Eq, Show, Read, Ord)
 
+-- | Whether the given text is Unicode or font-specific "glyphs".
 data ContentType = ContentTypeUnicode | ContentTypeGlyphs deriving (Eq, Show, Read, Ord)
+-- | Defines how fine the groupings represented by `GlyphInfo`'s `cluster` property are.`
 data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars
     deriving (Eq, Show, Read, Ord)
 


@@ 111,42 115,63 @@ defaultBuffer = Buffer {
 --- Directions
 ------
 
+-- | The direction of a text segment or buffer.
 data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show, Read, Ord)
-dirFromStr ('L':_) = DirLTR
-dirFromStr ('l':_) = DirLTR
-dirFromStr ('R':_) = DirRTL
-dirFromStr ('r':_) = DirRTL
-dirFromStr ('T':_) = DirTTB
-dirFromStr ('t':_) = DirTTB
-dirFromStr ('B':_) = DirBTT
-dirFromStr ('b':_) = DirBTT
+-- | Converts a string to an hb_direction_t.
+-- Matching is loose and applies only to the first letter. For examples, 
+-- "LTR" and "left-to-right" will both return HB_DIRECTION_LTR.
+dirFromStr ('L':_) = Just DirLTR
+dirFromStr ('l':_) = Just DirLTR
+dirFromStr ('R':_) = Just DirRTL
+dirFromStr ('r':_) = Just DirRTL
+dirFromStr ('T':_) = Just DirTTB
+dirFromStr ('t':_) = Just DirTTB
+dirFromStr ('B':_) = Just DirBTT
+dirFromStr ('b':_) = Just DirBTT
+dirFromStr _ = Nothing
+-- | Converts an hb_direction_t to a string.
 dirToStr DirLTR = "ltr"
 dirToStr DirRTL = "rtl"
 dirToStr DirTTB = "ttb"
 dirToStr DirBTT = "btt"
+-- | Reverses a text direction.
 dirReverse DirLTR = DirRTL
 dirReverse DirRTL = DirLTR
 dirReverse DirTTB = DirBTT
 dirReverse DirBTT = DirTTB
+-- | Tests whether a text direction moves backward
+-- (from right to left, or from bottom to top).
 dirBackward dir = dir `Prelude.elem` [DirRTL, DirBTT]
+-- | Tests whether a text direction moves forward
+-- (from left to right, or from top to bottom).
 dirForward dir = dir `Prelude.elem` [DirLTR, DirTTB]
+-- | Tests whether a text direction is horizontal.
 dirHorizontal dir = dir `Prelude.elem` [DirLTR, DirRTL]
+-- | Tests whether a text direction is vertical.
 dirVertical dir = dir `Prelude.elem` [DirTTB, DirBTT]
 
+-- | Converts a `Direction` to C encoding.
 dir2int Nothing = 0
 dir2int (Just DirLTR) = 4
 dir2int (Just DirRTL) = 5
 dir2int (Just DirTTB) = 6
 dir2int (Just DirBTT) = 7
+-- | Sets `direction` property on C `Buffer'` struct.
 foreign import ccall "hb_buffer_set_direction" hb_buffer_set_direction
     :: Buffer' -> Int -> IO ()
 
+-- | Converts a `Direction` from C encoding.
 int2dir 4 = Just DirLTR
 int2dir 5 = Just DirRTL
 int2dir 6 = Just DirTTB
 int2dir 7 = Just DirBTT
 int2dir _ = Nothing
 
+-- | Fetches the hb_direction_t of a script when it is set horizontally.
+-- All right-to-left scripts will return `DirRTL`.
+-- All left-to-right scripts will return `DirLTR`.
+-- Scripts that can be written either horizontally or vertically will return `Nothing`.
+-- Unknown scripts will return `DirLTR`.
 scriptHorizontalDir :: String -> Maybe Direction
 scriptHorizontalDir = int2dir . hb_script_get_horizontal_direction . script_from_string
 foreign import ccall "hb_script_get_horizontal_direction" hb_script_get_horizontal_direction


@@ 156,19 181,32 @@ foreign import ccall "hb_script_get_horizontal_direction" hb_script_get_horizont
 --- Locales
 ------
 data Language'
+-- | Represents a natural written language.
+-- Corresponds to a BCP47 language tag.
 type Language = Ptr Language'
+-- | Fetch the default language from current locale.
+-- NOTE that the first time this function is called, it calls (C code)
+-- "setlocale (LC_CTYPE, nullptr)" to fetch current locale.
+-- The underlying setlocale function is, in many implementations, NOT threadsafe.
+-- To avoid problems, call this function once before multiple threads can call it.
+-- This function may be used to fill in missing fields on a `Buffer`.
 languageDefault :: IO String
 languageDefault = hb_language_get_default >>= hb_language_to_string >>= peekCString
 foreign import ccall "hb_language_to_string" hb_language_to_string :: Language -> IO CString
 foreign import ccall "hb_language_get_default" hb_language_get_default :: IO Language
 
-foreign import ccall "hb_language_from_string" hb_language_from_string'
-    :: CString -> Int -> IO Language
+-- | Converts a `String` representing a BCP 47 language tag to the corresponding `Language`.
 hb_language_from_string :: String -> IO Language
 hb_language_from_string str =
     withCString str $ \str' -> hb_language_from_string' str' (-1)
+foreign import ccall "hb_language_from_string" hb_language_from_string'
+    :: CString -> Int -> IO Language
 
-{-languageMatches :: String -> String -> Bool
+{-
+-- | Check whether a second language tag is the same or a more specific version
+-- of the provided language tag.
+-- For example, "fa_IR.utf8" is a more specific tag for "fa" or for "fa_IR".
+languageMatches :: String -> String -> Bool
 languageMatches lang specific = unsafePerformIO $ do
     lang' <- hb_language_from_string lang
     specific' <- hb_language_from_string specific


@@ 179,23 217,20 @@ foreign import ccall "hb_language_matches" hb_language_matches :: Language -> La
 --- FFI Support
 ------
 
+-- | Directly corresponds to "hb_buffer_t".
 data Buffer''
 type Buffer' = Ptr Buffer''
 
+-- | Temporarily allocates a `Buffer'`
 withNewBuffer :: (Buffer' -> IO a) -> IO a
-withNewBuffer cb = do
-    buf <- hb_buffer_create
-    ret <- cb buf
-    hb_buffer_destroy buf
-    return ret
+withNewBuffer cb = bracket hb_buffer_create hb_buffer_destroy cb
 foreign import ccall "hb_buffer_create" hb_buffer_create :: IO Buffer'
 foreign import ccall "hb_buffer_destroy" hb_buffer_destroy :: Buffer' -> IO ()
 
-{-txt2primitiveArray (A.ByteArray arr') = ByteArray arr'
-withByteArrayLen :: ByteArray -> (CString -> Int -> IO a) -> IO a
-withByteArrayLen arr cb = let len = sizeofByteArray arr in allocaBytes len $ \arr' -> do
-    copyByteArrayToPtr arr' arr 0 len
-    cb arr' len-}
+-- | Decodes given lazy `Text` into given `Buffer'`.
+-- Should be valid Unicode data.
+-- Captures a few trailing & preceding chars when possible to give additional
+-- context to the shaping.
 bufferWithText _ Lazy.Empty cb = cb
 bufferWithText buffer txt@(Lazy.Chunk (Txt.Text (A.ByteArray arr) offset length) txts) cb = do
     hb_buffer_add_utf8 buffer arr (sizeofByteArray# arr) (toEnum offset) length


@@ 203,9 238,13 @@ bufferWithText buffer txt@(Lazy.Chunk (Txt.Text (A.ByteArray arr) offset length)
 foreign import ccall "hb_buffer_add_utf8" hb_buffer_add_utf8
     :: Buffer' -> ByteArray# -> Int# -> Word -> Int -> IO ()
 
+-- | Converts initial char to uppercase & all others to lowercase.
+-- Internal utility for reimplementation `script_from_string`.
 titlecase :: String -> String
 titlecase "" = ""
 titlecase (c:cs) = toUpper c : Prelude.map toLower cs
+-- | Converts a string str representing an ISO 15924 script tag to a corresponding "tag" `Word32`.
+script_from_string :: String -> Word32
 script_from_string str = tag_from_string $ case titlecase str of
     'Q':'a':'a':'i':_ -> "Zinh"
     'Q':'a':'a':'c':_ -> "Copt"


@@ 222,8 261,11 @@ script_from_string str = tag_from_string $ case titlecase str of
     'S':'y':'r':'j':_ -> "Syrc"
     'S':'y':'r':'n':_ -> "Syrc"
     x -> x
+-- | Converts a `String` into a "tag" `Word32`. Valid tags are 4 `Char`s.
+-- Shorter input `String`s will be padded with spaces.
+-- Longer input strings will be truncated.
 tag_from_string :: String -> Word32
-tag_from_string str = case str ++ Prelude.repeat '\0' of
+tag_from_string str = case str ++ Prelude.repeat ' ' of
     c1:c2:c3:c4:_ -> Prelude.foldl (.|.) 0 [
         shiftL (c2w c1 .&. 0x7) 24,
         shiftL (c2w c2 .&. 0x7) 16,


@@ 231,6 273,7 @@ tag_from_string str = case str ++ Prelude.repeat '\0' of
         shiftL (c2w c4 .&. 0x7) 0
       ]
     _ -> 0
+-- | Converts a "tag" `Word32` into a 4 `Char` `String`.
 tag_to_string :: Word32 -> String
 tag_to_string tag = [
     w2c (shiftR tag 24 .&. 0x7),


@@ 243,6 286,8 @@ tag_to_string tag = [
 --- Haskell-to-C conversion
 ------
 
+-- | Temporarily allocates a `Buffer'` corresponding to the given `Buffer`
+-- to be processed entirely within the given callback.
 withBuffer :: Buffer -> (Buffer' -> IO a) -> IO a
 withBuffer buf cb = withNewBuffer $ \buf' -> bufferWithText buf' (text buf) $ do
     hb_buffer_set_content_type buf' $ case contentType buf of


@@ 296,6 341,8 @@ foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segmen
 ------
 --- C-to-Haskell conversion
 ------
+
+-- | Holds information about the glyphs & their relation to input text.
 data GlyphInfo = GlyphInfo {
     codepoint :: Word32,
     -- ^ Glyph index (or unicode codepoint)


@@ 309,8 356,30 @@ data GlyphInfo = GlyphInfo {
     -- they are seperate glyphs, `Buffer`'s `clusterLevel` property allows
     -- selecting more fine grained cluster handling.
     unsafeToBreak :: Bool,
+    -- ^ Indicates that if input text is broken at the beginning of the cluster
+    -- this glyph is part of, then both sides need to be re-shaped,
+    -- as the result might be different.
+    -- On the flip side, it means that when this flag is not present,
+    -- then it is safe to break the glyph-run at the beginning of this cluster,
+    -- and the two sides will represent the exact same result one would get
+    -- if breaking input text at the beginning of this cluster and shaping
+    -- the two sides separately. This can be used to optimize paragraph layout,
+    -- by avoiding re-shaping of each line after line-breaking.
     unsafeToConcat :: Bool,
+    -- ^ Indicates that if input text is changed on one side of the beginning
+    -- of the cluster this glyph is part of, then the shaping results for
+    -- the other side might change.
+    -- Note that the absence of this flag will NOT by itself mean that
+    -- it IS safe to concat text. Only two pieces of text both of which
+    -- clear of this flag can be concatenated safely.
+    -- See https://harfbuzz.github.io/harfbuzz-hb-buffer.html#HB_GLYPH_FLAG_UNSAFE_TO_CONCAT
+    -- for more details.
     safeToInsertTatweel :: Bool
+    -- ^ In scripts that use elongation (Arabic, Mongolian, Syriac, etc.),
+    -- this flag signifies that it is safe to insert a U+0640 TATWEEL character
+    -- before this cluster for elongation.
+    -- This flag does not determine the script-specific elongation places,
+    -- but only when it is safe to do the elongation without interrupting text shaping.
 } deriving (Show, Read, Eq)
 instance Storable GlyphInfo where
     sizeOf _ = sizeOf (undefined :: Word32) * 5


@@ 336,6 405,7 @@ instance Storable GlyphInfo where
         pokeElemOff ptr' 2 cluster'
         pokeElemOff ptr' 3 0
         pokeElemOff ptr' 4 0
+-- | Decodes `Buffer'`'s glyph information array.'
 glyphInfos buf' = do
     arr <- hb_buffer_get_glyph_infos buf' nullPtr
     length <- hb_buffer_get_length buf'


@@ 347,6 417,8 @@ foreign import ccall "hb_buffer_get_glyph_infos" hb_buffer_get_glyph_infos
 foreign import ccall "hb_buffer_get_length" hb_buffer_get_length :: Buffer' -> IO Word
 -- NOTE: The array returned from FFI is valid as long as the buffer is.
 
+-- | Holds positions of the glyph in both horizontal & vertical directions.
+-- All positions are relative to current point.
 data GlyphPos = GlyphPos {
     x_advance :: Int32,
     -- ^ How much the line advances after drawing this glyph when setting text


@@ 378,6 450,8 @@ instance Storable GlyphPos where
         pokeElemOff ptr' 2 x_offset'
         pokeElemOff ptr' 3 y_offset'
         pokeElemOff ptr' 4 0 -- Zero private field.
+-- | Decodes `Buffer'`'s glyph position array.
+-- If buffer did not have positions before, they will be initialized to zeros.'
 glyphsPos buf' = do
     arr <- hb_buffer_get_glyph_positions buf' nullPtr
     length <- hb_buffer_get_length buf'


@@ 388,6 462,7 @@ foreign import ccall "hb_buffer_get_glyph_positions" hb_buffer_get_glyph_positio
     :: Buffer' -> Ptr Word -> IO (Ptr GlyphPos)
 -- NOTE: The array returned from FFI is valid as long as the buffer is.
 
+-- | Decodes a `Buffer'` back to corresponding pure-functional `Buffer`.
 thawBuffer :: Buffer' -> IO Buffer
 thawBuffer buf' = do
     glyphInfos' <- glyphInfos buf'