From 0a28bb844c31d7f4327bfab61144c3e01400b959 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Wed, 9 Nov 2022 21:06:26 +1300 Subject: [PATCH] Document Buffer API. --- Data/Text/Glyphize/Buffer.hs | 119 ++++++++++++++++++++++++++++------- 1 file changed, 97 insertions(+), 22 deletions(-) diff --git a/Data/Text/Glyphize/Buffer.hs b/Data/Text/Glyphize/Buffer.hs index 4d3f713..67fe9a0 100644 --- a/Data/Text/Glyphize/Buffer.hs +++ b/Data/Text/Glyphize/Buffer.hs @@ -6,6 +6,7 @@ import qualified Data.Text.Lazy as Lazy import qualified Data.Text.Internal as Txt import Data.Char (toUpper, toLower) import Control.Monad (forM) +import Control.Exception (bracket) --- To fill computed text property. import Data.Text.Encoding (decodeUtf8Lenient) @@ -29,6 +30,7 @@ import Foreign.Storable.Generic (GStorable(..)) --- Public Datastructures ------ +-- | Text to be shaped or the resulting glyphs, for which language/script/direction/etc. data Buffer = Buffer { text :: Lazy.Text, -- ^ The Unicode text, in visual order, for HarfBuzz to convert into glyphs. @@ -85,7 +87,9 @@ data Buffer = Buffer { -- ^ the glyph number that replaces replaces characters not found in the font. } deriving (Eq, Show, Read, Ord) +-- | Whether the given text is Unicode or font-specific "glyphs". data ContentType = ContentTypeUnicode | ContentTypeGlyphs deriving (Eq, Show, Read, Ord) +-- | Defines how fine the groupings represented by `GlyphInfo`'s `cluster` property are.` data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars deriving (Eq, Show, Read, Ord) @@ -111,42 +115,63 @@ defaultBuffer = Buffer { --- Directions ------ +-- | The direction of a text segment or buffer. data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show, Read, Ord) -dirFromStr ('L':_) = DirLTR -dirFromStr ('l':_) = DirLTR -dirFromStr ('R':_) = DirRTL -dirFromStr ('r':_) = DirRTL -dirFromStr ('T':_) = DirTTB -dirFromStr ('t':_) = DirTTB -dirFromStr ('B':_) = DirBTT -dirFromStr ('b':_) = DirBTT +-- | Converts a string to an hb_direction_t. +-- Matching is loose and applies only to the first letter. For examples, +-- "LTR" and "left-to-right" will both return HB_DIRECTION_LTR. +dirFromStr ('L':_) = Just DirLTR +dirFromStr ('l':_) = Just DirLTR +dirFromStr ('R':_) = Just DirRTL +dirFromStr ('r':_) = Just DirRTL +dirFromStr ('T':_) = Just DirTTB +dirFromStr ('t':_) = Just DirTTB +dirFromStr ('B':_) = Just DirBTT +dirFromStr ('b':_) = Just DirBTT +dirFromStr _ = Nothing +-- | Converts an hb_direction_t to a string. dirToStr DirLTR = "ltr" dirToStr DirRTL = "rtl" dirToStr DirTTB = "ttb" dirToStr DirBTT = "btt" +-- | Reverses a text direction. dirReverse DirLTR = DirRTL dirReverse DirRTL = DirLTR dirReverse DirTTB = DirBTT dirReverse DirBTT = DirTTB +-- | Tests whether a text direction moves backward +-- (from right to left, or from bottom to top). dirBackward dir = dir `Prelude.elem` [DirRTL, DirBTT] +-- | Tests whether a text direction moves forward +-- (from left to right, or from top to bottom). dirForward dir = dir `Prelude.elem` [DirLTR, DirTTB] +-- | Tests whether a text direction is horizontal. dirHorizontal dir = dir `Prelude.elem` [DirLTR, DirRTL] +-- | Tests whether a text direction is vertical. dirVertical dir = dir `Prelude.elem` [DirTTB, DirBTT] +-- | Converts a `Direction` to C encoding. dir2int Nothing = 0 dir2int (Just DirLTR) = 4 dir2int (Just DirRTL) = 5 dir2int (Just DirTTB) = 6 dir2int (Just DirBTT) = 7 +-- | Sets `direction` property on C `Buffer'` struct. foreign import ccall "hb_buffer_set_direction" hb_buffer_set_direction :: Buffer' -> Int -> IO () +-- | Converts a `Direction` from C encoding. int2dir 4 = Just DirLTR int2dir 5 = Just DirRTL int2dir 6 = Just DirTTB int2dir 7 = Just DirBTT int2dir _ = Nothing +-- | Fetches the hb_direction_t of a script when it is set horizontally. +-- All right-to-left scripts will return `DirRTL`. +-- All left-to-right scripts will return `DirLTR`. +-- Scripts that can be written either horizontally or vertically will return `Nothing`. +-- Unknown scripts will return `DirLTR`. scriptHorizontalDir :: String -> Maybe Direction scriptHorizontalDir = int2dir . hb_script_get_horizontal_direction . script_from_string foreign import ccall "hb_script_get_horizontal_direction" hb_script_get_horizontal_direction @@ -156,19 +181,32 @@ foreign import ccall "hb_script_get_horizontal_direction" hb_script_get_horizont --- Locales ------ data Language' +-- | Represents a natural written language. +-- Corresponds to a BCP47 language tag. type Language = Ptr Language' +-- | Fetch the default language from current locale. +-- NOTE that the first time this function is called, it calls (C code) +-- "setlocale (LC_CTYPE, nullptr)" to fetch current locale. +-- The underlying setlocale function is, in many implementations, NOT threadsafe. +-- To avoid problems, call this function once before multiple threads can call it. +-- This function may be used to fill in missing fields on a `Buffer`. languageDefault :: IO String languageDefault = hb_language_get_default >>= hb_language_to_string >>= peekCString foreign import ccall "hb_language_to_string" hb_language_to_string :: Language -> IO CString foreign import ccall "hb_language_get_default" hb_language_get_default :: IO Language -foreign import ccall "hb_language_from_string" hb_language_from_string' - :: CString -> Int -> IO Language +-- | Converts a `String` representing a BCP 47 language tag to the corresponding `Language`. hb_language_from_string :: String -> IO Language hb_language_from_string str = withCString str $ \str' -> hb_language_from_string' str' (-1) +foreign import ccall "hb_language_from_string" hb_language_from_string' + :: CString -> Int -> IO Language -{-languageMatches :: String -> String -> Bool +{- +-- | Check whether a second language tag is the same or a more specific version +-- of the provided language tag. +-- For example, "fa_IR.utf8" is a more specific tag for "fa" or for "fa_IR". +languageMatches :: String -> String -> Bool languageMatches lang specific = unsafePerformIO $ do lang' <- hb_language_from_string lang specific' <- hb_language_from_string specific @@ -179,23 +217,20 @@ foreign import ccall "hb_language_matches" hb_language_matches :: Language -> La --- FFI Support ------ +-- | Directly corresponds to "hb_buffer_t". data Buffer'' type Buffer' = Ptr Buffer'' +-- | Temporarily allocates a `Buffer'` withNewBuffer :: (Buffer' -> IO a) -> IO a -withNewBuffer cb = do - buf <- hb_buffer_create - ret <- cb buf - hb_buffer_destroy buf - return ret +withNewBuffer cb = bracket hb_buffer_create hb_buffer_destroy cb foreign import ccall "hb_buffer_create" hb_buffer_create :: IO Buffer' foreign import ccall "hb_buffer_destroy" hb_buffer_destroy :: Buffer' -> IO () -{-txt2primitiveArray (A.ByteArray arr') = ByteArray arr' -withByteArrayLen :: ByteArray -> (CString -> Int -> IO a) -> IO a -withByteArrayLen arr cb = let len = sizeofByteArray arr in allocaBytes len $ \arr' -> do - copyByteArrayToPtr arr' arr 0 len - cb arr' len-} +-- | Decodes given lazy `Text` into given `Buffer'`. +-- Should be valid Unicode data. +-- Captures a few trailing & preceding chars when possible to give additional +-- context to the shaping. bufferWithText _ Lazy.Empty cb = cb bufferWithText buffer txt@(Lazy.Chunk (Txt.Text (A.ByteArray arr) offset length) txts) cb = do hb_buffer_add_utf8 buffer arr (sizeofByteArray# arr) (toEnum offset) length @@ -203,9 +238,13 @@ bufferWithText buffer txt@(Lazy.Chunk (Txt.Text (A.ByteArray arr) offset length) foreign import ccall "hb_buffer_add_utf8" hb_buffer_add_utf8 :: Buffer' -> ByteArray# -> Int# -> Word -> Int -> IO () +-- | Converts initial char to uppercase & all others to lowercase. +-- Internal utility for reimplementation `script_from_string`. titlecase :: String -> String titlecase "" = "" titlecase (c:cs) = toUpper c : Prelude.map toLower cs +-- | Converts a string str representing an ISO 15924 script tag to a corresponding "tag" `Word32`. +script_from_string :: String -> Word32 script_from_string str = tag_from_string $ case titlecase str of 'Q':'a':'a':'i':_ -> "Zinh" 'Q':'a':'a':'c':_ -> "Copt" @@ -222,8 +261,11 @@ script_from_string str = tag_from_string $ case titlecase str of 'S':'y':'r':'j':_ -> "Syrc" 'S':'y':'r':'n':_ -> "Syrc" x -> x +-- | Converts a `String` into a "tag" `Word32`. Valid tags are 4 `Char`s. +-- Shorter input `String`s will be padded with spaces. +-- Longer input strings will be truncated. tag_from_string :: String -> Word32 -tag_from_string str = case str ++ Prelude.repeat '\0' of +tag_from_string str = case str ++ Prelude.repeat ' ' of c1:c2:c3:c4:_ -> Prelude.foldl (.|.) 0 [ shiftL (c2w c1 .&. 0x7) 24, shiftL (c2w c2 .&. 0x7) 16, @@ -231,6 +273,7 @@ tag_from_string str = case str ++ Prelude.repeat '\0' of shiftL (c2w c4 .&. 0x7) 0 ] _ -> 0 +-- | Converts a "tag" `Word32` into a 4 `Char` `String`. tag_to_string :: Word32 -> String tag_to_string tag = [ w2c (shiftR tag 24 .&. 0x7), @@ -243,6 +286,8 @@ tag_to_string tag = [ --- Haskell-to-C conversion ------ +-- | Temporarily allocates a `Buffer'` corresponding to the given `Buffer` +-- to be processed entirely within the given callback. withBuffer :: Buffer -> (Buffer' -> IO a) -> IO a withBuffer buf cb = withNewBuffer $ \buf' -> bufferWithText buf' (text buf) $ do hb_buffer_set_content_type buf' $ case contentType buf of @@ -296,6 +341,8 @@ foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segmen ------ --- C-to-Haskell conversion ------ + +-- | Holds information about the glyphs & their relation to input text. data GlyphInfo = GlyphInfo { codepoint :: Word32, -- ^ Glyph index (or unicode codepoint) @@ -309,8 +356,30 @@ data GlyphInfo = GlyphInfo { -- they are seperate glyphs, `Buffer`'s `clusterLevel` property allows -- selecting more fine grained cluster handling. unsafeToBreak :: Bool, + -- ^ Indicates that if input text is broken at the beginning of the cluster + -- this glyph is part of, then both sides need to be re-shaped, + -- as the result might be different. + -- On the flip side, it means that when this flag is not present, + -- then it is safe to break the glyph-run at the beginning of this cluster, + -- and the two sides will represent the exact same result one would get + -- if breaking input text at the beginning of this cluster and shaping + -- the two sides separately. This can be used to optimize paragraph layout, + -- by avoiding re-shaping of each line after line-breaking. unsafeToConcat :: Bool, + -- ^ Indicates that if input text is changed on one side of the beginning + -- of the cluster this glyph is part of, then the shaping results for + -- the other side might change. + -- Note that the absence of this flag will NOT by itself mean that + -- it IS safe to concat text. Only two pieces of text both of which + -- clear of this flag can be concatenated safely. + -- See https://harfbuzz.github.io/harfbuzz-hb-buffer.html#HB_GLYPH_FLAG_UNSAFE_TO_CONCAT + -- for more details. safeToInsertTatweel :: Bool + -- ^ In scripts that use elongation (Arabic, Mongolian, Syriac, etc.), + -- this flag signifies that it is safe to insert a U+0640 TATWEEL character + -- before this cluster for elongation. + -- This flag does not determine the script-specific elongation places, + -- but only when it is safe to do the elongation without interrupting text shaping. } deriving (Show, Read, Eq) instance Storable GlyphInfo where sizeOf _ = sizeOf (undefined :: Word32) * 5 @@ -336,6 +405,7 @@ instance Storable GlyphInfo where pokeElemOff ptr' 2 cluster' pokeElemOff ptr' 3 0 pokeElemOff ptr' 4 0 +-- | Decodes `Buffer'`'s glyph information array.' glyphInfos buf' = do arr <- hb_buffer_get_glyph_infos buf' nullPtr length <- hb_buffer_get_length buf' @@ -347,6 +417,8 @@ foreign import ccall "hb_buffer_get_glyph_infos" hb_buffer_get_glyph_infos foreign import ccall "hb_buffer_get_length" hb_buffer_get_length :: Buffer' -> IO Word -- NOTE: The array returned from FFI is valid as long as the buffer is. +-- | Holds positions of the glyph in both horizontal & vertical directions. +-- All positions are relative to current point. data GlyphPos = GlyphPos { x_advance :: Int32, -- ^ How much the line advances after drawing this glyph when setting text @@ -378,6 +450,8 @@ instance Storable GlyphPos where pokeElemOff ptr' 2 x_offset' pokeElemOff ptr' 3 y_offset' pokeElemOff ptr' 4 0 -- Zero private field. +-- | Decodes `Buffer'`'s glyph position array. +-- If buffer did not have positions before, they will be initialized to zeros.' glyphsPos buf' = do arr <- hb_buffer_get_glyph_positions buf' nullPtr length <- hb_buffer_get_length buf' @@ -388,6 +462,7 @@ foreign import ccall "hb_buffer_get_glyph_positions" hb_buffer_get_glyph_positio :: Buffer' -> Ptr Word -> IO (Ptr GlyphPos) -- NOTE: The array returned from FFI is valid as long as the buffer is. +-- | Decodes a `Buffer'` back to corresponding pure-functional `Buffer`. thawBuffer :: Buffer' -> IO Buffer thawBuffer buf' = do glyphInfos' <- glyphInfos buf' -- 2.30.2