From 7aae895f30d192648e4257d7c695e541b07a1686 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Sat, 12 Feb 2022 15:51:05 +1300 Subject: [PATCH] Integrate guessing chars, document properties, expose default property values. --- Data/Text/Glyphize/Buffer.hs | 80 ++++++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 7 deletions(-) diff --git a/Data/Text/Glyphize/Buffer.hs b/Data/Text/Glyphize/Buffer.hs index 910c102..ce740ba 100644 --- a/Data/Text/Glyphize/Buffer.hs +++ b/Data/Text/Glyphize/Buffer.hs @@ -2,6 +2,7 @@ module Data.Text.Glyphize.Buffer where import Data.Text.Lazy as Lazy import Data.ByteString.Lazy as Lazy +import Data.ByteString.Lazy as LBS import Data.Text.Short import Foreign.ForeignPtr @@ -19,21 +20,80 @@ import Data.Char (ord) data Buffer = Buffer { text :: Either Lazy.Text Lazy.ByteString, + -- ^ The Unicode text, in visual order, for HarfBuzz to convert into glyphs. contentType :: Maybe ContentType, + -- ^ What the bytes of the ByteString contents represents, + -- namely unicode characters (before shaping) or glyphs (result of shaping). + -- Typically callers should leave this as `Just ContentTypeUnicode`. direction :: Maybe Direction, + -- ^ The text flow direction of the buffer. + -- No shaping can happen without setting buffer direction, and it controls + -- the visual direction for the output glyphs; for RTL direction the glyphs + -- will be reversed. Many layout features depend on the proper setting of + -- the direction, for example, reversing RTL text before shaping, + -- then shaping with LTR direction is not the same as keeping the text in + -- logical order and shaping with RTL direction. script :: Maybe ShortText, + -- ^ Script is crucial for choosing the proper shaping behaviour for scripts + -- that require it (e.g. Arabic) and the which OpenType features defined in + -- the font to be applied. language :: Maybe ShortText, + -- ^ Languages are crucial for selecting which OpenType feature to apply to + -- the buffer which can result in applying language-specific behaviour. + -- Languages are orthogonal to the scripts, and though they are related, + -- they are different concepts and should not be confused with each other. beginsText :: Bool, + -- ^ special handling of the beginning of text paragraph can be applied to + -- this buffer. Should usually be set, unless you are passing to the buffer + -- only part of the text without the full context. endsText :: Bool, + -- ^ special handling of the end of text paragraph can be applied to this buffer. preserveDefaultIgnorables :: Bool, + -- ^ character with Default_Ignorable Unicode property should use the + -- corresponding glyph from the font, instead of hiding them (done by + -- replacing them with the space glyph and zeroing the advance width.) + -- Takes precedance over `removeDefaultIgnorables`. removeDefaultIgnorables :: Bool, + -- ^ character with Default_Ignorable Unicode property should be removed + -- from glyph string instead of hiding them (done by replacing them with + -- the space glyph and zeroing the advance width.) don'tInsertDottedCircle :: Bool, + -- ^ a dotted circle should not be inserted in the rendering of incorrect + -- character sequences (such at <0905 093E>). clusterLevel :: ClusterLevel, - invisibleGlyph :: Char, - notFoundGlyph :: Char, + -- ^ dictates one aspect of how HarfBuzz will treat non-base characters + -- during shaping. + invisibleGlyph :: Int, + -- ^ The glyph number that replaces invisible characters in the + -- shaping result. If set to zero (default), the glyph for the U+0020 + -- SPACE character is used. Otherwise, this value is used verbatim. + notFoundGlyph :: Int, + -- ^ the glyph number that replaces characters not found in the font during shaping. + -- The not-found glyph defaults to zero, sometimes knows as the ".notdef" glyph. + -- This API allows for differentiating the two. replacementCodepoint :: Char + -- ^ the hb_codepoint_t that replaces invalid entries for a given encoding + -- when adding text to buffer . } +-- | An empty buffer with sensible default properties. +defaultBuffer = Buffer { + text = Right LBS.empty, + contentType = Just ContentTypeUnicode, + direction = Nothing, + script = Nothing, + language = Nothing, + beginsText = True, + endsText = True, + preserveDefaultIgnorables = False, + removeDefaultIgnorables = False, + don'tInsertDottedCircle = False, + clusterLevel = ClusterMonotoneGraphemes, + invisibleGlyph = 0, + notFoundGlyph = 0, + replacementCodepoint = '\xFFFD' + } + data ContentType = ContentTypeUnicode | ContentTypeGlyphs deriving (Eq, Show) data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show) data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars deriving (Eq, Show) @@ -65,7 +125,8 @@ dirVertical dir = dir `Prelude.elem` [DirTTB, DirBTT] type Buffer' = ForeignPtr Buffer'' data Buffer'' -buffer2buffer' buf = unsafePerformIO $ do +freeze = unsafePerformIO . freeze' +freeze' buf = do buffer <- hb_buffer_create case text buf of Right bs -> hb_buffer_add_bytestring buffer bs @@ -100,13 +161,16 @@ buffer2buffer' buf = unsafePerformIO $ do ClusterMonotoneGraphemes -> 0 ClusterMonotoneChars -> 1 ClusterChars -> 2 - hb_buffer_set_invisible_glyph buffer $ ord $ invisibleGlyph buf - hb_buffer_set_not_found_glyph buffer $ ord $ notFoundGlyph buf + hb_buffer_set_invisible_glyph buffer $ invisibleGlyph buf + hb_buffer_set_not_found_glyph buffer $ notFoundGlyph buf hb_buffer_set_replacement_codepoint buffer $ ord $ replacementCodepoint buf + case (contentType buf, direction buf, script buf, language buf) of + (Just ContentTypeUnicode, Nothing, _, _) -> hb_buffer_guess_segment_properties buffer + (Just ContentTypeUnicode, _, Nothing, _) -> hb_buffer_guess_segment_properties buffer + (Just ContentTypeUnicode, _, _, Nothing) -> hb_buffer_guess_segment_properties buffer newForeignPtr hb_buffer_destroy buffer -buffer'2buffer buf' = unsafePerformIO $ do - return () +thaw buf' = () foreign import ccall "hb_buffer_create" hb_buffer_create :: IO (Ptr Buffer'') foreign import ccall "&hb_buffer_destroy" hb_buffer_destroy :: FunPtr (Ptr Buffer'' -> IO ()) @@ -141,3 +205,5 @@ foreign import ccall "hb_buffer_set_not_found_glyph" hb_buffer_set_not_found_gly :: Ptr Buffer'' -> Int -> IO () foreign import ccall "hb_buffer_set_replacement_codepoint" hb_buffer_set_replacement_codepoint :: Ptr Buffer'' -> Int -> IO () +foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segment_properties + :: Ptr Buffer'' -> IO () -- 2.30.2