From 4674b43acebc33f553b45c1238c0c37509774778 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Sat, 12 Feb 2022 19:13:57 +1300 Subject: [PATCH] Reimplement parts of Harfbuzz where that's easier than writing a binding. Removed a note where that turned out not to be the case. --- Data/Text/Glyphize/Buffer.hs | 193 ++++++++++++++++++++++++++++++----- harfbuzz-pure.cabal | 2 +- 2 files changed, 170 insertions(+), 25 deletions(-) diff --git a/Data/Text/Glyphize/Buffer.hs b/Data/Text/Glyphize/Buffer.hs index ce740ba..687480e 100644 --- a/Data/Text/Glyphize/Buffer.hs +++ b/Data/Text/Glyphize/Buffer.hs @@ -1,22 +1,28 @@ module Data.Text.Glyphize.Buffer where -import Data.Text.Lazy as Lazy -import Data.ByteString.Lazy as Lazy +import Data.Text.Lazy as Lazy hiding (toUpper, toLower) +import Data.ByteString.Lazy as Lazy hiding (toUpper, toLower) import Data.ByteString.Lazy as LBS -import Data.Text.Short +import Data.Text.Short as Short import Foreign.ForeignPtr import Foreign.Ptr +import Foreign.Storable +import Foreign.Marshal.Alloc import Foreign.C.Types +import Foreign.C.String import Data.Word -import System.IO.Unsafe (unsafePerformIO) +import System.IO.Unsafe (unsafePerformIO, unsafeInterleaveIO) import Data.Text.Lazy.Encoding import Data.ByteString.Lazy.Internal as Lazy -import Data.ByteString.Internal as Strict -import Data.ByteString.Short.Internal as Strict -import Data.Bits ((.|.)) -import Data.Char (ord) +import Data.ByteString.Internal as Strict hiding (w2c, c2w) +import Data.ByteString.Short.Internal as Strict hiding (w2c, c2w) +import Data.Bits ((.|.), (.&.), shiftR, shiftL, testBit) +import Data.Char (ord, chr, toUpper, toLower) + +import Control.Monad (forM) +import Codec.Binary.UTF8.Light (encodeUTF8, w2c, c2w) data Buffer = Buffer { text :: Either Lazy.Text Lazy.ByteString, @@ -63,11 +69,11 @@ data Buffer = Buffer { clusterLevel :: ClusterLevel, -- ^ dictates one aspect of how HarfBuzz will treat non-base characters -- during shaping. - invisibleGlyph :: Int, + invisibleGlyph :: Word32, -- ^ The glyph number that replaces invisible characters in the -- shaping result. If set to zero (default), the glyph for the U+0020 -- SPACE character is used. Otherwise, this value is used verbatim. - notFoundGlyph :: Int, + notFoundGlyph :: Word32, -- ^ the glyph number that replaces characters not found in the font during shaping. -- The not-found glyph defaults to zero, sometimes knows as the ".notdef" glyph. -- This API allows for differentiating the two. @@ -99,13 +105,42 @@ data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show) data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars deriving (Eq, Show) data GlyphInfo = GlyphInfo { - codepoint :: Int, - cluster :: Int + codepoint :: Word32, + cluster :: Word32 } +instance Storable GlyphInfo where + sizeOf (GlyphInfo a b) = sizeOf a + sizeOf b + alignment (GlyphInfo a b) = alignment a + peek p = do + q <- return $ castPtr p + codepoint' <- peek q + cluster' <- peekElemOff q 1 + return $ GlyphInfo codepoint' cluster' + poke p (GlyphInfo a b) = do + q <- return $ castPtr p + poke q a + pokeElemOff q 1 b + data GlyphPos = GlyphPos { - x_advance :: Int, y_advance :: Int, - x_offset :: Int, y_offset :: Int + x_advance :: Word32, y_advance :: Word32, + x_offset :: Word32, y_offset :: Word32 } +instance Storable GlyphPos where + sizeOf (GlyphPos a _ _ _) = 4 * sizeOf a + alignment (GlyphPos a _ _ _) = alignment a + peek p = do + q <- return $ castPtr p + xa <- peek q + ya <- peekElemOff q 1 + xoff <- peekElemOff q 2 + yoff <- peekElemOff q 3 + return $ GlyphPos xa ya xoff yoff + poke p (GlyphPos xa ya xoff yoff) = do + q <- return $ castPtr p + poke q xa + pokeElemOff q 1 ya + pokeElemOff q 2 xoff + pokeElemOff q 3 yoff -- guessSegmentProperties :: Buffer -> Buffer -- glyphInfo & glyphPositions to be zipped & return from shape function @@ -145,7 +180,7 @@ freeze' buf = do Just DirTTB -> 6 Just DirBTT -> 7 case script buf of - Just script' -> hb_buffer_set_script buffer =<< hb_script_from_txt script' + Just script' -> hb_buffer_set_script buffer $ hb_script_from_txt script' Nothing -> return () case language buf of Just lang' -> hb_buffer_set_language buffer =<< hb_language_from_txt lang' @@ -170,7 +205,64 @@ freeze' buf = do (Just ContentTypeUnicode, _, _, Nothing) -> hb_buffer_guess_segment_properties buffer newForeignPtr hb_buffer_destroy buffer -thaw buf' = () +glyphInfos :: Buffer' -> [GlyphInfo] +glyphInfos = unsafePerformIO . glyphInfos' +glyphInfos' :: Buffer' -> IO [GlyphInfo] +glyphInfos' buf' = alloca $ \length' -> do + arr <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_glyph_infos buf'' length' + length <- peek length' + forM [0..length - 1] $ peekElemOff arr +glyphsPos = unsafePerformIO . glyphsPos' +glyphsPos' buf' = do + has_positions <- withForeignPtr buf' $ \buf'' -> hb_buffer_has_positions buf'' + if has_positions + then alloca $ \length' -> do + arr <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_glyph_positions buf'' length' + length <- peek length' + forM [0..length-1] $ peekElemOff arr + else return [] + +thaw :: Buffer' -> Buffer +thaw = unsafePerformIO . thaw' +thaw' buf' = do + let getter cb = unsafeInterleaveIO $ withForeignPtr buf' cb + glyphInfos' <- glyphInfos' buf' + contentType' <- getter hb_buffer_get_content_type + direction' <- getter hb_buffer_get_direction + script' <- getter hb_buffer_get_script + language' <- unsafeInterleaveIO $ do + lang <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_language buf'' + peekCString $ hb_language_to_string lang + flags' <- getter hb_buffer_get_flags + clusterLevel' <- getter hb_buffer_get_cluster_level + invisibleGlyph' <- getter hb_buffer_get_invisible_glyph + notFoundGlyph' <- getter hb_buffer_get_not_found_glyph + replacementCodepoint' <- getter hb_buffer_get_replacement_codepoint + return Buffer { + text = Right $ LBS.fromStrict $ encodeUTF8 $ Prelude.map codepoint glyphInfos', + contentType = case contentType' of + 1 -> Just ContentTypeUnicode + 2 -> Just ContentTypeGlyphs + _ -> Nothing, + direction = case direction' of + 4 -> Just DirLTR + 5 -> Just DirRTL + 6 -> Just DirTTB + 7 -> Just DirBTT + _ -> Nothing, + language = Just $ Short.fromString language', + script = Just $ Short.fromString $ hb_tag_to_string script', + beginsText = testBit flags' 0, endsText = testBit flags' 1, + preserveDefaultIgnorables = testBit flags' 2, + removeDefaultIgnorables = testBit flags' 3, + don'tInsertDottedCircle = testBit flags' 4, + clusterLevel = case clusterLevel' of + 1 -> ClusterMonotoneChars + 2 -> ClusterChars + _ -> ClusterMonotoneGraphemes, + invisibleGlyph = invisibleGlyph', notFoundGlyph = notFoundGlyph', + replacementCodepoint = w2c replacementCodepoint' + } foreign import ccall "hb_buffer_create" hb_buffer_create :: IO (Ptr Buffer'') foreign import ccall "&hb_buffer_destroy" hb_buffer_destroy :: FunPtr (Ptr Buffer'' -> IO ()) @@ -184,26 +276,79 @@ foreign import ccall "hb_buffer_set_content_type" hb_buffer_set_content_type :: Ptr Buffer'' -> Int -> IO () foreign import ccall "hb_buffer_set_direction" hb_buffer_set_direction :: Ptr Buffer'' -> Int -> IO () -foreign import ccall "hb_script_from_string" hb_script_from_string - :: Ptr Word8 -> Int -> Int -hb_script_from_txt txt = let Strict.PS ptr offset size = toByteString txt - in withForeignPtr ptr $ \ptr' -> return $ hb_script_from_string ptr' size +hb_tag_from_string :: String -> Word32 +hb_tag_from_string str = case str ++ Prelude.repeat '\0' of + c1:c2:c3:c4:_ -> Prelude.foldl (.|.) 0 [ + shiftL (c2w c1 .&. 0x7) 24, + shiftL (c2w c2 .&. 0x7) 16, + shiftL (c2w c3 .&. 0x7) 8, + shiftL (c2w c4 .&. 0x7) 0 + ] + _ -> 0 +titlecase :: String -> String +titlecase "" = "" +titlecase (c:cs) = toUpper c : Prelude.map toLower cs +hb_script_from_string str = hb_tag_from_string $ case titlecase str of + 'Q':'a':'a':'i':_ -> "Zinh" + 'Q':'a':'a':'c':_ -> "Copt" + + 'A':'r':'a':'n':_ -> "Arab" + 'C':'y':'r':'s':_ -> "Cyrl" + 'G':'e':'o':'k':_ -> "Geor" + 'H':'a':'n':'s':_ -> "Hani" + 'H':'a':'n':'t':_ -> "Hani" + 'J':'a':'m':'o':_ -> "Hang" + 'L':'a':'t':'f':_ -> "Latn" + 'L':'a':'t':'g':_ -> "Latn" + 'S':'y':'r':'e':_ -> "Syrc" + 'S':'y':'r':'j':_ -> "Syrc" + 'S':'y':'r':'n':_ -> "Syrc" +hb_script_from_txt txt = hb_script_from_string $ Short.toString txt foreign import ccall "hb_buffer_set_script" hb_buffer_set_script - :: Ptr Buffer'' -> Int -> IO () + :: Ptr Buffer'' -> Word32 -> IO () foreign import ccall "hb_language_from_string" hb_language_from_string :: Ptr Word8 -> Int -> Int hb_language_from_txt txt = let Strict.PS ptr offset size = toByteString txt - in withForeignPtr ptr $ \ptr' -> return $ hb_script_from_string ptr' size + in withForeignPtr ptr $ \ptr' -> return $ hb_language_from_string ptr' size foreign import ccall "hb_buffer_set_language" hb_buffer_set_language :: Ptr Buffer'' -> Int -> IO () foreign import ccall "hb_buffer_set_flags" hb_buffer_set_flags :: Ptr Buffer'' -> Int -> IO () foreign import ccall "hb_buffer_set_cluster_level" hb_buffer_set_cluster_level :: Ptr Buffer'' -> Int -> IO () foreign import ccall "hb_buffer_set_invisible_glyph" hb_buffer_set_invisible_glyph - :: Ptr Buffer'' -> Int -> IO () + :: Ptr Buffer'' -> Word32 -> IO () foreign import ccall "hb_buffer_set_not_found_glyph" hb_buffer_set_not_found_glyph - :: Ptr Buffer'' -> Int -> IO () + :: Ptr Buffer'' -> Word32 -> IO () foreign import ccall "hb_buffer_set_replacement_codepoint" hb_buffer_set_replacement_codepoint :: Ptr Buffer'' -> Int -> IO () foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segment_properties :: Ptr Buffer'' -> IO () + + +foreign import ccall "hb_buffer_get_content_type" hb_buffer_get_content_type + :: Ptr Buffer'' -> IO Int +foreign import ccall "hb_buffer_get_direction" hb_buffer_get_direction :: Ptr Buffer'' -> IO Int +foreign import ccall "hb_buffer_get_script" hb_buffer_get_script :: Ptr Buffer'' -> IO Word32 +hb_tag_to_string :: Word32 -> String +hb_tag_to_string tag = [ + w2c (shiftR tag 24 .&. 0x7), + w2c (shiftR tag 16 .&. 0x7), + w2c (shiftR tag 8 .&. 0x7), + w2c (shiftR tag 0 .&. 0x7) + ] +foreign import ccall "hb_buffer_get_language" hb_buffer_get_language :: Ptr Buffer'' -> IO (Ptr ()) +foreign import ccall "hb_language_to_string" hb_language_to_string :: Ptr () -> CString +foreign import ccall "hb_buffer_get_flags" hb_buffer_get_flags :: Ptr Buffer'' -> IO Int +foreign import ccall "hb_buffer_get_cluster_level" hb_buffer_get_cluster_level + :: Ptr Buffer'' -> IO Int +foreign import ccall "hb_buffer_get_glyph_infos" hb_buffer_get_glyph_infos + :: Ptr Buffer'' -> Ptr Int -> IO (Ptr GlyphInfo) +foreign import ccall "hb_buffer_has_positions" hb_buffer_has_positions :: Ptr Buffer'' -> IO Bool +foreign import ccall "hb_buffer_get_glyph_positions" hb_buffer_get_glyph_positions + :: Ptr Buffer'' -> Ptr Int -> IO (Ptr GlyphPos) +foreign import ccall "hb_buffer_get_invisible_glyph" hb_buffer_get_invisible_glyph + :: Ptr Buffer'' -> IO Word32 +foreign import ccall "hb_buffer_get_not_found_glyph" hb_buffer_get_not_found_glyph + :: Ptr Buffer'' -> IO Word32 +foreign import ccall "hb_buffer_get_replacement_codepoint" hb_buffer_get_replacement_codepoint + :: Ptr Buffer'' -> IO Word32 diff --git a/harfbuzz-pure.cabal b/harfbuzz-pure.cabal index 052de30..3ce2588 100644 --- a/harfbuzz-pure.cabal +++ b/harfbuzz-pure.cabal @@ -60,7 +60,7 @@ library -- other-extensions: -- Other library packages from which modules are imported. - build-depends: base >=4.9 && <4.10, bytestring, text, text-short + build-depends: base >=4.9 && <4.10, bytestring, text, text-short, utf8-light extra-libraries: harfbuzz -- Directories containing source files. -- 2.30.2