From b7564ba8f687c05b74999b6f480f68de1b6f1d43 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Fri, 22 Sep 2023 14:04:55 +1200 Subject: [PATCH] Optimize Harfbuzz-Pure. --- Data/Text/Glyphize.hs | 10 +-- Data/Text/Glyphize/Buffer.hs | 129 +++++++++++++++++++++-------------- Data/Text/Glyphize/Font.hs | 23 ++++--- Main.hs | 4 +- bench/Main.hs | 31 ++++++++- harfbuzz-pure.cabal | 6 +- 6 files changed, 127 insertions(+), 76 deletions(-) diff --git a/Data/Text/Glyphize.hs b/Data/Text/Glyphize.hs index 54ffdf8..3114068 100644 --- a/Data/Text/Glyphize.hs +++ b/Data/Text/Glyphize.hs @@ -49,12 +49,12 @@ import Foreign.Marshal.Array (withArrayLen) -- the value of the `Feature` with the higher index takes precedance. shape :: Font -> Buffer -> [Feature] -> [(GlyphInfo, GlyphPos)] shape _ Buffer {text = ""} _ = [] -shape font buffer features = unsafePerformIO $ withForeignPtr font $ \font' -> - withBuffer buffer $ \buffer' -> withArrayLen features $ \len features' -> do +shape font buffer features = unsafePerformIO $ withBuffer buffer $ \buffer' -> do + withForeignPtr font $ \font' -> withArrayLen features $ \len features' -> hb_shape font' buffer' features' $ toEnum len - infos <- glyphInfos buffer' - pos <- glyphsPos buffer' - return $ zip infos pos + infos <- glyphInfos buffer' + pos <- glyphsPos buffer' + return $ zip infos pos foreign import ccall "hb_shape" hb_shape :: Font_ -> Buffer' -> Ptr Feature -> Word -> IO () -- | Fills in unset segment properties based on buffer unicode contents. diff --git a/Data/Text/Glyphize/Buffer.hs b/Data/Text/Glyphize/Buffer.hs index 2c43d09..f777359 100644 --- a/Data/Text/Glyphize/Buffer.hs +++ b/Data/Text/Glyphize/Buffer.hs @@ -1,4 +1,5 @@ -{-# LANGUAGE MagicHash, UnliftedFFITypes, DeriveGeneric #-} +{-# LANGUAGE MagicHash, UnboxedTuples #-} +{-# LANGUAGE UnliftedFFITypes, DeriveGeneric #-} module Data.Text.Glyphize.Buffer where import qualified Data.Text.Internal.Lazy as Lazy @@ -7,18 +8,24 @@ import qualified Data.Text.Internal as Txt import Data.Char (toUpper, toLower) import Control.Monad (forM) import Control.Exception (bracket) +import Control.DeepSeq (NFData) import Data.Text.Glyphize.Oom (throwFalse, throwNull) import qualified Data.Text.Array as A -import GHC.Exts (ByteArray#, sizeofByteArray#, Int#) +import GHC.Exts (ByteArray#, sizeofByteArray#, Int#, realWorld#) import Data.Word (Word32) import Data.Int (Int32) import Data.Bits ((.|.), (.&.), shiftR, shiftL, testBit) -import System.IO.Unsafe (unsafePerformIO) +import System.IO.Unsafe (unsafePerformIO, unsafeDupablePerformIO) +import GHC.IO (IO(IO)) + import Foreign.Marshal.Alloc (alloca, allocaBytes) +import Foreign.Marshal.Array (peekArray, copyArray) import Foreign.Ptr +import Foreign.ForeignPtr (ForeignPtr, + withForeignPtr, plusForeignPtr, mallocForeignPtrArray) import Foreign.C.String (CString, withCString, peekCString) import Foreign.Storable (Storable(..)) import GHC.Generics (Generic(..)) @@ -389,40 +396,21 @@ data GlyphInfo = GlyphInfo { -- before this cluster for elongation. -- This flag does not determine the script-specific elongation places, -- but only when it is safe to do the elongation without interrupting text shaping. -} deriving (Show, Read, Eq) -instance Storable GlyphInfo where - sizeOf _ = sizeOf (undefined :: Word32) * 5 - alignment _ = alignment (undefined :: Word32) - peek ptr = do - let ptr' :: Ptr Word32 - ptr' = castPtr ptr - -- Ignore private fields. - codepoint' <- ptr' `peekElemOff` 0 - mask <- ptr' `peekElemOff` 1 - cluster' <- ptr' `peekElemOff` 2 - return $ GlyphInfo codepoint' cluster' (mask `testBit` 1) (mask `testBit` 2) (mask `testBit` 3) - poke ptr (GlyphInfo codepoint' cluster' flag1 flag2 flag3) = do - -- Zero private fields. - let ptr' :: Ptr Word32 - ptr' = castPtr ptr - pokeElemOff ptr' 0 codepoint' - pokeElemOff ptr' 1 $ Prelude.foldl (.|.) 0 [ - if flag1 then 1 else 0, - if flag2 then 2 else 0, - if flag3 then 4 else 0 - ] - pokeElemOff ptr' 2 cluster' - pokeElemOff ptr' 3 0 - pokeElemOff ptr' 4 0 +} deriving (Show, Read, Eq, Generic) +instance NFData GlyphInfo +decodeInfos :: [Word32] -> [GlyphInfo] +decodeInfos (codepoint':cluster':mask:_:_:rest) = + GlyphInfo codepoint' cluster' (mask `testBit` 1) (mask `testBit` 2) + (mask `testBit` 3):decodeInfos rest +decodeInfos _ = [] -- | Decodes `Buffer'`'s glyph information array.' glyphInfos buf' = do arr <- throwNull $ hb_buffer_get_glyph_infos buf' nullPtr length <- hb_buffer_get_length buf' - if length == 0 || arr == nullPtr - then return [] - else forM [0..fromEnum length - 1] $ peekElemOff arr + words <- iterateLazy arr (fromEnum length * 5) + return $ decodeInfos words foreign import ccall "hb_buffer_get_glyph_infos" hb_buffer_get_glyph_infos - :: Buffer' -> Ptr Word -> IO (Ptr GlyphInfo) + :: Buffer' -> Ptr Word -> IO (Ptr Word32) foreign import ccall "hb_buffer_get_length" hb_buffer_get_length :: Buffer' -> IO Word -- NOTE: The array returned from FFI is valid as long as the buffer is. @@ -442,33 +430,19 @@ data GlyphPos = GlyphPos { -- ^ How much the glyph moves on the Y-axis before drawing it, this should -- not effect how much the line advances. } deriving (Show, Read, Eq, Generic) -instance Storable GlyphPos where - sizeOf _ = sizeOf (undefined :: Int32) * 5 - alignment _ = alignment (undefined :: Int32) - peek ptr = let ptr' = castPtr ptr in do - x_advance' <- ptr' `peekElemOff` 0 - y_advance' <- ptr' `peekElemOff` 1 - x_offset' <- ptr' `peekElemOff` 2 - y_offset' <- ptr' `peekElemOff` 3 - return $ GlyphPos x_advance' y_advance' x_offset' y_offset' - poke ptr (GlyphPos x_advance' y_advance' x_offset' y_offset') = do - let ptr' :: Ptr Int32 - ptr' = castPtr ptr - pokeElemOff ptr' 0 x_advance' - pokeElemOff ptr' 1 y_advance' - pokeElemOff ptr' 2 x_offset' - pokeElemOff ptr' 3 y_offset' - pokeElemOff ptr' 4 0 -- Zero private field. +instance NFData GlyphPos +decodePositions (x_advance':y_advance':x_offset':y_offset':_:rest) = + GlyphPos x_advance' y_advance' x_offset' y_offset':decodePositions rest +decodePositions _ = [] -- | Decodes `Buffer'`'s glyph position array. -- If buffer did not have positions before, they will be initialized to zeros.' glyphsPos buf' = do arr <- throwNull $ hb_buffer_get_glyph_positions buf' nullPtr length <- hb_buffer_get_length buf' - if length == 0 || arr == nullPtr - then return [] - else forM [0..fromEnum length - 1] $ peekElemOff arr + words <- iterateLazy arr (fromEnum length * 5) + return $ decodePositions words foreign import ccall "hb_buffer_get_glyph_positions" hb_buffer_get_glyph_positions - :: Buffer' -> Ptr Word -> IO (Ptr GlyphPos) + :: Buffer' -> Ptr Word -> IO (Ptr Int32) -- NOTE: The array returned from FFI is valid as long as the buffer is. -- | Decodes a `Buffer'` back to corresponding pure-functional `Buffer`. @@ -516,3 +490,52 @@ foreign import ccall "hb_buffer_get_invisible_glyph" hb_buffer_get_invisible_gly :: Buffer' -> IO Word32 foreign import ccall "hb_buffer_get_replacement_codepoint" hb_buffer_get_replacement_codepoint :: Buffer' -> IO Word32 + +--- Even more optimized peekArray + +clonePtr ptr l = do + ret <- mallocForeignPtrArray l + withForeignPtr ret $ \ptr' -> copyArray ptr ptr' l + return ret +peekLazy :: Storable a => ForeignPtr a -> Int -> [a] +peekLazy = peekLazy' 0 +peekLazy' n fp l + | n < l = x:peekLazy' (succ n) fp l + | otherwise = [] + where x = accursedUnutterablePerformIO $ withForeignPtr fp $ flip peekElemOff n +iterateLazy :: Storable a => Ptr a -> Int -> IO [a] +iterateLazy ptr l = do + fp <- clonePtr ptr l + return $ peekLazy fp $ fromEnum l + +-- | This \"function\" has a superficial similarity to 'System.IO.Unsafe.unsafePerformIO' but +-- it is in fact a malevolent agent of chaos. It unpicks the seams of reality +-- (and the 'IO' monad) so that the normal rules no longer apply. It lulls you +-- into thinking it is reasonable, but when you are not looking it stabs you +-- in the back and aliases all of your mutable buffers. The carcass of many a +-- seasoned Haskell programmer lie strewn at its feet. +-- +-- Witness the trail of destruction: +-- +-- * +-- +-- * +-- +-- * +-- +-- * +-- +-- * +-- +-- * +-- +-- * +-- +-- Do not talk about \"safe\"! You do not know what is safe! +-- +-- Yield not to its blasphemous call! Flee traveller! Flee or you will be +-- corrupted and devoured! +-- +{-# INLINE accursedUnutterablePerformIO #-} +accursedUnutterablePerformIO :: IO a -> a +accursedUnutterablePerformIO (IO m) = case m realWorld# of (# _, r #) -> r diff --git a/Data/Text/Glyphize/Font.hs b/Data/Text/Glyphize/Font.hs index 22c9c8a..feb4c9b 100644 --- a/Data/Text/Glyphize/Font.hs +++ b/Data/Text/Glyphize/Font.hs @@ -11,6 +11,7 @@ import Data.Text.Glyphize.Buffer (tag_to_string, tag_from_string, Direction, dir import Data.Text.Glyphize.Oom (throwNull, throwFalse) import Control.Monad (forM, unless) +import Control.Exception (bracket) import Data.Maybe (fromMaybe) import System.IO.Unsafe (unsafePerformIO) @@ -121,9 +122,7 @@ globalEnd = maxBound -- | Fetches the number of `Face`s in a `ByteString`. countFace :: ByteString -> Word -countFace bytes = unsafePerformIO $ do - blob <- bs2blob bytes - withForeignPtr blob hb_face_count +countFace bytes = unsafePerformIO $ withBlob bytes hb_face_count foreign import ccall "hb_face_count" hb_face_count :: Blob_ -> IO Word -- | A Font face. @@ -140,8 +139,7 @@ data Face' -- load named-instances in variable fonts. See `createFont` for details. createFace :: ByteString -> Word -> Face createFace bytes index = unsafePerformIO $ do - blob <- bs2blob bytes - face <- withForeignPtr blob $ throwNull . flip hb_face_create index + face <- withBlob bytes $ throwNull . flip hb_face_create index hb_face_make_immutable face newForeignPtr hb_face_destroy face foreign import ccall "hb_face_create" hb_face_create :: Blob_ -> Word -> IO Face_ @@ -260,8 +258,7 @@ foreign import ccall "hb_face_set_upem" hb_face_set_upem :: Face_ -> Word -> IO -- | Variant of `createFace` which applies given options. createFaceWithOpts :: FaceOptions -> ByteString -> Word -> Face createFaceWithOpts opts bytes index = unsafePerformIO $ do - blob <- bs2blob bytes - face <- withForeignPtr blob $ throwNull . flip hb_face_create index + face <- withBlob bytes $ throwNull . flip hb_face_create index _setFaceOptions face opts hb_face_make_immutable face newForeignPtr hb_face_destroy face @@ -279,9 +276,8 @@ ftCreateFaceWithOpts opts ftFace = do buildFace :: [(String, ByteString)] -> FaceOptions -> Face buildFace tables opts = unsafePerformIO $ do builder <- throwNull hb_face_builder_create - forM tables $ \(tag, bytes) -> do - blob <- bs2blob bytes - throwFalse $ withForeignPtr blob $ + forM tables $ \(tag, bytes) -> + throwFalse $ withBlob bytes $ hb_face_builder_add_table builder $ tag_from_string tag _setFaceOptions builder opts hb_face_make_immutable builder @@ -926,9 +922,16 @@ bs2blob (BS bytes len) = do blob <- throwNull $ withForeignPtr bytes $ \bytes' -> hb_blob_create bytes' len hb_MEMORY_MODE_DUPLICATE nullPtr nullFunPtr newForeignPtr hb_blob_destroy blob +withBlob :: ByteString -> (Blob_ -> IO a) -> IO a +withBlob (BS bytes len) cb = withForeignPtr bytes $ \bytes' -> do + throwNull $ pure bytes' + bracket + (hb_blob_create bytes' len hb_MEMORY_MODE_READONLY nullPtr nullFunPtr) + hb_blob_destroy' cb foreign import ccall "hb_blob_create" hb_blob_create :: Ptr Word8 -> Int -> Int -> Ptr () -> FunPtr (Ptr () -> IO ()) -> IO Blob_ hb_MEMORY_MODE_DUPLICATE = 0 +hb_MEMORY_MODE_READONLY = 1 foreign import ccall "&hb_blob_destroy" hb_blob_destroy :: FunPtr (Blob_ -> IO ()) -- | Convert to a ByteString from Harfbuzz's equivalent. diff --git a/Main.hs b/Main.hs index b948326..755d674 100644 --- a/Main.hs +++ b/Main.hs @@ -2,7 +2,7 @@ module Main where import "harfbuzz-pure" Data.Text.Glyphize -import Control.Parallel.Strategies (parMap, rpar) +import Control.Parallel.Strategies (parMap, rdeepseq) import Data.Text.Lazy (pack) import qualified Data.ByteString as BS @@ -21,4 +21,4 @@ main = do let font = createFont $ createFace blob 0 case words of "!":words' -> print $ shape font (defaultBuffer { text = pack $ unwords words' }) [] - _ -> print $ parMap rpar (shapeStr font) words + _ -> print $ parMap rdeepseq (shapeStr font) words diff --git a/bench/Main.hs b/bench/Main.hs index 5447772..7b2d3f1 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -5,15 +5,40 @@ import Data.Text.Glyphize import FileEmbedLzma (embedLazyText, embedByteString) import Data.FileEmbed (makeRelativeToProject) import System.FilePath (()) +import qualified Data.Text.Foreign as Txt +import qualified Data.Text.Lazy as Txt + +import Control.Parallel.Strategies (parMap, rdeepseq) +import Data.Word (Word8) + +-- Benchmarking these as well... +import Foreign.Marshal.Array (peekArray, copyArray) +import Foreign.ForeignPtr (mallocForeignPtrArray, ForeignPtr) + +import Foreign.Ptr (Ptr) +import Foreign.Storable (Storable(..)) +import System.IO.Unsafe (unsafePerformIO) shapeStr txt = shape font defaultBuffer { text = txt } [] where font = createFont $ createFace $( makeRelativeToProject ("assets" "Lora-Regular.ttf") >>= embedByteString) 0 +dracula = $(makeRelativeToProject ("bench" "dracula.txt") >>= embedLazyText) + main = defaultMain [ - bgroup "literature" [ - bench "Dracula" $ whnf shapeStr - $(makeRelativeToProject ("bench" "dracula.txt") >>= embedLazyText) + bgroup "Dracula" [ + bench "Week-Head" $ whnf shapeStr dracula, + bench "Normal Form" $ nf shapeStr dracula, + bench "Paragraphs" $ nf (map shapeStr) $ Txt.lines dracula, + bench "Parallelised" $ nf (parMap rdeepseq shapeStr) $ Txt.lines dracula + ], + bgroup "building blocks" [ + bench "peekArray (NF)" $ nfIO $ Txt.useAsPtr (Txt.toStrict dracula) $ + \ptr l -> peekArray (fromEnum l) ptr, + bench "peekArray" $ whnfIO $ Txt.useAsPtr (Txt.toStrict dracula) $ + \ptr l -> peekArray (fromEnum l) ptr, + bench "alloc foreign ptr" $ whnfIO (mallocForeignPtrArray $ + fromEnum $ Txt.length dracula :: IO (ForeignPtr Word8)) ] ] diff --git a/harfbuzz-pure.cabal b/harfbuzz-pure.cabal index 8adebc6..dd3fa73 100644 --- a/harfbuzz-pure.cabal +++ b/harfbuzz-pure.cabal @@ -60,13 +60,13 @@ library exposed-modules: Data.Text.Glyphize -- Modules included in this library but not exported. - other-modules: Data.Text.Glyphize.Buffer, Data.Text.Glyphize.Font, Data.Text.Glyphize.Oom + other-modules: Data.Text.Glyphize.Font, Data.Text.Glyphize.Buffer, Data.Text.Glyphize.Oom -- LANGUAGE extensions used by modules in this package. -- other-extensions: -- Other library packages from which modules are imported. - build-depends: base >=4.12 && <5, text >= 2.0 && < 3, + build-depends: base >=4.12 && <5, text >= 2.0 && < 3, deepseq >= 1, bytestring >= 0.11, freetype2 >= 0.2, derive-storable >= 0.3 && < 1 pkgconfig-depends: harfbuzz >= 3.3 @@ -95,4 +95,4 @@ benchmark bench-harfbuzz type: exitcode-stdio-1.0 hs-source-dirs: bench main-is: Main.hs - build-depends: base, harfbuzz-pure, file-embed-lzma, file-embed, criterion, filepath + build-depends: base, harfbuzz-pure, file-embed-lzma, file-embed, criterion, filepath, text, parallel -- 2.30.2