~alcinnz/harfbuzz-pure

4674b43acebc33f553b45c1238c0c37509774778 — Adrian Cochrane 2 years ago 7aae895
Reimplement parts of Harfbuzz where that's easier than writing a binding.

Removed a note where that turned out not to be the case.
2 files changed, 170 insertions(+), 25 deletions(-)

M Data/Text/Glyphize/Buffer.hs
M harfbuzz-pure.cabal
M Data/Text/Glyphize/Buffer.hs => Data/Text/Glyphize/Buffer.hs +169 -24
@@ 1,22 1,28 @@
module Data.Text.Glyphize.Buffer where

import Data.Text.Lazy as Lazy
import Data.ByteString.Lazy as Lazy
import Data.Text.Lazy as Lazy hiding (toUpper, toLower)
import Data.ByteString.Lazy as Lazy hiding (toUpper, toLower)
import Data.ByteString.Lazy as LBS
import Data.Text.Short
import Data.Text.Short as Short

import Foreign.ForeignPtr
import Foreign.Ptr
import Foreign.Storable
import Foreign.Marshal.Alloc
import Foreign.C.Types
import Foreign.C.String
import Data.Word
import System.IO.Unsafe (unsafePerformIO)
import System.IO.Unsafe (unsafePerformIO, unsafeInterleaveIO)

import Data.Text.Lazy.Encoding
import Data.ByteString.Lazy.Internal as Lazy
import Data.ByteString.Internal as Strict
import Data.ByteString.Short.Internal as Strict
import Data.Bits ((.|.))
import Data.Char (ord)
import Data.ByteString.Internal as Strict hiding (w2c, c2w)
import Data.ByteString.Short.Internal as Strict hiding (w2c, c2w)
import Data.Bits ((.|.), (.&.), shiftR, shiftL, testBit)
import Data.Char (ord, chr, toUpper, toLower)

import Control.Monad (forM)
import Codec.Binary.UTF8.Light (encodeUTF8, w2c, c2w)

data Buffer = Buffer {
    text :: Either Lazy.Text Lazy.ByteString,


@@ 63,11 69,11 @@ data Buffer = Buffer {
    clusterLevel :: ClusterLevel,
    -- ^ dictates one aspect of how HarfBuzz will treat non-base characters
    -- during shaping.
    invisibleGlyph :: Int,
    invisibleGlyph :: Word32,
    -- ^ The glyph number that replaces invisible characters in the
    -- shaping result. If set to zero (default), the glyph for the U+0020
    -- SPACE character is used. Otherwise, this value is used verbatim.
    notFoundGlyph :: Int,
    notFoundGlyph :: Word32,
    -- ^ the glyph number that replaces characters not found in the font during shaping.
    -- The not-found glyph defaults to zero, sometimes knows as the ".notdef" glyph.
    -- This API allows for differentiating the two.


@@ 99,13 105,42 @@ data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show)
data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars deriving (Eq, Show)

data GlyphInfo = GlyphInfo {
    codepoint :: Int,
    cluster :: Int
    codepoint :: Word32,
    cluster :: Word32
}
instance Storable GlyphInfo where
    sizeOf (GlyphInfo a b) = sizeOf a + sizeOf b
    alignment (GlyphInfo a b) = alignment a
    peek p = do
        q <- return $ castPtr p
        codepoint' <- peek q
        cluster' <- peekElemOff q 1
        return $ GlyphInfo codepoint' cluster'
    poke p (GlyphInfo a b) = do
        q <- return $ castPtr p
        poke q a
        pokeElemOff q 1 b

data GlyphPos = GlyphPos {
    x_advance :: Int, y_advance :: Int,
    x_offset :: Int, y_offset :: Int
    x_advance :: Word32, y_advance :: Word32,
    x_offset :: Word32, y_offset :: Word32
}
instance Storable GlyphPos where
    sizeOf (GlyphPos a _ _ _) = 4 * sizeOf a
    alignment (GlyphPos a _ _ _) = alignment a
    peek p = do
        q <- return $ castPtr p
        xa <- peek q
        ya <- peekElemOff q 1
        xoff <- peekElemOff q 2
        yoff <- peekElemOff q 3
        return $ GlyphPos xa ya xoff yoff
    poke p (GlyphPos xa ya xoff yoff) = do
        q <- return $ castPtr p
        poke q xa
        pokeElemOff q 1 ya
        pokeElemOff q 2 xoff
        pokeElemOff q 3 yoff

-- guessSegmentProperties :: Buffer -> Buffer
-- glyphInfo & glyphPositions to be zipped & return from shape function


@@ 145,7 180,7 @@ freeze' buf = do
        Just DirTTB -> 6
        Just DirBTT -> 7
    case script buf of
        Just script' -> hb_buffer_set_script buffer =<< hb_script_from_txt script'
        Just script' -> hb_buffer_set_script buffer $ hb_script_from_txt script'
        Nothing -> return ()
    case language buf of
        Just lang' -> hb_buffer_set_language buffer =<< hb_language_from_txt lang'


@@ 170,7 205,64 @@ freeze' buf = do
        (Just ContentTypeUnicode, _, _, Nothing) -> hb_buffer_guess_segment_properties buffer
    newForeignPtr hb_buffer_destroy buffer

thaw buf' = ()
glyphInfos :: Buffer' -> [GlyphInfo]
glyphInfos = unsafePerformIO . glyphInfos'
glyphInfos' :: Buffer' -> IO [GlyphInfo]
glyphInfos' buf' = alloca $ \length' -> do
    arr <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_glyph_infos buf'' length'
    length <- peek length'
    forM [0..length - 1] $ peekElemOff arr
glyphsPos = unsafePerformIO . glyphsPos'
glyphsPos' buf' = do
    has_positions <- withForeignPtr buf' $ \buf'' -> hb_buffer_has_positions buf''
    if has_positions
    then alloca $ \length' -> do
        arr <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_glyph_positions buf'' length'
        length <- peek length'
        forM [0..length-1] $ peekElemOff arr
    else return []

thaw :: Buffer' -> Buffer
thaw = unsafePerformIO . thaw'
thaw' buf' = do
    let getter cb = unsafeInterleaveIO $ withForeignPtr buf' cb
    glyphInfos' <- glyphInfos' buf'
    contentType' <- getter hb_buffer_get_content_type
    direction' <- getter hb_buffer_get_direction
    script' <- getter hb_buffer_get_script
    language' <- unsafeInterleaveIO $ do
        lang <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_language buf''
        peekCString $ hb_language_to_string lang
    flags' <- getter hb_buffer_get_flags
    clusterLevel' <- getter hb_buffer_get_cluster_level
    invisibleGlyph' <- getter hb_buffer_get_invisible_glyph
    notFoundGlyph' <- getter hb_buffer_get_not_found_glyph
    replacementCodepoint' <- getter hb_buffer_get_replacement_codepoint
    return Buffer {
        text = Right $ LBS.fromStrict $ encodeUTF8 $ Prelude.map codepoint glyphInfos',
        contentType = case contentType' of
            1 -> Just ContentTypeUnicode
            2 -> Just ContentTypeGlyphs
            _ -> Nothing,
        direction = case direction' of
            4 -> Just DirLTR
            5 -> Just DirRTL
            6 -> Just DirTTB
            7 -> Just DirBTT
            _ -> Nothing,
        language = Just $ Short.fromString language',
        script = Just $ Short.fromString $ hb_tag_to_string script',
        beginsText = testBit flags' 0, endsText = testBit flags' 1,
        preserveDefaultIgnorables = testBit flags' 2,
        removeDefaultIgnorables = testBit flags' 3,
        don'tInsertDottedCircle = testBit flags' 4,
        clusterLevel = case clusterLevel' of
            1 -> ClusterMonotoneChars
            2 -> ClusterChars
            _ -> ClusterMonotoneGraphemes,
        invisibleGlyph = invisibleGlyph', notFoundGlyph = notFoundGlyph',
        replacementCodepoint = w2c replacementCodepoint'
      }

foreign import ccall "hb_buffer_create" hb_buffer_create :: IO (Ptr Buffer'')
foreign import ccall "&hb_buffer_destroy" hb_buffer_destroy :: FunPtr (Ptr Buffer'' -> IO ())


@@ 184,26 276,79 @@ foreign import ccall "hb_buffer_set_content_type" hb_buffer_set_content_type
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_direction" hb_buffer_set_direction
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_script_from_string" hb_script_from_string
    :: Ptr Word8 -> Int -> Int
hb_script_from_txt txt = let Strict.PS ptr offset size = toByteString txt
    in withForeignPtr ptr $ \ptr' -> return $ hb_script_from_string ptr' size
hb_tag_from_string :: String -> Word32
hb_tag_from_string str = case str ++ Prelude.repeat '\0' of
    c1:c2:c3:c4:_ -> Prelude.foldl (.|.) 0 [
        shiftL (c2w c1 .&. 0x7) 24,
        shiftL (c2w c2 .&. 0x7) 16,
        shiftL (c2w c3 .&. 0x7) 8,
        shiftL (c2w c4 .&. 0x7) 0
      ]
    _ -> 0
titlecase :: String -> String
titlecase "" = ""
titlecase (c:cs) = toUpper c : Prelude.map toLower cs
hb_script_from_string str = hb_tag_from_string $ case titlecase str of
    'Q':'a':'a':'i':_ -> "Zinh"
    'Q':'a':'a':'c':_ -> "Copt"

    'A':'r':'a':'n':_ -> "Arab"
    'C':'y':'r':'s':_ -> "Cyrl"
    'G':'e':'o':'k':_ -> "Geor"
    'H':'a':'n':'s':_ -> "Hani"
    'H':'a':'n':'t':_ -> "Hani"
    'J':'a':'m':'o':_ -> "Hang"
    'L':'a':'t':'f':_ -> "Latn"
    'L':'a':'t':'g':_ -> "Latn"
    'S':'y':'r':'e':_ -> "Syrc"
    'S':'y':'r':'j':_ -> "Syrc"
    'S':'y':'r':'n':_ -> "Syrc"
hb_script_from_txt txt = hb_script_from_string $ Short.toString txt
foreign import ccall "hb_buffer_set_script" hb_buffer_set_script
    :: Ptr Buffer'' -> Int -> IO ()
    :: Ptr Buffer'' -> Word32 -> IO ()
foreign import ccall "hb_language_from_string" hb_language_from_string
    :: Ptr Word8 -> Int -> Int
hb_language_from_txt txt = let Strict.PS ptr offset size = toByteString txt
    in withForeignPtr ptr $ \ptr' -> return $ hb_script_from_string ptr' size
    in withForeignPtr ptr $ \ptr' -> return $ hb_language_from_string ptr' size
foreign import ccall "hb_buffer_set_language" hb_buffer_set_language
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_flags" hb_buffer_set_flags :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_cluster_level" hb_buffer_set_cluster_level
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_set_invisible_glyph" hb_buffer_set_invisible_glyph
    :: Ptr Buffer'' -> Int -> IO ()
    :: Ptr Buffer'' -> Word32 -> IO ()
foreign import ccall "hb_buffer_set_not_found_glyph" hb_buffer_set_not_found_glyph
    :: Ptr Buffer'' -> Int -> IO ()
    :: Ptr Buffer'' -> Word32 -> IO ()
foreign import ccall "hb_buffer_set_replacement_codepoint" hb_buffer_set_replacement_codepoint
    :: Ptr Buffer'' -> Int -> IO ()
foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segment_properties
    :: Ptr Buffer'' -> IO ()


foreign import ccall "hb_buffer_get_content_type" hb_buffer_get_content_type
    :: Ptr Buffer'' -> IO Int
foreign import ccall "hb_buffer_get_direction" hb_buffer_get_direction :: Ptr Buffer'' -> IO Int
foreign import ccall "hb_buffer_get_script" hb_buffer_get_script :: Ptr Buffer'' -> IO Word32
hb_tag_to_string :: Word32 -> String
hb_tag_to_string tag = [
    w2c (shiftR tag 24 .&. 0x7),
    w2c (shiftR tag 16 .&. 0x7),
    w2c (shiftR tag 8 .&. 0x7),
    w2c (shiftR tag 0 .&. 0x7)
  ]
foreign import ccall "hb_buffer_get_language" hb_buffer_get_language :: Ptr Buffer'' -> IO (Ptr ())
foreign import ccall "hb_language_to_string" hb_language_to_string :: Ptr () -> CString
foreign import ccall "hb_buffer_get_flags" hb_buffer_get_flags :: Ptr Buffer'' -> IO Int
foreign import ccall "hb_buffer_get_cluster_level" hb_buffer_get_cluster_level
    :: Ptr Buffer'' -> IO Int
foreign import ccall "hb_buffer_get_glyph_infos" hb_buffer_get_glyph_infos
    :: Ptr Buffer'' -> Ptr Int -> IO (Ptr GlyphInfo)
foreign import ccall "hb_buffer_has_positions" hb_buffer_has_positions :: Ptr Buffer'' -> IO Bool
foreign import ccall "hb_buffer_get_glyph_positions" hb_buffer_get_glyph_positions
    :: Ptr Buffer'' -> Ptr Int -> IO (Ptr GlyphPos)
foreign import ccall "hb_buffer_get_invisible_glyph" hb_buffer_get_invisible_glyph
    :: Ptr Buffer'' -> IO Word32
foreign import ccall "hb_buffer_get_not_found_glyph" hb_buffer_get_not_found_glyph
    :: Ptr Buffer'' -> IO Word32
foreign import ccall "hb_buffer_get_replacement_codepoint" hb_buffer_get_replacement_codepoint
    :: Ptr Buffer'' -> IO Word32

M harfbuzz-pure.cabal => harfbuzz-pure.cabal +1 -1
@@ 60,7 60,7 @@ library
  -- other-extensions:    
  
  -- Other library packages from which modules are imported.
  build-depends:       base >=4.9 && <4.10, bytestring, text, text-short
  build-depends:       base >=4.9 && <4.10, bytestring, text, text-short, utf8-light
  extra-libraries:     harfbuzz
  
  -- Directories containing source files.