module Data.Text.Glyphize.Buffer where import Data.Text.Lazy as Lazy hiding (toUpper, toLower) import Data.ByteString.Lazy as Lazy hiding (toUpper, toLower) import Data.ByteString.Lazy as LBS import Data.Text.Short as Short import Foreign.ForeignPtr import Foreign.Ptr import Foreign.Storable import Foreign.Marshal.Alloc import Foreign.C.Types import Foreign.C.String import Data.Word import System.IO.Unsafe (unsafePerformIO, unsafeInterleaveIO) import Data.Text.Lazy.Encoding import Data.ByteString.Lazy.Internal as Lazy import Data.ByteString.Internal as Strict hiding (w2c, c2w) import Data.ByteString.Short.Internal as Strict hiding (w2c, c2w) import Data.Bits ((.|.), (.&.), shiftR, shiftL, testBit) import Data.Char (ord, chr, toUpper, toLower) import Control.Monad (forM) import Codec.Binary.UTF8.Light (encodeUTF8, w2c, c2w) data Buffer = Buffer { text :: Either Lazy.Text Lazy.ByteString, -- ^ The Unicode text, in visual order, for HarfBuzz to convert into glyphs. contentType :: Maybe ContentType, -- ^ What the bytes of the ByteString contents represents, -- namely unicode characters (before shaping) or glyphs (result of shaping). -- Typically callers should leave this as `Just ContentTypeUnicode`. direction :: Maybe Direction, -- ^ The text flow direction of the buffer. -- No shaping can happen without setting buffer direction, and it controls -- the visual direction for the output glyphs; for RTL direction the glyphs -- will be reversed. Many layout features depend on the proper setting of -- the direction, for example, reversing RTL text before shaping, -- then shaping with LTR direction is not the same as keeping the text in -- logical order and shaping with RTL direction. script :: Maybe ShortText, -- ^ Script is crucial for choosing the proper shaping behaviour for scripts -- that require it (e.g. Arabic) and the which OpenType features defined in -- the font to be applied. language :: Maybe ShortText, -- ^ Languages are crucial for selecting which OpenType feature to apply to -- the buffer which can result in applying language-specific behaviour. -- Languages are orthogonal to the scripts, and though they are related, -- they are different concepts and should not be confused with each other. beginsText :: Bool, -- ^ special handling of the beginning of text paragraph can be applied to -- this buffer. Should usually be set, unless you are passing to the buffer -- only part of the text without the full context. endsText :: Bool, -- ^ special handling of the end of text paragraph can be applied to this buffer. preserveDefaultIgnorables :: Bool, -- ^ character with Default_Ignorable Unicode property should use the -- corresponding glyph from the font, instead of hiding them (done by -- replacing them with the space glyph and zeroing the advance width.) -- Takes precedance over `removeDefaultIgnorables`. removeDefaultIgnorables :: Bool, -- ^ character with Default_Ignorable Unicode property should be removed -- from glyph string instead of hiding them (done by replacing them with -- the space glyph and zeroing the advance width.) don'tInsertDottedCircle :: Bool, -- ^ a dotted circle should not be inserted in the rendering of incorrect -- character sequences (such at <0905 093E>). clusterLevel :: ClusterLevel, -- ^ dictates one aspect of how HarfBuzz will treat non-base characters -- during shaping. invisibleGlyph :: Char, -- ^ The glyph number that replaces invisible characters in the -- shaping result. If set to zero (default), the glyph for the U+0020 -- SPACE character is used. Otherwise, this value is used verbatim. replacementCodepoint :: Char -- ^ the hb_codepoint_t that replaces invalid entries for a given encoding -- when adding text to buffer . } deriving (Eq, Show, Read) -- | An empty buffer with sensible default properties. defaultBuffer = Buffer { text = Right LBS.empty, contentType = Just ContentTypeUnicode, direction = Nothing, script = Nothing, language = Nothing, beginsText = True, endsText = True, preserveDefaultIgnorables = False, removeDefaultIgnorables = False, don'tInsertDottedCircle = False, clusterLevel = ClusterMonotoneGraphemes, invisibleGlyph = '\0', replacementCodepoint = '\xFFFD' } data ContentType = ContentTypeUnicode | ContentTypeGlyphs deriving (Eq, Show, Read) data Direction = DirLTR | DirRTL | DirTTB | DirBTT deriving (Eq, Show, Read) data ClusterLevel = ClusterMonotoneGraphemes | ClusterMonotoneChars | ClusterChars deriving (Eq, Show, Read) data GlyphInfo = GlyphInfo { codepoint :: Word32, cluster :: Word32 } deriving (Show, Read, Eq) instance Storable GlyphInfo where sizeOf _ = 2 * sizeOf (undefined :: Word32) alignment _ = alignment (undefined :: Word32) peek p = do codepoint' <- peek $ castPtr p cluster' <- peekElemOff (castPtr p) 1 return $ GlyphInfo codepoint' cluster' poke p (GlyphInfo a b) = do q <- return $ castPtr p poke q a pokeElemOff q 1 b data GlyphPos = GlyphPos { x_advance :: Word32, y_advance :: Word32, x_offset :: Word32, y_offset :: Word32 } deriving (Show, Read, Eq) instance Storable GlyphPos where sizeOf _ = 4 * sizeOf (undefined :: Word32) alignment _ = alignment (undefined :: Word32) peek p = do q <- return $ castPtr p xa <- peek q ya <- peekElemOff q 1 xoff <- peekElemOff q 2 yoff <- peekElemOff q 3 return $ GlyphPos xa ya xoff yoff poke p (GlyphPos xa ya xoff yoff) = do q <- return $ castPtr p poke q xa pokeElemOff q 1 ya pokeElemOff q 2 xoff pokeElemOff q 3 yoff guessSegmentProperties :: Buffer -> Buffer guessSegmentProperties = thaw . freeze scriptHorizontalDir :: ShortText -> Maybe Direction scriptHorizontalDir script = int2dir $ hb_script_get_horizontal_direction $ hb_script_from_txt script int2dir 4 = Just DirLTR int2dir 5 = Just DirRTL int2dir 6 = Just DirTTB int2dir 7 = Just DirBTT int2dir _ = Nothing dir2int Nothing = 0 dir2int (Just DirLTR) = 4 dir2int (Just DirRTL) = 5 dir2int (Just DirTTB) = 6 dir2int (Just DirBTT) = 7 dirReverse DirLTR = DirRTL dirReverse DirRTL = DirLTR dirReverse DirTTB = DirBTT dirReverse DirBTT = DirTTB dirBackward dir = dir `Prelude.elem` [DirRTL, DirBTT] dirForward dir = dir `Prelude.elem` [DirLTR, DirTTB] dirHorizontal dir = dir `Prelude.elem` [DirLTR, DirRTL] dirVertical dir = dir `Prelude.elem` [DirTTB, DirBTT] --- type Buffer' = ForeignPtr Buffer'' data Buffer'' type Buffer_ = Ptr Buffer'' freeze = unsafePerformIO . freeze' freeze' buf = do buffer <- hb_buffer_create case text buf of Right bs -> hb_buffer_add_bytestring buffer bs -- Convert text to bytestring for now due to the text 2.0 UTF-8 transition. -- Unfortunately this may prevent Harfbuzz from reading opening context -- So for correctness we'll eventually want to depend on text>2.0 Left txt -> hb_buffer_add_bytestring buffer $ encodeUtf8 txt hb_buffer_set_content_type buffer $ case contentType buf of Nothing -> 0 Just ContentTypeUnicode -> 1 Just ContentTypeGlyphs -> 2 hb_buffer_set_direction buffer $ dir2int $ direction buf case script buf of Just script' -> hb_buffer_set_script buffer $ hb_script_from_txt script' Nothing -> return () case language buf of Just lang' -> hb_buffer_set_language buffer =<< hb_language_from_txt lang' Nothing -> return () hb_buffer_set_flags buffer $ Prelude.foldl (.|.) 0 [ if beginsText buf then 1 else 0, if endsText buf then 2 else 0, if preserveDefaultIgnorables buf then 4 else 0, if removeDefaultIgnorables buf then 8 else 0, if don'tInsertDottedCircle buf then 16 else 0 ] hb_buffer_set_cluster_level buffer $ case clusterLevel buf of ClusterMonotoneGraphemes -> 0 ClusterMonotoneChars -> 1 ClusterChars -> 2 hb_buffer_set_invisible_glyph buffer $ c2w $ invisibleGlyph buf hb_buffer_set_replacement_codepoint buffer $ c2w $ replacementCodepoint buf case (contentType buf, direction buf, script buf, language buf) of (Just ContentTypeUnicode, Nothing, _, _) -> hb_buffer_guess_segment_properties buffer (Just ContentTypeUnicode, _, Nothing, _) -> hb_buffer_guess_segment_properties buffer (Just ContentTypeUnicode, _, _, Nothing) -> hb_buffer_guess_segment_properties buffer newForeignPtr hb_buffer_destroy buffer glyphInfos :: Buffer' -> [GlyphInfo] glyphInfos = unsafePerformIO . glyphInfos' glyphInfos' :: Buffer' -> IO [GlyphInfo] glyphInfos' buf' = alloca $ \length' -> do arr <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_glyph_infos buf'' length' length <- peek length' if length == 0 then return [] else forM [0..length - 1] $ peekElemOff arr glyphsPos = unsafePerformIO . glyphsPos' glyphsPos' buf' = alloca $ \length' -> do arr <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_glyph_positions buf'' length' length <- peek length' if length == 0 then return [] else forM [0..length-1] $ peekElemOff arr thaw :: Buffer' -> Buffer thaw = unsafePerformIO . thaw' thaw' buf' = do let getter cb = unsafeInterleaveIO $ withForeignPtr buf' cb glyphInfos' <- glyphInfos' buf' contentType' <- getter hb_buffer_get_content_type direction' <- getter hb_buffer_get_direction script' <- getter hb_buffer_get_script language' <- unsafeInterleaveIO $ do lang <- withForeignPtr buf' $ \buf'' -> hb_buffer_get_language buf'' peekCString $ hb_language_to_string lang flags' <- getter hb_buffer_get_flags clusterLevel' <- getter hb_buffer_get_cluster_level invisibleGlyph' <- getter hb_buffer_get_invisible_glyph replacementCodepoint' <- getter hb_buffer_get_replacement_codepoint return Buffer { text = Right $ LBS.fromStrict $ encodeUTF8 $ Prelude.map codepoint glyphInfos', contentType = case contentType' of 1 -> Just ContentTypeUnicode 2 -> Just ContentTypeGlyphs _ -> Nothing, direction = int2dir direction', language = Just $ Short.fromString language', script = Just $ Short.fromString $ hb_tag_to_string script', beginsText = testBit flags' 0, endsText = testBit flags' 1, preserveDefaultIgnorables = testBit flags' 2, removeDefaultIgnorables = testBit flags' 3, don'tInsertDottedCircle = testBit flags' 4, clusterLevel = case clusterLevel' of 1 -> ClusterMonotoneChars 2 -> ClusterChars _ -> ClusterMonotoneGraphemes, invisibleGlyph = w2c invisibleGlyph', replacementCodepoint = w2c replacementCodepoint' } foreign import ccall "hb_buffer_create" hb_buffer_create :: IO Buffer_ foreign import ccall "&hb_buffer_destroy" hb_buffer_destroy :: FunPtr (Buffer_ -> IO ()) foreign import ccall "hb_buffer_add_utf8" hb_buffer_add_utf8 :: Buffer_ -> Ptr Word8 -> Int -> Int -> Int -> IO () hb_buffer_add_bytestring _ Lazy.Empty = return () hb_buffer_add_bytestring buf (Lazy.Chunk (Strict.PS ptr offset length) next) = do withForeignPtr ptr $ \ptr' -> hb_buffer_add_utf8 buf ptr' length offset (length - offset) hb_buffer_add_bytestring buf next foreign import ccall "hb_buffer_set_content_type" hb_buffer_set_content_type :: Buffer_ -> Int -> IO () foreign import ccall "hb_buffer_set_direction" hb_buffer_set_direction :: Buffer_ -> Int -> IO () hb_tag_from_string :: String -> Word32 hb_tag_from_string str = case str ++ Prelude.repeat '\0' of c1:c2:c3:c4:_ -> Prelude.foldl (.|.) 0 [ shiftL (c2w c1 .&. 0x7) 24, shiftL (c2w c2 .&. 0x7) 16, shiftL (c2w c3 .&. 0x7) 8, shiftL (c2w c4 .&. 0x7) 0 ] _ -> 0 titlecase :: String -> String titlecase "" = "" titlecase (c:cs) = toUpper c : Prelude.map toLower cs hb_script_from_string str = hb_tag_from_string $ case titlecase str of 'Q':'a':'a':'i':_ -> "Zinh" 'Q':'a':'a':'c':_ -> "Copt" 'A':'r':'a':'n':_ -> "Arab" 'C':'y':'r':'s':_ -> "Cyrl" 'G':'e':'o':'k':_ -> "Geor" 'H':'a':'n':'s':_ -> "Hani" 'H':'a':'n':'t':_ -> "Hani" 'J':'a':'m':'o':_ -> "Hang" 'L':'a':'t':'f':_ -> "Latn" 'L':'a':'t':'g':_ -> "Latn" 'S':'y':'r':'e':_ -> "Syrc" 'S':'y':'r':'j':_ -> "Syrc" 'S':'y':'r':'n':_ -> "Syrc" hb_script_from_txt txt = hb_script_from_string $ Short.toString txt foreign import ccall "hb_buffer_set_script" hb_buffer_set_script :: Buffer_ -> Word32 -> IO () foreign import ccall "hb_script_get_horizontal_direction" hb_script_get_horizontal_direction :: Word32 -> Int foreign import ccall "hb_language_from_string" hb_language_from_string :: Ptr Word8 -> Int -> Int hb_language_from_txt txt = let Strict.PS ptr offset size = toByteString txt in withForeignPtr ptr $ \ptr' -> return $ hb_language_from_string (plusPtr ptr' offset) (size - offset) foreign import ccall "hb_buffer_set_language" hb_buffer_set_language :: Buffer_ -> Int -> IO () foreign import ccall "hb_buffer_set_flags" hb_buffer_set_flags :: Buffer_ -> Int -> IO () foreign import ccall "hb_buffer_set_cluster_level" hb_buffer_set_cluster_level :: Buffer_ -> Int -> IO () foreign import ccall "hb_buffer_set_invisible_glyph" hb_buffer_set_invisible_glyph :: Buffer_ -> Word32 -> IO () foreign import ccall "hb_buffer_set_replacement_codepoint" hb_buffer_set_replacement_codepoint :: Buffer_ -> Word32 -> IO () foreign import ccall "hb_buffer_guess_segment_properties" hb_buffer_guess_segment_properties :: Buffer_ -> IO () foreign import ccall "hb_buffer_get_content_type" hb_buffer_get_content_type :: Buffer_ -> IO Int foreign import ccall "hb_buffer_get_direction" hb_buffer_get_direction :: Buffer_ -> IO Int foreign import ccall "hb_buffer_get_script" hb_buffer_get_script :: Buffer_ -> IO Word32 hb_tag_to_string :: Word32 -> String hb_tag_to_string tag = [ w2c (shiftR tag 24 .&. 0x7), w2c (shiftR tag 16 .&. 0x7), w2c (shiftR tag 8 .&. 0x7), w2c (shiftR tag 0 .&. 0x7) ] foreign import ccall "hb_buffer_get_language" hb_buffer_get_language :: Buffer_ -> IO (Ptr ()) foreign import ccall "hb_language_to_string" hb_language_to_string :: Ptr () -> CString foreign import ccall "hb_buffer_get_flags" hb_buffer_get_flags :: Buffer_ -> IO Int foreign import ccall "hb_buffer_get_cluster_level" hb_buffer_get_cluster_level :: Buffer_ -> IO Int foreign import ccall "hb_buffer_get_glyph_infos" hb_buffer_get_glyph_infos :: Buffer_ -> Ptr Int -> IO (Ptr GlyphInfo) foreign import ccall "hb_buffer_get_glyph_positions" hb_buffer_get_glyph_positions :: Buffer_ -> Ptr Int -> IO (Ptr GlyphPos) foreign import ccall "hb_buffer_get_invisible_glyph" hb_buffer_get_invisible_glyph :: Buffer_ -> IO Word32 foreign import ccall "hb_buffer_get_replacement_codepoint" hb_buffer_get_replacement_codepoint :: Buffer_ -> IO Word32