~alcinnz/rhapsode

ref: d94a3ecb9788980310707c7bc6a4a66ab68d6d6c rhapsode/src/SSML.hs -rw-r--r-- 5.2 KiB
d94a3ecb — Adrian Cochrane Propagate local attrs through to output. 4 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
{-# LANGUAGE OverloadedStrings #-}
module SSML(styleToSSML, postorder) where

import Text.XML
import qualified Data.Text as Txt
import Data.Map
import Data.Maybe (isNothing, fromMaybe, fromJust)
import qualified Data.Map as M
import Data.CSS.StyleTree

import Data.CSS.Syntax.Tokens
import Data.Scientific (toRealFloat)
import Data.List (elemIndex)

import SpeechStyle

styleToSSML :: StyleTree SpeechStyle -> Element
styleToSSML = Element "{http://www.w3.org/2001/10/synthesis}speak" M.empty .
    collapseBreaks' . floatBreaks' . styleToNodes
styleToNodes :: StyleTree SpeechStyle -> [Node]
styleToNodes = Prelude.map style . postorder styleToSSML'

styleToSSML' SpeechStyle { speak = False } _ = []
styleToSSML' self@SpeechStyle {content = ""} children = el "prosody" [
        ("xml:lang", lang self),
        ("rhapsode:pseudo", pseudoEl self),
        ("volume", volume self),
        ("rate", rate self),
        ("pitch", pitch2txt <$> pitch self),
        ("range", pitch2txt <$> range self)
    ] $ el "prosody" [
        ("volume", unit2txt <$> volumeAdjust self),
        ("rate", unit2txt <$> rateAdjust self),
        ("pitch", unit2txt <$> (pitchAdjust =<< pitch self)),
        ("range", unit2txt <$> (pitchAdjust =<< range self))
    ] $ el "emphasis" [("level", stress self)] $
    el "say-as" [("interpret-as", speakAs self)] $
    el "tts:style" [
        ("field", (\_ -> "punctuation") <$> speakAs self),
        ("mode", (\b -> if b then "all" else "none") <$> punctuation self)
    ] $ buildVoices (reverse $ voices self) $
    buildBox self children
styleToSSML' style childs = styleToSSML' style {content = ""} (
        pseudo "before" ++ [NodeContent $ content style] ++ pseudo "after")
    where
        pseudo n = [child | child@(NodeElement (Element _ attrs _)) <- childs,
            M.lookup "rhapsode:pseudo" attrs == Just n]

buildVoices (Voice name:voices) children =
    el "voice" [("name", Just name)] $ buildVoices voices children
buildVoices (VoicePattern age gender variant:voices) children = el "voice" [
        ("age", Txt.pack <$> show <$> age),
        ("gender", Just gender),
        ("variant", Txt.pack <$> show <$> variant)
    ] $ buildVoices voices children
buildVoices [] children = children

buildBox self childs = concat [
        breakEl $ pauseBefore self,
        audioEl $ cueBefore self,
        breakEl $ restBefore self,
        el "mark" [("name", marker self)] [],
        childs,
        breakEl $ restAfter self,
        audioEl $ cueAfter self,
        breakEl $ pauseAfter self
    ]
breakEl self = el "break" [("strength", strength self), ("time", unit2txt <$> time self)] []
audioEl NoCue = []
audioEl self = el "prosody" [("volume", unit2txt <$> cueVolume self)] $
    el "audio" [("src", Just $ src self)] []

-- support
postorder :: (a -> [b] -> [b]) -> StyleTree a -> [StyleTree b]
postorder cb (StyleTree self children) =
    [StyleTree self' children' | self' <- cb self $ Prelude.map style children']
  where children' = concat $ Prelude.map (postorder cb) children

el :: Name -> [(Name, Maybe Txt.Text)] -> [Node] -> [Node]
el n attrs children | all (isNothing . snd) attrs = children
    | otherwise = [NodeElement $ Element {
        elementName = n,
        elementAttributes = M.fromList [(k, v) | (k, Just v) <- attrs],
        elementNodes = children
    }]

relativeUnit n | n < 0 = Txt.pack (show n)
    | otherwise = Txt.pack ('+':show n)
unit2txt (Unit' unit n) = relativeUnit n `Txt.append` unit

pitch2txt (Pitch kw _) = kw
pitch2txt (Absolute (Unit' unit n)) = Txt.pack (show n) `Txt.append` unit
pitch2txt (Relative n) = unit2txt n

-- <break> collapse
floatBreaks :: Element -> [Node]
floatBreaks el@(Element _ _ childs)
    | break@(NodeElement (Element "break" _ _)):nodes <- floatBreaks' childs =
        break : floatBreaks el{elementNodes = nodes}
    | break@(NodeElement (Element "break" _ _)):nodes <- reverse $ floatBreaks' childs =
        floatBreaks el{elementNodes = reverse nodes} ++ [break]
    | otherwise = [NodeElement el]
floatBreaks' (NodeElement el:nodes) = floatBreaks el ++ floatBreaks' nodes
floatBreaks' (node:nodes) = node : floatBreaks' nodes
floatBreaks' [] = []

collapseBreaks :: Element -> Element
collapseBreaks (Element name attrs elChildren) =
    Element name attrs $ collapseBreaks' elChildren
collapseBreaks' :: [Node] -> [Node]
collapseBreaks' (
        NodeElement a@(Element "break" _ _):
        NodeElement b@(Element "break" _ _):
        nodes
    ) = NodeElement (maxBreak a b) : collapseBreaks' nodes
collapseBreaks' (NodeElement el:nodes) = NodeElement (collapseBreaks el) : collapseBreaks' nodes
collapseBreaks' (node:nodes) = node:collapseBreaks' nodes
collapseBreaks' [] = []

volumes = ["x-weak", "weak", "medium", "strong", "x-strong"]
maxBreak x@(Element _ a _) y@(Element _ b _)
    | Just a' <- "strength" `M.lookup` a, Just b' <- "strength" `M.lookup` b, a' /= b' =
        if fromJust (elemIndex a' volumes) > fromJust (elemIndex b' volumes) then x else y
    | Just a' <- "time" `M.lookup` a, Just b' <- "time" `M.lookup` b, a' /= b' =
        if toMS (tokenize a') > toMS (tokenize b') then x else y
    | otherwise = x
toMS [Dimension _ n "s"] = cssFloat n * 1000
toMS [Dimension _ n "ms"] = cssFloat n
toMS _ = 0 -- Should never happen.
cssFloat (NVInteger i) = fromInteger i
cssFloat (NVNumber n) = toRealFloat n