~alcinnz/rhapsode

ref: 888d9d25377ac1f373c5c65b4172d62e15c05189 rhapsode/src/SSML.hs -rw-r--r-- 5.2 KiB
888d9d25 — Adrian Cochrane Save feedback regarding form design. 4 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
{-# LANGUAGE OverloadedStrings #-}
module SSML(styleToSSML, postorder) where

import Text.XML
import qualified Data.Text as Txt
import Data.Map
import Data.Maybe (isNothing, fromMaybe, fromJust)
import qualified Data.Map as M
import Data.CSS.StyleTree

import Data.CSS.Syntax.Tokens
import Data.Scientific (toRealFloat)
import Data.List (elemIndex)

import SpeechStyle

styleToSSML :: StyleTree SpeechStyle -> Element
styleToSSML = Element "{http://www.w3.org/2001/10/synthesis}speak" M.empty .
    collapseBreaks' . floatBreaks' . styleToNodes
styleToNodes :: StyleTree SpeechStyle -> [Node]
styleToNodes = Prelude.map style . postorder styleToSSML'

styleToSSML' SpeechStyle { speak = False } _ = []
styleToSSML' self@SpeechStyle {content = ""} children = el "prosody" [
        ("rhapsode:pseudo", pseudoEl self),
        ("volume", volume self),
        ("rate", rate self),
        ("pitch", pitch2txt <$> pitch self),
        ("range", pitch2txt <$> range self)
    ] $ el "prosody" [
        ("volume", unit2txt <$> volumeAdjust self),
        ("rate", unit2txt <$> rateAdjust self),
        ("pitch", unit2txt <$> (pitchAdjust =<< pitch self)),
        ("range", unit2txt <$> (pitchAdjust =<< range self))
    ] $ el "emphasis" [("level", stress self)] $
    el "say-as" [("interpret-as", speakAs self)] $
    el "tts:style" [
        ("field", (\_ -> "punctuation") <$> speakAs self),
        ("mode", (\b -> if b then "all" else "none") <$> punctuation self)
    ] $ buildVoices (reverse $ voices self) $
    buildBox self children
styleToSSML' style childs = styleToSSML' style {content = ""} (
        pseudo "before" ++ [NodeContent $ content style] ++ pseudo "after")
    where
        pseudo n = [child | child@(NodeElement (Element _ attrs _)) <- childs,
            M.lookup "rhapsode:pseudo" attrs == Just n]

buildVoices (Voice name:voices) children =
    el "voice" [("name", Just name)] $ buildVoices voices children
buildVoices (VoicePattern age gender variant:voices) children = el "voice" [
        ("age", Txt.pack <$> show <$> age),
        ("gender", Just gender),
        ("variant", Txt.pack <$> show <$> variant)
    ] $ buildVoices voices children
buildVoices [] children = children

buildBox self childs = concat [
        breakEl $ pauseBefore self,
        audioEl $ cueBefore self,
        breakEl $ restBefore self,
        el "mark" [("name", marker self)] [],
        childs,
        breakEl $ restAfter self,
        audioEl $ cueAfter self,
        breakEl $ pauseAfter self
    ]
breakEl self = el "break" [("strength", strength self), ("time", unit2txt <$> time self)] []
audioEl NoCue = []
audioEl self = el "prosody" [("volume", unit2txt <$> cueVolume self)] $
    el "audio" [("src", Just $ src self)] []

-- support
postorder :: (a -> [b] -> [b]) -> StyleTree a -> [StyleTree b]
postorder cb (StyleTree self children) =
    [StyleTree self' children' | self' <- cb self $ Prelude.map style children']
  where children' = concat $ Prelude.map (postorder cb) children

el :: Name -> [(Name, Maybe Txt.Text)] -> [Node] -> [Node]
el n attrs children | all (isNothing . snd) attrs = children
    | otherwise = [NodeElement $ Element {
        elementName = n,
        elementAttributes = M.fromList [(k, v) | (k, Just v) <- attrs],
        elementNodes = children
    }]

relativeUnit n | n < 0 = Txt.pack (show n)
    | otherwise = Txt.pack ('+':show n)
unit2txt (Unit' unit n) = relativeUnit n `Txt.append` unit

pitch2txt (Pitch kw _) = kw
pitch2txt (Absolute (Unit' unit n)) = Txt.pack (show n) `Txt.append` unit
pitch2txt (Relative n) = unit2txt n

-- <break> collapse
floatBreaks :: Element -> [Node]
floatBreaks el@(Element _ _ childs)
    | break@(NodeElement (Element "break" _ _)):nodes <- floatBreaks' childs =
        break : floatBreaks el{elementNodes = nodes}
    | break@(NodeElement (Element "break" _ _)):nodes <- reverse $ floatBreaks' childs =
        floatBreaks el{elementNodes = reverse nodes} ++ [break]
    | otherwise = [NodeElement el]
floatBreaks' (NodeElement el:nodes) = floatBreaks el ++ floatBreaks' nodes
floatBreaks' (node:nodes) = node : floatBreaks' nodes
floatBreaks' [] = []

collapseBreaks :: Element -> Element
collapseBreaks (Element name attrs elChildren) =
    Element name attrs $ collapseBreaks' elChildren
collapseBreaks' :: [Node] -> [Node]
collapseBreaks' (
        NodeElement a@(Element "break" _ _):
        NodeElement b@(Element "break" _ _):
        nodes
    ) = NodeElement (maxBreak a b) : collapseBreaks' nodes
collapseBreaks' (NodeElement el:nodes) = NodeElement (collapseBreaks el) : collapseBreaks' nodes
collapseBreaks' (node:nodes) = node:collapseBreaks' nodes
collapseBreaks' [] = []

volumes = ["x-weak", "weak", "medium", "strong", "x-strong"]
maxBreak x@(Element _ a _) y@(Element _ b _)
    | Just a' <- "strength" `M.lookup` a, Just b' <- "strength" `M.lookup` b, a' /= b' =
        if fromJust (elemIndex a' volumes) > fromJust (elemIndex b' volumes) then x else y
    | Just a' <- "time" `M.lookup` a, Just b' <- "time" `M.lookup` b, a' /= b' =
        if toMS (tokenize a') > toMS (tokenize b') then x else y
    | otherwise = x
toMS [Dimension _ n "s"] = cssFloat n * 1000
toMS [Dimension _ n "ms"] = cssFloat n
toMS _ = 0 -- Should never happen.
cssFloat (NVInteger i) = fromInteger i
cssFloat (NVNumber n) = toRealFloat n