~alcinnz/rhapsode: Add wakeword detection. Currently uses Voice2JSON/Mycroft Precise's default 'Hey mycroft'

2 files changed, 97 insertions(+), 11 deletions(-)

M src/Links.hs
M src/main.c

M src/Links.hs => src/Links.hs +12 -0

@@ 286,6 286,18 @@ c_formatLink c_label c_title c_url = do
     prosody attrs txt = el "prosody" attrs [NodeContent txt]
     style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner]
 
+foreign export ccall c_dading :: IO CString
+
+c_dading = do
+    sfx <- getXdgDirectory XdgCache "rhapsode"
+    let link_el = audio (sfx </> "link.wav")
+    let root = el "speak" [] [NodeElement link_el]
+    let ssml = renderText def $ Document (Prologue [] Nothing []) root []
+    newCString $ LTxt.unpack ssml
+  where
+    el name attrs childs = Element name (M.fromList attrs) childs
+    audio src = el "audio" [("src", pack src)] []
+
 --- For Voice2JSON
 
 foreign export ccall c_dataDir :: CString -> IO CString

M src/main.c => src/main.c +85 -11

@@ 6,6 6,7 @@
 #include <termios.h>
 #include <limits.h>
 #include <ctype.h>
+#include <sys/wait.h>
 
 #include <dirent.h>
 #include <errno.h>


@@ 40,6 41,7 @@ char **c_docLinksAndRendering(struct session*, struct page*, _Bool); // FIXME se
 
 int c_ssmlHasMark(char*, char*);
 char *c_formatLink(char *label, char *title, char *url);
+char *c_dading();
 char *c_dataDir(char *subdir);
 char *c_recognizeIntent(char *profile); // For better JSON & subprocess APIs.
 


@@ 220,9 222,33 @@ char *select_link(char **links, const char *command) {
     return NULL;
 }
 
+pid_t v2j_waitwake(char *v2j_profile) {
+    pid_t pid = fork();
+    if (pid == 0) {/* child */
+        int pipefds[2];
+        // Silence standard pipes, not a biggy if it fails.
+        if (pipe(pipefds) == 0) dup2(pipefds[0], 0);
+        if (pipe(pipefds) == 0) dup2(pipefds[1], 1);
+        if (pipe(pipefds) == 0) dup2(pipefds[1], 2);
+        execlp("voice2json", "voice2json", "--profile", v2j_profile, "wait-wake", "--exit-count", "1", NULL);
+    }
+    return pid;
+}
+
+volatile pid_t pid_waitwake = 0;
+static void check_sigchld(int sig) {
+    pid_t pid = 0;
+    do {
+        pid = waitpid(-1, NULL, WNOHANG);
+        if (pid == pid_waitwake) pid_waitwake = 0;
+    } while (pid > 0);
+}
+
 struct termios stored_settings, no_echo;
 int read_keyboard = 1;
+int use_wakeword = 1;
 int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) {
+    // Keyboard input mainloop.
     while (read_keyboard) {
         if (out_link != NULL && *out_link != NULL) return 0;
 


@@ 289,9 315,15 @@ int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link)
                 speak(ssml, paragraph_no > 0 ? mark : NULL, NULL);
                 break;
             }
-        } else if (ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') {
+        } else if ((ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') ||
+                (use_wakeword && pid_waitwake == 0)) {
+            espeak_Cancel();
+            speak(c_dading(), NULL, NULL);
             char *line = c_recognizeIntent(v2j_profile);
-            if (out_link != NULL) *out_link = select_link(links, line);
+            if (out_link != NULL) {
+                *out_link = select_link(links, line);
+                if (*out_link == NULL) pid_waitwake = v2j_waitwake(v2j_profile);
+            }
             free(line);
         } else {
             // Read in a line


@@ 310,6 342,20 @@ int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link)
             tcsetattr(0, TCSANOW, &no_echo);
         }
     }
+
+    // Wakeword mainloop
+    while (use_wakeword && out_link != NULL) {
+        int status;
+        pid_t child = v2j_waitwake(v2j_profile);
+        if (child < 0) continue; // error
+        if (waitpid(child, &status, 0) == 0) continue;
+        espeak_Cancel();
+        char *line = c_recognizeIntent(v2j_profile);
+        *out_link = select_link(links, line);
+        if (*out_link != NULL) return 0;
+    }
+
+    // Otherwise, wait until eSpeak has had it's say.
     espeak_ng_STATUS result = espeak_ng_Synchronize();
     if (result != ENS_OK) {
         espeak_ng_PrintStatusCodeMessage(result, stderr, context);


@@ 369,9 415,9 @@ int main(int argc, char **argv) {
     int c;
     opterr = 0;
     #ifdef WITH_SPEECHD
-    while ((c = getopt(argc, argv, "xs::l::L:kKvVw::dh")) != -1) {
+    while ((c = getopt(argc, argv, "xs::l::L:kKv::VWw::dh")) != -1) {
     #else
-    while ((c = getopt(argc, argv, "xs::l::kKv::Vw::h")) != -1) {
+    while ((c = getopt(argc, argv, "xs::l::kKv::VWw::h")) != -1) {
     #endif
         switch (c) {
         case 'x':


@@ 400,6 446,9 @@ int main(int argc, char **argv) {
             if (optarg != NULL) v2j_profile = optarg;
             validate_v2j_profile = 1;
             break;
+        case 'W':
+            use_wakeword = 0;
+            break;
         case 'w':
             use_espeak = 1;
             path_wav = optarg;


@@ 424,6 473,7 @@ int main(int argc, char **argv) {
             fprintf(stderr, "\t-K\t\tDon't read input from stdin.");
             fprintf(stderr, "\t-v\tvoice in\tEnsure voice input is enabled & optionally sets the Voice2JSON profile.\n");
             fprintf(stderr, "\t-V\t\tDon't listen for voice input.\n");
+            fprintf(stderr, "\t-W\t\tNo wakeword\tDon't listen for the configured/trained wakeword regardless if voice recognition is enabled.\n");
             fprintf(stderr, "\t-w\t.wav\tWrite an audio recording of the webpage, or (DEFAULT) immediately output through speakers.\n");
             #ifdef WITH_SPEECHD
             fprintf(stderr, "\t-d\tSpeechD\tSchedule page read via the SpeechD daemon. (BROKEN)\n");


@@ 436,13 486,13 @@ int main(int argc, char **argv) {
         }
     }
     if (read_keyboard) {
-            // Read input character by character, not line by line.
-            no_echo = stored_settings;
-            no_echo.c_lflag &= (~ICANON);
-            no_echo.c_lflag &= (~ECHO);
-            no_echo.c_cc[VTIME] = 0;
-            no_echo.c_cc[VMIN] = 1;
-            tcsetattr(0, TCSANOW, &no_echo);
+        // Read input character by character, not line by line.
+        no_echo = stored_settings;
+        no_echo.c_lflag &= (~ICANON);
+        no_echo.c_lflag &= (~ECHO);
+        no_echo.c_cc[VTIME] = 0;
+        no_echo.c_cc[VMIN] = 1;
+        tcsetattr(0, TCSANOW, &no_echo);
     }
     if (fd_ssml == stdout && fd_links == stdout) fd_links = stderr;
     #ifdef WITH_SPEECHD


@@ 521,6 571,25 @@ read_uri:
         speak_err = espeak_ng_Synchronize();
         if (speak_err == 0) select_link(links, "");
     }
+
+    if (read_keyboard && use_wakeword && v2j_profile != NULL && *v2j_profile != 0) {
+        // Interrupt read when `voice2json wait-wake` exits.
+        pid_waitwake = v2j_waitwake(v2j_profile);
+        if (pid_waitwake < 0) {
+            fprintf(stderr, "Failed to run wakeword detection.\n");
+            use_wakeword = 0;
+        }
+        else {
+            struct sigaction act;
+            memset(&act, 0, sizeof(act));
+            act.sa_handler = check_sigchld;
+            if (sigaction(SIGCHLD, &act, 0)) {
+                fprintf(stderr, "Failed to wait upon wakeword detection.\n");
+                use_wakeword = 0;
+            }
+        }
+    }
+
     if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, v2j_profile, links, &uri);
     if (uri != NULL) goto read_uri;
 


@@ 528,6 597,11 @@ read_uri:
     if (spd_conn != NULL) spd_close(spd_conn);
     #endif
 
+    if (pid_waitwake > 0) {
+        kill(pid_waitwake, SIGTERM);
+        kill(pid_waitwake+1, SIGTERM); // Yuck, likely fragile! Also kill voice2json wait-wake's subprocesses.
+    }
+
     c_freePage(referer);
     c_freeSession(session);
     hs_exit();