From 6cee31ba85e77a7edfcf7e3d8f32bb4b004c481d Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Mon, 7 Jun 2021 15:43:04 +1200 Subject: [PATCH] Add wakeword detection. Currently uses Voice2JSON/Mycroft Precise's default 'Hey mycroft' --- src/Links.hs | 12 +++++++ src/main.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 97 insertions(+), 11 deletions(-) diff --git a/src/Links.hs b/src/Links.hs index b5803e3..2a4b0fd 100644 --- a/src/Links.hs +++ b/src/Links.hs @@ -286,6 +286,18 @@ c_formatLink c_label c_title c_url = do prosody attrs txt = el "prosody" attrs [NodeContent txt] style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner] +foreign export ccall c_dading :: IO CString + +c_dading = do + sfx <- getXdgDirectory XdgCache "rhapsode" + let link_el = audio (sfx "link.wav") + let root = el "speak" [] [NodeElement link_el] + let ssml = renderText def $ Document (Prologue [] Nothing []) root [] + newCString $ LTxt.unpack ssml + where + el name attrs childs = Element name (M.fromList attrs) childs + audio src = el "audio" [("src", pack src)] [] + --- For Voice2JSON foreign export ccall c_dataDir :: CString -> IO CString diff --git a/src/main.c b/src/main.c index 383687e..fca7328 100644 --- a/src/main.c +++ b/src/main.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -40,6 +41,7 @@ char **c_docLinksAndRendering(struct session*, struct page*, _Bool); // FIXME se int c_ssmlHasMark(char*, char*); char *c_formatLink(char *label, char *title, char *url); +char *c_dading(); char *c_dataDir(char *subdir); char *c_recognizeIntent(char *profile); // For better JSON & subprocess APIs. @@ -220,9 +222,33 @@ char *select_link(char **links, const char *command) { return NULL; } +pid_t v2j_waitwake(char *v2j_profile) { + pid_t pid = fork(); + if (pid == 0) {/* child */ + int pipefds[2]; + // Silence standard pipes, not a biggy if it fails. + if (pipe(pipefds) == 0) dup2(pipefds[0], 0); + if (pipe(pipefds) == 0) dup2(pipefds[1], 1); + if (pipe(pipefds) == 0) dup2(pipefds[1], 2); + execlp("voice2json", "voice2json", "--profile", v2j_profile, "wait-wake", "--exit-count", "1", NULL); + } + return pid; +} + +volatile pid_t pid_waitwake = 0; +static void check_sigchld(int sig) { + pid_t pid = 0; + do { + pid = waitpid(-1, NULL, WNOHANG); + if (pid == pid_waitwake) pid_waitwake = 0; + } while (pid > 0); +} + struct termios stored_settings, no_echo; int read_keyboard = 1; +int use_wakeword = 1; int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) { + // Keyboard input mainloop. while (read_keyboard) { if (out_link != NULL && *out_link != NULL) return 0; @@ -289,9 +315,15 @@ int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) speak(ssml, paragraph_no > 0 ? mark : NULL, NULL); break; } - } else if (ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') { + } else if ((ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') || + (use_wakeword && pid_waitwake == 0)) { + espeak_Cancel(); + speak(c_dading(), NULL, NULL); char *line = c_recognizeIntent(v2j_profile); - if (out_link != NULL) *out_link = select_link(links, line); + if (out_link != NULL) { + *out_link = select_link(links, line); + if (*out_link == NULL) pid_waitwake = v2j_waitwake(v2j_profile); + } free(line); } else { // Read in a line @@ -310,6 +342,20 @@ int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) tcsetattr(0, TCSANOW, &no_echo); } } + + // Wakeword mainloop + while (use_wakeword && out_link != NULL) { + int status; + pid_t child = v2j_waitwake(v2j_profile); + if (child < 0) continue; // error + if (waitpid(child, &status, 0) == 0) continue; + espeak_Cancel(); + char *line = c_recognizeIntent(v2j_profile); + *out_link = select_link(links, line); + if (*out_link != NULL) return 0; + } + + // Otherwise, wait until eSpeak has had it's say. espeak_ng_STATUS result = espeak_ng_Synchronize(); if (result != ENS_OK) { espeak_ng_PrintStatusCodeMessage(result, stderr, context); @@ -369,9 +415,9 @@ int main(int argc, char **argv) { int c; opterr = 0; #ifdef WITH_SPEECHD - while ((c = getopt(argc, argv, "xs::l::L:kKvVw::dh")) != -1) { + while ((c = getopt(argc, argv, "xs::l::L:kKv::VWw::dh")) != -1) { #else - while ((c = getopt(argc, argv, "xs::l::kKv::Vw::h")) != -1) { + while ((c = getopt(argc, argv, "xs::l::kKv::VWw::h")) != -1) { #endif switch (c) { case 'x': @@ -400,6 +446,9 @@ int main(int argc, char **argv) { if (optarg != NULL) v2j_profile = optarg; validate_v2j_profile = 1; break; + case 'W': + use_wakeword = 0; + break; case 'w': use_espeak = 1; path_wav = optarg; @@ -424,6 +473,7 @@ int main(int argc, char **argv) { fprintf(stderr, "\t-K\t\tDon't read input from stdin."); fprintf(stderr, "\t-v\tvoice in\tEnsure voice input is enabled & optionally sets the Voice2JSON profile.\n"); fprintf(stderr, "\t-V\t\tDon't listen for voice input.\n"); + fprintf(stderr, "\t-W\t\tNo wakeword\tDon't listen for the configured/trained wakeword regardless if voice recognition is enabled.\n"); fprintf(stderr, "\t-w\t.wav\tWrite an audio recording of the webpage, or (DEFAULT) immediately output through speakers.\n"); #ifdef WITH_SPEECHD fprintf(stderr, "\t-d\tSpeechD\tSchedule page read via the SpeechD daemon. (BROKEN)\n"); @@ -436,13 +486,13 @@ int main(int argc, char **argv) { } } if (read_keyboard) { - // Read input character by character, not line by line. - no_echo = stored_settings; - no_echo.c_lflag &= (~ICANON); - no_echo.c_lflag &= (~ECHO); - no_echo.c_cc[VTIME] = 0; - no_echo.c_cc[VMIN] = 1; - tcsetattr(0, TCSANOW, &no_echo); + // Read input character by character, not line by line. + no_echo = stored_settings; + no_echo.c_lflag &= (~ICANON); + no_echo.c_lflag &= (~ECHO); + no_echo.c_cc[VTIME] = 0; + no_echo.c_cc[VMIN] = 1; + tcsetattr(0, TCSANOW, &no_echo); } if (fd_ssml == stdout && fd_links == stdout) fd_links = stderr; #ifdef WITH_SPEECHD @@ -521,6 +571,25 @@ read_uri: speak_err = espeak_ng_Synchronize(); if (speak_err == 0) select_link(links, ""); } + + if (read_keyboard && use_wakeword && v2j_profile != NULL && *v2j_profile != 0) { + // Interrupt read when `voice2json wait-wake` exits. + pid_waitwake = v2j_waitwake(v2j_profile); + if (pid_waitwake < 0) { + fprintf(stderr, "Failed to run wakeword detection.\n"); + use_wakeword = 0; + } + else { + struct sigaction act; + memset(&act, 0, sizeof(act)); + act.sa_handler = check_sigchld; + if (sigaction(SIGCHLD, &act, 0)) { + fprintf(stderr, "Failed to wait upon wakeword detection.\n"); + use_wakeword = 0; + } + } + } + if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, v2j_profile, links, &uri); if (uri != NULL) goto read_uri; @@ -528,6 +597,11 @@ read_uri: if (spd_conn != NULL) spd_close(spd_conn); #endif + if (pid_waitwake > 0) { + kill(pid_waitwake, SIGTERM); + kill(pid_waitwake+1, SIGTERM); // Yuck, likely fragile! Also kill voice2json wait-wake's subprocesses. + } + c_freePage(referer); c_freeSession(session); hs_exit(); -- 2.30.2