~alcinnz/rhapsode

6cee31ba85e77a7edfcf7e3d8f32bb4b004c481d — Adrian Cochrane 2 years ago 17a4d92
Add wakeword detection. Currently uses Voice2JSON/Mycroft Precise's default 'Hey mycroft'
2 files changed, 97 insertions(+), 11 deletions(-)

M src/Links.hs
M src/main.c
M src/Links.hs => src/Links.hs +12 -0
@@ 286,6 286,18 @@ c_formatLink c_label c_title c_url = do
    prosody attrs txt = el "prosody" attrs [NodeContent txt]
    style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner]

foreign export ccall c_dading :: IO CString

c_dading = do
    sfx <- getXdgDirectory XdgCache "rhapsode"
    let link_el = audio (sfx </> "link.wav")
    let root = el "speak" [] [NodeElement link_el]
    let ssml = renderText def $ Document (Prologue [] Nothing []) root []
    newCString $ LTxt.unpack ssml
  where
    el name attrs childs = Element name (M.fromList attrs) childs
    audio src = el "audio" [("src", pack src)] []

--- For Voice2JSON

foreign export ccall c_dataDir :: CString -> IO CString

M src/main.c => src/main.c +85 -11
@@ 6,6 6,7 @@
#include <termios.h>
#include <limits.h>
#include <ctype.h>
#include <sys/wait.h>

#include <dirent.h>
#include <errno.h>


@@ 40,6 41,7 @@ char **c_docLinksAndRendering(struct session*, struct page*, _Bool); // FIXME se

int c_ssmlHasMark(char*, char*);
char *c_formatLink(char *label, char *title, char *url);
char *c_dading();
char *c_dataDir(char *subdir);
char *c_recognizeIntent(char *profile); // For better JSON & subprocess APIs.



@@ 220,9 222,33 @@ char *select_link(char **links, const char *command) {
    return NULL;
}

pid_t v2j_waitwake(char *v2j_profile) {
    pid_t pid = fork();
    if (pid == 0) {/* child */
        int pipefds[2];
        // Silence standard pipes, not a biggy if it fails.
        if (pipe(pipefds) == 0) dup2(pipefds[0], 0);
        if (pipe(pipefds) == 0) dup2(pipefds[1], 1);
        if (pipe(pipefds) == 0) dup2(pipefds[1], 2);
        execlp("voice2json", "voice2json", "--profile", v2j_profile, "wait-wake", "--exit-count", "1", NULL);
    }
    return pid;
}

volatile pid_t pid_waitwake = 0;
static void check_sigchld(int sig) {
    pid_t pid = 0;
    do {
        pid = waitpid(-1, NULL, WNOHANG);
        if (pid == pid_waitwake) pid_waitwake = 0;
    } while (pid > 0);
}

struct termios stored_settings, no_echo;
int read_keyboard = 1;
int use_wakeword = 1;
int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) {
    // Keyboard input mainloop.
    while (read_keyboard) {
        if (out_link != NULL && *out_link != NULL) return 0;



@@ 289,9 315,15 @@ int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link)
                speak(ssml, paragraph_no > 0 ? mark : NULL, NULL);
                break;
            }
        } else if (ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') {
        } else if ((ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') ||
                (use_wakeword && pid_waitwake == 0)) {
            espeak_Cancel();
            speak(c_dading(), NULL, NULL);
            char *line = c_recognizeIntent(v2j_profile);
            if (out_link != NULL) *out_link = select_link(links, line);
            if (out_link != NULL) {
                *out_link = select_link(links, line);
                if (*out_link == NULL) pid_waitwake = v2j_waitwake(v2j_profile);
            }
            free(line);
        } else {
            // Read in a line


@@ 310,6 342,20 @@ int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link)
            tcsetattr(0, TCSANOW, &no_echo);
        }
    }

    // Wakeword mainloop
    while (use_wakeword && out_link != NULL) {
        int status;
        pid_t child = v2j_waitwake(v2j_profile);
        if (child < 0) continue; // error
        if (waitpid(child, &status, 0) == 0) continue;
        espeak_Cancel();
        char *line = c_recognizeIntent(v2j_profile);
        *out_link = select_link(links, line);
        if (*out_link != NULL) return 0;
    }

    // Otherwise, wait until eSpeak has had it's say.
    espeak_ng_STATUS result = espeak_ng_Synchronize();
    if (result != ENS_OK) {
        espeak_ng_PrintStatusCodeMessage(result, stderr, context);


@@ 369,9 415,9 @@ int main(int argc, char **argv) {
    int c;
    opterr = 0;
    #ifdef WITH_SPEECHD
    while ((c = getopt(argc, argv, "xs::l::L:kKvVw::dh")) != -1) {
    while ((c = getopt(argc, argv, "xs::l::L:kKv::VWw::dh")) != -1) {
    #else
    while ((c = getopt(argc, argv, "xs::l::kKv::Vw::h")) != -1) {
    while ((c = getopt(argc, argv, "xs::l::kKv::VWw::h")) != -1) {
    #endif
        switch (c) {
        case 'x':


@@ 400,6 446,9 @@ int main(int argc, char **argv) {
            if (optarg != NULL) v2j_profile = optarg;
            validate_v2j_profile = 1;
            break;
        case 'W':
            use_wakeword = 0;
            break;
        case 'w':
            use_espeak = 1;
            path_wav = optarg;


@@ 424,6 473,7 @@ int main(int argc, char **argv) {
            fprintf(stderr, "\t-K\t\tDon't read input from stdin.");
            fprintf(stderr, "\t-v\tvoice in\tEnsure voice input is enabled & optionally sets the Voice2JSON profile.\n");
            fprintf(stderr, "\t-V\t\tDon't listen for voice input.\n");
            fprintf(stderr, "\t-W\t\tNo wakeword\tDon't listen for the configured/trained wakeword regardless if voice recognition is enabled.\n");
            fprintf(stderr, "\t-w\t.wav\tWrite an audio recording of the webpage, or (DEFAULT) immediately output through speakers.\n");
            #ifdef WITH_SPEECHD
            fprintf(stderr, "\t-d\tSpeechD\tSchedule page read via the SpeechD daemon. (BROKEN)\n");


@@ 436,13 486,13 @@ int main(int argc, char **argv) {
        }
    }
    if (read_keyboard) {
            // Read input character by character, not line by line.
            no_echo = stored_settings;
            no_echo.c_lflag &= (~ICANON);
            no_echo.c_lflag &= (~ECHO);
            no_echo.c_cc[VTIME] = 0;
            no_echo.c_cc[VMIN] = 1;
            tcsetattr(0, TCSANOW, &no_echo);
        // Read input character by character, not line by line.
        no_echo = stored_settings;
        no_echo.c_lflag &= (~ICANON);
        no_echo.c_lflag &= (~ECHO);
        no_echo.c_cc[VTIME] = 0;
        no_echo.c_cc[VMIN] = 1;
        tcsetattr(0, TCSANOW, &no_echo);
    }
    if (fd_ssml == stdout && fd_links == stdout) fd_links = stderr;
    #ifdef WITH_SPEECHD


@@ 521,6 571,25 @@ read_uri:
        speak_err = espeak_ng_Synchronize();
        if (speak_err == 0) select_link(links, "");
    }

    if (read_keyboard && use_wakeword && v2j_profile != NULL && *v2j_profile != 0) {
        // Interrupt read when `voice2json wait-wake` exits.
        pid_waitwake = v2j_waitwake(v2j_profile);
        if (pid_waitwake < 0) {
            fprintf(stderr, "Failed to run wakeword detection.\n");
            use_wakeword = 0;
        }
        else {
            struct sigaction act;
            memset(&act, 0, sizeof(act));
            act.sa_handler = check_sigchld;
            if (sigaction(SIGCHLD, &act, 0)) {
                fprintf(stderr, "Failed to wait upon wakeword detection.\n");
                use_wakeword = 0;
            }
        }
    }

    if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, v2j_profile, links, &uri);
    if (uri != NULL) goto read_uri;



@@ 528,6 597,11 @@ read_uri:
    if (spd_conn != NULL) spd_close(spd_conn);
    #endif

    if (pid_waitwake > 0) {
        kill(pid_waitwake, SIGTERM);
        kill(pid_waitwake+1, SIGTERM); // Yuck, likely fragile! Also kill voice2json wait-wake's subprocesses.
    }

    c_freePage(referer);
    c_freeSession(session);
    hs_exit();