From fa07e2d4c0541ef85d3737e9e7887a77476a2e0e Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Thu, 14 Jan 2021 19:18:26 +1300 Subject: [PATCH] Switch to event-based input & integrate CMU PocketSphinx. --- src/main.c | 80 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/src/main.c b/src/main.c index 26d4956..d632c8f 100644 --- a/src/main.c +++ b/src/main.c @@ -61,18 +61,6 @@ int choose_format(char *path) { return SF_FORMAT_WAV; } -#define BUFFER_LEN 1024 -#define MAX_CHANNELS 6 -// FIXME convert samplerate -int read_mono(SNDFILE *fd, SF_INFO *info, short *out) { - sf_count_t count = sf_read_short(fd, out, BUFFER_LEN); - if (info->channels == 1) return count; - - int i = 0; - for (int j = 0; j < count; j += info->channels) out[i++] = out[j]; - return i; -} - int paragraph_no = 0, section_no = 0; int tablerow = 0, tablecol = 0, tableno = 0, in_table = 0; int capture_marks(short *wav, int numsamples, espeak_EVENT *events) { @@ -195,7 +183,7 @@ int levenshtein_distance(const char *a, const char* b) { int MAX_DIST = 3; // Is this ideal? char **links = NULL; -char *select_link(char *command) { +char *select_link(const char *command) { // Pass 1, min distance int score = INT_MAX; for (int i = 0; strcmp(links[i], " ") != 0; i++) { @@ -312,6 +300,33 @@ gboolean read_stdin(GIOChannel *source, GIOCondition condition, gpointer data) { } return TRUE; } +static gboolean stt_bus_call(GstBus * bus, GstMessage * msg, gpointer data) { + switch (GST_MESSAGE_TYPE(msg)) { + case GST_MESSAGE_EOS: + fprintf(stderr, "Audio input ended. Please type commands.\n"); + return FALSE; + case GST_MESSAGE_ERROR: { + gchar *debug; + GError *error; + gst_message_parse_error(msg, &error, &debug); + g_free(debug); + + fprintf(stderr, "Audio input error. Please type commands.\n%s\n", error->message); + g_error_free(error); + return FALSE; + } + } + + const GstStructure *st = gst_message_get_structure(msg); + if (st && strcmp(gst_structure_get_name(st), "pocketsphinx") == 0) { + const gchar *cmd = g_value_get_string(gst_structure_get_value(st, "hypothesis")); + gboolean final = g_value_get_boolean(gst_structure_get_value(st, "final")); + char *uri = select_link(cmd); + if (uri != NULL && final) read_page(uri, 1); + } + + return TRUE; +} int speak_finalize(char *ssml) { while (read_keyboard) { @@ -340,6 +355,8 @@ void write_links() { } void read_page(char *uri, int read_input) { + if (uri == NULL) return; // Leave things as-is... + for (int i = 0; strcmp(links[i], " ") != 0; i++) free(links[i]); free(links); @@ -391,6 +408,7 @@ int main(int argc, char **argv) { hs_init(&argc, &argv); gst_init(&argc, &argv); GMainLoop *loop = g_main_loop_new(NULL, FALSE); + guint bus_watch_id = 0; tcgetattr(0, &stored_settings); #ifdef WITH_SPEECHD @@ -401,9 +419,9 @@ int main(int argc, char **argv) { int c; opterr = 0; #ifdef WITH_SPEECHD - while ((c = getopt(argc, argv, "xs::l::L:kw::dvh")) != -1) { + while ((c = getopt(argc, argv, "xs::l::L:kKw::dvh")) != -1) { #else - while ((c = getopt(argc, argv, "xs::l::kw::h")) != -1) { + while ((c = getopt(argc, argv, "xs::l::kKw::vh")) != -1) { #endif switch (c) { case 'x': @@ -418,15 +436,11 @@ int main(int argc, char **argv) { case 'L': logpath = optarg; break; + case 'K': + read_keyboard = 0; + break; case 'k': read_keyboard = 1; - // Read input character by character, not line by line. - no_echo = stored_settings; - no_echo.c_lflag &= (~ICANON); - no_echo.c_lflag &= (~ECHO); - no_echo.c_cc[VTIME] = 0; - no_echo.c_cc[VMIN] = 1; - tcsetattr(0, TCSANOW, &no_echo); break; case 'w': use_espeak = 1; @@ -458,10 +472,9 @@ int main(int argc, char **argv) { g_object_set(G_OBJECT(decoder), "lmctl", "test.lmctl", NULL); g_object_set(G_OBJECT(decoder), "lmname", "tidigits", NULL); - // FIXME: Implement callback function - /*GstBus *bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline)); - bus_watch_id = gst_bus_add_watch(bus, stt_bus_call, loop); // FIXME clean this up on shutdown. - gst_object_unref(bus);*/ + GstBus *bus = gst_pipeline_get_bus(GST_PIPELINE(stt_pipeline)); + bus_watch_id = gst_bus_add_watch(bus, stt_bus_call, NULL); + gst_object_unref(bus); gst_bin_add_many(GST_BIN(stt_pipeline), src, convert, resample, decoder, sink, NULL); gst_element_link_many(src, convert, resample, decoder, sink, NULL); @@ -475,7 +488,8 @@ int main(int argc, char **argv) { fprintf(stderr, "\t\t\thttps://xkcd.com/1692/\n"); fprintf(stderr, "\t-l\tlinks\tWrite extracted links to specifed file or stdout as TSV.\n"); fprintf(stderr, "\t-L\tlog\tWrite (append) network request timing to specified filepath.\n"); - fprintf(stderr, "\t-k\tkeyboard\tRead arrow key navigation & links from stdin.\n"); + fprintf(stderr, "\t-k\tkeyboard\tRead arrow key navigation & links from stdin. Default behaviour, noop.\n"); + fprintf(stderr, "\t-K\tDon't read input from stdin."); fprintf(stderr, "\t-w\t.wav\tWrite an audio recording of the webpage, or (DEFAULT) immediately output through speakers.\n"); #ifdef WITH_SPEECHD fprintf(stderr, "\t-d\tSpeechD\tSchedule page read via the SpeechD daemon. (BROKEN)\n"); @@ -487,6 +501,15 @@ int main(int argc, char **argv) { return c == 'h' ? 0 : 1; } } + if (read_keyboard) { + // Read input character by character, not line by line. + no_echo = stored_settings; + no_echo.c_lflag &= (~ICANON); + no_echo.c_lflag &= (~ECHO); + no_echo.c_cc[VTIME] = 0; + no_echo.c_cc[VMIN] = 1; + tcsetattr(0, TCSANOW, &no_echo); + } if (fd_ssml == stdout && fd_links == stdout) fd_links = stderr; #ifdef WITH_SPEECHD if (fd_ssml == NULL && fd_links == NULL && spd_conn == NULL && !use_espeak) @@ -515,6 +538,8 @@ int main(int argc, char **argv) { } else for (int i = optind; i < argc; i++) read_page(argv[i], i+1 == argc); g_main_loop_run(loop); + if (bus_watch_id) g_source_remove(bus_watch_id); + g_main_loop_unref(loop); #ifdef WITH_SPEECHD if (spd_conn != NULL) spd_close(spd_conn); @@ -528,6 +553,7 @@ int main(int argc, char **argv) { if (path_wav != NULL) sf_close(fd_wav); espeak_ng_Terminate(); + if (stt_pipeline != NULL) gst_object_unref(GST_OBJECT(stt_pipeline)); c_freePage(referer); c_freeSession(session); hs_exit(); -- 2.30.2