~alcinnz/rhapsode

fa07e2d4c0541ef85d3737e9e7887a77476a2e0e — Adrian Cochrane 3 years ago d57e119
Switch to event-based input & integrate CMU PocketSphinx.
1 files changed, 53 insertions(+), 27 deletions(-)

M src/main.c
M src/main.c => src/main.c +53 -27
@@ 61,18 61,6 @@ int choose_format(char *path) {
    return SF_FORMAT_WAV;
}

#define		BUFFER_LEN	1024
#define		MAX_CHANNELS	6
// FIXME convert samplerate
int read_mono(SNDFILE *fd, SF_INFO *info, short *out) {
    sf_count_t count = sf_read_short(fd, out, BUFFER_LEN);
    if (info->channels == 1) return count;

    int i = 0;
    for (int j = 0; j < count; j += info->channels) out[i++] = out[j];
    return i;
}

int paragraph_no = 0, section_no = 0;
int tablerow = 0, tablecol = 0, tableno = 0, in_table = 0;
int capture_marks(short *wav, int numsamples, espeak_EVENT *events) {


@@ 195,7 183,7 @@ int levenshtein_distance(const char *a, const char* b) {

int MAX_DIST = 3; // Is this ideal?
char **links = NULL;
char *select_link(char *command) {
char *select_link(const char *command) {
    // Pass 1, min distance
    int score = INT_MAX;
    for (int i = 0; strcmp(links[i], " ") != 0; i++) {


@@ 312,6 300,33 @@ gboolean read_stdin(GIOChannel *source, GIOCondition condition, gpointer data) {
    }
    return TRUE;
}
static gboolean stt_bus_call(GstBus * bus, GstMessage * msg, gpointer data) {
    switch (GST_MESSAGE_TYPE(msg)) {
    case GST_MESSAGE_EOS:
        fprintf(stderr, "Audio input ended. Please type commands.\n");
        return FALSE;
    case GST_MESSAGE_ERROR: {
        gchar *debug;
        GError *error;
        gst_message_parse_error(msg, &error, &debug);
        g_free(debug);

        fprintf(stderr, "Audio input error. Please type commands.\n%s\n", error->message);
        g_error_free(error);
        return FALSE;
      }
    }

    const GstStructure *st = gst_message_get_structure(msg);
    if (st && strcmp(gst_structure_get_name(st), "pocketsphinx") == 0) {
        const gchar *cmd = g_value_get_string(gst_structure_get_value(st, "hypothesis"));
        gboolean final = g_value_get_boolean(gst_structure_get_value(st, "final"));
        char *uri = select_link(cmd);
        if (uri != NULL && final) read_page(uri, 1);
    }

    return TRUE;
}

int speak_finalize(char *ssml) {
    while (read_keyboard) {


@@ 340,6 355,8 @@ void write_links() {
}

void read_page(char *uri, int read_input) {
    if (uri == NULL) return; // Leave things as-is...

    for (int i = 0; strcmp(links[i], " ") != 0; i++) free(links[i]);
    free(links);



@@ 391,6 408,7 @@ int main(int argc, char **argv) {
    hs_init(&argc, &argv);
    gst_init(&argc, &argv);
    GMainLoop *loop = g_main_loop_new(NULL, FALSE);
    guint bus_watch_id = 0;

    tcgetattr(0, &stored_settings);
    #ifdef WITH_SPEECHD


@@ 401,9 419,9 @@ int main(int argc, char **argv) {
    int c;
    opterr = 0;
    #ifdef WITH_SPEECHD
    while ((c = getopt(argc, argv, "xs::l::L:kw::dvh")) != -1) {
    while ((c = getopt(argc, argv, "xs::l::L:kKw::dvh")) != -1) {
    #else
    while ((c = getopt(argc, argv, "xs::l::kw::h")) != -1) {
    while ((c = getopt(argc, argv, "xs::l::kKw::vh")) != -1) {
    #endif
        switch (c) {
        case 'x':


@@ 418,15 436,11 @@ int main(int argc, char **argv) {
        case 'L':
            logpath = optarg;
            break;
        case 'K':
            read_keyboard = 0;
            break;
        case 'k':
            read_keyboard = 1;
            // Read input character by character, not line by line.
            no_echo = stored_settings;
            no_echo.c_lflag &= (~ICANON);
            no_echo.c_lflag &= (~ECHO);
            no_echo.c_cc[VTIME] = 0;
            no_echo.c_cc[VMIN] = 1;
            tcsetattr(0, TCSANOW, &no_echo);
            break;
        case 'w':
            use_espeak = 1;


@@ 458,10 472,9 @@ int main(int argc, char **argv) {
            g_object_set(G_OBJECT(decoder), "lmctl", "test.lmctl", NULL);
            g_object_set(G_OBJECT(decoder), "lmname", "tidigits", NULL);

            // FIXME: Implement callback function
            /*GstBus *bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline));
            bus_watch_id = gst_bus_add_watch(bus, stt_bus_call, loop); // FIXME clean this up on shutdown.
            gst_object_unref(bus);*/
            GstBus *bus = gst_pipeline_get_bus(GST_PIPELINE(stt_pipeline));
            bus_watch_id = gst_bus_add_watch(bus, stt_bus_call, NULL);
            gst_object_unref(bus);

            gst_bin_add_many(GST_BIN(stt_pipeline), src, convert, resample, decoder, sink, NULL);
            gst_element_link_many(src, convert, resample, decoder, sink, NULL);


@@ 475,7 488,8 @@ int main(int argc, char **argv) {
            fprintf(stderr, "\t\t\thttps://xkcd.com/1692/\n");
            fprintf(stderr, "\t-l\tlinks\tWrite extracted links to specifed file or stdout as TSV.\n");
            fprintf(stderr, "\t-L\tlog\tWrite (append) network request timing to specified filepath.\n");
            fprintf(stderr, "\t-k\tkeyboard\tRead arrow key navigation & links from stdin.\n");
            fprintf(stderr, "\t-k\tkeyboard\tRead arrow key navigation & links from stdin. Default behaviour, noop.\n");
            fprintf(stderr, "\t-K\tDon't read input from stdin.");
            fprintf(stderr, "\t-w\t.wav\tWrite an audio recording of the webpage, or (DEFAULT) immediately output through speakers.\n");
            #ifdef WITH_SPEECHD
            fprintf(stderr, "\t-d\tSpeechD\tSchedule page read via the SpeechD daemon. (BROKEN)\n");


@@ 487,6 501,15 @@ int main(int argc, char **argv) {
            return c == 'h' ? 0 : 1;
        }
    }
    if (read_keyboard) {
            // Read input character by character, not line by line.
            no_echo = stored_settings;
            no_echo.c_lflag &= (~ICANON);
            no_echo.c_lflag &= (~ECHO);
            no_echo.c_cc[VTIME] = 0;
            no_echo.c_cc[VMIN] = 1;
            tcsetattr(0, TCSANOW, &no_echo);
    }
    if (fd_ssml == stdout && fd_links == stdout) fd_links = stderr;
    #ifdef WITH_SPEECHD
    if (fd_ssml == NULL && fd_links == NULL && spd_conn == NULL && !use_espeak)


@@ 515,6 538,8 @@ int main(int argc, char **argv) {
    } else for (int i = optind; i < argc; i++) read_page(argv[i], i+1 == argc);

    g_main_loop_run(loop);
    if (bus_watch_id) g_source_remove(bus_watch_id);
    g_main_loop_unref(loop);

    #ifdef WITH_SPEECHD
    if (spd_conn != NULL) spd_close(spd_conn);


@@ 528,6 553,7 @@ int main(int argc, char **argv) {
    if (path_wav != NULL) sf_close(fd_wav);
    espeak_ng_Terminate();

    if (stt_pipeline != NULL) gst_object_unref(GST_OBJECT(stt_pipeline));
    c_freePage(referer);
    c_freeSession(session);
    hs_exit();