From 0844f28d1fcacde4f2b830ff192d501bb18eab54 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Tue, 12 Jan 2021 20:10:48 +1300 Subject: [PATCH] Initialize GStreamer for CMU PocketSphinx speech-to-text. --- rhapsode.cabal | 3 +-- src/main.c | 35 +++++++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/rhapsode.cabal b/rhapsode.cabal index 1dcf0cf..2e887f2 100644 --- a/rhapsode.cabal +++ b/rhapsode.cabal @@ -78,8 +78,7 @@ executable rhapsode main-is: main.c ghc-options: -no-hs-main -threaded extra-libraries: espeak-ng, sndfile - -- pkgconfig-depends: pocketsphinx, sphinxbase -- FIXME errors out - cc-options: -I/usr/include/sphinxbase -I/usr/include/pocketsphinx -I/usr/include/x86_64-linux-gnu -I/usr/include/x86_64-linux-gnu/sphinxbase -lpocketsphinx -lsphinxbase -lsphinxad + pkgconfig-depends: gstreamer-1.0 build-depends: base >=4.9 && <=4.12, rhapsode other-modules: Stub hs-source-dirs: src diff --git a/src/main.c b/src/main.c index f670eda..b9c657d 100644 --- a/src/main.c +++ b/src/main.c @@ -10,7 +10,8 @@ #include #include -#include +#include +#include // #define WITH_SPEECHD // FIXME Doesn't support audio cues, navigation, or even read the full page. #ifdef WITH_SPEECHD @@ -341,6 +342,8 @@ FILE *parse_opt_file() { int main(int argc, char **argv) { int speak_err = 0; hs_init(&argc, &argv); + gst_init(&argc, &argv); + GMainLoop *loop = g_main_loop_new(NULL, FALSE); char *mimes = "text/html text/xml application/xml application/xhtml+xml text/plain"; char *logpath = NULL; @@ -351,11 +354,12 @@ int main(int argc, char **argv) { #ifdef WITH_SPEECHD SPDConnection *spd_conn = NULL; #endif + GstElement *stt_pipeline = NULL; int c; opterr = 0; #ifdef WITH_SPEECHD - while ((c = getopt(argc, argv, "xs::l::L:kw::dh")) != -1) { + while ((c = getopt(argc, argv, "xs::l::L:kw::dvh")) != -1) { #else while ((c = getopt(argc, argv, "xs::l::kw::h")) != -1) { #endif @@ -393,6 +397,33 @@ int main(int argc, char **argv) { spd_set_data_mode(spd_conn, SPD_DATA_SSML); break; #endif + case 'v': + stt_pipeline = gst_pipeline_new("speech2text"); + GstElement *src = gst_element_factory_make("autoaudiosrc", "microphone"); + GstElement *convert = gst_element_factory_make("audioconvert", "convert"); + GstElement *resample = gst_element_factory_make("audioresample", "resample"); + GstElement *decoder = gst_element_factory_make("pocketsphinx", "asr"); + GstElement *sink = gst_element_factory_make("fakesink", "output"); + + if (!src || !convert || !resample || !decoder || !sink) { + fprintf(stderr, "Failed to initialize voice recognition. You'll just have to type your commands.\n"); + gst_object_unref(stt_pipeline); + stt_pipeline = NULL; + break; + } + + // FIXME: Set something more appropriate + g_object_set(G_OBJECT(decoder), "lmctl", "test.lmctl", NULL); + g_object_set(G_OBJECT(decoder), "lmname", "tidigits", NULL); + + // FIXME: Implement callback function + /*GstBus *bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline)); + bus_watch_id = gst_bus_add_watch(bus, stt_bus_call, loop); // FIXME clean this up on shutdown. + gst_object_unref(bus);*/ + + gst_bin_add_many(GST_BIN(stt_pipeline), src, convert, resample, decoder, sink, NULL); + gst_element_link_many(src, convert, resample, decoder, sink, NULL); + break; case '?': fprintf(stderr, "Invalid flag %c\n\n", optopt); case 'h': -- 2.30.2