#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <termios.h>
#include <limits.h>
#include "HsFFI.h"
#include <espeak-ng/espeak_ng.h>
#include <sndfile.h>
#include <gst/gst.h>
#include <glib.h>
// #define WITH_SPEECHD // FIXME Doesn't support audio cues, navigation, or even read the full page.
#ifdef WITH_SPEECHD
#include <speechd_types.h>
#include <libspeechd.h>
#endif
/* Exported Haskell functions/types */
struct session;
struct session *c_newSession();
void c_freeSession(struct session*);
struct page;
struct page *c_initialReferer();
void *c_fetchURL(struct session*, char*, struct page*, char*);
//struct page **c_fetchURLs(struct session*, struct page*, char**); // FIXME segfaults.
void c_freePage(struct page*);
struct session *c_enableLogging(struct session*);
void c_writeLog(char*, struct session*);
char *c_renderDoc(struct session*, struct page*, _Bool);
char **c_extractLinks(struct page*);
char **c_docLinksAndRendering(struct session*, struct page*, _Bool); // FIXME segfaults.
int c_ssmlHasMark(char*, char*);
char *c_formatLink(char *label, char *title, char *url);
char *c_lastVisited(char*);
/* espeak-ng integration. Based on the espeak-ng command source code. */
SNDFILE *fd_wav = NULL;
char *path_wav = NULL;
static int samplerate;
espeak_ng_ERROR_CONTEXT context;
int choose_format(char *path) {
SF_FORMAT_INFO format_info;
int k, count;
sf_command(fd_wav, SFC_GET_FORMAT_MAJOR_COUNT, &count, sizeof (int));
for (k = 0; k < count; k++) {
format_info.format = k;
sf_command(fd_wav, SFC_GET_FORMAT_MAJOR, &format_info, sizeof (format_info));
char *suffix = path + strlen(path) - strlen(format_info.extension);
if (strcmp(path, format_info.extension) == 0) return format_info.format;
}
return SF_FORMAT_WAV;
}
int paragraph_no = 0, section_no = 0;
int tablerow = 0, tablecol = 0, tableno = 0, in_table = 0;
int capture_marks(short *wav, int numsamples, espeak_EVENT *events) {
while (events->type != 0) {
if (events->type == espeakEVENT_MARK) {
in_table = 0;
if (sscanf(events->id.name, "-rhaps-paragraph%i", ¶graph_no) == 1) {}
else if (sscanf(events->id.name, "-rhaps-section%i", §ion_no) == 1) {}
else if (sscanf(events->id.name, "-rhaps-tablecell%i:%ix%i",
&tableno, &tablerow, &tablecol) == 3) {in_table = 1;}
}
events++;
}
return 0;
}
int save_audio(short *wav, int numsamples, espeak_EVENT *events) {
capture_marks(wav, numsamples, events);
if (wav == NULL) return 0;
while (events->type != 0) {
if (events->type == espeakEVENT_SAMPLERATE) samplerate = events->id.number;
events++;
}
if (fd_wav == NULL) {
SF_INFO info;
info.samplerate = samplerate;
info.channels = 1;
info.format = choose_format(path_wav) | SF_FORMAT_PCM_16 | SF_ENDIAN_LITTLE;
fd_wav = sf_open(path_wav, SFM_WRITE, &info);
}
if (numsamples > 0) sf_writef_short(fd_wav, wav, numsamples);
return 0;
}
int speak_initialize() {
espeak_ng_InitializePath(NULL);
context = NULL;
espeak_ng_STATUS result = espeak_ng_Initialize(&context);
if (result != ENS_OK) {
espeak_ng_PrintStatusCodeMessage(result, stderr, context);
espeak_ng_ClearErrorContext(&context);
return 2;
}
if (path_wav != NULL) {
result = espeak_ng_InitializeOutput(ENOUTPUT_MODE_SYNCHRONOUS, 0, NULL);
espeak_SetSynthCallback(save_audio);
} else {
result = espeak_ng_InitializeOutput(ENOUTPUT_MODE_SPEAK_AUDIO, 0, NULL);
espeak_SetSynthCallback(capture_marks);
}
if (result != ENS_OK) {
espeak_ng_PrintStatusCodeMessage(result, stderr, context);
return 3;
}
samplerate = espeak_ng_GetSampleRate();
return 0;
}
void speak(char *ssml, char *mark, char* fallback) {
int flags = espeakCHARS_AUTO | espeakPHONEMES | espeakENDPAUSE | espeakCHARS_UTF8 | espeakSSML;
if (mark != NULL && c_ssmlHasMark(mark, ssml))
espeak_Synth_Mark(ssml, strlen(ssml)+1, mark, 0, flags, NULL, NULL);
else if (fallback != NULL && c_ssmlHasMark(fallback, ssml))
espeak_Synth_Mark(ssml, strlen(ssml)+1, fallback, 0, flags, NULL, NULL);
else espeak_Synth(ssml, strlen(ssml)+1, 0, POS_CHARACTER, 0, flags, NULL, NULL);
}
void speak_text(char *text, espeak_PARAMETER param, int value) {
if (param != 0) espeak_SetParameter(param, value, /* relative */1);
int flags = espeakCHARS_AUTO | espeakPHONEMES | espeakENDPAUSE | espeakCHARS_UTF8;
espeak_Synth(text, strlen(text)+1, 0, POS_CHARACTER, 0, flags, NULL, NULL);
if (param != 0) espeak_SetParameter(param, espeak_GetParameter(param, /* current */0), /* relative */0);
}
/* Utilities */
#define SWAP(type, a, b) {type temp = a; a = b; b = temp;}
int min(int a, int b) {return a < b ? a : b;}
/* Keyboard input */
int levenshtein_distance(const char *a, const char* b) {
int a_len = strlen(a);
int b_len = strlen(b);
if (a_len < b_len) {
SWAP(int, a_len, b_len);
SWAP(const char*, a, b);
}
int *v0 = malloc(sizeof(int)*(b_len+1));
int *v1 = malloc(sizeof(int)*(b_len+1));
// Initialize v0 for an edit distance of an empty a
for (int i = 0; i < b_len; i++) v0[i] = i;
// The core algorithm
for (int i = 0; i < a_len; i++) {
// Edit distance from empty string is delete i characters.
v1[0] = i;
for (int j = 0; j < b_len; j++) {
int deletion_cost = v0[j+1] + 1; // above cell + deletion
int insertion_cost = v1[j] + 1; // left cell + insertion
// top left cell + maybe substitution
int substitution_cost = v0[j] + (a[i] == b[j] ? 0 : 1);
v1[j+1] = min(deletion_cost, min(insertion_cost, substitution_cost));
}
// Progress to next row
SWAP(int*, v0, v1);
}
int ret = v0[b_len];
free(v0); free(v1);
return ret;
}
int MAX_DIST = 3; // Is this ideal?
char **links = NULL;
char *select_link(const char *command) {
// Pass 1, min distance
int score = INT_MAX;
for (int i = 0; strcmp(links[i], " ") != 0; i++) {
score = min(score, levenshtein_distance(command, links[i]));
}
// Pass 2: Is ambiguous?
int num_matches = 0;
for (int i = 0; strcmp(links[i], " ") != 0; i++)
if (score >= levenshtein_distance(command, links[i]) - MAX_DIST) num_matches++;
espeak_Cancel();
// Pass 3: Retrieve answer
for (int i = 0; strcmp(links[i], " ") != 0; i += 3) {
if (command[0] != '\0' || command[0] == '\n') {// "" to read entire link table...
if (score < levenshtein_distance(command, links[i]) &&
score < levenshtein_distance(command, links[i+1]) &&
score < levenshtein_distance(command, links[i+2])) continue;
if (num_matches == 1) return links[i+2];
}
// Communicate
printf("%s\t%s\t%s\n", links[i+2], links[i], links[i+1]);
char *ssml = c_formatLink(links[i], links[i+1], links[i+2]);
speak(ssml, NULL, NULL);
free(ssml);
}
return NULL;
}
struct termios stored_settings, no_echo;
int read_keyboard = 0;
void read_page(char *uri, int read_input);
gboolean read_stdin(GIOChannel *source, GIOCondition condition, gpointer data) {
char *ssml = data;
if (getc(stdin) == '\033') {
char mark[200];
char fallback[200];
espeak_Cancel();
char c = getc(stdin);
if (c == 0 || c == -1 || c == '\033' || c == 'q') return 1; // skip [
switch (getc(stdin)) {
case 'A':
// 🠕
if (in_table) {
tablerow--;
if (tablerow > 0) {
sprintf(mark, "-rhaps-tablecell%i:%ix%i", tableno, tablerow, tablecol);
speak(ssml, mark, NULL);
break;
} else in_table = 0;
}
section_no--;
sprintf(mark, "-rhaps-section%i", section_no);
speak(ssml, section_no > 0 ? mark : NULL, NULL);
break;
case 'B':
// 🠗
if (in_table) {
tablerow++;
sprintf(mark, "-rhaps-tablecell%i:%ix%i", tableno, tablerow, tablecol);
sprintf(fallback, "-rhaps-section%i", section_no+1);
speak(ssml, mark, fallback);
break; // FIXME What if that mark doesn't exist?
}
section_no++;
sprintf(mark, "-rhaps-section%i", section_no);
speak(ssml, section_no > 0 ? mark : NULL, NULL);
break;
case 'C':
// ➔
if (in_table) {
tablecol++;
sprintf(mark, "-rhaps-tablecell%i:%ix%i", tableno, tablerow, tablecol);
sprintf(fallback, "-rhaps-paragraph%i", paragraph_no+1);
speak(ssml, mark, fallback);
break; // FIXME What if that mark doesn't exist?
}
paragraph_no++;
sprintf(mark, "-rhaps-paragraph%i", paragraph_no);
speak(ssml, paragraph_no > 0 ? mark : NULL, NULL);
break;
case 'D':
// 🠔
if (in_table) {
tablecol--;
if (tablecol > 0) {
sprintf(mark, "-rhaps-tablecell%i:%ix%i", tableno, tablerow, tablecol);
speak(ssml, mark, NULL);
break;
} else in_table = 0;
}
paragraph_no--;
sprintf(mark, "-rhaps-paragraph%i", paragraph_no);
speak(ssml, paragraph_no > 0 ? mark : NULL, NULL);
break;
case '5':
// Page up
if (in_table) { // Jump to first row of table.
tablerow = 0;
sprintf(mark, "-rhaps-tablecell%i:%ix%i", tableno, tablerow, tablecol);
speak(ssml, mark, NULL);
}
break;
}
} else {
// Read in a line, with full terminal emulator i18n.
tcsetattr(0, TCSANOW, &stored_settings);
char *line = NULL;
size_t len = 0;
if (getline(&line, &len, stdin) < 0) {
fprintf(stderr, "Failed to read stdin line!\n");
return TRUE;
}
tcsetattr(0, TCSANOW, &no_echo);
read_page(select_link(line), 1);
return FALSE;
}
return TRUE;
}
static gboolean stt_bus_call(GstBus * bus, GstMessage * msg, gpointer data) {
switch (GST_MESSAGE_TYPE(msg)) {
case GST_MESSAGE_EOS:
fprintf(stderr, "Audio input ended. Please type commands.\n");
return FALSE;
case GST_MESSAGE_ERROR: {
gchar *debug;
GError *error;
gst_message_parse_error(msg, &error, &debug);
g_free(debug);
fprintf(stderr, "Audio input error. Please type commands.\n%s\n", error->message);
g_error_free(error);
return FALSE;
}
}
const GstStructure *st = gst_message_get_structure(msg);
if (st && strcmp(gst_structure_get_name(st), "pocketsphinx") == 0) {
const gchar *cmd = g_value_get_string(gst_structure_get_value(st, "hypothesis"));
gboolean final = g_value_get_boolean(gst_structure_get_value(st, "final"));
char *uri = select_link(cmd);
if (uri != NULL && final) read_page(uri, 1);
}
return TRUE;
}
int speak_finalize(char *ssml) {
while (read_keyboard) {
if (!read_stdin(NULL, 0, ssml)) return 0;
}
close:
return 0;
}
/* Main driver */
int speak_err = 0;
struct session *session = NULL;
struct page *referer = NULL;
int use_espeak = 0;
char *logpath = NULL;
char *mimes = "text/html text/xml application/xml application/xhtml+xml text/plain";
FILE *fd_ssml = NULL;
FILE *fd_links = NULL;
void write_links() {
for (int i = 0; strcmp(links[i], " ") != 0; i++) {
fprintf(fd_links, "%s%c", links[i], (i % 3) == 2 ? '\n' : '\t');
}
}
void read_page(char *uri, int read_input) {
if (uri == NULL) return; // Leave things as-is...
if (links != NULL) { // Ensure links is freed...
for (int i = 0; strcmp(links[i], " ") != 0; i++) free(links[i]);
free(links);
}
if (use_espeak && speak_err == 0) speak_text(uri, espeakRATE, 10);
#ifdef WITH_SPEECHD
else if (spd_conn != NULL) spd_say(spd_conn, SPD_MESSAGE, uri);
#endif
else printf("%s\n", uri);
struct page *page = c_fetchURL(session, mimes, referer, uri);
referer = page;
char *ssml = c_renderDoc(session, page, use_espeak);
links = c_extractLinks(page);
if (logpath != NULL) c_writeLog(logpath, session);
if (fd_ssml != NULL) fprintf(fd_ssml, "%s\n", ssml);
if (read_keyboard && strcmp(uri, "about:welcome")) select_link("");
if (fd_links != NULL) write_links(fd_links, links);
if (use_espeak & speak_err == 0) speak(ssml, "main", NULL);
#ifdef WITH_SPEECHD
if (spd_conn != NULL) spd_say(spd_conn, SPD_MESSAGE, ssml);
#endif
c_freePage(page);
//free(uri);
if (read_input) {
g_io_add_watch(g_io_channel_unix_new(0), G_IO_IN|G_IO_HUP, read_stdin, ssml);
} else {
free(ssml);
for (int i = 0; strcmp(links[i], " ") != 0; i++) free(links[i]);
free(links);
}
}
FILE *parse_opt_file() {
FILE *ret = optarg != NULL ? fopen(optarg, "w") : stdout;
if (ret == NULL) {
fprintf(stderr, "Failed to open file %s\n", optarg);
hs_exit();
exit(-1);
}
return ret;
}
int main(int argc, char **argv) {
hs_init(&argc, &argv);
gst_init(&argc, &argv);
GMainLoop *loop = g_main_loop_new(NULL, FALSE);
guint bus_watch_id = 0;
tcgetattr(0, &stored_settings);
#ifdef WITH_SPEECHD
SPDConnection *spd_conn = NULL;
#endif
GstElement *stt_pipeline = NULL;
int c;
opterr = 0;
#ifdef WITH_SPEECHD
while ((c = getopt(argc, argv, "xs::l::L:kKw::dvh")) != -1) {
#else
while ((c = getopt(argc, argv, "xs::l::kKw::vh")) != -1) {
#endif
switch (c) {
case 'x':
mimes = "text/xml application/xml application/xhtml+xml text/html text/plain";
break;
case 's':
fd_ssml = parse_opt_file();
break;
case 'l':
fd_links = parse_opt_file();
break;
case 'L':
logpath = optarg;
break;
case 'K':
read_keyboard = 0;
break;
case 'k':
read_keyboard = 1;
break;
case 'w':
use_espeak = 1;
path_wav = optarg;
break;
#ifdef WITH_SPEECHD
case 'd':
spd_conn = spd_open("rhapsode", "main", NULL, SPD_MODE_SINGLE);
if (spd_conn == NULL) fprintf(stderr, "Failed to open SpeechD connection, ignoring\n");
spd_set_data_mode(spd_conn, SPD_DATA_SSML);
break;
#endif
case 'v':
stt_pipeline = gst_pipeline_new("speech2text");
GstElement *src = gst_element_factory_make("autoaudiosrc", "microphone");
GstElement *convert = gst_element_factory_make("audioconvert", "convert");
GstElement *resample = gst_element_factory_make("audioresample", "resample");
GstElement *decoder = gst_element_factory_make("pocketsphinx", "asr");
GstElement *sink = gst_element_factory_make("fakesink", "output");
if (!src || !convert || !resample || !decoder || !sink) {
fprintf(stderr, "Failed to initialize voice recognition. You'll just have to type your commands.\n");
gst_object_unref(stt_pipeline);
stt_pipeline = NULL;
break;
}
// FIXME: Set something more appropriate
g_object_set(G_OBJECT(decoder), "lmctl", "test.lmctl", NULL);
g_object_set(G_OBJECT(decoder), "lmname", "tidigits", NULL);
GstBus *bus = gst_pipeline_get_bus(GST_PIPELINE(stt_pipeline));
bus_watch_id = gst_bus_add_watch(bus, stt_bus_call, NULL);
gst_object_unref(bus);
gst_bin_add_many(GST_BIN(stt_pipeline), src, convert, resample, decoder, sink, NULL);
gst_element_link_many(src, convert, resample, decoder, sink, NULL);
break;
case '?':
fprintf(stderr, "Invalid flag %c\n\n", optopt);
case 'h':
fprintf(stderr, "USAGE: rhapsode [FLAGS] URL...\n");
fprintf(stderr, "\t-x\tX(HT)ML\tIndicates to expect an X(HT)ML file.\n");
fprintf(stderr, "\t-s\tsilent/SSML\tWrites SSML to the specified file or stdout.\n");
fprintf(stderr, "\t\t\thttps://xkcd.com/1692/\n");
fprintf(stderr, "\t-l\tlinks\tWrite extracted links to specifed file or stdout as TSV.\n");
fprintf(stderr, "\t-L\tlog\tWrite (append) network request timing to specified filepath.\n");
fprintf(stderr, "\t-k\tkeyboard\tRead arrow key navigation & links from stdin. Default behaviour, noop.\n");
fprintf(stderr, "\t-K\tDon't read input from stdin.");
fprintf(stderr, "\t-w\t.wav\tWrite an audio recording of the webpage, or (DEFAULT) immediately output through speakers.\n");
#ifdef WITH_SPEECHD
fprintf(stderr, "\t-d\tSpeechD\tSchedule page read via the SpeechD daemon. (BROKEN)\n");
#endif
fprintf(stderr, "\t-h\thelp\tOutputs this usage information to stderr.\n");
fprintf(stderr, "\t\t\tIf both -s & -l are enabled without an argument, writes to stderr instead.\n");
hs_exit();
return c == 'h' ? 0 : 1;
}
}
if (read_keyboard) {
// Read input character by character, not line by line.
no_echo = stored_settings;
no_echo.c_lflag &= (~ICANON);
no_echo.c_lflag &= (~ECHO);
no_echo.c_cc[VTIME] = 0;
no_echo.c_cc[VMIN] = 1;
tcsetattr(0, TCSANOW, &no_echo);
}
if (fd_ssml == stdout && fd_links == stdout) fd_links = stderr;
#ifdef WITH_SPEECHD
if (fd_ssml == NULL && fd_links == NULL && spd_conn == NULL && !use_espeak)
#else
if (fd_ssml == NULL && fd_links == NULL && !use_espeak)
#endif
use_espeak = 1;
#if 0 // Unit-test levenshtein_distance
int dist = levenshtein_distance("kitten", "kitten");
printf("kitten => kitten: %i\n", dist);
if (dist != 0) return 10;
dist = levenshtein_distance("kitten", "sitting");
printf("kitten => sitting: %i\n", dist);
if (dist != 3) return 11;
#endif
session = c_newSession();
if (logpath != NULL) session = c_enableLogging(session);
referer = c_initialReferer();
if (use_espeak) speak_err = speak_initialize();
if (optind >= argc) {
// No URLs specified, restore previous session or goto about:welcome
read_page(c_lastVisited("about:welcome"), 1);
} else for (int i = optind; i < argc; i++) read_page(argv[i], i+1 == argc);
g_main_loop_run(loop);
if (bus_watch_id) g_source_remove(bus_watch_id);
g_main_loop_unref(loop);
#ifdef WITH_SPEECHD
if (spd_conn != NULL) spd_close(spd_conn);
#endif
espeak_ng_STATUS result = espeak_ng_Synchronize();
if (result != ENS_OK) {
espeak_ng_PrintStatusCodeMessage(result, stderr, context);
return 4;
}
if (path_wav != NULL) sf_close(fd_wav);
espeak_ng_Terminate();
if (stt_pipeline != NULL) gst_object_unref(GST_OBJECT(stt_pipeline));
c_freePage(referer);
c_freeSession(session);
hs_exit();
tcsetattr(0, TCSANOW, &stored_settings);
return speak_err;
}