From 5199f051ab1452575e2cfc59ad16084f632ace87 Mon Sep 17 00:00:00 2001 From: Adrian Cochrane Date: Thu, 3 Jun 2021 20:36:53 +1200 Subject: [PATCH] Integrate Voice2JSON for voice command input. Ended up coding the commandline in Haskell because that, somewhat surprisingly, had better APIs for the purpose than C. Turned out to be well worth navigating the language barrier. --- rhapsode.cabal | 10 +++++++--- src/Links.hs | 33 +++++++++++++++++++++++++++++---- src/main.c | 24 ++++++++++++++++++------ 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/rhapsode.cabal b/rhapsode.cabal index 5a0cac0..4bce942 100644 --- a/rhapsode.cabal +++ b/rhapsode.cabal @@ -61,11 +61,15 @@ library -- Other library packages from which modules are imported. build-depends: base >=4.9 && <5, directory >= 1.3.2, bytestring, + file-embed >= 0.0.9 && < 0.1, time, parallel >= 1, + -- HTML parsing, SSML output html-conduit, xml-conduit, text, containers, data-default-class, - network-uri, + -- Networking + network-uri, async, hurl >= 2, filepath, temporary, + -- CSS stylist >= 2.4 && <3, css-syntax, xml-conduit-stylist >= 2.3 && <3, scientific, - async, hurl >= 2, filepath, temporary, - file-embed >= 0.0.9 && < 0.1, time, parallel >= 1, process + -- Voice2Json input + process, aeson >= 1.5 && <1.6, unordered-containers -- Directories containing source files. hs-source-dirs: src diff --git a/src/Links.hs b/src/Links.hs index 914df0c..0a6ac98 100644 --- a/src/Links.hs +++ b/src/Links.hs @@ -22,16 +22,19 @@ import Control.Exception (catch) import System.Directory -- For locating links.xml, suggestions.gmni import System.FilePath -import System.IO (hPrint, stderr) -- For error reporting +import System.IO (hPrint, stderr, hGetContents) -- For error reporting, Voice2Json -- For suggestions.gmni import qualified Data.Set as Set import Data.List (nub, intercalate) import Control.Concurrent (forkIO) --- For Voice2Json's sentences.ini +-- For Voice2Json import Data.Char -import System.Process (callProcess) +import System.Process +import Data.Aeson +import qualified Data.HashMap.Strict as HM +import qualified Data.ByteString.Lazy as LBS data Link = Link { label :: Text, @@ -249,6 +252,10 @@ c_extractLinks c_page c_v2jProfile = do text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString) +------ +--- C helper functions +------ + foreign export ccall c_formatLink :: CString -> CString -> CString -> IO CString c_formatLink c_label c_title c_url = do @@ -273,10 +280,28 @@ c_formatLink c_label c_title c_url = do prosody attrs txt = el "prosody" attrs [NodeContent txt] style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner] +--- For Voice2JSON + foreign export ccall c_dataDir :: CString -> IO CString --- | Used to find Voice2Json profile c_dataDir c_subdir = do subdir <- peekCString c_subdir cache <- getXdgDirectory XdgData "rhapsode" newCString (cache subdir) + +foreign export ccall c_recognizeIntent :: CString -> IO CString + +c_recognizeIntent c_profile = do + profile <- peekCString c_profile + (_, Just pipe, _, _) <- createProcess (proc "voice2json" [ + "--profile", profile, + "transcribe-stream", + "-c", "1"]){std_out = CreatePipe} + (_, Just out, _, _) <- createProcess (proc "voice2json" [ + "--profile", profile, + "recognize-intent"]){std_in = UseHandle pipe, std_out = CreatePipe} + intent <- LBS.hGetContents out + let transcript = case decode intent of + Just (Object obj) | Just (String txt) <- "text" `HM.lookup` obj -> unpack txt + _ -> "" + newCString transcript diff --git a/src/main.c b/src/main.c index ec17e0b..ee129a8 100644 --- a/src/main.c +++ b/src/main.c @@ -36,9 +36,11 @@ void c_writeLog(char*, struct session*); char *c_renderDoc(struct session*, struct page*, _Bool); char **c_extractLinks(struct page*, char *v2jProfile); char **c_docLinksAndRendering(struct session*, struct page*, _Bool); // FIXME segfaults. + int c_ssmlHasMark(char*, char*); char *c_formatLink(char *label, char *title, char *url); char *c_dataDir(char *subdir); +char *c_recognizeIntent(char *profile); // For better JSON & subprocess APIs. char *c_lastVisited(char*); @@ -219,11 +221,12 @@ char *select_link(char **links, const char *command) { struct termios stored_settings, no_echo; int read_keyboard = 1; -int speak_finalize(char *ssml, char **links, char **out_link) { +int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) { while (read_keyboard) { if (out_link != NULL && *out_link != NULL) return 0; - if (getc(stdin) == '\033') { + char ch = getc(stdin); + if (ch == '\033') { char mark[200]; char fallback[200]; espeak_Cancel(); @@ -285,15 +288,24 @@ int speak_finalize(char *ssml, char **links, char **out_link) { speak(ssml, paragraph_no > 0 ? mark : NULL, NULL); break; } + } else if (ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') { + char *line = c_recognizeIntent(v2j_profile); + if (out_link != NULL) *out_link = select_link(links, line); + free(line); } else { // Read in a line tcsetattr(0, TCSANOW, &stored_settings); - char *line = NULL; - size_t len = 0; + char buffer[512]; + + buffer[0] = ch; + putchar(ch); + + char *line = buffer + 1; + size_t len = 512; if (getline(&line, &len, stdin) < 0) fprintf(stderr, "Failed to read stdin line!\n"); else if (out_link != NULL) - *out_link = select_link(links, line); + *out_link = select_link(links, buffer); tcsetattr(0, TCSANOW, &no_echo); } } @@ -510,7 +522,7 @@ read_uri: speak_err = espeak_ng_Synchronize(); if (speak_err == 0) select_link(links, ""); } - if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, links, &uri); + if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, v2j_profile, links, &uri); if (uri != NULL) goto read_uri; #ifdef WITH_SPEECHD -- 2.30.2