~alcinnz/rhapsode

5199f051ab1452575e2cfc59ad16084f632ace87 — Adrian Cochrane 2 years ago 4ab0d7f
Integrate Voice2JSON for voice command input.

Ended up coding the commandline in Haskell because that, somewhat surprisingly,
had better APIs for the purpose than C. Turned out to be well worth navigating the language barrier.
3 files changed, 54 insertions(+), 13 deletions(-)

M rhapsode.cabal
M src/Links.hs
M src/main.c
M rhapsode.cabal => rhapsode.cabal +7 -3
@@ 61,11 61,15 @@ library
  
  -- Other library packages from which modules are imported.
  build-depends:       base >=4.9 && <5, directory >= 1.3.2, bytestring,
        file-embed >= 0.0.9 && < 0.1, time, parallel >= 1,
        -- HTML parsing, SSML output
        html-conduit, xml-conduit, text, containers, data-default-class,
        network-uri,
        -- Networking
        network-uri, async, hurl >= 2, filepath, temporary,
        -- CSS
        stylist >= 2.4 && <3, css-syntax, xml-conduit-stylist >= 2.3 && <3, scientific,
        async, hurl >= 2, filepath, temporary,
        file-embed >= 0.0.9 && < 0.1, time, parallel >= 1, process
        -- Voice2Json input
        process, aeson >= 1.5 && <1.6, unordered-containers
  
  -- Directories containing source files.
  hs-source-dirs:      src

M src/Links.hs => src/Links.hs +29 -4
@@ 22,16 22,19 @@ import Control.Exception (catch)

import System.Directory -- For locating links.xml, suggestions.gmni
import System.FilePath
import System.IO (hPrint, stderr) -- For error reporting
import System.IO (hPrint, stderr, hGetContents) -- For error reporting, Voice2Json

-- For suggestions.gmni
import qualified Data.Set as Set
import Data.List (nub, intercalate)
import Control.Concurrent (forkIO)

-- For Voice2Json's sentences.ini
-- For Voice2Json
import Data.Char
import System.Process (callProcess)
import System.Process
import Data.Aeson
import qualified Data.HashMap.Strict as HM
import qualified Data.ByteString.Lazy as LBS

data Link = Link {
    label :: Text,


@@ 249,6 252,10 @@ c_extractLinks c_page c_v2jProfile = do

text2cstring txt = FTxt.withCStringLen txt $ \s -> (peekCStringLen s >>= newCString)

------
--- C helper functions
------

foreign export ccall c_formatLink :: CString -> CString -> CString -> IO CString

c_formatLink c_label c_title c_url = do


@@ 273,10 280,28 @@ c_formatLink c_label c_title c_url = do
    prosody attrs txt = el "prosody" attrs [NodeContent txt]
    style field mode inner = el "tts:style" [("field", field), ("mode", mode)] [NodeElement inner]

--- For Voice2JSON

foreign export ccall c_dataDir :: CString -> IO CString

-- | Used to find Voice2Json profile
c_dataDir c_subdir = do
    subdir <- peekCString c_subdir
    cache <- getXdgDirectory XdgData "rhapsode"
    newCString (cache </> subdir)

foreign export ccall c_recognizeIntent :: CString -> IO CString

c_recognizeIntent c_profile = do
    profile <- peekCString c_profile
    (_, Just pipe, _, _) <- createProcess (proc "voice2json" [
        "--profile", profile,
        "transcribe-stream",
        "-c", "1"]){std_out = CreatePipe}
    (_, Just out, _, _) <- createProcess (proc "voice2json" [
        "--profile", profile,
        "recognize-intent"]){std_in = UseHandle pipe, std_out = CreatePipe}
    intent <- LBS.hGetContents out
    let transcript = case decode intent of
            Just (Object obj) | Just (String txt) <- "text" `HM.lookup` obj -> unpack txt
            _ -> ""
    newCString transcript

M src/main.c => src/main.c +18 -6
@@ 36,9 36,11 @@ void c_writeLog(char*, struct session*);
char *c_renderDoc(struct session*, struct page*, _Bool);
char **c_extractLinks(struct page*, char *v2jProfile);
char **c_docLinksAndRendering(struct session*, struct page*, _Bool); // FIXME segfaults.

int c_ssmlHasMark(char*, char*);
char *c_formatLink(char *label, char *title, char *url);
char *c_dataDir(char *subdir);
char *c_recognizeIntent(char *profile); // For better JSON & subprocess APIs.

char *c_lastVisited(char*);



@@ 219,11 221,12 @@ char *select_link(char **links, const char *command) {

struct termios stored_settings, no_echo;
int read_keyboard = 1;
int speak_finalize(char *ssml, char **links, char **out_link) {
int speak_finalize(char *ssml, char *v2j_profile, char **links, char **out_link) {
    while (read_keyboard) {
        if (out_link != NULL && *out_link != NULL) return 0;

        if (getc(stdin) == '\033') {
        char ch = getc(stdin);
        if (ch == '\033') {
            char mark[200];
            char fallback[200];
            espeak_Cancel();


@@ 285,15 288,24 @@ int speak_finalize(char *ssml, char **links, char **out_link) {
                speak(ssml, paragraph_no > 0 ? mark : NULL, NULL);
                break;
            }
        } else if (ch == ' ' && v2j_profile != NULL && *v2j_profile != '\0') {
            char *line = c_recognizeIntent(v2j_profile);
            if (out_link != NULL) *out_link = select_link(links, line);
            free(line);
        } else {
            // Read in a line
            tcsetattr(0, TCSANOW, &stored_settings);
            char *line = NULL;
            size_t len = 0;
            char buffer[512];

            buffer[0] = ch;
            putchar(ch);

            char *line = buffer + 1;
            size_t len = 512;
            if (getline(&line, &len, stdin) < 0)
                fprintf(stderr, "Failed to read stdin line!\n");
            else if (out_link != NULL)
                *out_link = select_link(links, line);
                *out_link = select_link(links, buffer);
            tcsetattr(0, TCSANOW, &no_echo);
        }
    }


@@ 510,7 522,7 @@ read_uri:
        speak_err = espeak_ng_Synchronize();
        if (speak_err == 0) select_link(links, "");
    }
    if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, links, &uri);
    if (use_espeak & speak_err == 0) speak_err = speak_finalize(ssml, v2j_profile, links, &uri);
    if (uri != NULL) goto read_uri;

    #ifdef WITH_SPEECHD