cmusphinx · dhdaines · Oct 21, 2022 · Oct 19, 2022 · Oct 19, 2022 · Oct 19, 2022
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -2,12 +2,58 @@ set(EXAMPLES
   live
   simple
   )
+
 foreach(EXAMPLE ${EXAMPLES})
   add_executable(${EXAMPLE} EXCLUDE_FROM_ALL ${EXAMPLE}.c)
   target_link_libraries(${EXAMPLE} pocketsphinx)
   target_include_directories(
     ${EXAMPLE} PRIVATE ${CMAKE_BINARY_DIR}
     )
 endforeach()
+
 add_custom_target(examples DEPENDS ${EXAMPLES})
 
+# Try to find portaudio and pulseaudio with pkg-config
+find_package(PkgConfig QUIET)
+if(PKG_CONFIG_FOUND)
+  pkg_check_modules(PULSEAUDIO libpulse-simple)
+  if(PULSEAUDIO_FOUND)
+    add_executable(live_pulseaudio EXCLUDE_FROM_ALL live_pulseaudio.c)
+    target_link_libraries(live_pulseaudio pocketsphinx ${PULSEAUDIO_LIBRARIES})
+    target_include_directories(live_pulseaudio PRIVATE ${CMAKE_BINARY_DIR}
+      live_pulseaudio PUBLIC ${PULSEAUDIO_INCLUDE_DIRS})
+  endif()
+
+  pkg_check_modules(PORTAUDIO portaudio-2.0)
+  if(PORTAUDIO_FOUND)
+    add_executable(live_portaudio EXCLUDE_FROM_ALL live_portaudio.c)
+    target_link_libraries(live_portaudio pocketsphinx ${PORTAUDIO_LIBRARIES})
+    target_include_directories(live_portaudio PRIVATE ${CMAKE_BINARY_DIR}
+      live_portaudio PUBLIC ${PORTAUDIO_INCLUDE_DIRS})
+  endif()
+endif()
+
+# Try to find portaudio with its old package finder thing
+if(NOT PORTAUDIO_FOUND)
+  find_package(portaudio QUIET)
+  if(TARGET portaudio_static)
+    add_executable(live_portaudio EXCLUDE_FROM_ALL live_portaudio.c)
+    target_link_libraries(live_portaudio pocketsphinx portaudio_static)
+    set(PORTAUDIO_FOUND 1)
+  endif()
+endif()
+
+# Try to find portaudio with its new package finder thing
+if(NOT PORTAUDIO_FOUND)
+  find_package(PortAudio QUIET)
+  if(TARGET PortAudio::PortAudio)
+    add_executable(live_portaudio EXCLUDE_FROM_ALL live_portaudio.c)
+    target_link_libraries(live_portaudio pocketsphinx PortAudio::PortAudio)
+    set(PORTAUDIO_FOUND 1)
+  endif()
+endif()
+
+if(WIN32)
+  add_executable(live_win32 EXCLUDE_FROM_ALL live_win32.c)
+  target_link_libraries(live_win32 pocketsphinx winmm)
+endif()
diff --git a/examples/live.c b/examples/live.c
@@ -21,7 +21,10 @@
  *
  *     gcc -o live live.c $(pkg-config --libs --cflags pocketsphinx)
  *
- *
+ * Sadly, this example does *not* seem to work on Windows, even if you
+ * manage to get `sox` in your `PATH` (which is not easy), because it
+ * seems that it can't actually read from the microphone.  Try
+ * live_win32.c or live_portaudio.c instead.
  */
 #include <pocketsphinx.h>
 #include <signal.h>
@@ -34,6 +37,11 @@ catch_sig(int signum)
     global_done = 1;
 }
 
+#ifdef WIN32
+#define popen _popen
+#define pclose _pclose
+#endif
+
 static FILE *
 popen_sox(int sample_rate)
 {

diff --git a/examples/live_portaudio.c b/examples/live_portaudio.c
@@ -0,0 +1,122 @@
+/* Example of simple PocketSphinx speech segmentation.
+ *
+ * MIT license (c) 2022, see LICENSE for more information.
+ *
+ * Author: David Huggins-Daines <[email protected]>
+ */
+/**
+ * @example live_portaudio.c
+ * @brief Speech recognition with live audio input and endpointing.
+ *
+ * This file shows how to use PocketSphinx with microphone input using
+ * PortAudio (v19 and above).
+ *
+ * To compile it, assuming you have built the library as in
+ * \ref unix_install "these directions", you can run:
+ *
+ *     cmake --build build --target live_portaudio
+ *
+ * Alternately, if PocketSphinx is installed system-wide, you can run:
+ *
+ *     gcc -o live_portaudio live_portaudio.c \
+ *         $(pkg-config --libs --cflags pocketsphinx portaudio-2.0)
+ *
+ *
+ */
+#include <portaudio.h>
+#include <pocketsphinx.h>
+#include <signal.h>
+
+static int global_done = 0;
+static void
+catch_sig(int signum)
+{
+    (void)signum;
+    global_done = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+
+    PaStream *stream;
+    PaError err;
+    ps_decoder_t *decoder;
+    ps_config_t *config;
+    ps_endpointer_t *ep;
+    short *frame;
+    size_t frame_size;
+
+    (void)argc; (void)argv;
+
+    config = ps_config_init(NULL);
+    ps_default_search_args(config);
+
+    if ((err = Pa_Initialize()) != paNoError)
+        E_FATAL("Failed to initialize PortAudio: %s\n",
+                Pa_GetErrorText(err));
+    if ((decoder = ps_init(config)) == NULL)
+        E_FATAL("PocketSphinx decoder init failed\n");
+    if ((ep = ps_endpointer_init(0, 0.0, 0, 0, 0)) == NULL)
+        E_FATAL("PocketSphinx endpointer init failed\n");
+    frame_size = ps_endpointer_frame_size(ep);
+    if ((frame = malloc(frame_size * sizeof(frame[0]))) == NULL)
+        E_FATAL_SYSTEM("Failed to allocate frame");
+    if ((err = Pa_OpenDefaultStream(&stream, 1, 0, paInt16,
+                                    ps_config_int(config, "samprate"),
+                                    frame_size, NULL, NULL)) != paNoError)
+        E_FATAL("Failed to open PortAudio stream: %s\n",
+                Pa_GetErrorText(err));
+    if ((err = Pa_StartStream(stream)) != paNoError)
+        E_FATAL("Failed to start PortAudio stream: %s\n",
+                Pa_GetErrorText(err));
+    if (signal(SIGINT, catch_sig) == SIG_ERR)
+        E_FATAL_SYSTEM("Failed to set SIGINT handler");
+    while (!global_done) {
+        const int16 *speech;
+        int prev_in_speech = ps_endpointer_in_speech(ep);
+        if ((err = Pa_ReadStream(stream, frame, frame_size)) != paNoError) {
+            E_ERROR("Error in PortAudio read: %s\n",
+                Pa_GetErrorText(err));
+            break;
+        }
+        speech = ps_endpointer_process(ep, frame);
+        if (speech != NULL) {
+            const char *hyp;
+            if (!prev_in_speech) {
+                fprintf(stderr, "Speech start at %.2f\n",
+                        ps_endpointer_speech_start(ep));
+		fflush(stderr); /* For broken MSYS2 terminal */
+                ps_start_utt(decoder);
+            }
+            if (ps_process_raw(decoder, speech, frame_size, FALSE, FALSE) < 0)
+                E_FATAL("ps_process_raw() failed\n");
+            if ((hyp = ps_get_hyp(decoder, NULL)) != NULL) {
+                fprintf(stderr, "PARTIAL RESULT: %s\n", hyp);
+		fflush(stderr);
+	    }
+            if (!ps_endpointer_in_speech(ep)) {
+                fprintf(stderr, "Speech end at %.2f\n",
+                        ps_endpointer_speech_end(ep));
+		fflush(stderr);
+                ps_end_utt(decoder);
+                if ((hyp = ps_get_hyp(decoder, NULL)) != NULL) {
+		    printf("%s\n", hyp);
+		    fflush(stdout);
+		}
+            }
+        }
+    }
+    if ((err = Pa_StopStream(stream)) != paNoError)
+        E_FATAL("Failed to stop PortAudio stream: %s\n",
+                Pa_GetErrorText(err));
+    if ((err = Pa_Terminate()) != paNoError)
+        E_FATAL("Failed to terminate PortAudio: %s\n",
+                Pa_GetErrorText(err));
+    free(frame);
+    ps_endpointer_free(ep);
+    ps_free(decoder);
+    ps_config_free(config);
+
+    return 0;
+}
diff --git a/examples/live_pulseaudio.c b/examples/live_pulseaudio.c
@@ -0,0 +1,110 @@
+/* Example of simple PocketSphinx speech segmentation.
+ *
+ * MIT license (c) 2022, see LICENSE for more information.
+ *
+ * Author: David Huggins-Daines <[email protected]>
+ */
+/**
+ * @example live_pulseaudio.c
+ * @brief Speech recognition with live audio input and endpointing.
+ *
+ * This file shows how to use PocketSphinx with microphone input using
+ * PulseAudio.
+ *
+ * To compile it, assuming you have built the library as in
+ * \ref unix_install "these directions", you can run:
+ *
+ *     cmake --build build --target live_pulseaudio
+ *
+ * Alternately, if PocketSphinx is installed system-wide, you can run:
+ *
+ *     gcc -o live_pulseaudio live_pulseaudio.c \
+ *         $(pkg-config --libs --cflags pocketsphinx libpulse-simple)
+ *
+ *
+ */
+#include <pulse/simple.h>
+#include <pulse/error.h>
+#include <pocketsphinx.h>
+#include <signal.h>
+
+static int global_done = 0;
+static void
+catch_sig(int signum)
+{
+    (void)signum;
+    global_done = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+
+    pa_simple *s;
+    pa_sample_spec ss;
+    int err;
+    ps_decoder_t *decoder;
+    ps_config_t *config;
+    ps_endpointer_t *ep;
+    short *frame;
+    size_t frame_size;
+
+    (void)argc; (void)argv;
+
+    config = ps_config_init(NULL);
+    ps_default_search_args(config);
+    if ((decoder = ps_init(config)) == NULL)
+        E_FATAL("PocketSphinx decoder init failed\n");
+    if ((ep = ps_endpointer_init(0, 0.0, 0, 0, 0)) == NULL)
+        E_FATAL("PocketSphinx endpointer init failed\n");
+    frame_size = ps_endpointer_frame_size(ep);
+    if ((frame = malloc(frame_size * sizeof(frame[0]))) == NULL)
+        E_FATAL_SYSTEM("Failed to allocate frame");
+
+    ss.format = PA_SAMPLE_S16NE;
+    ss.channels = 1;
+    ss.rate = ps_config_int(config, "samprate");
+    if ((s = pa_simple_new(NULL, "live_pulseaudio", PA_STREAM_RECORD, NULL,
+                           "live", &ss, NULL, NULL, &err)) == NULL)
+        E_FATAL("Failed to connect to PulseAudio: %s\n",
+                pa_strerror(err));
+    if (signal(SIGINT, catch_sig) == SIG_ERR)
+        E_FATAL_SYSTEM("Failed to set SIGINT handler");
+    while (!global_done) {
+        const int16 *speech;
+        int prev_in_speech = ps_endpointer_in_speech(ep);
+        if (pa_simple_read(s, frame,
+                           frame_size * sizeof(frame[0]), &err) < 0) {
+            E_ERROR("Error in pa_simple_read: %s\n",
+                    pa_strerror(err));
+            break;
+        }
+        speech = ps_endpointer_process(ep, frame);
+        if (speech != NULL) {
+            const char *hyp;
+            if (!prev_in_speech) {
+                fprintf(stderr, "Speech start at %.2f\n",
+                        ps_endpointer_speech_start(ep));
+                ps_start_utt(decoder);
+            }
+            if (ps_process_raw(decoder, speech, frame_size, FALSE, FALSE) < 0)
+                E_FATAL("ps_process_raw() failed\n");
+            if ((hyp = ps_get_hyp(decoder, NULL)) != NULL)
+                fprintf(stderr, "PARTIAL RESULT: %s\n", hyp);
+            if (!ps_endpointer_in_speech(ep)) {
+                fprintf(stderr, "Speech end at %.2f\n",
+                        ps_endpointer_speech_end(ep));
+                ps_end_utt(decoder);
+                if ((hyp = ps_get_hyp(decoder, NULL)) != NULL)
+                    printf("%s\n", hyp);
+            }
+        }
+    }
+    pa_simple_free(s);
+    free(frame);
+    ps_endpointer_free(ep);
+    ps_free(decoder);
+    ps_config_free(config);
+
+    return 0;
+}