Merge pull request #39 from nalbion/feature/lex-ai

nalbion · web-flow · commit 2e0f5c48d967 · 2017-07-11T00:07:38.000+10:00
Feature/lex ai
diff --git a/src/main/java/org/openasr/idear/asr/ASRControlLoop.kt b/src/main/java/org/openasr/idear/asr/ASRControlLoop.kt
@@ -7,13 +7,13 @@ import com.intellij.openapi.application.ApplicationInfo
 import com.intellij.openapi.application.ApplicationManager
 import com.intellij.openapi.util.Pair
 import com.intellij.util.Consumer
+import org.openasr.idear.GoogleHelper
 import org.openasr.idear.GoogleHelper.getBestTextForUtterance
 import org.openasr.idear.WordToNumberConverter
 import org.openasr.idear.actions.ExecuteVoiceCommandAction
 import org.openasr.idear.actions.recognition.SurroundWithNoNullCheckRecognizer
 import org.openasr.idear.ide.IDEService
 import org.openasr.idear.ide.IDEService.invokeAction
-import org.openasr.idear.recognizer.CustomLiveSpeechRecognizer
 import org.openasr.idear.recognizer.CustomMicrophone
 import org.openasr.idear.tts.TTSService.say
 import java.awt.EventQueue
@@ -25,11 +25,11 @@ import java.util.logging.Logger
 import java.util.regex.Pattern
 import javax.sound.sampled.AudioSystem
 
-class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runnable {
+class ASRControlLoop(private val asrProvider: ASRProvider) : Runnable {
     override fun run() {
         while (!ListeningState.isTerminated) {
             // This blocks on a recognition result
-            val result = resultFromRecognizer
+            val result = asrProvider.waitForUtterance()
 
             if (ListeningState.isInit) {
                 if (result == HI_IDEA) {
@@ -45,23 +45,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
         }
     }
 
-    private val resultFromRecognizer: String
-        get() {
-            val result = recognizer.result
-
-            println("Recognized: ")
-            println("\tTop H:       " + result.result + " / " + result.result.bestToken + " / " + result.result.bestPronunciationResult)
-            println("\tTop 3H:      " + result.getNbest(3))
-
-            logger.info("Recognized:    ")
-            logger.info("\tTop H:       " + result.result + " / " + result.result.bestToken + " / " + result.result.bestPronunciationResult)
-            logger.info("\tTop 3H:      " + result.getNbest(3))
-
-            return result.hypothesis
-        }
-
     private fun applyAction(c: String) {
-
         if (c == HI_IDEA) {
             // Greet some more
             say("Hi, again!")
@@ -291,7 +275,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
 
         var result: String? = null
         while ("who is there" != result) {
-            result = resultFromRecognizer
+            result = asrProvider.waitForUtterance()
         }
 
         say("Hang on, I will be right back")
@@ -305,7 +289,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
         say("Jah, jah, jav, jav, jav, a, a, a, va, va, va, va, va")
 
         while (!result!!.contains("wait who") && !result.contains("who are you")) {
-            result = resultFromRecognizer
+            result = asrProvider.waitForUtterance()
         }
 
         say("It is me, Jah java va va, va, va. Open up already!")
@@ -342,15 +326,15 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
         val searchQueryTuple = webSpeechResult ?: return
         say("I think you said " + searchQueryTuple.first + ", searching Google now")
 
-        org.openasr.idear.GoogleHelper.searchGoogle(searchQueryTuple.first)
+        GoogleHelper.searchGoogle(searchQueryTuple.first)
     }
 
     private /* || searchQuery.second < CONFIDENCE_LEVEL_THRESHOLD */ val webSpeechResult: Pair<String, Double>?
         get() {
             var searchQueryTuple: Pair<String, Double>? = null
             beep()
             try {
-                searchQueryTuple = org.openasr.idear.GoogleHelper.getBestTextForUtterance(CustomMicrophone.recordFromMic(GOOGLE_QUERY_DURATION))
+                searchQueryTuple = GoogleHelper.getBestTextForUtterance(CustomMicrophone.recordFromMic(GOOGLE_QUERY_DURATION))
             } catch (e: IOException) {
                 logger.log(Level.SEVERE, "Panic! Failed to dump WAV", e)
             }
@@ -366,7 +350,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
         beep()
         var result: String
         while (ListeningState.isActive) {
-            result = resultFromRecognizer
+            result = asrProvider.waitForUtterance()
             if (result == "speech resume") {
                 beep()
                 break
@@ -378,7 +362,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
         var result: String
         logger.info("Recognizing number...")
         while (true) {
-            result = resultFromRecognizer
+            result = asrProvider.waitForUtterance()
             if (result.startsWith("jump ")) {
                 val number = WordToNumberConverter.getNumber(result.substring(5))
                 logger.info("Recognized number: " + number)
diff --git a/src/main/java/org/openasr/idear/asr/ASRProvider.kt b/src/main/java/org/openasr/idear/asr/ASRProvider.kt
@@ -0,0 +1,9 @@
+package com.jetbrains.idear.asr
+
+interface ASRProvider {
+    fun startRecognition()
+    fun stopRecognition()
+
+    /** Blocks until a we recognise something from the user. Called from [ASRControlLoop.run] */
+    fun waitForUtterance(): String
+}
diff --git a/src/main/java/org/openasr/idear/asr/ASRService.kt b/src/main/java/org/openasr/idear/asr/ASRService.kt
@@ -1,28 +1,21 @@
 package org.openasr.idear.asr
 
-import edu.cmu.sphinx.api.Configuration
-import org.openasr.idear.recognizer.CustomLiveSpeechRecognizer
+import org.openasr.idear.asr.cmusphinx.CMUSphinxASR
 import java.io.IOException
 import java.util.logging.Level
 import java.util.logging.Logger
 
 class ASRService {
     private lateinit var speechThread: Thread
-    private lateinit var recognizer: CustomLiveSpeechRecognizer
-
-    fun init() {
-        val configuration = Configuration()
-        configuration.acousticModelPath = ACOUSTIC_MODEL
-        configuration.dictionaryPath = DICTIONARY_PATH
-        configuration.grammarPath = GRAMMAR_PATH
-        configuration.useGrammar = true
-        configuration.grammarName = "command"
+    private lateinit var recognizer: ASRProvider
 
+    init {
         try {
-            recognizer = CustomLiveSpeechRecognizer(configuration)
-            //            recognizer.setMasterGain(MASTER_GAIN);
+            recognizer = CMUSphinxASR()
+//            recognizer = LexASR()
+
             speechThread = Thread(ASRControlLoop(recognizer), "ASR Thread")
-            recognizer.startRecognition(true)
+            recognizer.startRecognition()
             // Fire up control-loop
             speechThread.start()
         } catch (e: IOException) {
@@ -31,42 +24,28 @@ class ASRService {
     }
 
     fun activate(): Boolean {
-        //        if (getStatus() == Status.INIT) {
-        //            // Cold start prune cache
-        //            recognizer.startRecognition(true);
-        //        }
-
         return ListeningState.activate()
     }
 
     fun deactivate(): Boolean {
         return ListeningState.standBy()
     }
 
+    fun terminate() = recognizer.stopRecognition()
+
     fun dispose() {
         // Deactivate in the first place, therefore actually
         // prevent activation upon the user-input
         deactivate()
         terminate()
     }
 
-    private fun terminate() = recognizer.stopRecognition()
-
     companion object {
-        val MASTER_GAIN = 0.85
-        val CONFIDENCE_LEVEL_THRESHOLD = 0.5
-
-        private val ACOUSTIC_MODEL = "resource:/edu.cmu.sphinx.models.en-us/en-us"
-        private val DICTIONARY_PATH = "resource:/edu.cmu.sphinx.models.en-us/cmudict-en-us.dict"
-        private val GRAMMAR_PATH = "resource:/org.openasr.idear/grammars"
-
         private val logger = Logger.getLogger(ASRService::class.java.simpleName)
     }
 }
 
 // This is for testing purposes solely
 fun main(args: Array<String>) {
-    val asrService = ASRService()
-    asrService.init()
-    ListeningState.activate()
+    ASRService().activate()
 }
diff --git a/src/main/java/org/openasr/idear/asr/cmusphinx/CMUSphinxASR.kt b/src/main/java/org/openasr/idear/asr/cmusphinx/CMUSphinxASR.kt
@@ -0,0 +1,55 @@
+package org.openasr.idear.asr.cmusphinx
+
+import org.openasr.idear.asr.ASRProvider
+import edu.cmu.sphinx.api.Configuration
+import java.io.IOException
+import java.util.logging.Level
+import java.util.logging.Logger
+
+class CMUSphinxASR : ASRProvider {
+    private lateinit var recognizer: CustomLiveSpeechRecognizer
+
+    init {
+        val configuration = Configuration()
+        configuration.acousticModelPath = ACOUSTIC_MODEL
+        configuration.dictionaryPath = DICTIONARY_PATH
+        configuration.grammarPath = GRAMMAR_PATH
+        configuration.useGrammar = true
+        configuration.grammarName = "command"
+
+        try {
+            recognizer = CustomLiveSpeechRecognizer(configuration)
+        } catch (e: IOException) {
+            logger.log(Level.SEVERE, "Couldn't initialize speech recognizer:", e)
+        }
+    }
+
+    override fun waitForUtterance(): String {
+        val result = recognizer.result
+
+        println("Recognized: ")
+        println("\tTop H:       " + result.result + " / " + result.result.bestToken + " / " + result.result.bestPronunciationResult)
+        println("\tTop 3H:      " + result.getNbest(3))
+
+        logger.info("Recognized:    ")
+        logger.info("\tTop H:       " + result.result + " / " + result.result.bestToken + " / " + result.result.bestPronunciationResult)
+        logger.info("\tTop 3H:      " + result.getNbest(3))
+
+        return result.hypothesis
+    }
+
+    override fun startRecognition() = recognizer.startRecognition()
+
+    override fun stopRecognition() = recognizer.stopRecognition()
+
+    companion object {
+        val MASTER_GAIN = 0.85
+        val CONFIDENCE_LEVEL_THRESHOLD = 0.5
+
+        private val ACOUSTIC_MODEL = "resource:/edu.cmu.sphinx.models.en-us/en-us"
+        private val DICTIONARY_PATH = "resource:/edu.cmu.sphinx.models.en-us/cmudict-en-us.dict"
+        private val GRAMMAR_PATH = "resource:/org.openasr.idear/grammars"
+
+        private val logger = Logger.getLogger(CMUSphinxASR::class.java.simpleName)
+    }
+}
diff --git a/src/main/java/org/openasr/idear/asr/cmusphinx/CustomLiveSpeechRecognizer.kt b/src/main/java/org/openasr/idear/asr/cmusphinx/CustomLiveSpeechRecognizer.kt
@@ -0,0 +1,71 @@
+package com.jetbrains.idear.asr.cmusphinx
+
+import com.jetbrains.idear.recognizer.CustomMicrophone
+import edu.cmu.sphinx.api.AbstractSpeechRecognizer
+import edu.cmu.sphinx.api.Configuration
+import edu.cmu.sphinx.decoder.ResultListener
+import edu.cmu.sphinx.frontend.endpoint.SpeechClassifier
+import edu.cmu.sphinx.frontend.util.StreamDataSource
+
+import java.io.IOException
+
+/**
+ * High-level class for live speech recognition.
+ */
+class CustomLiveSpeechRecognizer
+
+/**
+ * Constructs new live recognition object.
+
+ * @param configuration common configuration
+ * *
+ * @throws IOException if model IO went wrong
+ */
+@Throws(IOException::class)
+constructor(configuration: Configuration) : AbstractSpeechRecognizer(configuration) {
+    private val microphone: CustomMicrophone = CustomMicrophone(16000f, 16, true, false)
+
+    // sphinx4 default sensitivity is 13.
+    private val SPEECH_SENSITIVITY = 20
+
+    init {
+        context.getInstance(StreamDataSource::class.java).setInputStream(microphone.stream)
+        context.setLocalProperty(String.format("speechClassifier->%s", SpeechClassifier.PROP_THRESHOLD), SPEECH_SENSITIVITY)
+    }
+
+    /**
+     * Starts recognition process.
+     * @see CustomLiveSpeechRecognizer.stopRecognition
+     */
+    fun startRecognition() {
+        recognizer.allocate()
+        microphone.startRecording()
+    }
+
+    /**
+     * Stops recognition process.
+     * Recognition process is paused until the next call to startRecognition.
+     * @see CustomLiveSpeechRecognizer.startRecognition
+     */
+    fun stopRecognition() {
+        microphone.stopRecording()
+        recognizer.deallocate()
+    }
+
+    fun addResultListener(listener: ResultListener) {
+        recognizer.addResultListener(listener)
+    }
+
+    fun removeResultListener(listener: ResultListener) {
+        recognizer.removeResultListener(listener)
+    }
+
+
+    //    public void setMasterGain(double mg) {
+    //        microphone.setMasterGain(mg);
+    //    }
+    //
+    //    public void setNoiseLevel(double mg) {
+    //        microphone.setNoiseLevel(mg);
+    //    }
+}
diff --git a/src/main/java/org/openasr/idear/ide/IDEService.kt b/src/main/java/org/openasr/idear/ide/IDEService.kt
@@ -10,6 +10,9 @@ object IDEService {
         AnActionEvent(null, dataContext, ActionPlaces.UNKNOWN, Presentation(), ActionManager.getInstance(), 0)
     }
 
+    /**
+     * @param action - see [com.intellij.openapi.actionSystem.IdeActions]
+     */
     fun invokeAction(action: String, actionFactory: (DataContext) -> AnActionEvent = defaultActionFactory) =
             DataManager.getInstance().dataContextFromFocus.doWhenDone(Consumer<DataContext> { dataContext: DataContext ->
                 EventQueue.invokeLater {
diff --git a/src/main/java/org/openasr/idear/recognizer/CustomMicrophone.kt b/src/main/java/org/openasr/idear/recognizer/CustomMicrophone.kt
@@ -24,7 +24,6 @@ class CustomMicrophone(sampleRate: Float, sampleSize: Int, signed: Boolean, bigE
     val stream: AudioInputStream
 
     init {
-
         val format = AudioFormat(sampleRate, sampleSize, 1, signed, bigEndian)
 
         try {
@@ -38,13 +37,11 @@ class CustomMicrophone(sampleRate: Float, sampleSize: Int, signed: Boolean, bigE
                 logger.warning("Microphone: MASTER_GAIN NOT supported")
 
             //masterGainControl = findMGControl(line);
-
         } catch (e: LineUnavailableException) {
             throw IllegalStateException(e)
         }
 
-        stream = org.openasr.idear.recognizer.AudioInputStreamWithAdjustableGain(
-            line)
+        stream = AudioInputStreamWithAdjustableGain(line)
     }
 
 
diff --git a/src/main/java/org/openasr/idear/tts/TTSService.kt b/src/main/java/org/openasr/idear/tts/TTSService.kt

Original file line number	Diff line number	Diff line change
`@@ -10,6 +10,9 @@ object IDEService {`
`10`	`10`	`AnActionEvent(null, dataContext, ActionPlaces.UNKNOWN, Presentation(), ActionManager.getInstance(), 0)`
`11`	`11`	`}`
`12`	`12`
	`13`	`+ /**`
	`14`	`+ * @param action - see [com.intellij.openapi.actionSystem.IdeActions]`
	`15`	`+ */`
`13`	`16`	`fun invokeAction(action: String, actionFactory: (DataContext) -> AnActionEvent = defaultActionFactory) =`
`14`	`17`	`DataManager.getInstance().dataContextFromFocus.doWhenDone(Consumer<DataContext> { dataContext: DataContext ->`
`15`	`18`	`EventQueue.invokeLater {`