Skip to content

Commit 2e0f5c4

Browse files
authored
Merge pull request #39 from nalbion/feature/lex-ai
Feature/lex ai
2 parents a2907e5 + 3fdce07 commit 2e0f5c4

File tree

8 files changed

+216
-69
lines changed

8 files changed

+216
-69
lines changed

src/main/java/org/openasr/idear/asr/ASRControlLoop.kt

+9-25
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,13 @@ import com.intellij.openapi.application.ApplicationInfo
77
import com.intellij.openapi.application.ApplicationManager
88
import com.intellij.openapi.util.Pair
99
import com.intellij.util.Consumer
10+
import org.openasr.idear.GoogleHelper
1011
import org.openasr.idear.GoogleHelper.getBestTextForUtterance
1112
import org.openasr.idear.WordToNumberConverter
1213
import org.openasr.idear.actions.ExecuteVoiceCommandAction
1314
import org.openasr.idear.actions.recognition.SurroundWithNoNullCheckRecognizer
1415
import org.openasr.idear.ide.IDEService
1516
import org.openasr.idear.ide.IDEService.invokeAction
16-
import org.openasr.idear.recognizer.CustomLiveSpeechRecognizer
1717
import org.openasr.idear.recognizer.CustomMicrophone
1818
import org.openasr.idear.tts.TTSService.say
1919
import java.awt.EventQueue
@@ -25,11 +25,11 @@ import java.util.logging.Logger
2525
import java.util.regex.Pattern
2626
import javax.sound.sampled.AudioSystem
2727

28-
class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runnable {
28+
class ASRControlLoop(private val asrProvider: ASRProvider) : Runnable {
2929
override fun run() {
3030
while (!ListeningState.isTerminated) {
3131
// This blocks on a recognition result
32-
val result = resultFromRecognizer
32+
val result = asrProvider.waitForUtterance()
3333

3434
if (ListeningState.isInit) {
3535
if (result == HI_IDEA) {
@@ -45,23 +45,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
4545
}
4646
}
4747

48-
private val resultFromRecognizer: String
49-
get() {
50-
val result = recognizer.result
51-
52-
println("Recognized: ")
53-
println("\tTop H: " + result.result + " / " + result.result.bestToken + " / " + result.result.bestPronunciationResult)
54-
println("\tTop 3H: " + result.getNbest(3))
55-
56-
logger.info("Recognized: ")
57-
logger.info("\tTop H: " + result.result + " / " + result.result.bestToken + " / " + result.result.bestPronunciationResult)
58-
logger.info("\tTop 3H: " + result.getNbest(3))
59-
60-
return result.hypothesis
61-
}
62-
6348
private fun applyAction(c: String) {
64-
6549
if (c == HI_IDEA) {
6650
// Greet some more
6751
say("Hi, again!")
@@ -291,7 +275,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
291275

292276
var result: String? = null
293277
while ("who is there" != result) {
294-
result = resultFromRecognizer
278+
result = asrProvider.waitForUtterance()
295279
}
296280

297281
say("Hang on, I will be right back")
@@ -305,7 +289,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
305289
say("Jah, jah, jav, jav, jav, a, a, a, va, va, va, va, va")
306290

307291
while (!result!!.contains("wait who") && !result.contains("who are you")) {
308-
result = resultFromRecognizer
292+
result = asrProvider.waitForUtterance()
309293
}
310294

311295
say("It is me, Jah java va va, va, va. Open up already!")
@@ -342,15 +326,15 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
342326
val searchQueryTuple = webSpeechResult ?: return
343327
say("I think you said " + searchQueryTuple.first + ", searching Google now")
344328

345-
org.openasr.idear.GoogleHelper.searchGoogle(searchQueryTuple.first)
329+
GoogleHelper.searchGoogle(searchQueryTuple.first)
346330
}
347331

348332
private /* || searchQuery.second < CONFIDENCE_LEVEL_THRESHOLD */ val webSpeechResult: Pair<String, Double>?
349333
get() {
350334
var searchQueryTuple: Pair<String, Double>? = null
351335
beep()
352336
try {
353-
searchQueryTuple = org.openasr.idear.GoogleHelper.getBestTextForUtterance(CustomMicrophone.recordFromMic(GOOGLE_QUERY_DURATION))
337+
searchQueryTuple = GoogleHelper.getBestTextForUtterance(CustomMicrophone.recordFromMic(GOOGLE_QUERY_DURATION))
354338
} catch (e: IOException) {
355339
logger.log(Level.SEVERE, "Panic! Failed to dump WAV", e)
356340
}
@@ -366,7 +350,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
366350
beep()
367351
var result: String
368352
while (ListeningState.isActive) {
369-
result = resultFromRecognizer
353+
result = asrProvider.waitForUtterance()
370354
if (result == "speech resume") {
371355
beep()
372356
break
@@ -378,7 +362,7 @@ class ASRControlLoop(private val recognizer: CustomLiveSpeechRecognizer) : Runna
378362
var result: String
379363
logger.info("Recognizing number...")
380364
while (true) {
381-
result = resultFromRecognizer
365+
result = asrProvider.waitForUtterance()
382366
if (result.startsWith("jump ")) {
383367
val number = WordToNumberConverter.getNumber(result.substring(5))
384368
logger.info("Recognized number: " + number)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
package com.jetbrains.idear.asr
2+
3+
interface ASRProvider {
4+
fun startRecognition()
5+
fun stopRecognition()
6+
7+
/** Blocks until a we recognise something from the user. Called from [ASRControlLoop.run] */
8+
fun waitForUtterance(): String
9+
}
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,21 @@
11
package org.openasr.idear.asr
22

3-
import edu.cmu.sphinx.api.Configuration
4-
import org.openasr.idear.recognizer.CustomLiveSpeechRecognizer
3+
import org.openasr.idear.asr.cmusphinx.CMUSphinxASR
54
import java.io.IOException
65
import java.util.logging.Level
76
import java.util.logging.Logger
87

98
class ASRService {
109
private lateinit var speechThread: Thread
11-
private lateinit var recognizer: CustomLiveSpeechRecognizer
12-
13-
fun init() {
14-
val configuration = Configuration()
15-
configuration.acousticModelPath = ACOUSTIC_MODEL
16-
configuration.dictionaryPath = DICTIONARY_PATH
17-
configuration.grammarPath = GRAMMAR_PATH
18-
configuration.useGrammar = true
19-
configuration.grammarName = "command"
10+
private lateinit var recognizer: ASRProvider
2011

12+
init {
2113
try {
22-
recognizer = CustomLiveSpeechRecognizer(configuration)
23-
// recognizer.setMasterGain(MASTER_GAIN);
14+
recognizer = CMUSphinxASR()
15+
// recognizer = LexASR()
16+
2417
speechThread = Thread(ASRControlLoop(recognizer), "ASR Thread")
25-
recognizer.startRecognition(true)
18+
recognizer.startRecognition()
2619
// Fire up control-loop
2720
speechThread.start()
2821
} catch (e: IOException) {
@@ -31,42 +24,28 @@ class ASRService {
3124
}
3225

3326
fun activate(): Boolean {
34-
// if (getStatus() == Status.INIT) {
35-
// // Cold start prune cache
36-
// recognizer.startRecognition(true);
37-
// }
38-
3927
return ListeningState.activate()
4028
}
4129

4230
fun deactivate(): Boolean {
4331
return ListeningState.standBy()
4432
}
4533

34+
fun terminate() = recognizer.stopRecognition()
35+
4636
fun dispose() {
4737
// Deactivate in the first place, therefore actually
4838
// prevent activation upon the user-input
4939
deactivate()
5040
terminate()
5141
}
5242

53-
private fun terminate() = recognizer.stopRecognition()
54-
5543
companion object {
56-
val MASTER_GAIN = 0.85
57-
val CONFIDENCE_LEVEL_THRESHOLD = 0.5
58-
59-
private val ACOUSTIC_MODEL = "resource:/edu.cmu.sphinx.models.en-us/en-us"
60-
private val DICTIONARY_PATH = "resource:/edu.cmu.sphinx.models.en-us/cmudict-en-us.dict"
61-
private val GRAMMAR_PATH = "resource:/org.openasr.idear/grammars"
62-
6344
private val logger = Logger.getLogger(ASRService::class.java.simpleName)
6445
}
6546
}
6647

6748
// This is for testing purposes solely
6849
fun main(args: Array<String>) {
69-
val asrService = ASRService()
70-
asrService.init()
71-
ListeningState.activate()
50+
ASRService().activate()
7251
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
package org.openasr.idear.asr.cmusphinx
2+
3+
import org.openasr.idear.asr.ASRProvider
4+
import edu.cmu.sphinx.api.Configuration
5+
import java.io.IOException
6+
import java.util.logging.Level
7+
import java.util.logging.Logger
8+
9+
class CMUSphinxASR : ASRProvider {
10+
private lateinit var recognizer: CustomLiveSpeechRecognizer
11+
12+
init {
13+
val configuration = Configuration()
14+
configuration.acousticModelPath = ACOUSTIC_MODEL
15+
configuration.dictionaryPath = DICTIONARY_PATH
16+
configuration.grammarPath = GRAMMAR_PATH
17+
configuration.useGrammar = true
18+
configuration.grammarName = "command"
19+
20+
try {
21+
recognizer = CustomLiveSpeechRecognizer(configuration)
22+
} catch (e: IOException) {
23+
logger.log(Level.SEVERE, "Couldn't initialize speech recognizer:", e)
24+
}
25+
}
26+
27+
override fun waitForUtterance(): String {
28+
val result = recognizer.result
29+
30+
println("Recognized: ")
31+
println("\tTop H: " + result.result + " / " + result.result.bestToken + " / " + result.result.bestPronunciationResult)
32+
println("\tTop 3H: " + result.getNbest(3))
33+
34+
logger.info("Recognized: ")
35+
logger.info("\tTop H: " + result.result + " / " + result.result.bestToken + " / " + result.result.bestPronunciationResult)
36+
logger.info("\tTop 3H: " + result.getNbest(3))
37+
38+
return result.hypothesis
39+
}
40+
41+
override fun startRecognition() = recognizer.startRecognition()
42+
43+
override fun stopRecognition() = recognizer.stopRecognition()
44+
45+
companion object {
46+
val MASTER_GAIN = 0.85
47+
val CONFIDENCE_LEVEL_THRESHOLD = 0.5
48+
49+
private val ACOUSTIC_MODEL = "resource:/edu.cmu.sphinx.models.en-us/en-us"
50+
private val DICTIONARY_PATH = "resource:/edu.cmu.sphinx.models.en-us/cmudict-en-us.dict"
51+
private val GRAMMAR_PATH = "resource:/org.openasr.idear/grammars"
52+
53+
private val logger = Logger.getLogger(CMUSphinxASR::class.java.simpleName)
54+
}
55+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
package com.jetbrains.idear.asr.cmusphinx
2+
3+
import com.jetbrains.idear.recognizer.CustomMicrophone
4+
import edu.cmu.sphinx.api.AbstractSpeechRecognizer
5+
import edu.cmu.sphinx.api.Configuration
6+
import edu.cmu.sphinx.decoder.ResultListener
7+
import edu.cmu.sphinx.frontend.endpoint.SpeechClassifier
8+
import edu.cmu.sphinx.frontend.util.StreamDataSource
9+
10+
import java.io.IOException
11+
12+
/**
13+
* High-level class for live speech recognition.
14+
*/
15+
class CustomLiveSpeechRecognizer
16+
17+
/**
18+
* Constructs new live recognition object.
19+
20+
* @param configuration common configuration
21+
* *
22+
* @throws IOException if model IO went wrong
23+
*/
24+
@Throws(IOException::class)
25+
constructor(configuration: Configuration) : AbstractSpeechRecognizer(configuration) {
26+
private val microphone: CustomMicrophone = CustomMicrophone(16000f, 16, true, false)
27+
28+
// sphinx4 default sensitivity is 13.
29+
private val SPEECH_SENSITIVITY = 20
30+
31+
init {
32+
context.getInstance(StreamDataSource::class.java).setInputStream(microphone.stream)
33+
context.setLocalProperty(String.format("speechClassifier->%s", SpeechClassifier.PROP_THRESHOLD), SPEECH_SENSITIVITY)
34+
}
35+
36+
/**
37+
* Starts recognition process.
38+
* @see CustomLiveSpeechRecognizer.stopRecognition
39+
*/
40+
fun startRecognition() {
41+
recognizer.allocate()
42+
microphone.startRecording()
43+
}
44+
45+
/**
46+
* Stops recognition process.
47+
* Recognition process is paused until the next call to startRecognition.
48+
* @see CustomLiveSpeechRecognizer.startRecognition
49+
*/
50+
fun stopRecognition() {
51+
microphone.stopRecording()
52+
recognizer.deallocate()
53+
}
54+
55+
fun addResultListener(listener: ResultListener) {
56+
recognizer.addResultListener(listener)
57+
}
58+
59+
fun removeResultListener(listener: ResultListener) {
60+
recognizer.removeResultListener(listener)
61+
}
62+
63+
64+
// public void setMasterGain(double mg) {
65+
// microphone.setMasterGain(mg);
66+
// }
67+
//
68+
// public void setNoiseLevel(double mg) {
69+
// microphone.setNoiseLevel(mg);
70+
// }
71+
}

src/main/java/org/openasr/idear/ide/IDEService.kt

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ object IDEService {
1010
AnActionEvent(null, dataContext, ActionPlaces.UNKNOWN, Presentation(), ActionManager.getInstance(), 0)
1111
}
1212

13+
/**
14+
* @param action - see [com.intellij.openapi.actionSystem.IdeActions]
15+
*/
1316
fun invokeAction(action: String, actionFactory: (DataContext) -> AnActionEvent = defaultActionFactory) =
1417
DataManager.getInstance().dataContextFromFocus.doWhenDone(Consumer<DataContext> { dataContext: DataContext ->
1518
EventQueue.invokeLater {

src/main/java/org/openasr/idear/recognizer/CustomMicrophone.kt

+1-4
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ class CustomMicrophone(sampleRate: Float, sampleSize: Int, signed: Boolean, bigE
2424
val stream: AudioInputStream
2525

2626
init {
27-
2827
val format = AudioFormat(sampleRate, sampleSize, 1, signed, bigEndian)
2928

3029
try {
@@ -38,13 +37,11 @@ class CustomMicrophone(sampleRate: Float, sampleSize: Int, signed: Boolean, bigE
3837
logger.warning("Microphone: MASTER_GAIN NOT supported")
3938

4039
//masterGainControl = findMGControl(line);
41-
4240
} catch (e: LineUnavailableException) {
4341
throw IllegalStateException(e)
4442
}
4543

46-
stream = org.openasr.idear.recognizer.AudioInputStreamWithAdjustableGain(
47-
line)
44+
stream = AudioInputStreamWithAdjustableGain(line)
4845
}
4946

5047

0 commit comments

Comments
 (0)