00001
00012 #include "WinBaseLib.h"
00013 #include "NiSpeech.h"
00014
00015
00016
00017 #ifdef ENABLE_NI_SPEECH
00018
00019
00020 using namespace jbxl;
00021 using namespace jbxwl;
00022
00023
00025
00026
00027 CNiSpeech::CNiSpeech(IStream* stream, WAVEFORMATEX* format)
00028 {
00029 m_pSpeechStream = NULL;
00030 m_pSpeechRecognizer = NULL;
00031 m_pSpeechContext = NULL;
00032 m_pSpeechGrammar = NULL;
00033
00034 m_hSpeechEvent = NULL;
00035 m_hStopEvent = NULL;
00036 m_speechThread = NULL;
00037
00038 m_confidence = 0.1;
00039
00040 if (stream!=NULL && format!=NULL) init(stream, format);
00041 }
00042
00043
00044
00045 BOOL CNiSpeech::init(IStream* stream, WAVEFORMATEX* format)
00046 {
00047 DEBUG_INFO("CNiSpeech::init(): START\n");
00048 free();
00049
00050 if (stream==NULL || format==NULL) return FALSE;
00051
00052 HRESULT hr = CoCreateInstance(CLSID_SpStream, NULL, CLSCTX_INPROC_SERVER, __uuidof(ISpStream), (void**)&m_pSpeechStream);
00053 if (FAILED(hr)) {
00054 DEBUG_INFO("CNiSpeech::init(): ERR 1\n");
00055 return FALSE;
00056 }
00057
00058 hr = m_pSpeechStream->SetBaseStream(stream, SPDFID_WaveFormatEx, format);
00059 if (FAILED(hr)) {
00060 DEBUG_INFO("CNiSpeech::init(): ERR 2\n");
00061 releaseNull(m_pSpeechStream);
00062 return FALSE;
00063 }
00064
00065 DEBUG_INFO("CNiSpeech::init(): END\n");
00066 return TRUE;
00067 }
00068
00069
00070
00071 void CNiSpeech::free(void)
00072 {
00073 DEBUG_INFO("CNiSpeech::free(): START\n");
00074 releaseNull(m_pSpeechGrammar);
00075 releaseNull(m_pSpeechContext);
00076 releaseNull(m_pSpeechRecognizer);
00077
00078 DEBUG_INFO("CNiSpeech::free(): END\n");
00079 }
00080
00081
00082
00083 BOOL CNiSpeech::create(LPCTSTR lang)
00084 {
00085 DEBUG_INFO("CNiSpeech::create(): START\n");
00086 ISpObjectToken *pEngineToken = NULL;
00087
00088 releaseNull(m_pSpeechContext);
00089 releaseNull(m_pSpeechRecognizer);
00090
00091 HRESULT hr = CoCreateInstance(CLSID_SpInprocRecognizer, NULL, CLSCTX_INPROC_SERVER, __uuidof(ISpRecognizer), (void**)&m_pSpeechRecognizer);
00092 if (FAILED(hr)) {
00093 DEBUG_INFO("CNiSpeech::create(): ERR 1\n");
00094 return FALSE;
00095 }
00096
00097 m_pSpeechRecognizer->SetInput(m_pSpeechStream, FALSE);
00098 hr = SpFindBestToken(SPCAT_RECOGNIZERS, lang, NULL, &pEngineToken);
00099
00100 if (SUCCEEDED(hr)) {
00101 m_pSpeechRecognizer->SetRecognizer(pEngineToken);
00102 hr = m_pSpeechRecognizer->CreateRecoContext(&m_pSpeechContext);
00103 releaseNull(pEngineToken);
00104 }
00105
00106 if (FAILED(hr)) {
00107 DEBUG_INFO("CNiSpeech::create(): ERR 2\n");
00108 releaseNull(m_pSpeechContext);
00109 releaseNull(m_pSpeechRecognizer);
00110 releaseNull(m_pSpeechStream);
00111 return FALSE;
00112 }
00113
00114 DEBUG_INFO("CNiSpeech::create(): END\n");
00115 return TRUE;
00116 }
00117
00118
00119
00120 BOOL CNiSpeech::load(LPCTSTR file)
00121 {
00122 DEBUG_INFO("CNiSpeech::load(): START\n");
00123
00124 if (m_pSpeechContext==NULL) {
00125 DEBUG_INFO("CNiSpeech::load(): SpeechContext ERR\n");
00126 return FALSE;
00127 }
00128
00129 releaseNull(m_pSpeechGrammar);
00130
00131 HRESULT hr = m_pSpeechContext->CreateGrammar(1, &m_pSpeechGrammar);
00132
00133 if (SUCCEEDED(hr)) hr = m_pSpeechGrammar->LoadCmdFromFile(file, SPLO_STATIC);
00134 if (FAILED(hr)) {
00135 DEBUG_INFO("CNiSpeech::load: ERR 2\n");
00136 releaseNull(m_pSpeechGrammar);
00137 return FALSE;
00138 }
00139
00140 DEBUG_INFO("CNiSpeech::load(): END\n");
00141 return TRUE;
00142 }
00143
00144
00145
00146 BOOL CNiSpeech::start(double confidence=0.1)
00147 {
00148 if (m_pSpeechGrammar==NULL || m_pSpeechRecognizer==NULL || m_pSpeechContext==NULL) return FALSE;
00149 DEBUG_INFO("CNiSpeech::start(): START\n");
00150
00151
00152
00153 m_confidence = confidence;
00154
00155 m_pSpeechGrammar->SetRuleState(NULL, NULL, SPRS_ACTIVE);
00156 m_pSpeechRecognizer->SetRecoState(SPRST_ACTIVE_ALWAYS);
00157 DEBUG_INFO("CNiSpeech::start(): SetRecoState OK\n");
00158 ::DisPatcher();
00159 ::Sleep(100);
00160
00161 m_pSpeechContext->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION));
00162 HRESULT hr = m_pSpeechContext->Resume(0);
00163 if (FAILED(hr)) {
00164 DEBUG_INFO("CNiSpeech::start(): Resume ERR\n");
00165 free();
00166 return FALSE;
00167 }
00168 DEBUG_INFO("CNiSpeech::start(): Resume OK\n");
00169
00170 m_hStopEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
00171 m_hSpeechEvent = m_pSpeechContext->GetNotifyEventHandle();
00172
00173 m_speechThread = AfxBeginThread(speechThread, (LPVOID)this, THREAD_PRIORITY_NORMAL, 0, CREATE_SUSPENDED);
00174 m_speechThread->m_bAutoDelete = FALSE;
00175 m_speechThread->ResumeThread();
00176 ::Sleep(10);
00177
00178 DEBUG_INFO("CNiSpeech::start(): END");
00179 return TRUE;
00180 }
00181
00182
00183
00184 void CNiSpeech::stop(void)
00185 {
00186 DEBUG_INFO("CNiSpeech::stop(): START\n");
00187
00188 if (m_hStopEvent!=NULL) {
00189 SetEvent(m_hStopEvent);
00190
00191 if (m_speechThread!=NULL) {
00192 WaitForSingleObject(m_speechThread->m_hThread, INFINITE);
00193 CloseHandle(m_speechThread->m_hThread);
00194 delete m_speechThread;
00195 m_speechThread = NULL;
00196
00197
00198 DEBUG_INFO("CNiSpeech::stop(): Pause OK\n");
00199 }
00200 CloseHandle(m_hStopEvent);
00201 m_hStopEvent = NULL;
00202 }
00203
00204
00205 if (m_hSpeechEvent!=NULL) {
00206 CloseHandle(m_hSpeechEvent);
00207 m_hSpeechEvent = NULL;
00208 }
00209
00210 DEBUG_INFO("CNiSpeech::stop(): END\n");
00211 return;
00212 }
00213
00214
00215
00216 void CNiSpeech::process(void)
00217 {
00218 SPEVENT curEvent;
00219 ULONG fetched = 0;
00220
00221 m_pSpeechContext->GetEvents(1, &curEvent, &fetched);
00222
00223 while (fetched>0) {
00224 if (curEvent.eEventId==SPEI_RECOGNITION && curEvent.elParamType==SPET_LPARAM_IS_OBJECT) {
00225
00226 ISpRecoResult* result = reinterpret_cast<ISpRecoResult*>(curEvent.lParam);
00227 SPPHRASE* pPhrase = NULL;
00228 result->GetPhrase(&pPhrase);
00229
00230 if (pPhrase!=NULL) {
00231 if (pPhrase->pProperties!=NULL && pPhrase->pProperties->pFirstChild!=NULL) {
00232 const SPPHRASEPROPERTY* pSemanticTag = pPhrase->pProperties->pFirstChild;
00233
00234 map2action(pSemanticTag->pszValue, pSemanticTag->SREngineConfidence);
00235 }
00236 ::CoTaskMemFree(pPhrase);
00237 }
00238 }
00239
00240 m_pSpeechContext->GetEvents(1, &curEvent, &fetched);
00241 }
00242
00243 return;
00244 }
00245
00246
00247
00248 void CNiSpeech::setConfidence(double confd)
00249 {
00250 if (confd>1.0) confd = 1.0;
00251 else if (confd<0.0) confd = 0.0;
00252
00253 m_confidence = confd;
00254 }
00255
00256
00257
00258
00260
00261
00262 UINT CNiSpeech::speechThread(LPVOID pParam)
00263 {
00264 CNiSpeech* pthis = (CNiSpeech*)pParam;
00265 return pthis->speechThread();
00266 }
00267
00268
00269
00270 UINT CNiSpeech::speechThread(void)
00271 {
00272 DEBUG_INFO("CNiSpeech::speechThread(): START\n");
00273
00274 bool bContinue = true;
00275
00276 while(bContinue) {
00277
00278 if (WaitForSingleObject(m_hStopEvent, 0)==WAIT_OBJECT_0) {
00279 bContinue = false;
00280 continue;
00281 }
00282
00283 if (WaitForSingleObject(m_hSpeechEvent, 0)==WAIT_OBJECT_0) {
00284 process();
00285 }
00286
00287 Sleep(10);
00288 }
00289
00290 DEBUG_INFO("CNiSpeech::speechThread(): END\n");
00291 return TRUE;
00292 }
00293
00294
00295
00296 void CNiSpeech::map2action(LPCTSTR tag, double confd)
00297 {
00298 WORD param;
00299
00300 if (confd>=m_confidence) {
00301 param = TRUE;
00302 SendWinMessage(JBXWL_WM_SPEECH_EVENT, (WPARAM)¶m, (LPARAM)tag);
00303 }
00304 else {
00305 param = FALSE;
00306 SendWinMessage(JBXWL_WM_SPEECH_EVENT, (WPARAM)¶m, (LPARAM)_T("N/A"));
00307 }
00308 }
00309
00310
00311 #endif // ENABLE_NI_SPEECH
00312