1
0
mirror of https://github.com/wine-mirror/wine synced 2024-07-01 07:14:31 +00:00
wine/dlls/windows.media.speech/recognizer.c

1251 lines
40 KiB
C

/* WinRT Windows.Media.SpeechRecognition implementation
*
* Copyright 2022 Bernhard Kölbl
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "private.h"
#include "initguid.h"
#include "audioclient.h"
#include "mmdeviceapi.h"
#include "wine/debug.h"
WINE_DEFAULT_DEBUG_CHANNEL(speech);
/*
*
* ISpeechRecognitionCompilationResult
*
*/
struct compilation_result
{
ISpeechRecognitionCompilationResult ISpeechRecognitionCompilationResult_iface;
LONG ref;
SpeechRecognitionResultStatus status;
};
static inline struct compilation_result *impl_from_ISpeechRecognitionCompilationResult( ISpeechRecognitionCompilationResult *iface )
{
return CONTAINING_RECORD(iface, struct compilation_result, ISpeechRecognitionCompilationResult_iface);
}
static HRESULT WINAPI compilation_result_QueryInterface( ISpeechRecognitionCompilationResult *iface, REFIID iid, void **out )
{
struct compilation_result *impl = impl_from_ISpeechRecognitionCompilationResult(iface);
TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
if (IsEqualGUID(iid, &IID_IUnknown) ||
IsEqualGUID(iid, &IID_IInspectable) ||
IsEqualGUID(iid, &IID_IAgileObject) ||
IsEqualGUID(iid, &IID_ISpeechRecognitionCompilationResult))
{
IInspectable_AddRef((*out = &impl->ISpeechRecognitionCompilationResult_iface));
return S_OK;
}
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid));
*out = NULL;
return E_NOINTERFACE;
}
static ULONG WINAPI compilation_result_AddRef( ISpeechRecognitionCompilationResult *iface )
{
struct compilation_result *impl = impl_from_ISpeechRecognitionCompilationResult(iface);
ULONG ref = InterlockedIncrement(&impl->ref);
TRACE("iface %p, ref %lu.\n", iface, ref);
return ref;
}
static ULONG WINAPI compilation_result_Release( ISpeechRecognitionCompilationResult *iface )
{
struct compilation_result *impl = impl_from_ISpeechRecognitionCompilationResult(iface);
ULONG ref = InterlockedDecrement(&impl->ref);
TRACE("iface %p, ref %lu.\n", iface, ref);
if (!ref)
free(impl);
return ref;
}
static HRESULT WINAPI compilation_result_GetIids( ISpeechRecognitionCompilationResult *iface, ULONG *iid_count, IID **iids )
{
FIXME("iface %p, iid_count %p, iids %p stub!\n", iface, iid_count, iids);
return E_NOTIMPL;
}
static HRESULT WINAPI compilation_result_GetRuntimeClassName( ISpeechRecognitionCompilationResult *iface, HSTRING *class_name )
{
FIXME("iface %p, class_name %p stub!\n", iface, class_name);
return E_NOTIMPL;
}
static HRESULT WINAPI compilation_result_GetTrustLevel( ISpeechRecognitionCompilationResult *iface, TrustLevel *trust_level )
{
FIXME("iface %p, trust_level %p stub!\n", iface, trust_level);
return E_NOTIMPL;
}
static HRESULT WINAPI compilation_result_get_Status( ISpeechRecognitionCompilationResult *iface, SpeechRecognitionResultStatus *value )
{
struct compilation_result *impl = impl_from_ISpeechRecognitionCompilationResult(iface);
TRACE("iface %p, value %p.\n", iface, value);
*value = impl->status;
return S_OK;
}
static const struct ISpeechRecognitionCompilationResultVtbl compilation_result_vtbl =
{
/* IUnknown methods */
compilation_result_QueryInterface,
compilation_result_AddRef,
compilation_result_Release,
/* IInspectable methods */
compilation_result_GetIids,
compilation_result_GetRuntimeClassName,
compilation_result_GetTrustLevel,
/* ISpeechRecognitionCompilationResult methods */
compilation_result_get_Status
};
static HRESULT compilation_result_create( SpeechRecognitionResultStatus status, ISpeechRecognitionCompilationResult **out )
{
struct compilation_result *impl;
TRACE("out %p.\n", out);
if (!(impl = calloc(1, sizeof(*impl))))
{
*out = NULL;
return E_OUTOFMEMORY;
}
impl->ISpeechRecognitionCompilationResult_iface.lpVtbl = &compilation_result_vtbl;
impl->ref = 1;
impl->status = status;
*out = &impl->ISpeechRecognitionCompilationResult_iface;
TRACE("created %p\n", *out);
return S_OK;
}
/*
*
* SpeechContinuousRecognitionSession
*
*/
struct session
{
ISpeechContinuousRecognitionSession ISpeechContinuousRecognitionSession_iface;
LONG ref;
IVector_ISpeechRecognitionConstraint *constraints;
SpeechRecognizerState recognizer_state;
struct list completed_handlers;
struct list result_handlers;
IAudioClient *audio_client;
IAudioCaptureClient *capture_client;
WAVEFORMATEX capture_wfx;
HANDLE worker_thread, worker_control_event, audio_buf_event;
BOOLEAN worker_running, worker_paused;
CRITICAL_SECTION cs;
};
/*
*
* ISpeechContinuousRecognitionSession
*
*/
static inline struct session *impl_from_ISpeechContinuousRecognitionSession( ISpeechContinuousRecognitionSession *iface )
{
return CONTAINING_RECORD(iface, struct session, ISpeechContinuousRecognitionSession_iface);
}
static DWORD CALLBACK session_worker_thread_cb( void *args )
{
ISpeechContinuousRecognitionSession *iface = args;
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
BOOLEAN running = TRUE, paused = FALSE;
UINT32 frame_count, tmp_buf_size;
BYTE *audio_buf, *tmp_buf = NULL;
DWORD flags, status;
HANDLE events[2];
HRESULT hr;
SetThreadDescription(GetCurrentThread(), L"wine_speech_recognition_session_worker");
if (FAILED(hr = IAudioClient_Start(impl->audio_client)))
goto error;
if (FAILED(hr = IAudioClient_GetBufferSize(impl->audio_client, &frame_count)))
goto error;
tmp_buf_size = sizeof(*tmp_buf) * frame_count * impl->capture_wfx.nBlockAlign;
if (!(tmp_buf = malloc(tmp_buf_size)))
{
ERR("Memory allocation failed.\n");
return 1;
}
while (running)
{
BOOLEAN old_paused = paused;
UINT32 count = 0;
events[count++] = impl->worker_control_event;
if (!paused) events[count++] = impl->audio_buf_event;
status = WaitForMultipleObjects(count, events, FALSE, INFINITE);
if (status == 0) /* worker_control_event signaled */
{
EnterCriticalSection(&impl->cs);
paused = impl->worker_paused;
running = impl->worker_running;
LeaveCriticalSection(&impl->cs);
if (old_paused < paused)
{
if (FAILED(hr = IAudioClient_Stop(impl->audio_client))) goto error;
if (FAILED(hr = IAudioClient_Reset(impl->audio_client))) goto error;
TRACE("session worker paused.\n");
}
else if (old_paused > paused)
{
if (FAILED(hr = IAudioClient_Start(impl->audio_client))) goto error;
TRACE("session worker resumed.\n");
}
}
else if (status == 1) /* audio_buf_event signaled */
{
SIZE_T packet_size = 0, tmp_buf_offset = 0;
UINT32 frames_available = 0;
while (tmp_buf_offset < tmp_buf_size
&& IAudioCaptureClient_GetBuffer(impl->capture_client, &audio_buf, &frames_available, &flags, NULL, NULL) == S_OK)
{
packet_size = frames_available * impl->capture_wfx.nBlockAlign;
if (tmp_buf_offset + packet_size > tmp_buf_size)
{
/* Defer processing until the next iteration of the worker loop. */
IAudioCaptureClient_ReleaseBuffer(impl->capture_client, 0);
SetEvent(impl->audio_buf_event);
break;
}
memcpy(tmp_buf + tmp_buf_offset, audio_buf, packet_size);
tmp_buf_offset += packet_size;
IAudioCaptureClient_ReleaseBuffer(impl->capture_client, frames_available);
}
/* TODO: Send mic data to recognizer and handle results. */
}
else
{
ERR("Unexpected state entered. Aborting worker.\n");
break;
}
}
if (FAILED(hr = IAudioClient_Stop(impl->audio_client)))
ERR("IAudioClient_Stop failed with %#lx.\n", hr);
if (FAILED(hr = IAudioClient_Reset(impl->audio_client)))
ERR("IAudioClient_Reset failed with %#lx.\n", hr);
free(tmp_buf);
return 0;
error:
ERR("The recognition session worker encountered a serious error and needs to stop. hr: %lx.\n", hr);
free(tmp_buf);
return 1;
}
static HRESULT WINAPI session_QueryInterface( ISpeechContinuousRecognitionSession *iface, REFIID iid, void **out )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
if (IsEqualGUID(iid, &IID_IUnknown) ||
IsEqualGUID(iid, &IID_IInspectable) ||
IsEqualGUID(iid, &IID_ISpeechContinuousRecognitionSession))
{
IInspectable_AddRef((*out = &impl->ISpeechContinuousRecognitionSession_iface));
return S_OK;
}
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid));
*out = NULL;
return E_NOINTERFACE;
}
static ULONG WINAPI session_AddRef( ISpeechContinuousRecognitionSession *iface )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
ULONG ref = InterlockedIncrement(&impl->ref);
TRACE("iface %p, ref %lu.\n", iface, ref);
return ref;
}
static ULONG WINAPI session_Release( ISpeechContinuousRecognitionSession *iface )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
ULONG ref = InterlockedDecrement(&impl->ref);
TRACE("iface %p, ref %lu.\n", iface, ref);
if (!ref)
{
HANDLE thread;
EnterCriticalSection(&impl->cs);
thread = impl->worker_thread;
impl->worker_running = FALSE;
impl->worker_thread = INVALID_HANDLE_VALUE;
LeaveCriticalSection(&impl->cs);
SetEvent(impl->worker_control_event);
WaitForSingleObject(thread, INFINITE);
CloseHandle(thread);
typed_event_handlers_clear(&impl->completed_handlers);
typed_event_handlers_clear(&impl->result_handlers);
IAudioCaptureClient_Release(impl->capture_client);
IAudioClient_Release(impl->audio_client);
impl->cs.DebugInfo->Spare[0] = 0;
DeleteCriticalSection(&impl->cs);
IVector_ISpeechRecognitionConstraint_Release(impl->constraints);
free(impl);
}
return ref;
}
static HRESULT WINAPI session_GetIids( ISpeechContinuousRecognitionSession *iface, ULONG *iid_count, IID **iids )
{
FIXME("iface %p, iid_count %p, iids %p stub!\n", iface, iid_count, iids);
return E_NOTIMPL;
}
static HRESULT WINAPI session_GetRuntimeClassName( ISpeechContinuousRecognitionSession *iface, HSTRING *class_name )
{
FIXME("iface %p, class_name %p stub!\n", iface, class_name);
return E_NOTIMPL;
}
static HRESULT WINAPI session_GetTrustLevel( ISpeechContinuousRecognitionSession *iface, TrustLevel *trust_level )
{
FIXME("iface %p, trust_level %p stub!\n", iface, trust_level);
return E_NOTIMPL;
}
static HRESULT WINAPI session_get_AutoStopSilenceTimeout( ISpeechContinuousRecognitionSession *iface, TimeSpan *value )
{
FIXME("iface %p, value %p stub!\n", iface, value);
return E_NOTIMPL;
}
static HRESULT WINAPI session_set_AutoStopSilenceTimeout( ISpeechContinuousRecognitionSession *iface, TimeSpan value )
{
FIXME("iface %p, value %#I64x stub!\n", iface, value.Duration);
return E_NOTIMPL;
}
static HRESULT session_start_async( IInspectable *invoker )
{
return S_OK;
}
static HRESULT WINAPI session_StartAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
HRESULT hr;
TRACE("iface %p, action %p.\n", iface, action);
if (FAILED(hr = async_action_create(NULL, session_start_async, action)))
return hr;
EnterCriticalSection(&impl->cs);
if (impl->worker_running || impl->worker_thread)
{
hr = COR_E_INVALIDOPERATION;
}
else if (!(impl->worker_thread = CreateThread(NULL, 0, session_worker_thread_cb, impl, 0, NULL)))
{
hr = HRESULT_FROM_WIN32(GetLastError());
impl->worker_running = FALSE;
}
else
{
impl->worker_running = TRUE;
impl->recognizer_state = SpeechRecognizerState_Capturing;
}
LeaveCriticalSection(&impl->cs);
if (FAILED(hr))
{
IAsyncAction_Release(*action);
*action = NULL;
}
return hr;
}
static HRESULT WINAPI session_StartWithModeAsync( ISpeechContinuousRecognitionSession *iface,
SpeechContinuousRecognitionMode mode,
IAsyncAction **action )
{
FIXME("iface %p, mode %u, action %p stub!\n", iface, mode, action);
return E_NOTIMPL;
}
static HRESULT session_stop_async( IInspectable *invoker )
{
return S_OK;
}
static HRESULT WINAPI session_StopAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
HANDLE thread;
HRESULT hr;
TRACE("iface %p, action %p.\n", iface, action);
if (FAILED(hr = async_action_create(NULL, session_stop_async, action)))
return hr;
EnterCriticalSection(&impl->cs);
if (impl->worker_running && impl->worker_thread)
{
thread = impl->worker_thread;
impl->worker_thread = INVALID_HANDLE_VALUE;
impl->worker_running = FALSE;
impl->worker_paused = FALSE;
impl->recognizer_state = SpeechRecognizerState_Idle;
}
else
{
hr = COR_E_INVALIDOPERATION;
}
LeaveCriticalSection(&impl->cs);
if (SUCCEEDED(hr))
{
SetEvent(impl->worker_control_event);
WaitForSingleObject(thread, INFINITE);
CloseHandle(thread);
EnterCriticalSection(&impl->cs);
impl->worker_thread = NULL;
LeaveCriticalSection(&impl->cs);
}
else
{
IAsyncAction_Release(*action);
*action = NULL;
}
return hr;
}
static HRESULT WINAPI session_CancelAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action )
{
FIXME("iface %p, action %p stub!\n", iface, action);
return E_NOTIMPL;
}
static HRESULT session_pause_async( IInspectable *invoker )
{
return S_OK;
}
static HRESULT WINAPI session_PauseAsync( ISpeechContinuousRecognitionSession *iface, IAsyncAction **action )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
HRESULT hr = S_OK;
TRACE("iface %p, action %p.\n", iface, action);
*action = NULL;
if (FAILED(hr = async_action_create(NULL, session_pause_async, action)))
return hr;
EnterCriticalSection(&impl->cs);
if (impl->worker_running)
{
impl->worker_paused = TRUE;
impl->recognizer_state = SpeechRecognizerState_Paused;
}
LeaveCriticalSection(&impl->cs);
SetEvent(impl->worker_control_event);
return hr;
}
static HRESULT WINAPI session_Resume( ISpeechContinuousRecognitionSession *iface )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
TRACE("iface %p.\n", iface);
EnterCriticalSection(&impl->cs);
if (impl->worker_running)
{
impl->worker_paused = FALSE;
impl->recognizer_state = SpeechRecognizerState_Capturing;
}
LeaveCriticalSection(&impl->cs);
SetEvent(impl->worker_control_event);
return S_OK;
}
static HRESULT WINAPI session_add_Completed( ISpeechContinuousRecognitionSession *iface,
ITypedEventHandler_SpeechContinuousRecognitionSession_SpeechContinuousRecognitionCompletedEventArgs *handler,
EventRegistrationToken *token )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
TRACE("iface %p, handler %p, token %p.\n", iface, handler, token);
if (!handler) return E_INVALIDARG;
return typed_event_handlers_append(&impl->completed_handlers, (ITypedEventHandler_IInspectable_IInspectable *)handler, token);
}
static HRESULT WINAPI session_remove_Completed( ISpeechContinuousRecognitionSession *iface, EventRegistrationToken token )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
TRACE("iface %p, token.value %#I64x.\n", iface, token.value);
return typed_event_handlers_remove(&impl->completed_handlers, &token);
}
static HRESULT WINAPI session_add_ResultGenerated( ISpeechContinuousRecognitionSession *iface,
ITypedEventHandler_SpeechContinuousRecognitionSession_SpeechContinuousRecognitionResultGeneratedEventArgs *handler,
EventRegistrationToken *token)
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
TRACE("iface %p, handler %p, token %p.\n", iface, handler, token);
if (!handler) return E_INVALIDARG;
return typed_event_handlers_append(&impl->result_handlers, (ITypedEventHandler_IInspectable_IInspectable *)handler, token);
}
static HRESULT WINAPI session_remove_ResultGenerated( ISpeechContinuousRecognitionSession *iface, EventRegistrationToken token )
{
struct session *impl = impl_from_ISpeechContinuousRecognitionSession(iface);
TRACE("iface %p, token.value %#I64x.\n", iface, token.value);
return typed_event_handlers_remove(&impl->result_handlers, &token);
}
static const struct ISpeechContinuousRecognitionSessionVtbl session_vtbl =
{
/* IUnknown methods */
session_QueryInterface,
session_AddRef,
session_Release,
/* IInspectable methods */
session_GetIids,
session_GetRuntimeClassName,
session_GetTrustLevel,
/* ISpeechContinuousRecognitionSession methods */
session_get_AutoStopSilenceTimeout,
session_set_AutoStopSilenceTimeout,
session_StartAsync,
session_StartWithModeAsync,
session_StopAsync,
session_CancelAsync,
session_PauseAsync,
session_Resume,
session_add_Completed,
session_remove_Completed,
session_add_ResultGenerated,
session_remove_ResultGenerated
};
/*
*
* SpeechRecognizer
*
*/
struct recognizer
{
ISpeechRecognizer ISpeechRecognizer_iface;
IClosable IClosable_iface;
ISpeechRecognizer2 ISpeechRecognizer2_iface;
LONG ref;
ISpeechContinuousRecognitionSession *session;
};
/*
*
* ISpeechRecognizer
*
*/
static inline struct recognizer *impl_from_ISpeechRecognizer( ISpeechRecognizer *iface )
{
return CONTAINING_RECORD(iface, struct recognizer, ISpeechRecognizer_iface);
}
static HRESULT WINAPI recognizer_QueryInterface( ISpeechRecognizer *iface, REFIID iid, void **out )
{
struct recognizer *impl = impl_from_ISpeechRecognizer(iface);
TRACE("iface %p, iid %s, out %p.\n", iface, debugstr_guid(iid), out);
if (IsEqualGUID(iid, &IID_IUnknown) ||
IsEqualGUID(iid, &IID_IInspectable) ||
IsEqualGUID(iid, &IID_IAgileObject) ||
IsEqualGUID(iid, &IID_ISpeechRecognizer))
{
IInspectable_AddRef((*out = &impl->ISpeechRecognizer_iface));
return S_OK;
}
if (IsEqualGUID(iid, &IID_IClosable))
{
IInspectable_AddRef((*out = &impl->IClosable_iface));
return S_OK;
}
if (IsEqualGUID(iid, &IID_ISpeechRecognizer2))
{
IInspectable_AddRef((*out = &impl->ISpeechRecognizer2_iface));
return S_OK;
}
WARN("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid));
*out = NULL;
return E_NOINTERFACE;
}
static ULONG WINAPI recognizer_AddRef( ISpeechRecognizer *iface )
{
struct recognizer *impl = impl_from_ISpeechRecognizer(iface);
ULONG ref = InterlockedIncrement(&impl->ref);
TRACE("iface %p, ref %lu.\n", iface, ref);
return ref;
}
static ULONG WINAPI recognizer_Release( ISpeechRecognizer *iface )
{
struct recognizer *impl = impl_from_ISpeechRecognizer(iface);
ULONG ref = InterlockedDecrement(&impl->ref);
TRACE("iface %p, ref %lu.\n", iface, ref);
if (!ref)
{
ISpeechContinuousRecognitionSession_Release(impl->session);
free(impl);
}
return ref;
}
static HRESULT WINAPI recognizer_GetIids( ISpeechRecognizer *iface, ULONG *iid_count, IID **iids )
{
FIXME("iface %p, iid_count %p, iids %p stub!\n", iface, iid_count, iids);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_GetRuntimeClassName( ISpeechRecognizer *iface, HSTRING *class_name )
{
FIXME("iface %p, class_name %p stub!\n", iface, class_name);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_GetTrustLevel( ISpeechRecognizer *iface, TrustLevel *trust_level )
{
FIXME("iface %p, trust_level %p stub!\n", iface, trust_level);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_get_Constraints( ISpeechRecognizer *iface, IVector_ISpeechRecognitionConstraint **vector )
{
struct recognizer *impl = impl_from_ISpeechRecognizer(iface);
struct session *session = impl_from_ISpeechContinuousRecognitionSession(impl->session);
TRACE("iface %p, operation %p.\n", iface, vector);
IVector_ISpeechRecognitionConstraint_AddRef((*vector = session->constraints));
return S_OK;
}
static HRESULT WINAPI recognizer_get_CurrentLanguage( ISpeechRecognizer *iface, ILanguage **language )
{
FIXME("iface %p, operation %p stub!\n", iface, language);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_get_Timeouts( ISpeechRecognizer *iface, ISpeechRecognizerTimeouts **timeouts )
{
FIXME("iface %p, operation %p stub!\n", iface, timeouts);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_get_UIOptions( ISpeechRecognizer *iface, ISpeechRecognizerUIOptions **options )
{
FIXME("iface %p, operation %p stub!\n", iface, options);
return E_NOTIMPL;
}
static HRESULT recognizer_compile_constraints_async( IInspectable *invoker, IInspectable **result )
{
return compilation_result_create(SpeechRecognitionResultStatus_Success, (ISpeechRecognitionCompilationResult **) result);
}
static HRESULT WINAPI recognizer_CompileConstraintsAsync( ISpeechRecognizer *iface,
IAsyncOperation_SpeechRecognitionCompilationResult **operation )
{
IAsyncOperation_IInspectable **value = (IAsyncOperation_IInspectable **)operation;
FIXME("iface %p, operation %p semi-stub!\n", iface, operation);
return async_operation_inspectable_create(&IID_IAsyncOperation_SpeechRecognitionCompilationResult, NULL, recognizer_compile_constraints_async, value);
}
static HRESULT WINAPI recognizer_RecognizeAsync( ISpeechRecognizer *iface,
IAsyncOperation_SpeechRecognitionResult **operation )
{
FIXME("iface %p, operation %p stub!\n", iface, operation);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_RecognizeWithUIAsync( ISpeechRecognizer *iface,
IAsyncOperation_SpeechRecognitionResult **operation )
{
FIXME("iface %p, operation %p stub!\n", iface, operation);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_add_RecognitionQualityDegrading( ISpeechRecognizer *iface,
ITypedEventHandler_SpeechRecognizer_SpeechRecognitionQualityDegradingEventArgs *handler,
EventRegistrationToken *token )
{
FIXME("iface %p, operation %p, token %p, stub!\n", iface, handler, token);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_remove_RecognitionQualityDegrading( ISpeechRecognizer *iface, EventRegistrationToken token )
{
FIXME("iface %p, token.value %#I64x, stub!\n", iface, token.value);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_add_StateChanged( ISpeechRecognizer *iface,
ITypedEventHandler_SpeechRecognizer_SpeechRecognizerStateChangedEventArgs *handler,
EventRegistrationToken *token )
{
FIXME("iface %p, operation %p, token %p, stub!\n", iface, handler, token);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer_remove_StateChanged( ISpeechRecognizer *iface, EventRegistrationToken token )
{
FIXME("iface %p, token.value %#I64x, stub!\n", iface, token.value);
return E_NOTIMPL;
}
static const struct ISpeechRecognizerVtbl speech_recognizer_vtbl =
{
/* IUnknown methods */
recognizer_QueryInterface,
recognizer_AddRef,
recognizer_Release,
/* IInspectable methods */
recognizer_GetIids,
recognizer_GetRuntimeClassName,
recognizer_GetTrustLevel,
/* ISpeechRecognizer methods */
recognizer_get_CurrentLanguage,
recognizer_get_Constraints,
recognizer_get_Timeouts,
recognizer_get_UIOptions,
recognizer_CompileConstraintsAsync,
recognizer_RecognizeAsync,
recognizer_RecognizeWithUIAsync,
recognizer_add_RecognitionQualityDegrading,
recognizer_remove_RecognitionQualityDegrading,
recognizer_add_StateChanged,
recognizer_remove_StateChanged,
};
/*
*
* IClosable
*
*/
DEFINE_IINSPECTABLE(closable, IClosable, struct recognizer, ISpeechRecognizer_iface)
static HRESULT WINAPI closable_Close( IClosable *iface )
{
FIXME("iface %p stub.\n", iface);
return E_NOTIMPL;
}
static const struct IClosableVtbl closable_vtbl =
{
/* IUnknown methods */
closable_QueryInterface,
closable_AddRef,
closable_Release,
/* IInspectable methods */
closable_GetIids,
closable_GetRuntimeClassName,
closable_GetTrustLevel,
/* IClosable methods */
closable_Close,
};
/*
*
* ISpeechRecognizer2
*
*/
DEFINE_IINSPECTABLE(recognizer2, ISpeechRecognizer2, struct recognizer, ISpeechRecognizer_iface)
static HRESULT WINAPI recognizer2_get_ContinuousRecognitionSession( ISpeechRecognizer2 *iface,
ISpeechContinuousRecognitionSession **session )
{
struct recognizer *impl = impl_from_ISpeechRecognizer2(iface);
TRACE("iface %p, session %p.\n", iface, session);
ISpeechContinuousRecognitionSession_QueryInterface(impl->session, &IID_ISpeechContinuousRecognitionSession, (void **)session);
return S_OK;
}
static HRESULT WINAPI recognizer2_get_State( ISpeechRecognizer2 *iface, SpeechRecognizerState *state )
{
struct recognizer *impl = impl_from_ISpeechRecognizer2(iface);
struct session *session = impl_from_ISpeechContinuousRecognitionSession(impl->session);
FIXME("iface %p, state %p not all states are supported, yet.\n", iface, state);
if (!state)
return E_POINTER;
EnterCriticalSection(&session->cs);
*state = session->recognizer_state;
LeaveCriticalSection(&session->cs);
return S_OK;
}
static HRESULT WINAPI recognizer2_StopRecognitionAsync( ISpeechRecognizer2 *iface, IAsyncAction **action )
{
FIXME("iface %p, action %p stub!\n", iface, action);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer2_add_HypothesisGenerated( ISpeechRecognizer2 *iface,
ITypedEventHandler_SpeechRecognizer_SpeechRecognitionHypothesisGeneratedEventArgs *handler,
EventRegistrationToken *token )
{
FIXME("iface %p, operation %p, token %p, stub!\n", iface, handler, token);
return E_NOTIMPL;
}
static HRESULT WINAPI recognizer2_remove_HypothesisGenerated( ISpeechRecognizer2 *iface, EventRegistrationToken token )
{
FIXME("iface %p, token.value %#I64x, stub!\n", iface, token.value);
return E_NOTIMPL;
}
static const struct ISpeechRecognizer2Vtbl speech_recognizer2_vtbl =
{
/* IUnknown methods */
recognizer2_QueryInterface,
recognizer2_AddRef,
recognizer2_Release,
/* IInspectable methods */
recognizer2_GetIids,
recognizer2_GetRuntimeClassName,
recognizer2_GetTrustLevel,
/* ISpeechRecognizer2 methods */
recognizer2_get_ContinuousRecognitionSession,
recognizer2_get_State,
recognizer2_StopRecognitionAsync,
recognizer2_add_HypothesisGenerated,
recognizer2_remove_HypothesisGenerated,
};
/*
*
* Statics for SpeechRecognizer
*
*/
struct recognizer_statics
{
IActivationFactory IActivationFactory_iface;
ISpeechRecognizerFactory ISpeechRecognizerFactory_iface;
ISpeechRecognizerStatics ISpeechRecognizerStatics_iface;
ISpeechRecognizerStatics2 ISpeechRecognizerStatics2_iface;
LONG ref;
};
/*
*
* IActivationFactory
*
*/
static inline struct recognizer_statics *impl_from_IActivationFactory( IActivationFactory *iface )
{
return CONTAINING_RECORD(iface, struct recognizer_statics, IActivationFactory_iface);
}
static HRESULT WINAPI activation_factory_QueryInterface( IActivationFactory *iface, REFIID iid, void **out )
{
struct recognizer_statics *impl = impl_from_IActivationFactory(iface);
TRACE("iface %p, iid %s, out %p stub!\n", iface, debugstr_guid(iid), out);
if (IsEqualGUID(iid, &IID_IUnknown) ||
IsEqualGUID(iid, &IID_IInspectable) ||
IsEqualGUID(iid, &IID_IAgileObject) ||
IsEqualGUID(iid, &IID_IActivationFactory))
{
IInspectable_AddRef((*out = &impl->IActivationFactory_iface));
return S_OK;
}
if (IsEqualGUID(iid, &IID_ISpeechRecognizerFactory))
{
IInspectable_AddRef((*out = &impl->ISpeechRecognizerFactory_iface));
return S_OK;
}
if (IsEqualGUID(iid, &IID_ISpeechRecognizerStatics))
{
IInspectable_AddRef((*out = &impl->ISpeechRecognizerStatics_iface));
return S_OK;
}
if (IsEqualGUID(iid, &IID_ISpeechRecognizerStatics2))
{
IInspectable_AddRef((*out = &impl->ISpeechRecognizerStatics2_iface));
return S_OK;
}
FIXME("%s not implemented, returning E_NOINTERFACE.\n", debugstr_guid(iid));
*out = NULL;
return E_NOINTERFACE;
}
static ULONG WINAPI activation_factory_AddRef( IActivationFactory *iface )
{
struct recognizer_statics *impl = impl_from_IActivationFactory(iface);
ULONG ref = InterlockedIncrement(&impl->ref);
TRACE("iface %p, ref %lu.\n", iface, ref);
return ref;
}
static ULONG WINAPI activation_factory_Release( IActivationFactory *iface )
{
struct recognizer_statics *impl = impl_from_IActivationFactory(iface);
ULONG ref = InterlockedDecrement(&impl->ref);
TRACE("iface %p, ref %lu.\n", iface, ref);
return ref;
}
static HRESULT WINAPI activation_factory_GetIids( IActivationFactory *iface, ULONG *iid_count, IID **iids )
{
FIXME("iface %p, iid_count %p, iids %p stub!\n", iface, iid_count, iids);
return E_NOTIMPL;
}
static HRESULT WINAPI activation_factory_GetRuntimeClassName( IActivationFactory *iface, HSTRING *class_name )
{
FIXME("iface %p, class_name %p stub!\n", iface, class_name);
return E_NOTIMPL;
}
static HRESULT WINAPI activation_factory_GetTrustLevel( IActivationFactory *iface, TrustLevel *trust_level )
{
FIXME("iface %p, trust_level %p stub!\n", iface, trust_level);
return E_NOTIMPL;
}
static HRESULT WINAPI activation_factory_ActivateInstance( IActivationFactory *iface, IInspectable **instance )
{
struct recognizer_statics *impl = impl_from_IActivationFactory(iface);
TRACE("iface %p, instance %p.\n", iface, instance);
return ISpeechRecognizerFactory_Create(&impl->ISpeechRecognizerFactory_iface, NULL, (ISpeechRecognizer **)instance);
}
static const struct IActivationFactoryVtbl activation_factory_vtbl =
{
/* IUnknown methods */
activation_factory_QueryInterface,
activation_factory_AddRef,
activation_factory_Release,
/* IInspectable methods */
activation_factory_GetIids,
activation_factory_GetRuntimeClassName,
activation_factory_GetTrustLevel,
/* IActivationFactory methods */
activation_factory_ActivateInstance,
};
/*
*
* ISpeechRecognizerFactory
*
*/
DEFINE_IINSPECTABLE(recognizer_factory, ISpeechRecognizerFactory, struct recognizer_statics, IActivationFactory_iface)
static HRESULT recognizer_factory_create_audio_capture(struct session *session)
{
const REFERENCE_TIME buffer_duration = 5000000; /* 0.5 second */
IMMDeviceEnumerator *mm_enum = NULL;
IMMDevice *mm_device = NULL;
WAVEFORMATEX wfx = { 0 };
WCHAR *str = NULL;
HRESULT hr = S_OK;
if (!(session->audio_buf_event = CreateEventW(NULL, FALSE, FALSE, NULL)))
return HRESULT_FROM_WIN32(GetLastError());
if (FAILED(hr = CoCreateInstance(&CLSID_MMDeviceEnumerator, NULL, CLSCTX_INPROC_SERVER, &IID_IMMDeviceEnumerator, (void **)&mm_enum)))
goto cleanup;
if (FAILED(hr = IMMDeviceEnumerator_GetDefaultAudioEndpoint(mm_enum, eCapture, eMultimedia, &mm_device)))
goto cleanup;
if (FAILED(hr = IMMDevice_Activate(mm_device, &IID_IAudioClient, CLSCTX_INPROC_SERVER, NULL, (void **)&session->audio_client)))
goto cleanup;
hr = IMMDevice_GetId(mm_device, &str);
TRACE("selected capture device ID: %s, hr %#lx\n", debugstr_w(str), hr);
wfx.wFormatTag = WAVE_FORMAT_PCM;
wfx.nSamplesPerSec = 16000;
wfx.nChannels = 1;
wfx.wBitsPerSample = 16;
wfx.nBlockAlign = (wfx.wBitsPerSample + 7) / 8 * wfx.nChannels;
wfx.nAvgBytesPerSec = wfx.nSamplesPerSec * wfx.nBlockAlign;
TRACE("wfx tag %u, channels %u, samples %lu, bits %u, align %u.\n", wfx.wFormatTag, wfx.nChannels, wfx.nSamplesPerSec, wfx.wBitsPerSample, wfx.nBlockAlign);
if (FAILED(hr = IAudioClient_Initialize(session->audio_client, AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_EVENTCALLBACK, buffer_duration, 0, &wfx, NULL)))
goto cleanup;
if (FAILED(hr = IAudioClient_SetEventHandle(session->audio_client, session->audio_buf_event)))
goto cleanup;
hr = IAudioClient_GetService(session->audio_client, &IID_IAudioCaptureClient, (void **)&session->capture_client);
session->capture_wfx = wfx;
cleanup:
if (mm_device) IMMDevice_Release(mm_device);
if (mm_enum) IMMDeviceEnumerator_Release(mm_enum);
CoTaskMemFree(str);
return hr;
}
static HRESULT WINAPI recognizer_factory_Create( ISpeechRecognizerFactory *iface, ILanguage *language, ISpeechRecognizer **speechrecognizer )
{
struct recognizer *impl;
struct session *session;
struct vector_iids constraints_iids =
{
.iterable = &IID_IIterable_ISpeechRecognitionConstraint,
.iterator = &IID_IIterator_ISpeechRecognitionConstraint,
.vector = &IID_IVector_ISpeechRecognitionConstraint,
.view = &IID_IVectorView_ISpeechRecognitionConstraint,
};
HRESULT hr;
TRACE("iface %p, language %p, speechrecognizer %p.\n", iface, language, speechrecognizer);
*speechrecognizer = NULL;
if (!(impl = calloc(1, sizeof(*impl)))) return E_OUTOFMEMORY;
if (!(session = calloc(1, sizeof(*session))))
{
hr = E_OUTOFMEMORY;
goto error;
}
if (language)
FIXME("language parameter unused. Stub!\n");
/* Init ISpeechContinuousRecognitionSession */
session->ISpeechContinuousRecognitionSession_iface.lpVtbl = &session_vtbl;
session->ref = 1;
list_init(&session->completed_handlers);
list_init(&session->result_handlers);
if (!(session->worker_control_event = CreateEventW(NULL, FALSE, FALSE, NULL)))
{
hr = HRESULT_FROM_WIN32(GetLastError());
goto error;
}
if (FAILED(hr = vector_inspectable_create(&constraints_iids, (IVector_IInspectable**)&session->constraints)))
goto error;
if (FAILED(hr = recognizer_factory_create_audio_capture(session)))
goto error;
InitializeCriticalSectionEx(&session->cs, 0, RTL_CRITICAL_SECTION_FLAG_FORCE_DEBUG_INFO);
session->cs.DebugInfo->Spare[0] = (DWORD_PTR)(__FILE__ ": recognition_session.cs");
/* Init ISpeechRecognizer */
impl->ISpeechRecognizer_iface.lpVtbl = &speech_recognizer_vtbl;
impl->IClosable_iface.lpVtbl = &closable_vtbl;
impl->ISpeechRecognizer2_iface.lpVtbl = &speech_recognizer2_vtbl;
impl->session = &session->ISpeechContinuousRecognitionSession_iface;
impl->ref = 1;
*speechrecognizer = &impl->ISpeechRecognizer_iface;
TRACE("created SpeechRecognizer %p.\n", *speechrecognizer);
return S_OK;
error:
if (session->capture_client) IAudioCaptureClient_Release(session->capture_client);
if (session->audio_client) IAudioClient_Release(session->audio_client);
if (session->audio_buf_event) CloseHandle(session->audio_buf_event);
if (session->constraints) IVector_ISpeechRecognitionConstraint_Release(session->constraints);
if (session->worker_control_event) CloseHandle(session->worker_control_event);
free(session);
free(impl);
return hr;
}
static const struct ISpeechRecognizerFactoryVtbl speech_recognizer_factory_vtbl =
{
/* IUnknown methods */
recognizer_factory_QueryInterface,
recognizer_factory_AddRef,
recognizer_factory_Release,
/* IInspectable methods */
recognizer_factory_GetIids,
recognizer_factory_GetRuntimeClassName,
recognizer_factory_GetTrustLevel,
/* ISpeechRecognizerFactory methods */
recognizer_factory_Create
};
/*
*
* ISpeechRecognizerStatics
*
*/
DEFINE_IINSPECTABLE(statics, ISpeechRecognizerStatics, struct recognizer_statics, IActivationFactory_iface)
static HRESULT WINAPI statics_get_SystemSpeechLanguage( ISpeechRecognizerStatics *iface, ILanguage **language )
{
FIXME("iface %p, language %p stub!\n", iface, language);
return E_NOTIMPL;
}
static HRESULT WINAPI statics_get_SupportedTopicLanguages( ISpeechRecognizerStatics *iface, IVectorView_Language **languages )
{
FIXME("iface %p, languages %p stub!\n", iface, languages);
return E_NOTIMPL;
}
static HRESULT WINAPI statics_get_SupportedGrammarLanguages( ISpeechRecognizerStatics *iface, IVectorView_Language **languages )
{
FIXME("iface %p, languages %p stub!\n", iface, languages);
return E_NOTIMPL;
}
static const struct ISpeechRecognizerStaticsVtbl speech_recognizer_statics_vtbl =
{
/* IUnknown methods */
statics_QueryInterface,
statics_AddRef,
statics_Release,
/* IInspectable methods */
statics_GetIids,
statics_GetRuntimeClassName,
statics_GetTrustLevel,
/* ISpeechRecognizerStatics2 methods */
statics_get_SystemSpeechLanguage,
statics_get_SupportedTopicLanguages,
statics_get_SupportedGrammarLanguages
};
/*
*
* ISpeechRecognizerStatics2
*
*/
DEFINE_IINSPECTABLE(statics2, ISpeechRecognizerStatics2, struct recognizer_statics, IActivationFactory_iface)
static HRESULT WINAPI statics2_TrySetSystemSpeechLanguageAsync( ISpeechRecognizerStatics2 *iface,
ILanguage *language,
IAsyncOperation_boolean **operation)
{
FIXME("iface %p, operation %p stub!\n", iface, operation);
return E_NOTIMPL;
}
static const struct ISpeechRecognizerStatics2Vtbl speech_recognizer_statics2_vtbl =
{
/* IUnknown methods */
statics2_QueryInterface,
statics2_AddRef,
statics2_Release,
/* IInspectable methods */
statics2_GetIids,
statics2_GetRuntimeClassName,
statics2_GetTrustLevel,
/* ISpeechRecognizerStatics2 methods */
statics2_TrySetSystemSpeechLanguageAsync,
};
static struct recognizer_statics recognizer_statics =
{
.IActivationFactory_iface = {&activation_factory_vtbl},
.ISpeechRecognizerFactory_iface = {&speech_recognizer_factory_vtbl},
.ISpeechRecognizerStatics_iface = {&speech_recognizer_statics_vtbl},
.ISpeechRecognizerStatics2_iface = {&speech_recognizer_statics2_vtbl},
.ref = 1
};
IActivationFactory *recognizer_factory = &recognizer_statics.IActivationFactory_iface;