/* * Speech API (SAPI) text-to-speech implementation. * * Copyright 2019 Jactry Zeng for CodeWeavers * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA */ #include #define COBJMACROS #include "windef.h" #include "winbase.h" #include "objbase.h" #include "sapiddk.h" #include "sperror.h" #include "wine/debug.h" #include "sapi_private.h" WINE_DEFAULT_DEBUG_CHANNEL(sapi); struct speech_voice { ISpeechVoice ISpeechVoice_iface; ISpVoice ISpVoice_iface; IConnectionPointContainer IConnectionPointContainer_iface; LONG ref; ISpStreamFormat *output; ISpTTSEngine *engine; LONG cur_stream_num; DWORD actions; USHORT volume; LONG rate; struct async_queue queue; CRITICAL_SECTION cs; }; static inline struct speech_voice *impl_from_ISpeechVoice(ISpeechVoice *iface) { return CONTAINING_RECORD(iface, struct speech_voice, ISpeechVoice_iface); } static inline struct speech_voice *impl_from_ISpVoice(ISpVoice *iface) { return CONTAINING_RECORD(iface, struct speech_voice, ISpVoice_iface); } static inline struct speech_voice *impl_from_IConnectionPointContainer(IConnectionPointContainer *iface) { return CONTAINING_RECORD(iface, struct speech_voice, IConnectionPointContainer_iface); } struct tts_engine_site { ISpTTSEngineSite ISpTTSEngineSite_iface; LONG ref; struct speech_voice *voice; ULONG stream_num; }; static inline struct tts_engine_site *impl_from_ISpTTSEngineSite(ISpTTSEngineSite *iface) { return CONTAINING_RECORD(iface, struct tts_engine_site, ISpTTSEngineSite_iface); } static HRESULT create_default_token(const WCHAR *cat_id, ISpObjectToken **token) { ISpObjectTokenCategory *cat; WCHAR *default_token_id = NULL; HRESULT hr; TRACE("(%s, %p).\n", debugstr_w(cat_id), token); if (FAILED(hr = CoCreateInstance(&CLSID_SpObjectTokenCategory, NULL, CLSCTX_INPROC_SERVER, &IID_ISpObjectTokenCategory, (void **)&cat))) return hr; if (FAILED(hr = ISpObjectTokenCategory_SetId(cat, cat_id, FALSE)) || FAILED(hr = ISpObjectTokenCategory_GetDefaultTokenId(cat, &default_token_id))) { ISpObjectTokenCategory_Release(cat); return hr; } ISpObjectTokenCategory_Release(cat); if (FAILED(hr = CoCreateInstance(&CLSID_SpObjectToken, NULL, CLSCTX_INPROC_SERVER, &IID_ISpObjectToken, (void **)token))) goto done; if (FAILED(hr = ISpObjectToken_SetId(*token, NULL, default_token_id, FALSE))) { ISpObjectToken_Release(*token); *token = NULL; } done: CoTaskMemFree(default_token_id); return hr; } /* ISpeechVoice interface */ static HRESULT WINAPI speech_voice_QueryInterface(ISpeechVoice *iface, REFIID iid, void **obj) { struct speech_voice *This = impl_from_ISpeechVoice(iface); TRACE("(%p, %s %p).\n", iface, debugstr_guid(iid), obj); if (IsEqualIID(iid, &IID_IUnknown) || IsEqualIID(iid, &IID_IDispatch) || IsEqualIID(iid, &IID_ISpeechVoice)) *obj = &This->ISpeechVoice_iface; else if (IsEqualIID(iid, &IID_ISpVoice)) *obj = &This->ISpVoice_iface; else if (IsEqualIID(iid, &IID_IConnectionPointContainer)) *obj = &This->IConnectionPointContainer_iface; else { *obj = NULL; FIXME("interface %s not implemented.\n", debugstr_guid(iid)); return E_NOINTERFACE; } IUnknown_AddRef((IUnknown *)*obj); return S_OK; } static ULONG WINAPI speech_voice_AddRef(ISpeechVoice *iface) { struct speech_voice *This = impl_from_ISpeechVoice(iface); ULONG ref = InterlockedIncrement(&This->ref); TRACE("(%p): ref=%lu.\n", iface, ref); return ref; } static ULONG WINAPI speech_voice_Release(ISpeechVoice *iface) { struct speech_voice *This = impl_from_ISpeechVoice(iface); ULONG ref = InterlockedDecrement(&This->ref); TRACE("(%p): ref=%lu.\n", iface, ref); if (!ref) { async_cancel_queue(&This->queue); if (This->output) ISpStreamFormat_Release(This->output); if (This->engine) ISpTTSEngine_Release(This->engine); DeleteCriticalSection(&This->cs); heap_free(This); } return ref; } static HRESULT WINAPI speech_voice_GetTypeInfoCount(ISpeechVoice *iface, UINT *info) { FIXME("(%p, %p): stub.\n", iface, info); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_GetTypeInfo(ISpeechVoice *iface, UINT info, LCID lcid, ITypeInfo **type_info) { FIXME("(%p, %u, %lu, %p): stub.\n", iface, info, lcid, type_info); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_GetIDsOfNames(ISpeechVoice *iface, REFIID riid, LPOLESTR *names, UINT count, LCID lcid, DISPID *dispid) { FIXME("(%p, %s, %p, %u, %lu, %p): stub.\n", iface, debugstr_guid(riid), names, count, lcid, dispid); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_Invoke(ISpeechVoice *iface, DISPID dispid, REFIID riid, LCID lcid, WORD flags, DISPPARAMS *params, VARIANT *result, EXCEPINFO *excepinfo, UINT *argerr) { FIXME("(%p, %ld, %s, %#lx, %#x, %p, %p, %p, %p): stub.\n", iface, dispid, debugstr_guid(riid), lcid, flags, params, result, excepinfo, argerr); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_Status(ISpeechVoice *iface, ISpeechVoiceStatus **status) { FIXME("(%p, %p): stub.\n", iface, status); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_Voice(ISpeechVoice *iface, ISpeechObjectToken **voice) { FIXME("(%p, %p): stub.\n", iface, voice); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_putref_Voice(ISpeechVoice *iface, ISpeechObjectToken *voice) { FIXME("(%p, %p): stub.\n", iface, voice); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_AudioOutput(ISpeechVoice *iface, ISpeechObjectToken **output) { FIXME("(%p, %p): stub.\n", iface, output); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_putref_AudioOutput(ISpeechVoice *iface, ISpeechObjectToken *output) { FIXME("(%p, %p): stub.\n", iface, output); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_AudioOutputStream(ISpeechVoice *iface, ISpeechBaseStream **output) { FIXME("(%p, %p): stub.\n", iface, output); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_putref_AudioOutputStream(ISpeechVoice *iface, ISpeechBaseStream *output) { FIXME("(%p, %p): stub.\n", iface, output); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_Rate(ISpeechVoice *iface, LONG *rate) { FIXME("(%p, %p): stub.\n", iface, rate); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_put_Rate(ISpeechVoice *iface, LONG rate) { FIXME("(%p, %ld): stub.\n", iface, rate); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_Volume(ISpeechVoice *iface, LONG *volume) { FIXME("(%p, %p): stub.\n", iface, volume); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_put_Volume(ISpeechVoice *iface, LONG volume) { FIXME("(%p, %ld): stub.\n", iface, volume); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_put_AllowAudioOutputFormatChangesOnNextSet(ISpeechVoice *iface, VARIANT_BOOL allow) { FIXME("(%p, %d): stub.\n", iface, allow); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_AllowAudioOutputFormatChangesOnNextSet(ISpeechVoice *iface, VARIANT_BOOL *allow) { FIXME("(%p, %p): stub.\n", iface, allow); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_EventInterests(ISpeechVoice *iface, SpeechVoiceEvents *flags) { FIXME("(%p, %p): stub.\n", iface, flags); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_put_EventInterests(ISpeechVoice *iface, SpeechVoiceEvents flags) { FIXME("(%p, %#x): stub.\n", iface, flags); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_put_Priority(ISpeechVoice *iface, SpeechVoicePriority priority) { FIXME("(%p, %d): stub.\n", iface, priority); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_Priority(ISpeechVoice *iface, SpeechVoicePriority *priority) { FIXME("(%p, %p): stub.\n", iface, priority); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_put_AlertBoundary(ISpeechVoice *iface, SpeechVoiceEvents boundary) { FIXME("(%p, %#x): stub.\n", iface, boundary); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_AlertBoundary(ISpeechVoice *iface, SpeechVoiceEvents *boundary) { FIXME("(%p, %p): stub.\n", iface, boundary); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_put_SynchronousSpeakTimeout(ISpeechVoice *iface, LONG timeout) { FIXME("(%p, %ld): stub.\n", iface, timeout); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_get_SynchronousSpeakTimeout(ISpeechVoice *iface, LONG *timeout) { FIXME("(%p, %p): stub.\n", iface, timeout); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_Speak(ISpeechVoice *iface, BSTR text, SpeechVoiceSpeakFlags flags, LONG *number) { FIXME("(%p, %s, %#x, %p): stub.\n", iface, debugstr_w(text), flags, number); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_SpeakStream(ISpeechVoice *iface, ISpeechBaseStream *stream, SpeechVoiceSpeakFlags flags, LONG *number) { FIXME("(%p, %p, %#x, %p): stub.\n", iface, stream, flags, number); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_Pause(ISpeechVoice *iface) { FIXME("(%p): stub.\n", iface); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_Resume(ISpeechVoice *iface) { FIXME("(%p): stub.\n", iface); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_Skip(ISpeechVoice *iface, const BSTR type, LONG items, LONG *skipped) { FIXME("(%p, %s, %ld, %p): stub.\n", iface, debugstr_w(type), items, skipped); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_GetVoices(ISpeechVoice *iface, BSTR required, BSTR optional, ISpeechObjectTokens **tokens) { FIXME("(%p, %s, %s, %p): stub.\n", iface, debugstr_w(required), debugstr_w(optional), tokens); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_GetAudioOutputs(ISpeechVoice *iface, BSTR required, BSTR optional, ISpeechObjectTokens **tokens) { FIXME("(%p, %s, %s, %p): stub.\n", iface, debugstr_w(required), debugstr_w(optional), tokens); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_WaitUntilDone(ISpeechVoice *iface, LONG timeout, VARIANT_BOOL *done) { FIXME("(%p, %ld, %p): stub.\n", iface, timeout, done); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_SpeakCompleteEvent(ISpeechVoice *iface, LONG *handle) { FIXME("(%p, %p): stub.\n", iface, handle); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_IsUISupported(ISpeechVoice *iface, const BSTR typeui, const VARIANT *data, VARIANT_BOOL *supported) { FIXME("(%p, %s, %p, %p): stub.\n", iface, debugstr_w(typeui), data, supported); return E_NOTIMPL; } static HRESULT WINAPI speech_voice_DisplayUI(ISpeechVoice *iface, LONG hwnd, BSTR title, const BSTR typeui, const VARIANT *data) { FIXME("(%p, %ld, %s, %s, %p): stub.\n", iface, hwnd, debugstr_w(title), debugstr_w(typeui), data); return E_NOTIMPL; } static const ISpeechVoiceVtbl speech_voice_vtbl = { speech_voice_QueryInterface, speech_voice_AddRef, speech_voice_Release, speech_voice_GetTypeInfoCount, speech_voice_GetTypeInfo, speech_voice_GetIDsOfNames, speech_voice_Invoke, speech_voice_get_Status, speech_voice_get_Voice, speech_voice_putref_Voice, speech_voice_get_AudioOutput, speech_voice_putref_AudioOutput, speech_voice_get_AudioOutputStream, speech_voice_putref_AudioOutputStream, speech_voice_get_Rate, speech_voice_put_Rate, speech_voice_get_Volume, speech_voice_put_Volume, speech_voice_put_AllowAudioOutputFormatChangesOnNextSet, speech_voice_get_AllowAudioOutputFormatChangesOnNextSet, speech_voice_get_EventInterests, speech_voice_put_EventInterests, speech_voice_put_Priority, speech_voice_get_Priority, speech_voice_put_AlertBoundary, speech_voice_get_AlertBoundary, speech_voice_put_SynchronousSpeakTimeout, speech_voice_get_SynchronousSpeakTimeout, speech_voice_Speak, speech_voice_SpeakStream, speech_voice_Pause, speech_voice_Resume, speech_voice_Skip, speech_voice_GetVoices, speech_voice_GetAudioOutputs, speech_voice_WaitUntilDone, speech_voice_SpeakCompleteEvent, speech_voice_IsUISupported, speech_voice_DisplayUI, }; /* ISpVoice interface */ static HRESULT WINAPI spvoice_QueryInterface(ISpVoice *iface, REFIID iid, void **obj) { struct speech_voice *This = impl_from_ISpVoice(iface); TRACE("(%p, %s %p).\n", iface, debugstr_guid(iid), obj); return ISpeechVoice_QueryInterface(&This->ISpeechVoice_iface, iid, obj); } static ULONG WINAPI spvoice_AddRef(ISpVoice *iface) { struct speech_voice *This = impl_from_ISpVoice(iface); TRACE("(%p).\n", iface); return ISpeechVoice_AddRef(&This->ISpeechVoice_iface); } static ULONG WINAPI spvoice_Release(ISpVoice *iface) { struct speech_voice *This = impl_from_ISpVoice(iface); TRACE("(%p).\n", iface); return ISpeechVoice_Release(&This->ISpeechVoice_iface); } static HRESULT WINAPI spvoice_SetNotifySink(ISpVoice *iface, ISpNotifySink *sink) { FIXME("(%p, %p): stub.\n", iface, sink); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetNotifyWindowMessage(ISpVoice *iface, HWND hwnd, UINT msg, WPARAM wparam, LPARAM lparam) { FIXME("(%p, %p, %u, %Ix, %Ix): stub.\n", iface, hwnd, msg, wparam, lparam); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetNotifyCallbackFunction(ISpVoice *iface, SPNOTIFYCALLBACK *callback, WPARAM wparam, LPARAM lparam) { FIXME("(%p, %p, %Ix, %Ix): stub.\n", iface, callback, wparam, lparam); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetNotifyCallbackInterface(ISpVoice *iface, ISpNotifyCallback *callback, WPARAM wparam, LPARAM lparam) { FIXME("(%p, %p, %Ix, %Ix): stub.\n", iface, callback, wparam, lparam); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetNotifyWin32Event(ISpVoice *iface) { FIXME("(%p): stub.\n", iface); return E_NOTIMPL; } static HRESULT WINAPI spvoice_WaitForNotifyEvent(ISpVoice *iface, DWORD milliseconds) { FIXME("(%p, %ld): stub.\n", iface, milliseconds); return E_NOTIMPL; } static HANDLE WINAPI spvoice_GetNotifyEventHandle(ISpVoice *iface) { FIXME("(%p): stub.\n", iface); return NULL; } static HRESULT WINAPI spvoice_SetInterest(ISpVoice *iface, ULONGLONG event, ULONGLONG queued) { FIXME("(%p, %s, %s): stub.\n", iface, wine_dbgstr_longlong(event), wine_dbgstr_longlong(queued)); return E_NOTIMPL; } static HRESULT WINAPI spvoice_GetEvents(ISpVoice *iface, ULONG count, SPEVENT *array, ULONG *fetched) { FIXME("(%p, %lu, %p, %p): stub.\n", iface, count, array, fetched); return E_NOTIMPL; } static HRESULT WINAPI spvoice_GetInfo(ISpVoice *iface, SPEVENTSOURCEINFO *info) { FIXME("(%p, %p): stub.\n", iface, info); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetOutput(ISpVoice *iface, IUnknown *unk, BOOL allow_format_changes) { struct speech_voice *This = impl_from_ISpVoice(iface); ISpStreamFormat *stream = NULL; ISpObjectToken *token = NULL; HRESULT hr; TRACE("(%p, %p, %d).\n", iface, unk, allow_format_changes); if (!allow_format_changes) FIXME("ignoring allow_format_changes = FALSE.\n"); if (FAILED(hr = async_start_queue(&This->queue))) return hr; if (!unk) { /* TODO: Create the default SpAudioOut token here once SpMMAudioEnum is implemented. */ if (FAILED(hr = CoCreateInstance(&CLSID_SpMMAudioOut, NULL, CLSCTX_INPROC_SERVER, &IID_ISpStreamFormat, (void **)&stream))) return hr; } else { if (FAILED(IUnknown_QueryInterface(unk, &IID_ISpStreamFormat, (void **)&stream))) { if (FAILED(IUnknown_QueryInterface(unk, &IID_ISpObjectToken, (void **)&token))) return E_INVALIDARG; } } if (!stream) { hr = ISpObjectToken_CreateInstance(token, NULL, CLSCTX_ALL, &IID_ISpStreamFormat, (void **)&stream); ISpObjectToken_Release(token); if (FAILED(hr)) return hr; } EnterCriticalSection(&This->cs); if (This->output) ISpStreamFormat_Release(This->output); This->output = stream; LeaveCriticalSection(&This->cs); return S_OK; } static HRESULT WINAPI spvoice_GetOutputObjectToken(ISpVoice *iface, ISpObjectToken **token) { FIXME("(%p, %p): stub.\n", iface, token); return E_NOTIMPL; } static HRESULT WINAPI spvoice_GetOutputStream(ISpVoice *iface, ISpStreamFormat **stream) { FIXME("(%p, %p): stub.\n", iface, stream); return E_NOTIMPL; } static HRESULT WINAPI spvoice_Pause(ISpVoice *iface) { FIXME("(%p): stub.\n", iface); return E_NOTIMPL; } static HRESULT WINAPI spvoice_Resume(ISpVoice *iface) { FIXME("(%p): stub.\n", iface); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetVoice(ISpVoice *iface, ISpObjectToken *token) { struct speech_voice *This = impl_from_ISpVoice(iface); ISpTTSEngine *engine; HRESULT hr; TRACE("(%p, %p).\n", iface, token); if (!token) { if (FAILED(hr = create_default_token(SPCAT_VOICES, &token))) return hr; } else ISpObjectToken_AddRef(token); hr = ISpObjectToken_CreateInstance(token, NULL, CLSCTX_ALL, &IID_ISpTTSEngine, (void **)&engine); ISpObjectToken_Release(token); if (FAILED(hr)) return hr; EnterCriticalSection(&This->cs); if (This->engine) ISpTTSEngine_Release(This->engine); This->engine = engine; LeaveCriticalSection(&This->cs); return S_OK; } static HRESULT WINAPI spvoice_GetVoice(ISpVoice *iface, ISpObjectToken **token) { struct speech_voice *This = impl_from_ISpVoice(iface); ISpObjectWithToken *engine_token_iface; HRESULT hr; TRACE("(%p, %p).\n", iface, token); if (!token) return E_POINTER; EnterCriticalSection(&This->cs); if (!This->engine) { LeaveCriticalSection(&This->cs); return create_default_token(SPCAT_VOICES, token); } if (SUCCEEDED(hr = ISpTTSEngine_QueryInterface(This->engine, &IID_ISpObjectWithToken, (void **)&engine_token_iface))) { hr = ISpObjectWithToken_GetObjectToken(engine_token_iface, token); ISpObjectWithToken_Release(engine_token_iface); } LeaveCriticalSection(&This->cs); return hr; } struct async_result { HANDLE done; HRESULT hr; }; struct speak_task { struct async_task task; struct async_result *result; struct speech_voice *voice; SPVTEXTFRAG *frag_list; ISpTTSEngineSite *site; DWORD flags; }; static HRESULT set_output_format(ISpStreamFormat *output, ISpTTSEngine *engine, GUID *fmtid, WAVEFORMATEX **wfx) { GUID output_fmtid; WAVEFORMATEX *output_wfx = NULL; ISpAudio *audio = NULL; HRESULT hr; if (FAILED(hr = ISpStreamFormat_GetFormat(output, &output_fmtid, &output_wfx))) return hr; if (FAILED(hr = ISpTTSEngine_GetOutputFormat(engine, &output_fmtid, output_wfx, fmtid, wfx))) goto done; if (!IsEqualGUID(fmtid, &SPDFID_WaveFormatEx)) { hr = E_INVALIDARG; goto done; } if (memcmp(output_wfx, *wfx, sizeof(WAVEFORMATEX)) || memcmp(output_wfx + 1, *wfx + 1, output_wfx->cbSize)) { if (FAILED(hr = ISpStreamFormat_QueryInterface(output, &IID_ISpAudio, (void **)&audio)) || FAILED(hr = ISpAudio_SetFormat(audio, &SPDFID_WaveFormatEx, *wfx))) goto done; } done: CoTaskMemFree(output_wfx); if (audio) ISpAudio_Release(audio); return hr; } static void speak_proc(struct async_task *task) { struct speak_task *speak_task = (struct speak_task *)task; struct speech_voice *This = speak_task->voice; GUID fmtid; WAVEFORMATEX *wfx = NULL; ISpTTSEngine *engine = NULL; ISpAudio *audio = NULL; HRESULT hr; TRACE("(%p).\n", task); EnterCriticalSection(&This->cs); if (This->actions & SPVES_ABORT) { LeaveCriticalSection(&This->cs); hr = S_OK; goto done; } if (FAILED(hr = set_output_format(This->output, This->engine, &fmtid, &wfx))) { LeaveCriticalSection(&This->cs); ERR("failed setting output format: %#lx.\n", hr); goto done; } engine = This->engine; ISpTTSEngine_AddRef(engine); if (SUCCEEDED(ISpStreamFormat_QueryInterface(This->output, &IID_ISpAudio, (void **)&audio))) ISpAudio_SetState(audio, SPAS_RUN, 0); This->actions = SPVES_RATE | SPVES_VOLUME; LeaveCriticalSection(&This->cs); hr = ISpTTSEngine_Speak(engine, speak_task->flags, &fmtid, wfx, speak_task->frag_list, speak_task->site); if (SUCCEEDED(hr)) { ISpStreamFormat_Commit(This->output, STGC_DEFAULT); if (audio) WaitForSingleObject(ISpAudio_EventHandle(audio), INFINITE); } else WARN("ISpTTSEngine_Speak failed: %#lx.\n", hr); done: if (audio) { ISpAudio_SetState(audio, SPAS_CLOSED, 0); ISpAudio_Release(audio); } CoTaskMemFree(wfx); if (engine) ISpTTSEngine_Release(engine); heap_free(speak_task->frag_list); ISpTTSEngineSite_Release(speak_task->site); if (speak_task->result) { speak_task->result->hr = hr; SetEvent(speak_task->result->done); } } static HRESULT ttsenginesite_create(struct speech_voice *voice, ULONG stream_num, ISpTTSEngineSite **site); static HRESULT WINAPI spvoice_Speak(ISpVoice *iface, const WCHAR *contents, DWORD flags, ULONG *stream_num_out) { struct speech_voice *This = impl_from_ISpVoice(iface); ISpTTSEngineSite *site = NULL; SPVTEXTFRAG *frag; struct speak_task *speak_task = NULL; struct async_result *result = NULL; size_t contents_len, contents_size; ULONG stream_num; HRESULT hr; TRACE("(%p, %p, %#lx, %p).\n", iface, contents, flags, stream_num_out); flags &= ~SPF_IS_NOT_XML; if (flags & ~(SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_NLP_SPEAK_PUNC)) { FIXME("flags %#lx not implemented.\n", flags & ~(SPF_ASYNC | SPF_PURGEBEFORESPEAK | SPF_NLP_SPEAK_PUNC)); return E_NOTIMPL; } if (flags & SPF_PURGEBEFORESPEAK) { ISpAudio *audio; EnterCriticalSection(&This->cs); This->actions = SPVES_ABORT; if (This->output && SUCCEEDED(ISpStreamFormat_QueryInterface(This->output, &IID_ISpAudio, (void **)&audio))) { ISpAudio_SetState(audio, SPAS_CLOSED, 0); ISpAudio_Release(audio); } LeaveCriticalSection(&This->cs); async_empty_queue(&This->queue); EnterCriticalSection(&This->cs); This->actions = SPVES_CONTINUE; LeaveCriticalSection(&This->cs); if (!contents || !*contents) return S_OK; } else if (!contents) return E_POINTER; contents_len = wcslen(contents); contents_size = sizeof(WCHAR) * (contents_len + 1); if (!This->output) { /* Create a new output stream with the default output. */ if (FAILED(hr = ISpVoice_SetOutput(iface, NULL, TRUE))) return hr; } if (!This->engine) { /* Create a new engine with the default voice. */ if (FAILED(hr = ISpVoice_SetVoice(iface, NULL))) return hr; } if (!(frag = heap_alloc(sizeof(*frag) + contents_size))) return E_OUTOFMEMORY; memset(frag, 0, sizeof(*frag)); memcpy(frag + 1, contents, contents_size); frag->State.eAction = SPVA_Speak; frag->State.Volume = 100; frag->pTextStart = (WCHAR *)(frag + 1); frag->ulTextLen = contents_len; frag->ulTextSrcOffset = 0; stream_num = InterlockedIncrement(&This->cur_stream_num); if (FAILED(hr = ttsenginesite_create(This, stream_num, &site))) { FIXME("Failed to create ttsenginesite: %#lx.\n", hr); goto fail; } speak_task = heap_alloc(sizeof(*speak_task)); speak_task->task.proc = speak_proc; speak_task->result = NULL; speak_task->voice = This; speak_task->frag_list = frag; speak_task->site = site; speak_task->flags = flags & SPF_NLP_SPEAK_PUNC; if (!(flags & SPF_ASYNC)) { if (!(result = heap_alloc(sizeof(*result)))) { hr = E_OUTOFMEMORY; goto fail; } result->hr = E_FAIL; result->done = CreateEventW(NULL, FALSE, FALSE, NULL); speak_task->result = result; } if (FAILED(hr = async_queue_task(&This->queue, (struct async_task *)speak_task))) { WARN("Failed to queue task: %#lx.\n", hr); goto fail; } if (stream_num_out) *stream_num_out = stream_num; if (flags & SPF_ASYNC) return S_OK; else { WaitForSingleObject(result->done, INFINITE); hr = result->hr; CloseHandle(result->done); heap_free(result); return hr; } fail: if (site) ISpTTSEngineSite_Release(site); heap_free(frag); heap_free(speak_task); if (result) { CloseHandle(result->done); heap_free(result); } return hr; } static HRESULT WINAPI spvoice_SpeakStream(ISpVoice *iface, IStream *stream, DWORD flags, ULONG *number) { FIXME("(%p, %p, %#lx, %p): stub.\n", iface, stream, flags, number); return E_NOTIMPL; } static HRESULT WINAPI spvoice_GetStatus(ISpVoice *iface, SPVOICESTATUS *status, WCHAR **bookmark) { FIXME("(%p, %p, %p): stub.\n", iface, status, bookmark); return E_NOTIMPL; } static HRESULT WINAPI spvoice_Skip(ISpVoice *iface, const WCHAR *type, LONG items, ULONG *skipped) { FIXME("(%p, %s, %ld, %p): stub.\n", iface, debugstr_w(type), items, skipped); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetPriority(ISpVoice *iface, SPVPRIORITY priority) { FIXME("(%p, %d): stub.\n", iface, priority); return E_NOTIMPL; } static HRESULT WINAPI spvoice_GetPriority(ISpVoice *iface, SPVPRIORITY *priority) { FIXME("(%p, %p): stub.\n", iface, priority); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetAlertBoundary(ISpVoice *iface, SPEVENTENUM boundary) { FIXME("(%p, %d): stub.\n", iface, boundary); return E_NOTIMPL; } static HRESULT WINAPI spvoice_GetAlertBoundary(ISpVoice *iface, SPEVENTENUM *boundary) { FIXME("(%p, %p): stub.\n", iface, boundary); return E_NOTIMPL; } static HRESULT WINAPI spvoice_SetRate(ISpVoice *iface, LONG rate) { struct speech_voice *This = impl_from_ISpVoice(iface); TRACE("(%p, %ld).\n", iface, rate); EnterCriticalSection(&This->cs); This->rate = rate; This->actions |= SPVES_RATE; LeaveCriticalSection(&This->cs); return S_OK; } static HRESULT WINAPI spvoice_GetRate(ISpVoice *iface, LONG *rate) { struct speech_voice *This = impl_from_ISpVoice(iface); TRACE("(%p, %p).\n", iface, rate); EnterCriticalSection(&This->cs); *rate = This->rate; LeaveCriticalSection(&This->cs); return S_OK; } static HRESULT WINAPI spvoice_SetVolume(ISpVoice *iface, USHORT volume) { struct speech_voice *This = impl_from_ISpVoice(iface); TRACE("(%p, %d).\n", iface, volume); if (volume > 100) return E_INVALIDARG; EnterCriticalSection(&This->cs); This->volume = volume; This->actions |= SPVES_VOLUME; LeaveCriticalSection(&This->cs); return S_OK; } static HRESULT WINAPI spvoice_GetVolume(ISpVoice *iface, USHORT *volume) { struct speech_voice *This = impl_from_ISpVoice(iface); TRACE("(%p, %p).\n", iface, volume); EnterCriticalSection(&This->cs); *volume = This->volume; LeaveCriticalSection(&This->cs); return S_OK; } static HRESULT WINAPI spvoice_WaitUntilDone(ISpVoice *iface, ULONG timeout) { struct speech_voice *This = impl_from_ISpVoice(iface); HRESULT hr; TRACE("(%p, %ld).\n", iface, timeout); hr = async_wait_queue_empty(&This->queue, timeout); if (hr == WAIT_OBJECT_0) return S_OK; else if (hr == WAIT_TIMEOUT) return S_FALSE; return hr; } static HRESULT WINAPI spvoice_SetSyncSpeakTimeout(ISpVoice *iface, ULONG timeout) { FIXME("(%p, %ld): stub.\n", iface, timeout); return E_NOTIMPL; } static HRESULT WINAPI spvoice_GetSyncSpeakTimeout(ISpVoice *iface, ULONG *timeout) { FIXME("(%p, %p): stub.\n", iface, timeout); return E_NOTIMPL; } static HANDLE WINAPI spvoice_SpeakCompleteEvent(ISpVoice *iface) { FIXME("(%p): stub.\n", iface); return NULL; } static HRESULT WINAPI spvoice_IsUISupported(ISpVoice *iface, const WCHAR *type, void *extra, ULONG count, BOOL *supported) { FIXME("(%p, %p, %p, %ld, %p): stub.\n", iface, type, extra, count, supported); return E_NOTIMPL; } static HRESULT WINAPI spvoice_DisplayUI(ISpVoice *iface, HWND parent, const WCHAR *title, const WCHAR *type, void *extra, ULONG count) { FIXME("(%p, %p, %p, %p, %p, %ld): stub.\n", iface, parent, title, type, extra, count); return E_NOTIMPL; } static const ISpVoiceVtbl spvoice_vtbl = { spvoice_QueryInterface, spvoice_AddRef, spvoice_Release, spvoice_SetNotifySink, spvoice_SetNotifyWindowMessage, spvoice_SetNotifyCallbackFunction, spvoice_SetNotifyCallbackInterface, spvoice_SetNotifyWin32Event, spvoice_WaitForNotifyEvent, spvoice_GetNotifyEventHandle, spvoice_SetInterest, spvoice_GetEvents, spvoice_GetInfo, spvoice_SetOutput, spvoice_GetOutputObjectToken, spvoice_GetOutputStream, spvoice_Pause, spvoice_Resume, spvoice_SetVoice, spvoice_GetVoice, spvoice_Speak, spvoice_SpeakStream, spvoice_GetStatus, spvoice_Skip, spvoice_SetPriority, spvoice_GetPriority, spvoice_SetAlertBoundary, spvoice_GetAlertBoundary, spvoice_SetRate, spvoice_GetRate, spvoice_SetVolume, spvoice_GetVolume, spvoice_WaitUntilDone, spvoice_SetSyncSpeakTimeout, spvoice_GetSyncSpeakTimeout, spvoice_SpeakCompleteEvent, spvoice_IsUISupported, spvoice_DisplayUI }; /* ISpTTSEngineSite interface */ static HRESULT WINAPI ttsenginesite_QueryInterface(ISpTTSEngineSite *iface, REFIID iid, void **obj) { struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface); TRACE("(%p, %s %p).\n", iface, debugstr_guid(iid), obj); if (IsEqualIID(iid, &IID_IUnknown) || IsEqualIID(iid, &IID_ISpTTSEngineSite)) *obj = &This->ISpTTSEngineSite_iface; else { *obj = NULL; FIXME("interface %s not implemented.\n", debugstr_guid(iid)); return E_NOINTERFACE; } IUnknown_AddRef((IUnknown *)*obj); return S_OK; } static ULONG WINAPI ttsenginesite_AddRef(ISpTTSEngineSite *iface) { struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface); ULONG ref = InterlockedIncrement(&This->ref); TRACE("(%p): ref=%lu.\n", iface, ref); return ref; } static ULONG WINAPI ttsenginesite_Release(ISpTTSEngineSite *iface) { struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface); ULONG ref = InterlockedDecrement(&This->ref); TRACE("(%p): ref=%lu.\n", iface, ref); if (!ref) { if (This->voice) ISpeechVoice_Release(&This->voice->ISpeechVoice_iface); heap_free(This); } return ref; } static HRESULT WINAPI ttsenginesite_AddEvents(ISpTTSEngineSite *iface, const SPEVENT *events, ULONG count) { FIXME("(%p, %p, %ld): stub.\n", iface, events, count); return S_OK; } static HRESULT WINAPI ttsenginesite_GetEventInterest(ISpTTSEngineSite *iface, ULONGLONG *interest) { FIXME("(%p, %p): stub.\n", iface, interest); return E_NOTIMPL; } static DWORD WINAPI ttsenginesite_GetActions(ISpTTSEngineSite *iface) { struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface); DWORD actions; TRACE("(%p).\n", iface); EnterCriticalSection(&This->voice->cs); actions = This->voice->actions; LeaveCriticalSection(&This->voice->cs); return actions; } static HRESULT WINAPI ttsenginesite_Write(ISpTTSEngineSite *iface, const void *buf, ULONG cb, ULONG *cb_written) { struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface); TRACE("(%p, %p, %ld, %p).\n", iface, buf, cb, cb_written); if (!This->voice->output) return SPERR_UNINITIALIZED; return ISpStreamFormat_Write(This->voice->output, buf, cb, cb_written); } static HRESULT WINAPI ttsenginesite_GetRate(ISpTTSEngineSite *iface, LONG *rate) { struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface); TRACE("(%p, %p).\n", iface, rate); EnterCriticalSection(&This->voice->cs); *rate = This->voice->rate; This->voice->actions &= ~SPVES_RATE; LeaveCriticalSection(&This->voice->cs); return S_OK; } static HRESULT WINAPI ttsenginesite_GetVolume(ISpTTSEngineSite *iface, USHORT *volume) { struct tts_engine_site *This = impl_from_ISpTTSEngineSite(iface); TRACE("(%p, %p).\n", iface, volume); EnterCriticalSection(&This->voice->cs); *volume = This->voice->volume; This->voice->actions &= ~SPVES_VOLUME; LeaveCriticalSection(&This->voice->cs); return S_OK; } static HRESULT WINAPI ttsenginesite_GetSkipInfo(ISpTTSEngineSite *iface, SPVSKIPTYPE *type, LONG *skip_count) { FIXME("(%p, %p, %p): stub.\n", iface, type, skip_count); return E_NOTIMPL; } static HRESULT WINAPI ttsenginesite_CompleteSkip(ISpTTSEngineSite *iface, LONG num_skipped) { FIXME("(%p, %ld): stub.\n", iface, num_skipped); return E_NOTIMPL; } const static ISpTTSEngineSiteVtbl ttsenginesite_vtbl = { ttsenginesite_QueryInterface, ttsenginesite_AddRef, ttsenginesite_Release, ttsenginesite_AddEvents, ttsenginesite_GetEventInterest, ttsenginesite_GetActions, ttsenginesite_Write, ttsenginesite_GetRate, ttsenginesite_GetVolume, ttsenginesite_GetSkipInfo, ttsenginesite_CompleteSkip }; static HRESULT ttsenginesite_create(struct speech_voice *voice, ULONG stream_num, ISpTTSEngineSite **site) { struct tts_engine_site *This = heap_alloc(sizeof(*This)); if (!This) return E_OUTOFMEMORY; This->ISpTTSEngineSite_iface.lpVtbl = &ttsenginesite_vtbl; This->ref = 1; This->voice = voice; This->stream_num = stream_num; ISpeechVoice_AddRef(&This->voice->ISpeechVoice_iface); *site = &This->ISpTTSEngineSite_iface; return S_OK; } /* IConnectionPointContainer interface */ static HRESULT WINAPI container_QueryInterface(IConnectionPointContainer *iface, REFIID iid, void **obj) { struct speech_voice *This = impl_from_IConnectionPointContainer(iface); TRACE("(%p, %s %p).\n", iface, debugstr_guid(iid), obj); return ISpeechVoice_QueryInterface(&This->ISpeechVoice_iface, iid, obj); } static ULONG WINAPI container_AddRef(IConnectionPointContainer *iface) { struct speech_voice *This = impl_from_IConnectionPointContainer(iface); TRACE("(%p).\n", iface); return ISpeechVoice_AddRef(&This->ISpeechVoice_iface); } static ULONG WINAPI container_Release(IConnectionPointContainer *iface) { struct speech_voice *This = impl_from_IConnectionPointContainer(iface); TRACE("(%p).\n", iface); return ISpeechVoice_Release(&This->ISpeechVoice_iface); } static HRESULT WINAPI container_EnumConnectionPoints(IConnectionPointContainer *iface, IEnumConnectionPoints **enum_cp) { FIXME("(%p, %p): stub.\n", iface, enum_cp); return E_NOTIMPL; } static HRESULT WINAPI container_FindConnectionPoint(IConnectionPointContainer *iface, REFIID riid, IConnectionPoint **cp) { FIXME("(%p, %s, %p): stub.\n", iface, debugstr_guid(riid), cp); return E_NOTIMPL; } const static IConnectionPointContainerVtbl container_vtbl = { container_QueryInterface, container_AddRef, container_Release, container_EnumConnectionPoints, container_FindConnectionPoint }; HRESULT speech_voice_create(IUnknown *outer, REFIID iid, void **obj) { struct speech_voice *This = heap_alloc(sizeof(*This)); HRESULT hr; if (!This) return E_OUTOFMEMORY; This->ISpeechVoice_iface.lpVtbl = &speech_voice_vtbl; This->ISpVoice_iface.lpVtbl = &spvoice_vtbl; This->IConnectionPointContainer_iface.lpVtbl = &container_vtbl; This->ref = 1; This->output = NULL; This->engine = NULL; This->cur_stream_num = 0; This->actions = SPVES_CONTINUE; This->volume = 100; This->rate = 0; memset(&This->queue, 0, sizeof(This->queue)); InitializeCriticalSection(&This->cs); hr = ISpeechVoice_QueryInterface(&This->ISpeechVoice_iface, iid, obj); ISpeechVoice_Release(&This->ISpeechVoice_iface); return hr; }