voice - render markdown as string before synthesizing (#212796)

This commit is contained in:
Benjamin Pasero 2024-05-15 13:09:02 +02:00 committed by GitHub
parent 942d81c5b1
commit 8daa0c10bf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 95 additions and 35 deletions

View file

@ -45,7 +45,7 @@ import { IExtensionsWorkbenchService } from 'vs/workbench/contrib/extensions/com
import { InlineChatController } from 'vs/workbench/contrib/inlineChat/browser/inlineChatController';
import { CTX_INLINE_CHAT_FOCUSED, CTX_INLINE_CHAT_HAS_ACTIVE_REQUEST } from 'vs/workbench/contrib/inlineChat/common/inlineChat';
import { NOTEBOOK_EDITOR_FOCUSED } from 'vs/workbench/contrib/notebook/common/notebookContextKeys';
import { HasSpeechProvider, ISpeechService, ITextToSpeechSession, KeywordRecognitionStatus, SpeechToTextInProgress, SpeechToTextStatus, TextToSpeechStatus, TextToSpeechInProgress as GlobalTextToSpeechInProgress } from 'vs/workbench/contrib/speech/common/speechService';
import { HasSpeechProvider, ISpeechService, KeywordRecognitionStatus, SpeechToTextInProgress, SpeechToTextStatus, TextToSpeechStatus, TextToSpeechInProgress as GlobalTextToSpeechInProgress } from 'vs/workbench/contrib/speech/common/speechService';
import { ITerminalService } from 'vs/workbench/contrib/terminal/browser/terminal';
import { TerminalChatContextKeys, TerminalChatController } from 'vs/workbench/contrib/terminal/browser/terminalContribExports';
import { IEditorService } from 'vs/workbench/services/editor/common/editorService';
@ -55,6 +55,7 @@ import { IStatusbarEntry, IStatusbarEntryAccessor, IStatusbarService, StatusbarA
import { IViewsService } from 'vs/workbench/services/views/common/viewsService';
import { IChatResponseModel } from 'vs/workbench/contrib/chat/common/chatModel';
import { IAccessibilityService } from 'vs/platform/accessibility/common/accessibility';
import { renderStringAsPlaintext } from 'vs/base/browser/markdownRenderer';
//#region Speech to Text
@ -749,24 +750,12 @@ class ChatSynthesizerSessions {
}
}));
if (controller.response.isComplete) {
return this.synthesizeCompletedResponse(session, controller.response);
} else {
return this.synthesizePendingResponse(session, controller.response, activeSession.token);
}
}
private synthesizeCompletedResponse(session: ITextToSpeechSession, response: IChatResponseModel): Promise<void> {
return session.synthesize(response.response.asString());
}
private async synthesizePendingResponse(session: ITextToSpeechSession, response: IChatResponseModel, token: CancellationToken): Promise<void> {
for await (const chunk of this.nextChatResponseChunk(response, token)) {
if (token.isCancellationRequested) {
for await (const chunk of this.nextChatResponseChunk(controller.response, activeSession.token)) {
if (activeSession.token.isCancellationRequested) {
return;
}
await raceCancellation(session.synthesize(chunk), token);
await raceCancellation(session.synthesize(chunk), activeSession.token);
}
}
@ -774,39 +763,43 @@ class ChatSynthesizerSessions {
let totalOffset = 0;
let complete = false;
do {
const text = response.response.asString();
const { chunks, offset, tail } = this.toChunks(text, totalOffset);
const responseLength = response.response.asString().length;
const { chunk, offset } = this.parseNextChatResponseChunk(response, totalOffset);
totalOffset = offset;
complete = response.isComplete;
for (const chunk of chunks) {
if (chunk) {
yield chunk;
if (token.isCancellationRequested) {
return;
}
}
if (complete) {
yield tail;
} else if (text === response.response.asString()) {
if (token.isCancellationRequested) {
return;
}
if (!complete && responseLength === response.response.asString().length) {
await raceCancellation(Event.toPromise(response.onDidChange), token); // wait for the response to change
}
} while (!token.isCancellationRequested && !complete);
}
private toChunks(text: string, offset: number): { readonly chunks: string[]; readonly offset: number; readonly tail: string } {
const chunks: string[] = [];
private parseNextChatResponseChunk(response: IChatResponseModel, offset: number): { readonly chunk: string | undefined; readonly offset: number } {
let chunk: string | undefined = undefined;
for (let i = offset; i < text.length; i++) {
const char = text[i];
if (char === '.' || char === '!' || char === '?' || char === ':') {
chunks.push(text.substring(offset, i + 1));
offset = i + 1;
}
const text = response.response.asString();
if (response.isComplete) {
chunk = text.substring(offset);
offset = text.length + 1;
} else {
const res = parseNextChatResponseChunk(text, offset);
chunk = res.chunk;
offset = res.offset;
}
return { chunks, offset, tail: text.substring(offset) };
return {
chunk: chunk ? renderStringAsPlaintext({ value: chunk }) : chunk, // convert markdown to plain text
offset
};
}
stop(): void {
@ -815,6 +808,29 @@ class ChatSynthesizerSessions {
}
}
const sentenceDelimiter = ['.', '!', '?', ':'];
const lineDelimiter = '\n';
const wordDelimiter = ' ';
export function parseNextChatResponseChunk(text: string, offset: number): { readonly chunk: string | undefined; readonly offset: number } {
let chunk: string | undefined = undefined;
for (let i = text.length - 1; i >= offset; i--) { // going from end to start to produce largest chunks
const cur = text[i];
const next = text[i + 1];
if (
sentenceDelimiter.includes(cur) && next === wordDelimiter || // end of sentence
lineDelimiter === cur // end of line
) {
chunk = text.substring(offset, i + 1).trim();
offset = i + 1;
break;
}
}
return { chunk, offset };
}
export class ReadChatResponseAloud extends Action2 {
constructor() {
super({

View file

@ -0,0 +1,44 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as assert from 'assert';
import { ensureNoDisposablesAreLeakedInTestSuite } from 'vs/base/test/common/utils';
import { parseNextChatResponseChunk } from 'vs/workbench/contrib/chat/electron-sandbox/actions/voiceChatActions';
suite('VoiceChatActions', function () {
function assertChunk(text: string, expected: string | undefined, offset: number): { chunk: string | undefined; offset: number } {
const res = parseNextChatResponseChunk(text, offset);
assert.strictEqual(res.chunk, expected);
return res;
}
test('parseNextChatResponseChunk', function () {
// Simple, no offset
assertChunk('Hello World', undefined, 0);
assertChunk('Hello World.', undefined, 0);
assertChunk('Hello World. ', 'Hello World.', 0);
assertChunk('Hello World? ', 'Hello World?', 0);
assertChunk('Hello World! ', 'Hello World!', 0);
assertChunk('Hello World: ', 'Hello World:', 0);
// Ensure chunks are parsed from the end, no offset
assertChunk('Hello World. How is your day? And more...', 'Hello World. How is your day?', 0);
// Ensure chunks are parsed from the end, with offset
let offset = assertChunk('Hello World. How is your ', 'Hello World.', 0).offset;
offset = assertChunk('Hello World. How is your day? And more...', 'How is your day?', offset).offset;
offset = assertChunk('Hello World. How is your day? And more to come! ', 'And more to come!', offset).offset;
assertChunk('Hello World. How is your day? And more to come! ', undefined, offset);
// Sparted by newlines
offset = assertChunk('Hello World.\nHow is your', 'Hello World.', 0).offset;
assertChunk('Hello World.\nHow is your day?\n', 'How is your day?', offset);
});
ensureNoDisposablesAreLeakedInTestSuite();
});