Render html as plaintext when html not supported (#213265)

* Support rendering unsupported html tags as plaintext for chat

* Render html as plaintext when html not supported

* Add comment and test for trusted domains
This commit is contained in:
Rob Lourens 2024-05-23 13:16:18 -07:00 committed by GitHub
parent 81e568cf86
commit fd7c7bda0f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 280 additions and 14 deletions

View file

@ -36,6 +36,12 @@ export interface MarkdownRenderOptions extends FormattedTextRenderOptions {
readonly asyncRenderCallback?: () => void;
readonly fillInIncompleteTokens?: boolean;
readonly remoteImageIsAllowed?: (uri: URI) => boolean;
readonly sanitizerOptions?: ISanitizerOptions;
}
export interface ISanitizerOptions {
replaceWithPlaintext?: boolean;
allowedTags?: string[];
}
const defaultMarkedRenderers = Object.freeze({
@ -221,6 +227,10 @@ export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRende
// We always pass the output through dompurify after this so that we don't rely on
// marked for sanitization.
markedOptions.sanitizer = (html: string): string => {
if (options.sanitizerOptions?.replaceWithPlaintext) {
return escape(html);
}
const match = markdown.isTrusted ? html.match(/^(<span[^>]+>)|(<\/\s*span>)$/) : undefined;
return match ? html : '';
};
@ -261,7 +271,7 @@ export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRende
}
const htmlParser = new DOMParser();
const markdownHtmlDoc = htmlParser.parseFromString(sanitizeRenderedMarkdown(markdown, renderedMarkdown) as unknown as string, 'text/html');
const markdownHtmlDoc = htmlParser.parseFromString(sanitizeRenderedMarkdown({ isTrusted: markdown.isTrusted, ...options.sanitizerOptions }, renderedMarkdown) as unknown as string, 'text/html');
markdownHtmlDoc.body.querySelectorAll('img, audio, video, source')
.forEach(img => {
@ -306,7 +316,7 @@ export function renderMarkdown(markdown: IMarkdownString, options: MarkdownRende
}
});
element.innerHTML = sanitizeRenderedMarkdown(markdown, markdownHtmlDoc.body.innerHTML) as unknown as string;
element.innerHTML = sanitizeRenderedMarkdown({ isTrusted: markdown.isTrusted, ...options.sanitizerOptions }, markdownHtmlDoc.body.innerHTML) as unknown as string;
if (codeBlocks.length > 0) {
Promise.all(codeBlocks).then((tuples) => {
@ -378,8 +388,14 @@ function resolveWithBaseUri(baseUri: URI, href: string): string {
}
}
interface IInternalSanitizerOptions extends ISanitizerOptions {
isTrusted?: boolean | MarkdownStringTrustedOptions;
}
const selfClosingTags = ['area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
function sanitizeRenderedMarkdown(
options: { isTrusted?: boolean | MarkdownStringTrustedOptions },
options: IInternalSanitizerOptions,
renderedMarkdown: string,
): TrustedHTML {
const { config, allowedSchemes } = getSanitizerOptions(options);
@ -410,10 +426,45 @@ function sanitizeRenderedMarkdown(
if (e.tagName === 'input') {
if (element.attributes.getNamedItem('type')?.value === 'checkbox') {
element.setAttribute('disabled', '');
} else {
} else if (!options.replaceWithPlaintext) {
element.parentElement?.removeChild(element);
}
}
if (options.replaceWithPlaintext && !e.allowedTags[e.tagName] && e.tagName !== 'body') {
if (element.parentElement) {
let startTagText: string;
let endTagText: string | undefined;
if (e.tagName === '#comment') {
startTagText = `<!--${element.textContent}-->`;
} else {
const isSelfClosing = selfClosingTags.includes(e.tagName);
const attrString = element.attributes.length ?
' ' + Array.from(element.attributes)
.map(attr => `${attr.name}="${attr.value}"`)
.join(' ')
: '';
startTagText = `<${e.tagName}${attrString}>`;
if (!isSelfClosing) {
endTagText = `</${e.tagName}>`;
}
}
const fragment = document.createDocumentFragment();
const textNode = element.parentElement.ownerDocument.createTextNode(startTagText);
fragment.appendChild(textNode);
const endTagTextNode = endTagText ? element.parentElement.ownerDocument.createTextNode(endTagText) : undefined;
while (element.firstChild) {
fragment.appendChild(element.firstChild);
}
if (endTagTextNode) {
fragment.appendChild(endTagTextNode);
}
element.parentElement.replaceChild(fragment, element);
}
}
}));
store.add(DOM.hookDomPurifyHrefAndSrcSanitizer(allowedSchemes));
@ -451,7 +502,7 @@ export const allowedMarkdownAttr = [
'start',
];
function getSanitizerOptions(options: { readonly isTrusted?: boolean | MarkdownStringTrustedOptions }): { config: dompurify.Config; allowedSchemes: string[] } {
function getSanitizerOptions(options: IInternalSanitizerOptions): { config: dompurify.Config; allowedSchemes: string[] } {
const allowedSchemes = [
Schemas.http,
Schemas.https,
@ -473,7 +524,7 @@ function getSanitizerOptions(options: { readonly isTrusted?: boolean | MarkdownS
// Since we have our own sanitize function for marked, it's possible we missed some tag so let dompurify make sure.
// HTML tags that can result from markdown are from reading https://spec.commonmark.org/0.29/
// HTML table tags that can result from markdown are from https://github.github.com/gfm/#tables-extension-
ALLOWED_TAGS: [...DOM.basicMarkupHtmlTags],
ALLOWED_TAGS: options.allowedTags ?? [...DOM.basicMarkupHtmlTags],
ALLOWED_ATTR: allowedMarkdownAttr,
ALLOW_UNKNOWN_PROTOCOLS: true,
},

View file

@ -4,6 +4,8 @@
*--------------------------------------------------------------------------------------------*/
import * as dom from 'vs/base/browser/dom';
import { renderFormattedText } from 'vs/base/browser/formattedTextRenderer';
import { StandardKeyboardEvent } from 'vs/base/browser/keyboardEvent';
import { IActionViewItemOptions } from 'vs/base/browser/ui/actionbar/actionViewItems';
import { alert } from 'vs/base/browser/ui/aria/aria';
import { Button } from 'vs/base/browser/ui/button/button';
@ -21,6 +23,7 @@ import { Codicon } from 'vs/base/common/codicons';
import { Emitter, Event } from 'vs/base/common/event';
import { FuzzyScore } from 'vs/base/common/filters';
import { IMarkdownString, MarkdownString } from 'vs/base/common/htmlContent';
import { KeyCode } from 'vs/base/common/keyCodes';
import { Disposable, DisposableStore, IDisposable, toDisposable } from 'vs/base/common/lifecycle';
import { ResourceMap } from 'vs/base/common/map';
import { FileAccess, Schemas, matchesSomeScheme } from 'vs/base/common/network';
@ -68,19 +71,16 @@ import { ChatAgentLocation, IChatAgentMetadata } from 'vs/workbench/contrib/chat
import { CONTEXT_CHAT_RESPONSE_SUPPORT_ISSUE_REPORTING, CONTEXT_REQUEST, CONTEXT_RESPONSE, CONTEXT_RESPONSE_DETECTED_AGENT_COMMAND, CONTEXT_RESPONSE_FILTERED, CONTEXT_RESPONSE_VOTE } from 'vs/workbench/contrib/chat/common/chatContextKeys';
import { IChatProgressRenderableResponseContent, IChatTextEditGroup } from 'vs/workbench/contrib/chat/common/chatModel';
import { chatSubcommandLeader } from 'vs/workbench/contrib/chat/common/chatParserTypes';
import { IChatCommandButton, IChatConfirmation, IChatContentReference, IChatFollowup, IChatProgressMessage, IChatResponseProgressFileTreeData, IChatSendRequestOptions, IChatService, IChatTask, IChatWarningMessage, ChatAgentVoteDirection } from 'vs/workbench/contrib/chat/common/chatService';
import { ChatAgentVoteDirection, IChatCommandButton, IChatConfirmation, IChatContentReference, IChatFollowup, IChatProgressMessage, IChatResponseProgressFileTreeData, IChatSendRequestOptions, IChatService, IChatTask, IChatWarningMessage } from 'vs/workbench/contrib/chat/common/chatService';
import { IChatVariablesService } from 'vs/workbench/contrib/chat/common/chatVariables';
import { IChatProgressMessageRenderData, IChatRenderData, IChatResponseMarkdownRenderData, IChatResponseViewModel, IChatTaskRenderData, IChatWelcomeMessageViewModel, isRequestVM, isResponseVM, isWelcomeVM } from 'vs/workbench/contrib/chat/common/chatViewModel';
import { IWordCountResult, getNWords } from 'vs/workbench/contrib/chat/common/chatWordCounter';
import { createFileIconThemableTreeContainerScope } from 'vs/workbench/contrib/files/browser/views/explorerView';
import { IFilesConfiguration } from 'vs/workbench/contrib/files/common/files';
import { ITrustedDomainService } from 'vs/workbench/contrib/url/browser/trustedDomainService';
import { IMarkdownVulnerability, annotateSpecialMarkdownContent } from '../common/annotations';
import { CodeBlockModelCollection } from '../common/codeBlockModelCollection';
import { IChatListItemRendererOptions } from './chat';
import { renderFormattedText } from 'vs/base/browser/formattedTextRenderer';
import { StandardKeyboardEvent } from 'vs/base/browser/keyboardEvent';
import { KeyCode } from 'vs/base/common/keyCodes';
import { ChatMarkdownRenderer } from 'vs/workbench/contrib/chat/browser/chatMarkdownRenderer';
const $ = dom.$;
@ -160,13 +160,12 @@ export class ChatListItemRenderer extends Disposable implements ITreeRenderer<Ch
@ICommandService private readonly commandService: ICommandService,
@ITextModelService private readonly textModelService: ITextModelService,
@IModelService private readonly modelService: IModelService,
@ITrustedDomainService private readonly trustedDomainService: ITrustedDomainService,
@IHoverService private readonly hoverService: IHoverService,
@IChatService private readonly chatService: IChatService,
) {
super();
this.renderer = this._register(this.instantiationService.createInstance(MarkdownRenderer, {}));
this.renderer = this._register(this.instantiationService.createInstance(ChatMarkdownRenderer, undefined));
this.markdownDecorationsRenderer = this.instantiationService.createInstance(ChatMarkdownDecorationsRenderer);
this._editorPool = this._register(this.instantiationService.createInstance(EditorPool, editorOptions, delegate, overflowWidgetsDomNode));
this._diffEditorPool = this._register(this.instantiationService.createInstance(DiffEditorPool, editorOptions, delegate, overflowWidgetsDomNode));
@ -1104,7 +1103,6 @@ export class ChatListItemRenderer extends Disposable implements ITreeRenderer<Ch
const codeblocks: IChatCodeBlockInfo[] = [];
let codeBlockIndex = 0;
const result = this.renderer.render(markdown, {
remoteImageIsAllowed: (uri) => this.trustedDomainService.isValid(uri),
fillInIncompleteTokens,
codeBlockRendererSync: (languageId, text) => {
const index = codeBlockIndex++;

View file

@ -0,0 +1,79 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { MarkdownRenderOptions, MarkedOptions } from 'vs/base/browser/markdownRenderer';
import { IMarkdownString } from 'vs/base/common/htmlContent';
import { IMarkdownRendererOptions, IMarkdownRenderResult, MarkdownRenderer } from 'vs/editor/browser/widget/markdownRenderer/browser/markdownRenderer';
import { ILanguageService } from 'vs/editor/common/languages/language';
import { IOpenerService } from 'vs/platform/opener/common/opener';
import { ITrustedDomainService } from 'vs/workbench/contrib/url/browser/trustedDomainService';
const allowedHtmlTags = [
'b',
'blockquote',
'br',
'code',
'em',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'hr',
'i',
'li',
'ol',
'p',
'pre',
'strong',
'table',
'tbody',
'td',
'th',
'thead',
'tr',
'ul',
'a',
'img',
// Not in the official list, but used for codicons and other vscode markdown extensions
'span',
];
/**
* This wraps the MarkdownRenderer and applies sanitizer options needed for Chat.
*/
export class ChatMarkdownRenderer extends MarkdownRenderer {
constructor(
options: IMarkdownRendererOptions | undefined,
@ILanguageService languageService: ILanguageService,
@IOpenerService openerService: IOpenerService,
@ITrustedDomainService private readonly trustedDomainService: ITrustedDomainService,
) {
super(options ?? {}, languageService, openerService);
}
override render(markdown: IMarkdownString | undefined, options?: MarkdownRenderOptions, markedOptions?: MarkedOptions): IMarkdownRenderResult {
options = {
...options,
remoteImageIsAllowed: (uri) => this.trustedDomainService.isValid(uri),
sanitizerOptions: {
replaceWithPlaintext: true,
allowedTags: allowedHtmlTags,
}
};
const mdWithBody: IMarkdownString | undefined = (markdown && markdown.supportHtml) ?
{
...markdown,
// dompurify uses DOMParser, which strips leading comments. Wrapping it all in 'body' prevents this.
value: `<body>${markdown.value}</body>`,
}
: markdown;
return super.render(mdWithBody, options, markedOptions);
}
}

View file

@ -0,0 +1 @@
<div class="rendered-markdown">&lt;!--[CDATA[&lt;div--&gt;content]]&gt;</div>

View file

@ -0,0 +1 @@
<div class="rendered-markdown">&lt;!-- comment1 &lt;div&gt;&lt;/div&gt; --&gt;&lt;div&gt;content&lt;/div&gt;&lt;!-- comment2 --&gt;</div>

View file

@ -0,0 +1 @@
<div class="rendered-markdown">1&lt;canvas&gt;2&lt;div&gt;3&lt;/div&gt;&lt;/canvas&gt;4</div>

View file

@ -0,0 +1 @@
<div class="rendered-markdown">1&lt;div id="id1" style="display: none"&gt;2&lt;div id="my id 2"&gt;3&lt;/div&gt;&lt;/div&gt;4</div>

View file

@ -0,0 +1,8 @@
<div class="rendered-markdown"><h1>heading</h1>
&lt;div&gt;
<ul>
<li><span>&lt;div&gt;<i>1</i>&lt;/div&gt;</span></li>
<li><b>hi</b></li>
</ul>
&lt;/div&gt;
<pre>&lt;canvas&gt;canvas here&lt;/canvas&gt;</pre>&lt;details&gt;&lt;/details&gt;</div>

View file

@ -0,0 +1 @@
<div class="rendered-markdown"><img src="http://allowed.com/image.jpg"> &lt;div&gt;&lt;img src="http://disallowed.com/image.jpg"&gt;&lt;/div&gt;</div>

View file

@ -0,0 +1 @@
<div class="rendered-markdown">&lt;area&gt;<hr><br>&lt;input type="text" value="test"&gt;</div>

View file

@ -0,0 +1,6 @@
<div class="rendered-markdown"><h1>heading</h1>
<ul>
<li>1</li>
<li><b>hi</b></li>
</ul>
<pre><code>code here</code></pre></div>

View file

@ -0,0 +1,99 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { MarkdownString } from 'vs/base/common/htmlContent';
import { assertSnapshot } from 'vs/base/test/common/snapshot';
import { ensureNoDisposablesAreLeakedInTestSuite } from 'vs/base/test/common/utils';
import { ChatMarkdownRenderer } from 'vs/workbench/contrib/chat/browser/chatMarkdownRenderer';
import { ITrustedDomainService } from 'vs/workbench/contrib/url/browser/trustedDomainService';
import { MockTrustedDomainService } from 'vs/workbench/contrib/url/test/browser/mockTrustedDomainService';
import { workbenchInstantiationService } from 'vs/workbench/test/browser/workbenchTestServices';
suite('ChatMarkdownRenderer', () => {
const store = ensureNoDisposablesAreLeakedInTestSuite();
let testRenderer: ChatMarkdownRenderer;
setup(() => {
const instantiationService = store.add(workbenchInstantiationService(undefined, store));
instantiationService.stub(ITrustedDomainService, new MockTrustedDomainService(['http://allowed.com']));
testRenderer = instantiationService.createInstance(ChatMarkdownRenderer, {});
});
test('simple', async () => {
const md = new MarkdownString('a');
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.textContent);
});
test('invalid HTML', async () => {
const md = new MarkdownString('1<canvas>2<div>3</div></canvas>4');
md.supportHtml = true;
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.outerHTML);
});
test('invalid HTML with attributes', async () => {
const md = new MarkdownString('1<div id="id1" style="display: none">2<div id="my id 2">3</div></div>4');
md.supportHtml = true;
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.outerHTML);
});
test('valid HTML', async () => {
const md = new MarkdownString(`
<h1>heading</h1>
<ul>
<li>1</li>
<li><b>hi</b></li>
</ul>
<pre><code>code here</code></pre>`);
md.supportHtml = true;
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.outerHTML);
});
test('mixed valid and invalid HTML', async () => {
const md = new MarkdownString(`
<h1>heading</h1>
<div>
<ul>
<li><span><div><i>1</i></div></span></li>
<li><b>hi</b></li>
</ul>
</div>
<pre><canvas>canvas here</canvas></pre><details></details>`);
md.supportHtml = true;
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.outerHTML);
});
test('self-closing elements', async () => {
const md = new MarkdownString('<area><hr><br><input type="text" value="test">');
md.supportHtml = true;
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.outerHTML);
});
test('html comments', async () => {
const md = new MarkdownString('<!-- comment1 <div></div> --><div>content</div><!-- comment2 -->');
md.supportHtml = true;
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.outerHTML);
});
test('CDATA', async () => {
const md = new MarkdownString('<![CDATA[<div>content</div>]]>');
md.supportHtml = true;
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.outerHTML);
});
test('remote images', async () => {
const md = new MarkdownString('<img src="http://allowed.com/image.jpg"> <img src="http://disallowed.com/image.jpg">');
md.supportHtml = true;
const result = store.add(testRenderer.render(md));
await assertSnapshot(result.element.outerHTML);
});
});

View file

@ -0,0 +1,18 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { URI } from 'vs/base/common/uri';
import { isURLDomainTrusted, ITrustedDomainService } from 'vs/workbench/contrib/url/browser/trustedDomainService';
export class MockTrustedDomainService implements ITrustedDomainService {
_serviceBrand: undefined;
constructor(private readonly _trustedDomains: string[] = []) {
}
isValid(resource: URI): boolean {
return isURLDomainTrusted(resource, this._trustedDomains);
}
}