From bf7dc6646f063898682b30ab48c42d1c2961f697 Mon Sep 17 00:00:00 2001 From: Yuki Ito Date: Thu, 24 Mar 2022 13:45:23 +0000 Subject: [PATCH 1/2] Use maxTokenizationLineLength in monarch --- .../standalone/browser/standaloneLanguages.ts | 5 +- .../standalone/common/monarch/monarchLexer.ts | 19 ++++++- .../standalone/test/browser/monarch.test.ts | 57 ++++++++++++++++--- 3 files changed, 68 insertions(+), 13 deletions(-) diff --git a/src/vs/editor/standalone/browser/standaloneLanguages.ts b/src/vs/editor/standalone/browser/standaloneLanguages.ts index f738b97ed24..decc60790f5 100644 --- a/src/vs/editor/standalone/browser/standaloneLanguages.ts +++ b/src/vs/editor/standalone/browser/standaloneLanguages.ts @@ -23,6 +23,7 @@ import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneT import { IMarkerData, IMarkerService } from 'vs/platform/markers/common/markers'; import { ILanguageFeaturesService } from 'vs/editor/common/services/languageFeatures'; import { LanguageSelector } from 'vs/editor/common/languageSelector'; +import { IConfigurationService } from 'vs/platform/configuration/common/configuration'; /** * Register information about a new language. @@ -374,7 +375,7 @@ export function registerTokensProviderFactory(languageId: string, factory: Token if (isATokensProvider(result)) { return createTokenizationSupportAdapter(languageId, result); } - return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result)); + return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result), StandaloneServices.get(IConfigurationService)); } }; return languages.TokenizationRegistry.registerFactory(languageId, adaptedFactory); @@ -405,7 +406,7 @@ export function setTokensProvider(languageId: string, provider: TokensProvider | */ export function setMonarchTokensProvider(languageId: string, languageDef: IMonarchLanguage | Thenable): IDisposable { const create = (languageDef: IMonarchLanguage) => { - return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef)); + return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef), StandaloneServices.get(IConfigurationService)); }; if (isThenable(languageDef)) { return registerTokensProviderFactory(languageId, { create: () => languageDef }); diff --git a/src/vs/editor/standalone/common/monarch/monarchLexer.ts b/src/vs/editor/standalone/common/monarch/monarchLexer.ts index bdc490fdb41..dde538afac9 100644 --- a/src/vs/editor/standalone/common/monarch/monarchLexer.ts +++ b/src/vs/editor/standalone/common/monarch/monarchLexer.ts @@ -10,11 +10,12 @@ import { IDisposable } from 'vs/base/common/lifecycle'; import * as languages from 'vs/editor/common/languages'; -import { NullState } from 'vs/editor/common/languages/nullTokenize'; +import { NullState, nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize'; import { TokenTheme } from 'vs/editor/common/languages/supports/tokenization'; import { ILanguageService } from 'vs/editor/common/languages/language'; import * as monarchCommon from 'vs/editor/standalone/common/monarch/monarchCommon'; import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneTheme'; +import { IConfigurationService } from 'vs/platform/configuration/common/configuration'; const CACHE_STACK_DEPTH = 5; @@ -394,8 +395,9 @@ export class MonarchTokenizer implements languages.ITokenizationSupport { private readonly _embeddedLanguages: { [languageId: string]: boolean }; public embeddedLoaded: Promise; private readonly _tokenizationRegistryListener: IDisposable; + private _maxTokenizationLineLength: number; - constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer) { + constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) { this._languageService = languageService; this._standaloneThemeService = standaloneThemeService; this._languageId = languageId; @@ -423,6 +425,16 @@ export class MonarchTokenizer implements languages.ITokenizationSupport { emitting = false; } }); + this._maxTokenizationLineLength = this._configurationService.getValue('editor.maxTokenizationLineLength', { + overrideIdentifier: this._languageId + }); + this._configurationService.onDidChangeConfiguration(e => { + if (e.affectsConfiguration('editor.maxTokenizationLineLength')) { + this._maxTokenizationLineLength = this._configurationService.getValue('editor.maxTokenizationLineLength', { + overrideIdentifier: this._languageId + }); + } + }); } public dispose(): void { @@ -473,6 +485,9 @@ export class MonarchTokenizer implements languages.ITokenizationSupport { } public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult { + if (line.length >= this._maxTokenizationLineLength) { + return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState); + } const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme); const endLineState = this._tokenize(line, hasEOL, lineState, tokensCollector); return tokensCollector.finalize(endLineState); diff --git a/src/vs/editor/standalone/test/browser/monarch.test.ts b/src/vs/editor/standalone/test/browser/monarch.test.ts index 5e67bcb4cba..22487d563aa 100644 --- a/src/vs/editor/standalone/test/browser/monarch.test.ts +++ b/src/vs/editor/standalone/test/browser/monarch.test.ts @@ -11,11 +11,13 @@ import { compile } from 'vs/editor/standalone/common/monarch/monarchCompile'; import { Token, TokenizationRegistry } from 'vs/editor/common/languages'; import { IMonarchLanguage } from 'vs/editor/standalone/common/monarch/monarchTypes'; import { DisposableStore } from 'vs/base/common/lifecycle'; +import { IConfigurationService } from 'vs/platform/configuration/common/configuration'; +import { StandaloneConfigurationService } from 'vs/editor/standalone/browser/standaloneServices'; suite('Monarch', () => { - function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage): MonarchTokenizer { - return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language)); + function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage, configurationService: IConfigurationService): MonarchTokenizer { + return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language), configurationService); } function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] { @@ -32,6 +34,7 @@ suite('Monarch', () => { test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => { const disposables = new DisposableStore(); const languageService = disposables.add(new LanguageService()); + const configurationService = new StandaloneConfigurationService(); disposables.add(languageService.registerLanguage({ id: 'sql' })); disposables.add(TokenizationRegistry.register('sql', createMonarchTokenizer(languageService, 'sql', { tokenizer: { @@ -39,7 +42,7 @@ suite('Monarch', () => { [/./, 'token'] ] } - }))); + }, configurationService))); const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)'; const tokenizer = createMonarchTokenizer(languageService, 'test1', { tokenizer: { @@ -63,7 +66,7 @@ suite('Monarch', () => { ], endStringWithSQL: [[/"""/, { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', },]], } - }); + }, configurationService); const lines = [ `mysql_query("""SELECT * FROM table_name WHERE ds = ''""")`, @@ -106,6 +109,7 @@ suite('Monarch', () => { }); test('microsoft/monaco-editor#1235: Empty Line Handling', () => { + const configurationService = new StandaloneConfigurationService(); const languageService = new LanguageService(); const tokenizer = createMonarchTokenizer(languageService, 'test', { tokenizer: { @@ -125,7 +129,7 @@ suite('Monarch', () => { // No possible rule to detect an empty line and @pop? ], }, - }); + }, configurationService); const lines = [ `// This comment \\`, @@ -163,6 +167,7 @@ suite('Monarch', () => { }); test('microsoft/monaco-editor#2265: Exit a state at end of line', () => { + const configurationService = new StandaloneConfigurationService(); const languageService = new LanguageService(); const tokenizer = createMonarchTokenizer(languageService, 'test', { includeLF: true, @@ -179,7 +184,7 @@ suite('Monarch', () => { [/[^\d]+/, ''] ] } - }); + }, configurationService); const lines = [ `PRINT 10 * 20`, @@ -211,6 +216,7 @@ suite('Monarch', () => { }); test('issue #115662: monarchCompile function need an extra option which can control replacement', () => { + const configurationService = new StandaloneConfigurationService(); const languageService = new LanguageService(); const tokenizer1 = createMonarchTokenizer(languageService, 'test', { @@ -230,7 +236,7 @@ suite('Monarch', () => { }, ], }, - }); + }, configurationService); const tokenizer2 = createMonarchTokenizer(languageService, 'test', { ignoreCase: false, @@ -242,7 +248,7 @@ suite('Monarch', () => { }, ], }, - }); + }, configurationService); const lines = [ `@ham` @@ -265,6 +271,7 @@ suite('Monarch', () => { }); test('microsoft/monaco-editor#2424: Allow to target @@', () => { + const configurationService = new StandaloneConfigurationService(); const languageService = new LanguageService(); const tokenizer = createMonarchTokenizer(languageService, 'test', { @@ -277,7 +284,7 @@ suite('Monarch', () => { }, ], }, - }); + }, configurationService); const lines = [ `@@` @@ -292,4 +299,36 @@ suite('Monarch', () => { languageService.dispose(); }); + test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', () => { + const configurationService = new StandaloneConfigurationService(); + const languageService = new LanguageService(); + + const tokenizer = createMonarchTokenizer(languageService, 'test', { + maxTokenizationLineLength: 4, + tokenizer: { + root: [ + { + regex: /ham/, + action: { token: 'ham' } + }, + ], + }, + }, configurationService); + + const lines = [ + 'ham', // length 3, should be tokenized + 'hamham' // length 6, should NOT be tokenized + ]; + + const actualTokens = getTokens(tokenizer, lines); + assert.deepStrictEqual(actualTokens, [ + [ + new Token(0, 'ham.test', 'test'), + ], [ + new Token(0, '', 'test') + ] + ]); + languageService.dispose(); + }); + }); From d1fcde7fdd1d80195fae5d7a162c42e3c1d233f3 Mon Sep 17 00:00:00 2001 From: Yuki Ito Date: Thu, 14 Apr 2022 17:40:41 +0100 Subject: [PATCH 2/2] fix bad rebase --- src/vs/editor/standalone/common/monarch/monarchLexer.ts | 5 ++++- src/vs/editor/standalone/test/browser/monarch.test.ts | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/vs/editor/standalone/common/monarch/monarchLexer.ts b/src/vs/editor/standalone/common/monarch/monarchLexer.ts index dde538afac9..72988e4212f 100644 --- a/src/vs/editor/standalone/common/monarch/monarchLexer.ts +++ b/src/vs/editor/standalone/common/monarch/monarchLexer.ts @@ -10,7 +10,7 @@ import { IDisposable } from 'vs/base/common/lifecycle'; import * as languages from 'vs/editor/common/languages'; -import { NullState, nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize'; +import { NullState, nullTokenizeEncoded, nullTokenize } from 'vs/editor/common/languages/nullTokenize'; import { TokenTheme } from 'vs/editor/common/languages/supports/tokenization'; import { ILanguageService } from 'vs/editor/common/languages/language'; import * as monarchCommon from 'vs/editor/standalone/common/monarch/monarchCommon'; @@ -479,6 +479,9 @@ export class MonarchTokenizer implements languages.ITokenizationSupport { } public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult { + if (line.length >= this._maxTokenizationLineLength) { + return nullTokenize(this._languageId, lineState); + } const tokensCollector = new MonarchClassicTokensCollector(); const endLineState = this._tokenize(line, hasEOL, lineState, tokensCollector); return tokensCollector.finalize(endLineState); diff --git a/src/vs/editor/standalone/test/browser/monarch.test.ts b/src/vs/editor/standalone/test/browser/monarch.test.ts index 22487d563aa..da034e57b4e 100644 --- a/src/vs/editor/standalone/test/browser/monarch.test.ts +++ b/src/vs/editor/standalone/test/browser/monarch.test.ts @@ -299,12 +299,14 @@ suite('Monarch', () => { languageService.dispose(); }); - test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', () => { + test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', async () => { const configurationService = new StandaloneConfigurationService(); const languageService = new LanguageService(); + // Set maxTokenizationLineLength to 4 so that "ham" works but "hamham" would fail + await configurationService.updateValue('editor.maxTokenizationLineLength', 4); + const tokenizer = createMonarchTokenizer(languageService, 'test', { - maxTokenizationLineLength: 4, tokenizer: { root: [ {