Merge pull request #145979 from Lazyuki/fix-maxTokenizationLineLength-for-monaco

This commit is contained in:
Alexandru Dima 2022-05-30 22:47:07 +02:00 committed by GitHub
commit acb156d6fb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 73 additions and 13 deletions

View file

@ -23,6 +23,7 @@ import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneT
import { IMarkerData, IMarkerService } from 'vs/platform/markers/common/markers'; import { IMarkerData, IMarkerService } from 'vs/platform/markers/common/markers';
import { ILanguageFeaturesService } from 'vs/editor/common/services/languageFeatures'; import { ILanguageFeaturesService } from 'vs/editor/common/services/languageFeatures';
import { LanguageSelector } from 'vs/editor/common/languageSelector'; import { LanguageSelector } from 'vs/editor/common/languageSelector';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
/** /**
* Register information about a new language. * Register information about a new language.
@ -374,7 +375,7 @@ export function registerTokensProviderFactory(languageId: string, factory: Token
if (isATokensProvider(result)) { if (isATokensProvider(result)) {
return createTokenizationSupportAdapter(languageId, result); return createTokenizationSupportAdapter(languageId, result);
} }
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result)); return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result), StandaloneServices.get(IConfigurationService));
} }
}; };
return languages.TokenizationRegistry.registerFactory(languageId, adaptedFactory); return languages.TokenizationRegistry.registerFactory(languageId, adaptedFactory);
@ -405,7 +406,7 @@ export function setTokensProvider(languageId: string, provider: TokensProvider |
*/ */
export function setMonarchTokensProvider(languageId: string, languageDef: IMonarchLanguage | Thenable<IMonarchLanguage>): IDisposable { export function setMonarchTokensProvider(languageId: string, languageDef: IMonarchLanguage | Thenable<IMonarchLanguage>): IDisposable {
const create = (languageDef: IMonarchLanguage) => { const create = (languageDef: IMonarchLanguage) => {
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef)); return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef), StandaloneServices.get(IConfigurationService));
}; };
if (isThenable<IMonarchLanguage>(languageDef)) { if (isThenable<IMonarchLanguage>(languageDef)) {
return registerTokensProviderFactory(languageId, { create: () => languageDef }); return registerTokensProviderFactory(languageId, { create: () => languageDef });

View file

@ -10,11 +10,12 @@
import { IDisposable } from 'vs/base/common/lifecycle'; import { IDisposable } from 'vs/base/common/lifecycle';
import * as languages from 'vs/editor/common/languages'; import * as languages from 'vs/editor/common/languages';
import { NullState } from 'vs/editor/common/languages/nullTokenize'; import { NullState, nullTokenizeEncoded, nullTokenize } from 'vs/editor/common/languages/nullTokenize';
import { TokenTheme } from 'vs/editor/common/languages/supports/tokenization'; import { TokenTheme } from 'vs/editor/common/languages/supports/tokenization';
import { ILanguageService } from 'vs/editor/common/languages/language'; import { ILanguageService } from 'vs/editor/common/languages/language';
import * as monarchCommon from 'vs/editor/standalone/common/monarch/monarchCommon'; import * as monarchCommon from 'vs/editor/standalone/common/monarch/monarchCommon';
import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneTheme'; import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneTheme';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { LanguageId } from 'vs/editor/common/encodedTokenAttributes'; import { LanguageId } from 'vs/editor/common/encodedTokenAttributes';
const CACHE_STACK_DEPTH = 5; const CACHE_STACK_DEPTH = 5;
@ -395,8 +396,9 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
private readonly _embeddedLanguages: { [languageId: string]: boolean }; private readonly _embeddedLanguages: { [languageId: string]: boolean };
public embeddedLoaded: Promise<void>; public embeddedLoaded: Promise<void>;
private readonly _tokenizationRegistryListener: IDisposable; private readonly _tokenizationRegistryListener: IDisposable;
private _maxTokenizationLineLength: number;
constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer) { constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) {
this._languageService = languageService; this._languageService = languageService;
this._standaloneThemeService = standaloneThemeService; this._standaloneThemeService = standaloneThemeService;
this._languageId = languageId; this._languageId = languageId;
@ -424,6 +426,16 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
emitting = false; emitting = false;
} }
}); });
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
overrideIdentifier: this._languageId
});
this._configurationService.onDidChangeConfiguration(e => {
if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
overrideIdentifier: this._languageId
});
}
});
} }
public dispose(): void { public dispose(): void {
@ -468,12 +480,18 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
} }
public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult { public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult {
if (line.length >= this._maxTokenizationLineLength) {
return nullTokenize(this._languageId, lineState);
}
const tokensCollector = new MonarchClassicTokensCollector(); const tokensCollector = new MonarchClassicTokensCollector();
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector); const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
return tokensCollector.finalize(endLineState); return tokensCollector.finalize(endLineState);
} }
public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult { public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult {
if (line.length >= this._maxTokenizationLineLength) {
return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);
}
const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme); const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector); const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
return tokensCollector.finalize(endLineState); return tokensCollector.finalize(endLineState);

View file

@ -11,11 +11,13 @@ import { compile } from 'vs/editor/standalone/common/monarch/monarchCompile';
import { Token, TokenizationRegistry } from 'vs/editor/common/languages'; import { Token, TokenizationRegistry } from 'vs/editor/common/languages';
import { IMonarchLanguage } from 'vs/editor/standalone/common/monarch/monarchTypes'; import { IMonarchLanguage } from 'vs/editor/standalone/common/monarch/monarchTypes';
import { DisposableStore } from 'vs/base/common/lifecycle'; import { DisposableStore } from 'vs/base/common/lifecycle';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { StandaloneConfigurationService } from 'vs/editor/standalone/browser/standaloneServices';
suite('Monarch', () => { suite('Monarch', () => {
function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage): MonarchTokenizer { function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage, configurationService: IConfigurationService): MonarchTokenizer {
return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language)); return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language), configurationService);
} }
function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] { function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {
@ -32,6 +34,7 @@ suite('Monarch', () => {
test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => { test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {
const disposables = new DisposableStore(); const disposables = new DisposableStore();
const languageService = disposables.add(new LanguageService()); const languageService = disposables.add(new LanguageService());
const configurationService = new StandaloneConfigurationService();
disposables.add(languageService.registerLanguage({ id: 'sql' })); disposables.add(languageService.registerLanguage({ id: 'sql' }));
disposables.add(TokenizationRegistry.register('sql', createMonarchTokenizer(languageService, 'sql', { disposables.add(TokenizationRegistry.register('sql', createMonarchTokenizer(languageService, 'sql', {
tokenizer: { tokenizer: {
@ -39,7 +42,7 @@ suite('Monarch', () => {
[/./, 'token'] [/./, 'token']
] ]
} }
}))); }, configurationService)));
const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)'; const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)';
const tokenizer = createMonarchTokenizer(languageService, 'test1', { const tokenizer = createMonarchTokenizer(languageService, 'test1', {
tokenizer: { tokenizer: {
@ -63,7 +66,7 @@ suite('Monarch', () => {
], ],
endStringWithSQL: [[/"""/, { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', },]], endStringWithSQL: [[/"""/, { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', },]],
} }
}); }, configurationService);
const lines = [ const lines = [
`mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`, `mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`,
@ -106,6 +109,7 @@ suite('Monarch', () => {
}); });
test('microsoft/monaco-editor#1235: Empty Line Handling', () => { test('microsoft/monaco-editor#1235: Empty Line Handling', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService(); const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', { const tokenizer = createMonarchTokenizer(languageService, 'test', {
tokenizer: { tokenizer: {
@ -125,7 +129,7 @@ suite('Monarch', () => {
// No possible rule to detect an empty line and @pop? // No possible rule to detect an empty line and @pop?
], ],
}, },
}); }, configurationService);
const lines = [ const lines = [
`// This comment \\`, `// This comment \\`,
@ -163,6 +167,7 @@ suite('Monarch', () => {
}); });
test('microsoft/monaco-editor#2265: Exit a state at end of line', () => { test('microsoft/monaco-editor#2265: Exit a state at end of line', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService(); const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', { const tokenizer = createMonarchTokenizer(languageService, 'test', {
includeLF: true, includeLF: true,
@ -179,7 +184,7 @@ suite('Monarch', () => {
[/[^\d]+/, ''] [/[^\d]+/, '']
] ]
} }
}); }, configurationService);
const lines = [ const lines = [
`PRINT 10 * 20`, `PRINT 10 * 20`,
@ -211,6 +216,7 @@ suite('Monarch', () => {
}); });
test('issue #115662: monarchCompile function need an extra option which can control replacement', () => { test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService(); const languageService = new LanguageService();
const tokenizer1 = createMonarchTokenizer(languageService, 'test', { const tokenizer1 = createMonarchTokenizer(languageService, 'test', {
@ -230,7 +236,7 @@ suite('Monarch', () => {
}, },
], ],
}, },
}); }, configurationService);
const tokenizer2 = createMonarchTokenizer(languageService, 'test', { const tokenizer2 = createMonarchTokenizer(languageService, 'test', {
ignoreCase: false, ignoreCase: false,
@ -242,7 +248,7 @@ suite('Monarch', () => {
}, },
], ],
}, },
}); }, configurationService);
const lines = [ const lines = [
`@ham` `@ham`
@ -265,6 +271,7 @@ suite('Monarch', () => {
}); });
test('microsoft/monaco-editor#2424: Allow to target @@', () => { test('microsoft/monaco-editor#2424: Allow to target @@', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService(); const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', { const tokenizer = createMonarchTokenizer(languageService, 'test', {
@ -277,7 +284,7 @@ suite('Monarch', () => {
}, },
], ],
}, },
}); }, configurationService);
const lines = [ const lines = [
`@@` `@@`
@ -292,4 +299,38 @@ suite('Monarch', () => {
languageService.dispose(); languageService.dispose();
}); });
test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', async () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
// Set maxTokenizationLineLength to 4 so that "ham" works but "hamham" would fail
await configurationService.updateValue('editor.maxTokenizationLineLength', 4);
const tokenizer = createMonarchTokenizer(languageService, 'test', {
tokenizer: {
root: [
{
regex: /ham/,
action: { token: 'ham' }
},
],
},
}, configurationService);
const lines = [
'ham', // length 3, should be tokenized
'hamham' // length 6, should NOT be tokenized
];
const actualTokens = getTokens(tokenizer, lines);
assert.deepStrictEqual(actualTokens, [
[
new Token(0, 'ham.test', 'test'),
], [
new Token(0, '', 'test')
]
]);
languageService.dispose();
});
}); });