Merge pull request #145979 from Lazyuki/fix-maxTokenizationLineLength-for-monaco

This commit is contained in:
Alexandru Dima 2022-05-30 22:47:07 +02:00 committed by GitHub
commit acb156d6fb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 73 additions and 13 deletions

View file

@ -23,6 +23,7 @@ import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneT
import { IMarkerData, IMarkerService } from 'vs/platform/markers/common/markers';
import { ILanguageFeaturesService } from 'vs/editor/common/services/languageFeatures';
import { LanguageSelector } from 'vs/editor/common/languageSelector';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
/**
* Register information about a new language.
@ -374,7 +375,7 @@ export function registerTokensProviderFactory(languageId: string, factory: Token
if (isATokensProvider(result)) {
return createTokenizationSupportAdapter(languageId, result);
}
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result));
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, result), StandaloneServices.get(IConfigurationService));
}
};
return languages.TokenizationRegistry.registerFactory(languageId, adaptedFactory);
@ -405,7 +406,7 @@ export function setTokensProvider(languageId: string, provider: TokensProvider |
*/
export function setMonarchTokensProvider(languageId: string, languageDef: IMonarchLanguage | Thenable<IMonarchLanguage>): IDisposable {
const create = (languageDef: IMonarchLanguage) => {
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef));
return new MonarchTokenizer(StandaloneServices.get(ILanguageService), StandaloneServices.get(IStandaloneThemeService), languageId, compile(languageId, languageDef), StandaloneServices.get(IConfigurationService));
};
if (isThenable<IMonarchLanguage>(languageDef)) {
return registerTokensProviderFactory(languageId, { create: () => languageDef });

View file

@ -10,11 +10,12 @@
import { IDisposable } from 'vs/base/common/lifecycle';
import * as languages from 'vs/editor/common/languages';
import { NullState } from 'vs/editor/common/languages/nullTokenize';
import { NullState, nullTokenizeEncoded, nullTokenize } from 'vs/editor/common/languages/nullTokenize';
import { TokenTheme } from 'vs/editor/common/languages/supports/tokenization';
import { ILanguageService } from 'vs/editor/common/languages/language';
import * as monarchCommon from 'vs/editor/standalone/common/monarch/monarchCommon';
import { IStandaloneThemeService } from 'vs/editor/standalone/common/standaloneTheme';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { LanguageId } from 'vs/editor/common/encodedTokenAttributes';
const CACHE_STACK_DEPTH = 5;
@ -395,8 +396,9 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
private readonly _embeddedLanguages: { [languageId: string]: boolean };
public embeddedLoaded: Promise<void>;
private readonly _tokenizationRegistryListener: IDisposable;
private _maxTokenizationLineLength: number;
constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer) {
constructor(languageService: ILanguageService, standaloneThemeService: IStandaloneThemeService, languageId: string, lexer: monarchCommon.ILexer, @IConfigurationService private readonly _configurationService: IConfigurationService) {
this._languageService = languageService;
this._standaloneThemeService = standaloneThemeService;
this._languageId = languageId;
@ -424,6 +426,16 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
emitting = false;
}
});
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
overrideIdentifier: this._languageId
});
this._configurationService.onDidChangeConfiguration(e => {
if (e.affectsConfiguration('editor.maxTokenizationLineLength')) {
this._maxTokenizationLineLength = this._configurationService.getValue<number>('editor.maxTokenizationLineLength', {
overrideIdentifier: this._languageId
});
}
});
}
public dispose(): void {
@ -468,12 +480,18 @@ export class MonarchTokenizer implements languages.ITokenizationSupport {
}
public tokenize(line: string, hasEOL: boolean, lineState: languages.IState): languages.TokenizationResult {
if (line.length >= this._maxTokenizationLineLength) {
return nullTokenize(this._languageId, lineState);
}
const tokensCollector = new MonarchClassicTokensCollector();
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
return tokensCollector.finalize(endLineState);
}
public tokenizeEncoded(line: string, hasEOL: boolean, lineState: languages.IState): languages.EncodedTokenizationResult {
if (line.length >= this._maxTokenizationLineLength) {
return nullTokenizeEncoded(this._languageService.languageIdCodec.encodeLanguageId(this._languageId), lineState);
}
const tokensCollector = new MonarchModernTokensCollector(this._languageService, this._standaloneThemeService.getColorTheme().tokenTheme);
const endLineState = this._tokenize(line, hasEOL, <MonarchLineState>lineState, tokensCollector);
return tokensCollector.finalize(endLineState);

View file

@ -11,11 +11,13 @@ import { compile } from 'vs/editor/standalone/common/monarch/monarchCompile';
import { Token, TokenizationRegistry } from 'vs/editor/common/languages';
import { IMonarchLanguage } from 'vs/editor/standalone/common/monarch/monarchTypes';
import { DisposableStore } from 'vs/base/common/lifecycle';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { StandaloneConfigurationService } from 'vs/editor/standalone/browser/standaloneServices';
suite('Monarch', () => {
function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage): MonarchTokenizer {
return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language));
function createMonarchTokenizer(languageService: ILanguageService, languageId: string, language: IMonarchLanguage, configurationService: IConfigurationService): MonarchTokenizer {
return new MonarchTokenizer(languageService, null!, languageId, compile(languageId, language), configurationService);
}
function getTokens(tokenizer: MonarchTokenizer, lines: string[]): Token[][] {
@ -32,6 +34,7 @@ suite('Monarch', () => {
test('Ensure @rematch and nextEmbedded can be used together in Monarch grammar', () => {
const disposables = new DisposableStore();
const languageService = disposables.add(new LanguageService());
const configurationService = new StandaloneConfigurationService();
disposables.add(languageService.registerLanguage({ id: 'sql' }));
disposables.add(TokenizationRegistry.register('sql', createMonarchTokenizer(languageService, 'sql', {
tokenizer: {
@ -39,7 +42,7 @@ suite('Monarch', () => {
[/./, 'token']
]
}
})));
}, configurationService)));
const SQL_QUERY_START = '(SELECT|INSERT|UPDATE|DELETE|CREATE|REPLACE|ALTER|WITH)';
const tokenizer = createMonarchTokenizer(languageService, 'test1', {
tokenizer: {
@ -63,7 +66,7 @@ suite('Monarch', () => {
],
endStringWithSQL: [[/"""/, { token: 'string.quote', next: '@popall', nextEmbedded: '@pop', },]],
}
});
}, configurationService);
const lines = [
`mysql_query("""SELECT * FROM table_name WHERE ds = '<DATEID>'""")`,
@ -106,6 +109,7 @@ suite('Monarch', () => {
});
test('microsoft/monaco-editor#1235: Empty Line Handling', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', {
tokenizer: {
@ -125,7 +129,7 @@ suite('Monarch', () => {
// No possible rule to detect an empty line and @pop?
],
},
});
}, configurationService);
const lines = [
`// This comment \\`,
@ -163,6 +167,7 @@ suite('Monarch', () => {
});
test('microsoft/monaco-editor#2265: Exit a state at end of line', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', {
includeLF: true,
@ -179,7 +184,7 @@ suite('Monarch', () => {
[/[^\d]+/, '']
]
}
});
}, configurationService);
const lines = [
`PRINT 10 * 20`,
@ -211,6 +216,7 @@ suite('Monarch', () => {
});
test('issue #115662: monarchCompile function need an extra option which can control replacement', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
const tokenizer1 = createMonarchTokenizer(languageService, 'test', {
@ -230,7 +236,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);
const tokenizer2 = createMonarchTokenizer(languageService, 'test', {
ignoreCase: false,
@ -242,7 +248,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);
const lines = [
`@ham`
@ -265,6 +271,7 @@ suite('Monarch', () => {
});
test('microsoft/monaco-editor#2424: Allow to target @@', () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
const tokenizer = createMonarchTokenizer(languageService, 'test', {
@ -277,7 +284,7 @@ suite('Monarch', () => {
},
],
},
});
}, configurationService);
const lines = [
`@@`
@ -292,4 +299,38 @@ suite('Monarch', () => {
languageService.dispose();
});
test('microsoft/monaco-editor#3025: Check maxTokenizationLineLength before tokenizing', async () => {
const configurationService = new StandaloneConfigurationService();
const languageService = new LanguageService();
// Set maxTokenizationLineLength to 4 so that "ham" works but "hamham" would fail
await configurationService.updateValue('editor.maxTokenizationLineLength', 4);
const tokenizer = createMonarchTokenizer(languageService, 'test', {
tokenizer: {
root: [
{
regex: /ham/,
action: { token: 'ham' }
},
],
},
}, configurationService);
const lines = [
'ham', // length 3, should be tokenized
'hamham' // length 6, should NOT be tokenized
];
const actualTokens = getTokens(tokenizer, lines);
assert.deepStrictEqual(actualTokens, [
[
new Token(0, 'ham.test', 'test'),
], [
new Token(0, '', 'test')
]
]);
languageService.dispose();
});
});