Merge pull request #174364 from microsoft/hediet/async-tokenization

Refactors tokenization to support custom background tokenizer.
This commit is contained in:
Henning Dieterichs 2023-02-15 15:14:47 +01:00 committed by GitHub
commit 8066a8c455
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 375 additions and 285 deletions

View file

@ -19,6 +19,7 @@ import { Selection } from 'vs/editor/common/core/selection';
import { LanguageId } from 'vs/editor/common/encodedTokenAttributes';
import * as model from 'vs/editor/common/model';
import { TokenizationRegistry as TokenizationRegistryImpl } from 'vs/editor/common/tokenizationRegistry';
import { ContiguousMultilineTokens } from 'vs/editor/common/tokens/contiguousMultilineTokens';
import { ExtensionIdentifier } from 'vs/platform/extensions/common/extensions';
import { IMarkerData } from 'vs/platform/markers/common/markers';
@ -33,14 +34,11 @@ export interface ILanguageIdCodec {
export class Token {
_tokenBrand: void = undefined;
public readonly offset: number;
public readonly type: string;
public readonly language: string;
constructor(offset: number, type: string, language: string) {
this.offset = offset;
this.type = type;
this.language = language;
constructor(
public readonly offset: number,
public readonly type: string,
public readonly language: string,
) {
}
public toString(): string {
@ -54,12 +52,10 @@ export class Token {
export class TokenizationResult {
_tokenizationResultBrand: void = undefined;
public readonly tokens: Token[];
public readonly endState: IState;
constructor(tokens: Token[], endState: IState) {
this.tokens = tokens;
this.endState = endState;
constructor(
public readonly tokens: Token[],
public readonly endState: IState,
) {
}
}
@ -69,21 +65,33 @@ export class TokenizationResult {
export class EncodedTokenizationResult {
_encodedTokenizationResultBrand: void = undefined;
/**
* The tokens in binary format. Each token occupies two array indices. For token i:
* - at offset 2*i => startIndex
* - at offset 2*i + 1 => metadata
*
*/
public readonly tokens: Uint32Array;
public readonly endState: IState;
constructor(tokens: Uint32Array, endState: IState) {
this.tokens = tokens;
this.endState = endState;
constructor(
/**
* The tokens in binary format. Each token occupies two array indices. For token i:
* - at offset 2*i => startIndex
* - at offset 2*i + 1 => metadata
*
*/
public readonly tokens: Uint32Array,
public readonly endState: IState,
) {
}
}
/**
* @internal
*/
export interface IBackgroundTokenizer extends IDisposable {
/**
* Instructs the background tokenizer to set the tokens for the given range again.
*
* This might be necessary if the renderer overwrote those tokens with heuristically computed ones for some viewport,
* when the change does not even propagate to that viewport.
*/
requestTokens(startLineNumber: number, endLineNumberExclusive: number): void;
}
/**
* @internal
*/
@ -94,6 +102,26 @@ export interface ITokenizationSupport {
tokenize(line: string, hasEOL: boolean, state: IState): TokenizationResult;
tokenizeEncoded(line: string, hasEOL: boolean, state: IState): EncodedTokenizationResult;
/**
* Can be/return undefined if default background tokenization should be used.
*/
createBackgroundTokenizer?(textModel: model.ITextModel, store: IBackgroundTokenizationStore): IBackgroundTokenizer | undefined;
}
/**
* @internal
*/
export interface IBackgroundTokenizationStore {
setTokens(tokens: ContiguousMultilineTokens[]): void;
setEndState(lineNumber: number, state: IState): void;
/**
* Should be called to indicate that the background tokenization has finished for now.
* (This triggers bracket pair colorization to re-parse the bracket pairs with token information)
*/
backgroundTokenizationFinished(): void;
}
/**

View file

@ -56,8 +56,7 @@ export class BracketPairsTree extends Disposable {
) {
super();
if (textModel.tokenization.backgroundTokenizationState === BackgroundTokenizationState.Uninitialized) {
// There are no token information yet
if (!textModel.tokenization.hasTokens) {
const brackets = this.brackets.getSingleLanguageBracketTokens(this.textModel.getLanguageId());
const tokenizer = new FastTokenizer(this.textModel.getValue(), brackets);
this.initialAstWithoutTokens = parseDocument(tokenizer, [], undefined, true);
@ -67,7 +66,8 @@ export class BracketPairsTree extends Disposable {
// Directly create the tree with token information.
this.initialAstWithoutTokens = undefined;
this.astWithTokens = this.parseDocumentFromTextBuffer([], undefined, false);
} else if (textModel.tokenization.backgroundTokenizationState === BackgroundTokenizationState.InProgress) {
} else {
// We missed some token changes already, so we cannot use the fast tokenizer + delta increments
this.initialAstWithoutTokens = this.parseDocumentFromTextBuffer([], undefined, true);
this.astWithTokens = this.initialAstWithoutTokens;
}
@ -103,6 +103,7 @@ export class BracketPairsTree extends Disposable {
}
public handleContentChanged(change: IModelContentChangedEvent) {
// Must be sorted in ascending order
const edits = change.changes.map(c => {
const range = Range.lift(c.range);
return new TextEditInfo(

View file

@ -4,22 +4,23 @@
*--------------------------------------------------------------------------------------------*/
import * as arrays from 'vs/base/common/arrays';
import { onUnexpectedError } from 'vs/base/common/errors';
import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
import { Position } from 'vs/editor/common/core/position';
import { IRange } from 'vs/editor/common/core/range';
import { EncodedTokenizationResult, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages';
import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize';
import { TextModel } from 'vs/editor/common/model/textModel';
import { Disposable } from 'vs/base/common/lifecycle';
import { IdleDeadline, runWhenIdle } from 'vs/base/common/async';
import { BugIndicatingError, onUnexpectedError } from 'vs/base/common/errors';
import { Disposable, MutableDisposable } from 'vs/base/common/lifecycle';
import { setTimeout0 } from 'vs/base/common/platform';
import { StopWatch } from 'vs/base/common/stopwatch';
import { countEOL } from 'vs/editor/common/core/eolCounter';
import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
import { runWhenIdle, IdleDeadline } from 'vs/base/common/async';
import { setTimeout0 } from 'vs/base/common/platform';
import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
import { Position } from 'vs/editor/common/core/position';
import { IRange } from 'vs/editor/common/core/range';
import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
import { EncodedTokenizationResult, IBackgroundTokenizationStore, IBackgroundTokenizer, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages';
import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize';
import { ITextModel } from 'vs/editor/common/model';
import { TextModel } from 'vs/editor/common/model/textModel';
import { TokenizationTextModelPart } from 'vs/editor/common/model/tokenizationTextModelPart';
import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
const enum Constants {
CHEAP_TOKENIZATION_LENGTH_LIMIT = 2048
@ -51,6 +52,7 @@ class ContiguousGrowingArray<T> {
this._store[index] = value;
}
// TODO have `replace` instead of `delete` and `insert`
public delete(deleteIndex: number, deleteCount: number): void {
if (deleteCount === 0 || deleteIndex >= this._store.length) {
return;
@ -72,8 +74,8 @@ class ContiguousGrowingArray<T> {
/**
* Stores the states at the start of each line and keeps track of which lines
* must be retokenized. Also uses state equality to quickly validate lines
* that don't need to be retokenized.
* must be re-tokenized. Also uses state equality to quickly validate lines
* that don't need to be re-tokenized.
*
* For example, when typing on a line, the line gets marked as needing to be tokenized.
* Once the line is tokenized, the end state is checked for equality against the begin
@ -93,7 +95,7 @@ export class TokenizationStateStore {
private readonly _lineNeedsTokenization = new ContiguousGrowingArray<boolean>(true);
/**
* `invalidLineStartIndex` indicates that line number `invalidLineStartIndex + 1`
* is the first one that needs to be retokenized.
* is the first one that needs to be re-tokenized.
*/
private _firstLineNeedsTokenization: number;
@ -118,13 +120,13 @@ export class TokenizationStateStore {
return this._lineBeginState.get(lineIndex);
}
public setEndState(linesLength: number, lineIndex: number, endState: IState): void {
public setEndState(linesLength: number, lineIndex: number, endState: IState): boolean {
this._lineNeedsTokenization.set(lineIndex, false);
this._firstLineNeedsTokenization = lineIndex + 1;
// Check if this was the last line
if (lineIndex === linesLength - 1) {
return;
return false;
}
// Check if the end state has changed
@ -132,7 +134,7 @@ export class TokenizationStateStore {
if (previousEndState === null || !endState.equals(previousEndState)) {
this._lineBeginState.set(lineIndex + 1, endState);
this.markMustBeTokenized(lineIndex + 1);
return;
return true;
}
// Perhaps we can skip tokenizing some lines...
@ -144,10 +146,9 @@ export class TokenizationStateStore {
i++;
}
this._firstLineNeedsTokenization = i;
return false;
}
//#region Editing
public applyEdits(range: IRange, eolCount: number): void {
this.markMustBeTokenized(range.startLineNumber - 1);
@ -158,13 +159,34 @@ export class TokenizationStateStore {
this._lineNeedsTokenization.insert(range.startLineNumber, eolCount);
}
//#endregion
public updateTokensUntilLine(textModel: ITextModel, languageIdCodec: ILanguageIdCodec, builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
const languageId = textModel.getLanguageId();
const linesLength = textModel.getLineCount();
const endLineIndex = lineNumber - 1;
// Validate all states up to and including endLineIndex
for (let lineIndex = this.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
const text = textModel.getLineContent(lineIndex + 1);
const lineStartState = this.getBeginState(lineIndex);
const r = safeTokenize(languageIdCodec, languageId, this.tokenizationSupport, text, true, lineStartState!);
builder.add(lineIndex + 1, r.tokens);
this.setEndState(linesLength, lineIndex, r.endState);
lineIndex = this.invalidLineStartIndex - 1; // -1 because the outer loop increments it
}
}
isTokenizationComplete(textModel: ITextModel): boolean {
return this.invalidLineStartIndex >= textModel.getLineCount();
}
}
export class TextModelTokenization extends Disposable {
private _tokenizationStateStore: TokenizationStateStore | null;
private _isDisposed: boolean;
private _tokenizationStateStore: TokenizationStateStore | null = null;
private _defaultBackgroundTokenizer: DefaultBackgroundTokenizer | null = null;
private readonly backgroundTokenizer = this._register(new MutableDisposable<IBackgroundTokenizer>());
constructor(
private readonly _textModel: TextModel,
@ -172,8 +194,6 @@ export class TextModelTokenization extends Disposable {
private readonly _languageIdCodec: ILanguageIdCodec
) {
super();
this._isDisposed = false;
this._tokenizationStateStore = null;
this._register(TokenizationRegistry.onDidChange((e) => {
const languageId = this._textModel.getLanguageId();
@ -188,13 +208,6 @@ export class TextModelTokenization extends Disposable {
this._resetTokenizationState();
}
public override dispose(): void {
this._isDisposed = true;
super.dispose();
}
//#region TextModel events
public handleDidChangeContent(e: IModelContentChangedEvent): void {
if (e.isFlush) {
this._resetTokenizationState();
@ -208,11 +221,11 @@ export class TextModelTokenization extends Disposable {
}
}
this._beginBackgroundTokenization();
this._defaultBackgroundTokenizer?.handleChanges();
}
public handleDidChangeAttached(): void {
this._beginBackgroundTokenization();
this._defaultBackgroundTokenizer?.handleChanges();
}
public handleDidChangeLanguage(e: IModelLanguageChangedEvent): void {
@ -220,8 +233,6 @@ export class TextModelTokenization extends Disposable {
this._tokenizationPart.clearTokens();
}
//#endregion
private _resetTokenizationState(): void {
const [tokenizationSupport, initialState] = initializeTokenization(this._textModel, this._tokenizationPart);
if (tokenizationSupport && initialState) {
@ -229,81 +240,51 @@ export class TextModelTokenization extends Disposable {
} else {
this._tokenizationStateStore = null;
}
this._beginBackgroundTokenization();
}
private _isScheduled = false;
private _beginBackgroundTokenization(): void {
if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
return;
this.backgroundTokenizer.clear();
this._defaultBackgroundTokenizer = null;
if (this._tokenizationStateStore) {
const b: IBackgroundTokenizationStore = {
setTokens: (tokens) => {
this._tokenizationPart.setTokens(tokens);
},
backgroundTokenizationFinished: () => {
this._tokenizationPart.handleBackgroundTokenizationFinished();
},
setEndState: (lineNumber, state) => {
if (!state) {
throw new BugIndicatingError();
}
const invalidLineStartIndex = this._tokenizationStateStore?.invalidLineStartIndex;
if (invalidLineStartIndex !== undefined && lineNumber - 1 >= invalidLineStartIndex) {
// Don't accept states for definitely valid states
this._tokenizationStateStore?.setEndState(this._textModel.getLineCount(), lineNumber - 1, state);
}
},
};
if (tokenizationSupport && tokenizationSupport.createBackgroundTokenizer) {
this.backgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, b);
}
if (!this.backgroundTokenizer.value) {
this.backgroundTokenizer.value = this._defaultBackgroundTokenizer =
new DefaultBackgroundTokenizer(
this._textModel,
this._tokenizationStateStore,
b,
this._languageIdCodec
);
this._defaultBackgroundTokenizer.handleChanges();
}
}
this._isScheduled = true;
runWhenIdle((deadline) => {
this._isScheduled = false;
this._backgroundTokenizeWithDeadline(deadline);
});
}
/**
* Tokenize until the deadline occurs, but try to yield every 1-2ms.
*/
private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void {
// Read the time remaining from the `deadline` immediately because it is unclear
// if the `deadline` object will be valid after execution leaves this function.
const endTime = Date.now() + deadline.timeRemaining();
const execute = () => {
if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
// disposed in the meantime or detached or finished
return;
}
this._backgroundTokenizeForAtLeast1ms();
if (Date.now() < endTime) {
// There is still time before reaching the deadline, so yield to the browser and then
// continue execution
setTimeout0(execute);
} else {
// The deadline has been reached, so schedule a new idle callback if necessary
this._beginBackgroundTokenization();
}
};
execute();
}
/**
* Tokenize for at least 1ms.
*/
private _backgroundTokenizeForAtLeast1ms(): void {
const lineCount = this._textModel.getLineCount();
const builder = new ContiguousMultilineTokensBuilder();
const sw = StopWatch.create(false);
do {
if (sw.elapsed() > 1) {
// the comparison is intentionally > 1 and not >= 1 to ensure that
// a full millisecond has elapsed, given how microseconds are rounded
// to milliseconds
break;
}
const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
if (tokenizedLineNumber >= lineCount) {
break;
}
} while (this._hasLinesToTokenize());
this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
}
public tokenizeViewport(startLineNumber: number, endLineNumber: number): void {
const builder = new ContiguousMultilineTokensBuilder();
this._tokenizeViewport(builder, startLineNumber, endLineNumber);
this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
this._heuristicallyTokenizeViewport(builder, startLineNumber, endLineNumber);
this._tokenizationPart.setTokens(builder.finalize());
this._defaultBackgroundTokenizer?.checkFinished();
}
public reset(): void {
@ -313,8 +294,9 @@ export class TextModelTokenization extends Disposable {
public forceTokenization(lineNumber: number): void {
const builder = new ContiguousMultilineTokensBuilder();
this._updateTokensUntilLine(builder, lineNumber);
this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
this._tokenizationStateStore?.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber);
this._tokenizationPart.setTokens(builder.finalize());
this._defaultBackgroundTokenizer?.checkFinished();
}
public getTokenTypeIfInsertingCharacter(position: Position, character: string): StandardTokenType {
@ -401,55 +383,14 @@ export class TextModelTokenization extends Disposable {
return false;
}
private _hasLinesToTokenize(): boolean {
if (!this._tokenizationStateStore) {
return false;
}
return (this._tokenizationStateStore.invalidLineStartIndex < this._textModel.getLineCount());
}
private _isTokenizationComplete(): boolean {
if (!this._tokenizationStateStore) {
return false;
}
return (this._tokenizationStateStore.invalidLineStartIndex >= this._textModel.getLineCount());
}
private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {
if (!this._tokenizationStateStore || !this._hasLinesToTokenize()) {
return this._textModel.getLineCount() + 1;
}
const lineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
this._updateTokensUntilLine(builder, lineNumber);
return lineNumber;
}
private _updateTokensUntilLine(builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
if (!this._tokenizationStateStore) {
return;
}
const languageId = this._textModel.getLanguageId();
const linesLength = this._textModel.getLineCount();
const endLineIndex = lineNumber - 1;
// Validate all states up to and including endLineIndex
for (let lineIndex = this._tokenizationStateStore.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
const text = this._textModel.getLineContent(lineIndex + 1);
const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, lineStartState!);
builder.add(lineIndex + 1, r.tokens);
this._tokenizationStateStore.setEndState(linesLength, lineIndex, r.endState);
lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
}
}
private _tokenizeViewport(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): void {
/**
* The result is not cached.
*/
private _heuristicallyTokenizeViewport(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): void {
if (!this._tokenizationStateStore) {
// nothing to do
return;
}
if (endLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
// nothing to do
return;
@ -457,24 +398,38 @@ export class TextModelTokenization extends Disposable {
if (startLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
// tokenization has reached the viewport start...
this._updateTokensUntilLine(builder, endLineNumber);
this._tokenizationStateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, endLineNumber);
return;
}
let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(startLineNumber);
const fakeLines: string[] = [];
let initialState: IState | null = null;
for (let i = startLineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i);
let state = this.guessStartState(startLineNumber);
const languageId = this._textModel.getLanguageId();
for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
const text = this._textModel.getLineContent(lineNumber);
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state);
builder.add(lineNumber, r.tokens);
state = r.endState;
}
// We overrode the tokens. Because old states might get reused (thus stopping invalidation),
// we have to explicitly request the tokens for this range again.
this.backgroundTokenizer.value?.requestTokens(startLineNumber, endLineNumber + 1);
}
private guessStartState(lineNumber: number): IState {
let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(lineNumber);
const likelyRelevantLines: string[] = [];
let initialState: IState | null = null;
for (let i = lineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i);
// Ignore lines full of whitespace
if (newNonWhitespaceIndex === 0) {
continue;
}
if (newNonWhitespaceIndex < nonWhitespaceColumn) {
fakeLines.push(this._textModel.getLineContent(i));
likelyRelevantLines.push(this._textModel.getLineContent(i));
nonWhitespaceColumn = newNonWhitespaceIndex;
initialState = this._tokenizationStateStore.getBeginState(i - 1);
initialState = this._tokenizationStateStore!.getBeginState(i - 1);
if (initialState) {
break;
}
@ -482,23 +437,17 @@ export class TextModelTokenization extends Disposable {
}
if (!initialState) {
initialState = this._tokenizationStateStore.initialState;
initialState = this._tokenizationStateStore!.initialState;
}
likelyRelevantLines.reverse();
const languageId = this._textModel.getLanguageId();
let state = initialState;
for (let i = fakeLines.length - 1; i >= 0; i--) {
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, fakeLines[i], false, state);
state = r.endState;
}
for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
const text = this._textModel.getLineContent(lineNumber);
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state);
builder.add(lineNumber, r.tokens);
this._tokenizationStateStore.markMustBeTokenized(lineNumber - 1);
for (const line of likelyRelevantLines) {
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore!.tokenizationSupport, line, false, state);
state = r.endState;
}
return state;
}
}
@ -538,3 +487,123 @@ function safeTokenize(languageIdCodec: ILanguageIdCodec, languageId: string, tok
LineTokens.convertToEndOffset(r.tokens, text.length);
return r;
}
class DefaultBackgroundTokenizer implements IBackgroundTokenizer {
private _isDisposed = false;
constructor(
private readonly _textModel: ITextModel,
private readonly _stateStore: TokenizationStateStore,
private readonly _backgroundTokenStore: IBackgroundTokenizationStore,
private readonly _languageIdCodec: ILanguageIdCodec,
) {
}
public dispose(): void {
this._isDisposed = true;
}
public handleChanges(): void {
this._beginBackgroundTokenization();
}
private _isScheduled = false;
private _beginBackgroundTokenization(): void {
if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
return;
}
this._isScheduled = true;
runWhenIdle((deadline) => {
this._isScheduled = false;
this._backgroundTokenizeWithDeadline(deadline);
});
}
/**
* Tokenize until the deadline occurs, but try to yield every 1-2ms.
*/
private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void {
// Read the time remaining from the `deadline` immediately because it is unclear
// if the `deadline` object will be valid after execution leaves this function.
const endTime = Date.now() + deadline.timeRemaining();
const execute = () => {
if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
// disposed in the meantime or detached or finished
return;
}
this._backgroundTokenizeForAtLeast1ms();
if (Date.now() < endTime) {
// There is still time before reaching the deadline, so yield to the browser and then
// continue execution
setTimeout0(execute);
} else {
// The deadline has been reached, so schedule a new idle callback if necessary
this._beginBackgroundTokenization();
}
};
execute();
}
/**
* Tokenize for at least 1ms.
*/
private _backgroundTokenizeForAtLeast1ms(): void {
const lineCount = this._textModel.getLineCount();
const builder = new ContiguousMultilineTokensBuilder();
const sw = StopWatch.create(false);
do {
if (sw.elapsed() > 1) {
// the comparison is intentionally > 1 and not >= 1 to ensure that
// a full millisecond has elapsed, given how microseconds are rounded
// to milliseconds
break;
}
const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
if (tokenizedLineNumber >= lineCount) {
break;
}
} while (this._hasLinesToTokenize());
this._backgroundTokenStore.setTokens(builder.finalize());
this.checkFinished();
}
private _hasLinesToTokenize(): boolean {
if (!this._stateStore) {
return false;
}
return this._stateStore.invalidLineStartIndex < this._textModel.getLineCount();
}
private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {
if (!this._stateStore || !this._hasLinesToTokenize()) {
return this._textModel.getLineCount() + 1;
}
const lineNumber = this._stateStore.invalidLineStartIndex + 1;
this._stateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber);
return lineNumber;
}
public checkFinished(): void {
if (this._isDisposed) {
return;
}
if (this._stateStore.isTokenizationComplete(this._textModel)) {
this._backgroundTokenStore.backgroundTokenizationFinished();
}
}
requestTokens(startLineNumber: number, endLineNumberExclusive: number): void {
for (let lineNumber = startLineNumber; lineNumber < endLineNumberExclusive; lineNumber++) {
this._stateStore.markMustBeTokenized(lineNumber - 1);
}
}
}

View file

@ -5,7 +5,6 @@
import { Emitter, Event } from 'vs/base/common/event';
import { CharCode } from 'vs/base/common/charCode';
import { IDisposable } from 'vs/base/common/lifecycle';
import { IPosition, Position } from 'vs/editor/common/core/position';
import { IRange, Range } from 'vs/editor/common/core/range';
import { getWordAtText, IWordAtPosition } from 'vs/editor/common/core/wordHelper';
@ -34,7 +33,6 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent> = this._register(new Emitter<IModelTokensChangedEvent>());
public readonly onDidChangeTokens: Event<IModelTokensChangedEvent> = this._onDidChangeTokens.event;
private readonly _languageRegistryListener: IDisposable;
private readonly _tokens: ContiguousTokensStore;
private readonly _semanticTokens: SparseTokensStore;
private readonly _tokenization: TextModelTokenization;
@ -54,19 +52,19 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
this._semanticTokens = new SparseTokensStore(
this._languageService.languageIdCodec
);
this._tokenization = new TextModelTokenization(
this._tokenization = this._register(new TextModelTokenization(
_textModel,
this,
this._languageService.languageIdCodec
);
));
this._languageRegistryListener = this._languageConfigurationService.onDidChange(
this._register(this._languageConfigurationService.onDidChange(
e => {
if (e.affects(this._languageId)) {
this._onDidChangeLanguageConfiguration.fire({});
}
}
);
));
}
_hasListeners(): boolean {
@ -74,7 +72,6 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
this._onDidChangeLanguage.hasListeners()
|| this._onDidChangeLanguageConfiguration.hasListeners()
|| this._onDidChangeTokens.hasListeners()
|| this._onBackgroundTokenizationStateChanged.hasListeners()
);
}
@ -104,35 +101,15 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
this._semanticTokens.flush();
}
// TODO@hediet TODO@alexdima what is the difference between this and acceptEdit?
public handleDidChangeContent(change: IModelContentChangedEvent): void {
this._tokenization.handleDidChangeContent(change);
}
public override dispose(): void {
this._languageRegistryListener.dispose();
this._tokenization.dispose();
super.dispose();
}
private _backgroundTokenizationState = BackgroundTokenizationState.Uninitialized;
private _backgroundTokenizationState = BackgroundTokenizationState.InProgress;
public get backgroundTokenizationState(): BackgroundTokenizationState {
return this._backgroundTokenizationState;
}
private handleTokenizationProgress(completed: boolean) {
if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) {
// We already did a full tokenization and don't go back to progressing.
return;
}
const newState = completed ? BackgroundTokenizationState.Completed : BackgroundTokenizationState.InProgress;
if (this._backgroundTokenizationState !== newState) {
this._backgroundTokenizationState = newState;
this.bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
this._onBackgroundTokenizationStateChanged.fire();
}
}
private readonly _onBackgroundTokenizationStateChanged = this._register(new Emitter<void>());
public readonly onBackgroundTokenizationStateChanged: Event<void> = this._onBackgroundTokenizationStateChanged.event;
public setLineTokens(
lineNumber: number,
@ -151,64 +128,76 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
);
}
public setTokens(
tokens: ContiguousMultilineTokens[],
backgroundTokenizationCompleted: boolean = false
): void {
if (tokens.length !== 0) {
const ranges: { fromLineNumber: number; toLineNumber: number }[] = [];
public handleBackgroundTokenizationFinished(): void {
if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) {
// We already did a full tokenization and don't go back to progressing.
return;
}
const newState = BackgroundTokenizationState.Completed;
this._backgroundTokenizationState = newState;
this.bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
}
for (let i = 0, len = tokens.length; i < len; i++) {
const element = tokens[i];
let minChangedLineNumber = 0;
let maxChangedLineNumber = 0;
let hasChange = false;
for (
let lineNumber = element.startLineNumber;
lineNumber <= element.endLineNumber;
lineNumber++
) {
if (hasChange) {
this._tokens.setTokens(
this._languageId,
lineNumber - 1,
this._textModel.getLineLength(lineNumber),
element.getLineTokens(lineNumber),
false
);
public get hasTokens(): boolean {
return this._tokens.hasTokens;
}
public setTokens(tokens: ContiguousMultilineTokens[]): void {
if (tokens.length === 0) {
return;
}
const ranges: { fromLineNumber: number; toLineNumber: number }[] = [];
for (let i = 0, len = tokens.length; i < len; i++) {
const element = tokens[i];
let minChangedLineNumber = 0;
let maxChangedLineNumber = 0;
let hasChange = false;
for (
let lineNumber = element.startLineNumber;
lineNumber <= element.endLineNumber;
lineNumber++
) {
if (hasChange) {
this._tokens.setTokens(
this._languageId,
lineNumber - 1,
this._textModel.getLineLength(lineNumber),
element.getLineTokens(lineNumber),
false
);
maxChangedLineNumber = lineNumber;
} else {
const lineHasChange = this._tokens.setTokens(
this._languageId,
lineNumber - 1,
this._textModel.getLineLength(lineNumber),
element.getLineTokens(lineNumber),
true
);
if (lineHasChange) {
hasChange = true;
minChangedLineNumber = lineNumber;
maxChangedLineNumber = lineNumber;
} else {
const lineHasChange = this._tokens.setTokens(
this._languageId,
lineNumber - 1,
this._textModel.getLineLength(lineNumber),
element.getLineTokens(lineNumber),
true
);
if (lineHasChange) {
hasChange = true;
minChangedLineNumber = lineNumber;
maxChangedLineNumber = lineNumber;
}
}
}
if (hasChange) {
ranges.push({
fromLineNumber: minChangedLineNumber,
toLineNumber: maxChangedLineNumber,
});
}
}
if (ranges.length > 0) {
this._emitModelTokensChangedEvent({
tokenizationSupportChanged: false,
semanticTokensApplied: false,
ranges: ranges,
if (hasChange) {
ranges.push({
fromLineNumber: minChangedLineNumber,
toLineNumber: maxChangedLineNumber,
});
}
}
this.handleTokenizationProgress(backgroundTokenizationCompleted);
if (ranges.length > 0) {
this._emitModelTokensChangedEvent({
tokenizationSupportChanged: false,
semanticTokensApplied: false,
ranges: ranges,
});
}
}
public setSemanticTokens(

View file

@ -3,7 +3,6 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Event } from 'vs/base/common/event';
import { IPosition } from 'vs/editor/common/core/position';
import { Range } from 'vs/editor/common/core/range';
import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
@ -20,6 +19,8 @@ export interface ITokenizationTextModelPart {
*/
setTokens(tokens: ContiguousMultilineTokens[]): void;
readonly hasTokens: boolean;
/**
* Replaces all semantic tokens with the provided `tokens`.
* @internal
@ -98,11 +99,9 @@ export interface ITokenizationTextModelPart {
setLanguageId(languageId: string, source?: string): void;
readonly backgroundTokenizationState: BackgroundTokenizationState;
readonly onBackgroundTokenizationStateChanged: Event<void>;
}
export const enum BackgroundTokenizationState {
Uninitialized = 0,
InProgress = 1,
Completed = 2,
}

View file

@ -30,6 +30,10 @@ export class ContiguousTokensStore {
this._len = 0;
}
get hasTokens(): boolean {
return this._lineTokens.length > 0;
}
public getTokens(topLevelLanguageId: string, lineIndex: number, lineText: string): LineTokens {
let rawLineTokens: Uint32Array | ArrayBuffer | null = null;
if (lineIndex < this._len) {

2
src/vs/monaco.d.ts vendored
View file

@ -875,10 +875,10 @@ declare namespace monaco {
}
export class Token {
_tokenBrand: void;
readonly offset: number;
readonly type: string;
readonly language: string;
_tokenBrand: void;
constructor(offset: number, type: string, language: string);
toString(): string;
}