mirror of
https://github.com/Microsoft/vscode
synced 2024-09-13 21:55:38 +00:00
Merge pull request #174364 from microsoft/hediet/async-tokenization
Refactors tokenization to support custom background tokenizer.
This commit is contained in:
commit
8066a8c455
|
@ -19,6 +19,7 @@ import { Selection } from 'vs/editor/common/core/selection';
|
|||
import { LanguageId } from 'vs/editor/common/encodedTokenAttributes';
|
||||
import * as model from 'vs/editor/common/model';
|
||||
import { TokenizationRegistry as TokenizationRegistryImpl } from 'vs/editor/common/tokenizationRegistry';
|
||||
import { ContiguousMultilineTokens } from 'vs/editor/common/tokens/contiguousMultilineTokens';
|
||||
import { ExtensionIdentifier } from 'vs/platform/extensions/common/extensions';
|
||||
import { IMarkerData } from 'vs/platform/markers/common/markers';
|
||||
|
||||
|
@ -33,14 +34,11 @@ export interface ILanguageIdCodec {
|
|||
export class Token {
|
||||
_tokenBrand: void = undefined;
|
||||
|
||||
public readonly offset: number;
|
||||
public readonly type: string;
|
||||
public readonly language: string;
|
||||
|
||||
constructor(offset: number, type: string, language: string) {
|
||||
this.offset = offset;
|
||||
this.type = type;
|
||||
this.language = language;
|
||||
constructor(
|
||||
public readonly offset: number,
|
||||
public readonly type: string,
|
||||
public readonly language: string,
|
||||
) {
|
||||
}
|
||||
|
||||
public toString(): string {
|
||||
|
@ -54,12 +52,10 @@ export class Token {
|
|||
export class TokenizationResult {
|
||||
_tokenizationResultBrand: void = undefined;
|
||||
|
||||
public readonly tokens: Token[];
|
||||
public readonly endState: IState;
|
||||
|
||||
constructor(tokens: Token[], endState: IState) {
|
||||
this.tokens = tokens;
|
||||
this.endState = endState;
|
||||
constructor(
|
||||
public readonly tokens: Token[],
|
||||
public readonly endState: IState,
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,21 +65,33 @@ export class TokenizationResult {
|
|||
export class EncodedTokenizationResult {
|
||||
_encodedTokenizationResultBrand: void = undefined;
|
||||
|
||||
/**
|
||||
* The tokens in binary format. Each token occupies two array indices. For token i:
|
||||
* - at offset 2*i => startIndex
|
||||
* - at offset 2*i + 1 => metadata
|
||||
*
|
||||
*/
|
||||
public readonly tokens: Uint32Array;
|
||||
public readonly endState: IState;
|
||||
|
||||
constructor(tokens: Uint32Array, endState: IState) {
|
||||
this.tokens = tokens;
|
||||
this.endState = endState;
|
||||
constructor(
|
||||
/**
|
||||
* The tokens in binary format. Each token occupies two array indices. For token i:
|
||||
* - at offset 2*i => startIndex
|
||||
* - at offset 2*i + 1 => metadata
|
||||
*
|
||||
*/
|
||||
public readonly tokens: Uint32Array,
|
||||
public readonly endState: IState,
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export interface IBackgroundTokenizer extends IDisposable {
|
||||
/**
|
||||
* Instructs the background tokenizer to set the tokens for the given range again.
|
||||
*
|
||||
* This might be necessary if the renderer overwrote those tokens with heuristically computed ones for some viewport,
|
||||
* when the change does not even propagate to that viewport.
|
||||
*/
|
||||
requestTokens(startLineNumber: number, endLineNumberExclusive: number): void;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
|
@ -94,6 +102,26 @@ export interface ITokenizationSupport {
|
|||
tokenize(line: string, hasEOL: boolean, state: IState): TokenizationResult;
|
||||
|
||||
tokenizeEncoded(line: string, hasEOL: boolean, state: IState): EncodedTokenizationResult;
|
||||
|
||||
/**
|
||||
* Can be/return undefined if default background tokenization should be used.
|
||||
*/
|
||||
createBackgroundTokenizer?(textModel: model.ITextModel, store: IBackgroundTokenizationStore): IBackgroundTokenizer | undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* @internal
|
||||
*/
|
||||
export interface IBackgroundTokenizationStore {
|
||||
setTokens(tokens: ContiguousMultilineTokens[]): void;
|
||||
|
||||
setEndState(lineNumber: number, state: IState): void;
|
||||
|
||||
/**
|
||||
* Should be called to indicate that the background tokenization has finished for now.
|
||||
* (This triggers bracket pair colorization to re-parse the bracket pairs with token information)
|
||||
*/
|
||||
backgroundTokenizationFinished(): void;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -56,8 +56,7 @@ export class BracketPairsTree extends Disposable {
|
|||
) {
|
||||
super();
|
||||
|
||||
if (textModel.tokenization.backgroundTokenizationState === BackgroundTokenizationState.Uninitialized) {
|
||||
// There are no token information yet
|
||||
if (!textModel.tokenization.hasTokens) {
|
||||
const brackets = this.brackets.getSingleLanguageBracketTokens(this.textModel.getLanguageId());
|
||||
const tokenizer = new FastTokenizer(this.textModel.getValue(), brackets);
|
||||
this.initialAstWithoutTokens = parseDocument(tokenizer, [], undefined, true);
|
||||
|
@ -67,7 +66,8 @@ export class BracketPairsTree extends Disposable {
|
|||
// Directly create the tree with token information.
|
||||
this.initialAstWithoutTokens = undefined;
|
||||
this.astWithTokens = this.parseDocumentFromTextBuffer([], undefined, false);
|
||||
} else if (textModel.tokenization.backgroundTokenizationState === BackgroundTokenizationState.InProgress) {
|
||||
} else {
|
||||
// We missed some token changes already, so we cannot use the fast tokenizer + delta increments
|
||||
this.initialAstWithoutTokens = this.parseDocumentFromTextBuffer([], undefined, true);
|
||||
this.astWithTokens = this.initialAstWithoutTokens;
|
||||
}
|
||||
|
@ -103,6 +103,7 @@ export class BracketPairsTree extends Disposable {
|
|||
}
|
||||
|
||||
public handleContentChanged(change: IModelContentChangedEvent) {
|
||||
// Must be sorted in ascending order
|
||||
const edits = change.changes.map(c => {
|
||||
const range = Range.lift(c.range);
|
||||
return new TextEditInfo(
|
||||
|
|
|
@ -4,22 +4,23 @@
|
|||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import * as arrays from 'vs/base/common/arrays';
|
||||
import { onUnexpectedError } from 'vs/base/common/errors';
|
||||
import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
|
||||
import { Position } from 'vs/editor/common/core/position';
|
||||
import { IRange } from 'vs/editor/common/core/range';
|
||||
import { EncodedTokenizationResult, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages';
|
||||
import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
|
||||
import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize';
|
||||
import { TextModel } from 'vs/editor/common/model/textModel';
|
||||
import { Disposable } from 'vs/base/common/lifecycle';
|
||||
import { IdleDeadline, runWhenIdle } from 'vs/base/common/async';
|
||||
import { BugIndicatingError, onUnexpectedError } from 'vs/base/common/errors';
|
||||
import { Disposable, MutableDisposable } from 'vs/base/common/lifecycle';
|
||||
import { setTimeout0 } from 'vs/base/common/platform';
|
||||
import { StopWatch } from 'vs/base/common/stopwatch';
|
||||
import { countEOL } from 'vs/editor/common/core/eolCounter';
|
||||
import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
|
||||
import { runWhenIdle, IdleDeadline } from 'vs/base/common/async';
|
||||
import { setTimeout0 } from 'vs/base/common/platform';
|
||||
import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
|
||||
import { Position } from 'vs/editor/common/core/position';
|
||||
import { IRange } from 'vs/editor/common/core/range';
|
||||
import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
|
||||
import { EncodedTokenizationResult, IBackgroundTokenizationStore, IBackgroundTokenizer, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages';
|
||||
import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize';
|
||||
import { ITextModel } from 'vs/editor/common/model';
|
||||
import { TextModel } from 'vs/editor/common/model/textModel';
|
||||
import { TokenizationTextModelPart } from 'vs/editor/common/model/tokenizationTextModelPart';
|
||||
import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
|
||||
import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
|
||||
import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
|
||||
|
||||
const enum Constants {
|
||||
CHEAP_TOKENIZATION_LENGTH_LIMIT = 2048
|
||||
|
@ -51,6 +52,7 @@ class ContiguousGrowingArray<T> {
|
|||
this._store[index] = value;
|
||||
}
|
||||
|
||||
// TODO have `replace` instead of `delete` and `insert`
|
||||
public delete(deleteIndex: number, deleteCount: number): void {
|
||||
if (deleteCount === 0 || deleteIndex >= this._store.length) {
|
||||
return;
|
||||
|
@ -72,8 +74,8 @@ class ContiguousGrowingArray<T> {
|
|||
|
||||
/**
|
||||
* Stores the states at the start of each line and keeps track of which lines
|
||||
* must be retokenized. Also uses state equality to quickly validate lines
|
||||
* that don't need to be retokenized.
|
||||
* must be re-tokenized. Also uses state equality to quickly validate lines
|
||||
* that don't need to be re-tokenized.
|
||||
*
|
||||
* For example, when typing on a line, the line gets marked as needing to be tokenized.
|
||||
* Once the line is tokenized, the end state is checked for equality against the begin
|
||||
|
@ -93,7 +95,7 @@ export class TokenizationStateStore {
|
|||
private readonly _lineNeedsTokenization = new ContiguousGrowingArray<boolean>(true);
|
||||
/**
|
||||
* `invalidLineStartIndex` indicates that line number `invalidLineStartIndex + 1`
|
||||
* is the first one that needs to be retokenized.
|
||||
* is the first one that needs to be re-tokenized.
|
||||
*/
|
||||
private _firstLineNeedsTokenization: number;
|
||||
|
||||
|
@ -118,13 +120,13 @@ export class TokenizationStateStore {
|
|||
return this._lineBeginState.get(lineIndex);
|
||||
}
|
||||
|
||||
public setEndState(linesLength: number, lineIndex: number, endState: IState): void {
|
||||
public setEndState(linesLength: number, lineIndex: number, endState: IState): boolean {
|
||||
this._lineNeedsTokenization.set(lineIndex, false);
|
||||
this._firstLineNeedsTokenization = lineIndex + 1;
|
||||
|
||||
// Check if this was the last line
|
||||
if (lineIndex === linesLength - 1) {
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check if the end state has changed
|
||||
|
@ -132,7 +134,7 @@ export class TokenizationStateStore {
|
|||
if (previousEndState === null || !endState.equals(previousEndState)) {
|
||||
this._lineBeginState.set(lineIndex + 1, endState);
|
||||
this.markMustBeTokenized(lineIndex + 1);
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Perhaps we can skip tokenizing some lines...
|
||||
|
@ -144,10 +146,9 @@ export class TokenizationStateStore {
|
|||
i++;
|
||||
}
|
||||
this._firstLineNeedsTokenization = i;
|
||||
return false;
|
||||
}
|
||||
|
||||
//#region Editing
|
||||
|
||||
public applyEdits(range: IRange, eolCount: number): void {
|
||||
this.markMustBeTokenized(range.startLineNumber - 1);
|
||||
|
||||
|
@ -158,13 +159,34 @@ export class TokenizationStateStore {
|
|||
this._lineNeedsTokenization.insert(range.startLineNumber, eolCount);
|
||||
}
|
||||
|
||||
//#endregion
|
||||
public updateTokensUntilLine(textModel: ITextModel, languageIdCodec: ILanguageIdCodec, builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
|
||||
const languageId = textModel.getLanguageId();
|
||||
const linesLength = textModel.getLineCount();
|
||||
const endLineIndex = lineNumber - 1;
|
||||
|
||||
// Validate all states up to and including endLineIndex
|
||||
for (let lineIndex = this.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
|
||||
const text = textModel.getLineContent(lineIndex + 1);
|
||||
const lineStartState = this.getBeginState(lineIndex);
|
||||
|
||||
const r = safeTokenize(languageIdCodec, languageId, this.tokenizationSupport, text, true, lineStartState!);
|
||||
builder.add(lineIndex + 1, r.tokens);
|
||||
this.setEndState(linesLength, lineIndex, r.endState);
|
||||
lineIndex = this.invalidLineStartIndex - 1; // -1 because the outer loop increments it
|
||||
}
|
||||
}
|
||||
|
||||
isTokenizationComplete(textModel: ITextModel): boolean {
|
||||
return this.invalidLineStartIndex >= textModel.getLineCount();
|
||||
}
|
||||
}
|
||||
|
||||
export class TextModelTokenization extends Disposable {
|
||||
|
||||
private _tokenizationStateStore: TokenizationStateStore | null;
|
||||
private _isDisposed: boolean;
|
||||
private _tokenizationStateStore: TokenizationStateStore | null = null;
|
||||
private _defaultBackgroundTokenizer: DefaultBackgroundTokenizer | null = null;
|
||||
|
||||
private readonly backgroundTokenizer = this._register(new MutableDisposable<IBackgroundTokenizer>());
|
||||
|
||||
constructor(
|
||||
private readonly _textModel: TextModel,
|
||||
|
@ -172,8 +194,6 @@ export class TextModelTokenization extends Disposable {
|
|||
private readonly _languageIdCodec: ILanguageIdCodec
|
||||
) {
|
||||
super();
|
||||
this._isDisposed = false;
|
||||
this._tokenizationStateStore = null;
|
||||
|
||||
this._register(TokenizationRegistry.onDidChange((e) => {
|
||||
const languageId = this._textModel.getLanguageId();
|
||||
|
@ -188,13 +208,6 @@ export class TextModelTokenization extends Disposable {
|
|||
this._resetTokenizationState();
|
||||
}
|
||||
|
||||
public override dispose(): void {
|
||||
this._isDisposed = true;
|
||||
super.dispose();
|
||||
}
|
||||
|
||||
//#region TextModel events
|
||||
|
||||
public handleDidChangeContent(e: IModelContentChangedEvent): void {
|
||||
if (e.isFlush) {
|
||||
this._resetTokenizationState();
|
||||
|
@ -208,11 +221,11 @@ export class TextModelTokenization extends Disposable {
|
|||
}
|
||||
}
|
||||
|
||||
this._beginBackgroundTokenization();
|
||||
this._defaultBackgroundTokenizer?.handleChanges();
|
||||
}
|
||||
|
||||
public handleDidChangeAttached(): void {
|
||||
this._beginBackgroundTokenization();
|
||||
this._defaultBackgroundTokenizer?.handleChanges();
|
||||
}
|
||||
|
||||
public handleDidChangeLanguage(e: IModelLanguageChangedEvent): void {
|
||||
|
@ -220,8 +233,6 @@ export class TextModelTokenization extends Disposable {
|
|||
this._tokenizationPart.clearTokens();
|
||||
}
|
||||
|
||||
//#endregion
|
||||
|
||||
private _resetTokenizationState(): void {
|
||||
const [tokenizationSupport, initialState] = initializeTokenization(this._textModel, this._tokenizationPart);
|
||||
if (tokenizationSupport && initialState) {
|
||||
|
@ -229,81 +240,51 @@ export class TextModelTokenization extends Disposable {
|
|||
} else {
|
||||
this._tokenizationStateStore = null;
|
||||
}
|
||||
this._beginBackgroundTokenization();
|
||||
}
|
||||
|
||||
private _isScheduled = false;
|
||||
private _beginBackgroundTokenization(): void {
|
||||
if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
|
||||
return;
|
||||
this.backgroundTokenizer.clear();
|
||||
|
||||
this._defaultBackgroundTokenizer = null;
|
||||
if (this._tokenizationStateStore) {
|
||||
const b: IBackgroundTokenizationStore = {
|
||||
setTokens: (tokens) => {
|
||||
this._tokenizationPart.setTokens(tokens);
|
||||
},
|
||||
backgroundTokenizationFinished: () => {
|
||||
this._tokenizationPart.handleBackgroundTokenizationFinished();
|
||||
},
|
||||
setEndState: (lineNumber, state) => {
|
||||
if (!state) {
|
||||
throw new BugIndicatingError();
|
||||
}
|
||||
const invalidLineStartIndex = this._tokenizationStateStore?.invalidLineStartIndex;
|
||||
if (invalidLineStartIndex !== undefined && lineNumber - 1 >= invalidLineStartIndex) {
|
||||
// Don't accept states for definitely valid states
|
||||
this._tokenizationStateStore?.setEndState(this._textModel.getLineCount(), lineNumber - 1, state);
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
if (tokenizationSupport && tokenizationSupport.createBackgroundTokenizer) {
|
||||
this.backgroundTokenizer.value = tokenizationSupport.createBackgroundTokenizer(this._textModel, b);
|
||||
}
|
||||
if (!this.backgroundTokenizer.value) {
|
||||
this.backgroundTokenizer.value = this._defaultBackgroundTokenizer =
|
||||
new DefaultBackgroundTokenizer(
|
||||
this._textModel,
|
||||
this._tokenizationStateStore,
|
||||
b,
|
||||
this._languageIdCodec
|
||||
);
|
||||
this._defaultBackgroundTokenizer.handleChanges();
|
||||
}
|
||||
}
|
||||
|
||||
this._isScheduled = true;
|
||||
runWhenIdle((deadline) => {
|
||||
this._isScheduled = false;
|
||||
|
||||
this._backgroundTokenizeWithDeadline(deadline);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize until the deadline occurs, but try to yield every 1-2ms.
|
||||
*/
|
||||
private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void {
|
||||
// Read the time remaining from the `deadline` immediately because it is unclear
|
||||
// if the `deadline` object will be valid after execution leaves this function.
|
||||
const endTime = Date.now() + deadline.timeRemaining();
|
||||
|
||||
const execute = () => {
|
||||
if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
|
||||
// disposed in the meantime or detached or finished
|
||||
return;
|
||||
}
|
||||
|
||||
this._backgroundTokenizeForAtLeast1ms();
|
||||
|
||||
if (Date.now() < endTime) {
|
||||
// There is still time before reaching the deadline, so yield to the browser and then
|
||||
// continue execution
|
||||
setTimeout0(execute);
|
||||
} else {
|
||||
// The deadline has been reached, so schedule a new idle callback if necessary
|
||||
this._beginBackgroundTokenization();
|
||||
}
|
||||
};
|
||||
execute();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize for at least 1ms.
|
||||
*/
|
||||
private _backgroundTokenizeForAtLeast1ms(): void {
|
||||
const lineCount = this._textModel.getLineCount();
|
||||
const builder = new ContiguousMultilineTokensBuilder();
|
||||
const sw = StopWatch.create(false);
|
||||
|
||||
do {
|
||||
if (sw.elapsed() > 1) {
|
||||
// the comparison is intentionally > 1 and not >= 1 to ensure that
|
||||
// a full millisecond has elapsed, given how microseconds are rounded
|
||||
// to milliseconds
|
||||
break;
|
||||
}
|
||||
|
||||
const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
|
||||
|
||||
if (tokenizedLineNumber >= lineCount) {
|
||||
break;
|
||||
}
|
||||
} while (this._hasLinesToTokenize());
|
||||
|
||||
this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
|
||||
}
|
||||
|
||||
public tokenizeViewport(startLineNumber: number, endLineNumber: number): void {
|
||||
const builder = new ContiguousMultilineTokensBuilder();
|
||||
this._tokenizeViewport(builder, startLineNumber, endLineNumber);
|
||||
this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
|
||||
this._heuristicallyTokenizeViewport(builder, startLineNumber, endLineNumber);
|
||||
this._tokenizationPart.setTokens(builder.finalize());
|
||||
this._defaultBackgroundTokenizer?.checkFinished();
|
||||
}
|
||||
|
||||
public reset(): void {
|
||||
|
@ -313,8 +294,9 @@ export class TextModelTokenization extends Disposable {
|
|||
|
||||
public forceTokenization(lineNumber: number): void {
|
||||
const builder = new ContiguousMultilineTokensBuilder();
|
||||
this._updateTokensUntilLine(builder, lineNumber);
|
||||
this._tokenizationPart.setTokens(builder.finalize(), this._isTokenizationComplete());
|
||||
this._tokenizationStateStore?.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber);
|
||||
this._tokenizationPart.setTokens(builder.finalize());
|
||||
this._defaultBackgroundTokenizer?.checkFinished();
|
||||
}
|
||||
|
||||
public getTokenTypeIfInsertingCharacter(position: Position, character: string): StandardTokenType {
|
||||
|
@ -401,55 +383,14 @@ export class TextModelTokenization extends Disposable {
|
|||
return false;
|
||||
}
|
||||
|
||||
private _hasLinesToTokenize(): boolean {
|
||||
if (!this._tokenizationStateStore) {
|
||||
return false;
|
||||
}
|
||||
return (this._tokenizationStateStore.invalidLineStartIndex < this._textModel.getLineCount());
|
||||
}
|
||||
|
||||
private _isTokenizationComplete(): boolean {
|
||||
if (!this._tokenizationStateStore) {
|
||||
return false;
|
||||
}
|
||||
return (this._tokenizationStateStore.invalidLineStartIndex >= this._textModel.getLineCount());
|
||||
}
|
||||
|
||||
private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {
|
||||
if (!this._tokenizationStateStore || !this._hasLinesToTokenize()) {
|
||||
return this._textModel.getLineCount() + 1;
|
||||
}
|
||||
const lineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
|
||||
this._updateTokensUntilLine(builder, lineNumber);
|
||||
return lineNumber;
|
||||
}
|
||||
|
||||
private _updateTokensUntilLine(builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
|
||||
if (!this._tokenizationStateStore) {
|
||||
return;
|
||||
}
|
||||
const languageId = this._textModel.getLanguageId();
|
||||
const linesLength = this._textModel.getLineCount();
|
||||
const endLineIndex = lineNumber - 1;
|
||||
|
||||
// Validate all states up to and including endLineIndex
|
||||
for (let lineIndex = this._tokenizationStateStore.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
|
||||
const text = this._textModel.getLineContent(lineIndex + 1);
|
||||
const lineStartState = this._tokenizationStateStore.getBeginState(lineIndex);
|
||||
|
||||
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, lineStartState!);
|
||||
builder.add(lineIndex + 1, r.tokens);
|
||||
this._tokenizationStateStore.setEndState(linesLength, lineIndex, r.endState);
|
||||
lineIndex = this._tokenizationStateStore.invalidLineStartIndex - 1; // -1 because the outer loop increments it
|
||||
}
|
||||
}
|
||||
|
||||
private _tokenizeViewport(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): void {
|
||||
/**
|
||||
* The result is not cached.
|
||||
*/
|
||||
private _heuristicallyTokenizeViewport(builder: ContiguousMultilineTokensBuilder, startLineNumber: number, endLineNumber: number): void {
|
||||
if (!this._tokenizationStateStore) {
|
||||
// nothing to do
|
||||
return;
|
||||
}
|
||||
|
||||
if (endLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
|
||||
// nothing to do
|
||||
return;
|
||||
|
@ -457,24 +398,38 @@ export class TextModelTokenization extends Disposable {
|
|||
|
||||
if (startLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
|
||||
// tokenization has reached the viewport start...
|
||||
this._updateTokensUntilLine(builder, endLineNumber);
|
||||
this._tokenizationStateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, endLineNumber);
|
||||
return;
|
||||
}
|
||||
|
||||
let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(startLineNumber);
|
||||
const fakeLines: string[] = [];
|
||||
let initialState: IState | null = null;
|
||||
for (let i = startLineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
|
||||
const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i);
|
||||
let state = this.guessStartState(startLineNumber);
|
||||
const languageId = this._textModel.getLanguageId();
|
||||
|
||||
for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
|
||||
const text = this._textModel.getLineContent(lineNumber);
|
||||
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state);
|
||||
builder.add(lineNumber, r.tokens);
|
||||
state = r.endState;
|
||||
}
|
||||
// We overrode the tokens. Because old states might get reused (thus stopping invalidation),
|
||||
// we have to explicitly request the tokens for this range again.
|
||||
this.backgroundTokenizer.value?.requestTokens(startLineNumber, endLineNumber + 1);
|
||||
}
|
||||
|
||||
private guessStartState(lineNumber: number): IState {
|
||||
let nonWhitespaceColumn = this._textModel.getLineFirstNonWhitespaceColumn(lineNumber);
|
||||
const likelyRelevantLines: string[] = [];
|
||||
let initialState: IState | null = null;
|
||||
for (let i = lineNumber - 1; nonWhitespaceColumn > 1 && i >= 1; i--) {
|
||||
const newNonWhitespaceIndex = this._textModel.getLineFirstNonWhitespaceColumn(i);
|
||||
// Ignore lines full of whitespace
|
||||
if (newNonWhitespaceIndex === 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (newNonWhitespaceIndex < nonWhitespaceColumn) {
|
||||
fakeLines.push(this._textModel.getLineContent(i));
|
||||
likelyRelevantLines.push(this._textModel.getLineContent(i));
|
||||
nonWhitespaceColumn = newNonWhitespaceIndex;
|
||||
initialState = this._tokenizationStateStore.getBeginState(i - 1);
|
||||
initialState = this._tokenizationStateStore!.getBeginState(i - 1);
|
||||
if (initialState) {
|
||||
break;
|
||||
}
|
||||
|
@ -482,23 +437,17 @@ export class TextModelTokenization extends Disposable {
|
|||
}
|
||||
|
||||
if (!initialState) {
|
||||
initialState = this._tokenizationStateStore.initialState;
|
||||
initialState = this._tokenizationStateStore!.initialState;
|
||||
}
|
||||
likelyRelevantLines.reverse();
|
||||
|
||||
const languageId = this._textModel.getLanguageId();
|
||||
let state = initialState;
|
||||
for (let i = fakeLines.length - 1; i >= 0; i--) {
|
||||
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, fakeLines[i], false, state);
|
||||
state = r.endState;
|
||||
}
|
||||
|
||||
for (let lineNumber = startLineNumber; lineNumber <= endLineNumber; lineNumber++) {
|
||||
const text = this._textModel.getLineContent(lineNumber);
|
||||
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore.tokenizationSupport, text, true, state);
|
||||
builder.add(lineNumber, r.tokens);
|
||||
this._tokenizationStateStore.markMustBeTokenized(lineNumber - 1);
|
||||
for (const line of likelyRelevantLines) {
|
||||
const r = safeTokenize(this._languageIdCodec, languageId, this._tokenizationStateStore!.tokenizationSupport, line, false, state);
|
||||
state = r.endState;
|
||||
}
|
||||
return state;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -538,3 +487,123 @@ function safeTokenize(languageIdCodec: ILanguageIdCodec, languageId: string, tok
|
|||
LineTokens.convertToEndOffset(r.tokens, text.length);
|
||||
return r;
|
||||
}
|
||||
|
||||
class DefaultBackgroundTokenizer implements IBackgroundTokenizer {
|
||||
private _isDisposed = false;
|
||||
|
||||
constructor(
|
||||
private readonly _textModel: ITextModel,
|
||||
private readonly _stateStore: TokenizationStateStore,
|
||||
private readonly _backgroundTokenStore: IBackgroundTokenizationStore,
|
||||
private readonly _languageIdCodec: ILanguageIdCodec,
|
||||
) {
|
||||
}
|
||||
|
||||
public dispose(): void {
|
||||
this._isDisposed = true;
|
||||
}
|
||||
|
||||
public handleChanges(): void {
|
||||
this._beginBackgroundTokenization();
|
||||
}
|
||||
|
||||
private _isScheduled = false;
|
||||
private _beginBackgroundTokenization(): void {
|
||||
if (this._isScheduled || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
|
||||
return;
|
||||
}
|
||||
|
||||
this._isScheduled = true;
|
||||
runWhenIdle((deadline) => {
|
||||
this._isScheduled = false;
|
||||
|
||||
this._backgroundTokenizeWithDeadline(deadline);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize until the deadline occurs, but try to yield every 1-2ms.
|
||||
*/
|
||||
private _backgroundTokenizeWithDeadline(deadline: IdleDeadline): void {
|
||||
// Read the time remaining from the `deadline` immediately because it is unclear
|
||||
// if the `deadline` object will be valid after execution leaves this function.
|
||||
const endTime = Date.now() + deadline.timeRemaining();
|
||||
|
||||
const execute = () => {
|
||||
if (this._isDisposed || !this._textModel.isAttachedToEditor() || !this._hasLinesToTokenize()) {
|
||||
// disposed in the meantime or detached or finished
|
||||
return;
|
||||
}
|
||||
|
||||
this._backgroundTokenizeForAtLeast1ms();
|
||||
|
||||
if (Date.now() < endTime) {
|
||||
// There is still time before reaching the deadline, so yield to the browser and then
|
||||
// continue execution
|
||||
setTimeout0(execute);
|
||||
} else {
|
||||
// The deadline has been reached, so schedule a new idle callback if necessary
|
||||
this._beginBackgroundTokenization();
|
||||
}
|
||||
};
|
||||
execute();
|
||||
}
|
||||
|
||||
/**
|
||||
* Tokenize for at least 1ms.
|
||||
*/
|
||||
private _backgroundTokenizeForAtLeast1ms(): void {
|
||||
const lineCount = this._textModel.getLineCount();
|
||||
const builder = new ContiguousMultilineTokensBuilder();
|
||||
const sw = StopWatch.create(false);
|
||||
|
||||
do {
|
||||
if (sw.elapsed() > 1) {
|
||||
// the comparison is intentionally > 1 and not >= 1 to ensure that
|
||||
// a full millisecond has elapsed, given how microseconds are rounded
|
||||
// to milliseconds
|
||||
break;
|
||||
}
|
||||
|
||||
const tokenizedLineNumber = this._tokenizeOneInvalidLine(builder);
|
||||
|
||||
if (tokenizedLineNumber >= lineCount) {
|
||||
break;
|
||||
}
|
||||
} while (this._hasLinesToTokenize());
|
||||
|
||||
this._backgroundTokenStore.setTokens(builder.finalize());
|
||||
this.checkFinished();
|
||||
}
|
||||
|
||||
private _hasLinesToTokenize(): boolean {
|
||||
if (!this._stateStore) {
|
||||
return false;
|
||||
}
|
||||
return this._stateStore.invalidLineStartIndex < this._textModel.getLineCount();
|
||||
}
|
||||
|
||||
private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {
|
||||
if (!this._stateStore || !this._hasLinesToTokenize()) {
|
||||
return this._textModel.getLineCount() + 1;
|
||||
}
|
||||
const lineNumber = this._stateStore.invalidLineStartIndex + 1;
|
||||
this._stateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber);
|
||||
return lineNumber;
|
||||
}
|
||||
|
||||
public checkFinished(): void {
|
||||
if (this._isDisposed) {
|
||||
return;
|
||||
}
|
||||
if (this._stateStore.isTokenizationComplete(this._textModel)) {
|
||||
this._backgroundTokenStore.backgroundTokenizationFinished();
|
||||
}
|
||||
}
|
||||
|
||||
requestTokens(startLineNumber: number, endLineNumberExclusive: number): void {
|
||||
for (let lineNumber = startLineNumber; lineNumber < endLineNumberExclusive; lineNumber++) {
|
||||
this._stateStore.markMustBeTokenized(lineNumber - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
import { Emitter, Event } from 'vs/base/common/event';
|
||||
import { CharCode } from 'vs/base/common/charCode';
|
||||
import { IDisposable } from 'vs/base/common/lifecycle';
|
||||
import { IPosition, Position } from 'vs/editor/common/core/position';
|
||||
import { IRange, Range } from 'vs/editor/common/core/range';
|
||||
import { getWordAtText, IWordAtPosition } from 'vs/editor/common/core/wordHelper';
|
||||
|
@ -34,7 +33,6 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
|
|||
private readonly _onDidChangeTokens: Emitter<IModelTokensChangedEvent> = this._register(new Emitter<IModelTokensChangedEvent>());
|
||||
public readonly onDidChangeTokens: Event<IModelTokensChangedEvent> = this._onDidChangeTokens.event;
|
||||
|
||||
private readonly _languageRegistryListener: IDisposable;
|
||||
private readonly _tokens: ContiguousTokensStore;
|
||||
private readonly _semanticTokens: SparseTokensStore;
|
||||
private readonly _tokenization: TextModelTokenization;
|
||||
|
@ -54,19 +52,19 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
|
|||
this._semanticTokens = new SparseTokensStore(
|
||||
this._languageService.languageIdCodec
|
||||
);
|
||||
this._tokenization = new TextModelTokenization(
|
||||
this._tokenization = this._register(new TextModelTokenization(
|
||||
_textModel,
|
||||
this,
|
||||
this._languageService.languageIdCodec
|
||||
);
|
||||
));
|
||||
|
||||
this._languageRegistryListener = this._languageConfigurationService.onDidChange(
|
||||
this._register(this._languageConfigurationService.onDidChange(
|
||||
e => {
|
||||
if (e.affects(this._languageId)) {
|
||||
this._onDidChangeLanguageConfiguration.fire({});
|
||||
}
|
||||
}
|
||||
);
|
||||
));
|
||||
}
|
||||
|
||||
_hasListeners(): boolean {
|
||||
|
@ -74,7 +72,6 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
|
|||
this._onDidChangeLanguage.hasListeners()
|
||||
|| this._onDidChangeLanguageConfiguration.hasListeners()
|
||||
|| this._onDidChangeTokens.hasListeners()
|
||||
|| this._onBackgroundTokenizationStateChanged.hasListeners()
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -104,35 +101,15 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
|
|||
this._semanticTokens.flush();
|
||||
}
|
||||
|
||||
// TODO@hediet TODO@alexdima what is the difference between this and acceptEdit?
|
||||
public handleDidChangeContent(change: IModelContentChangedEvent): void {
|
||||
this._tokenization.handleDidChangeContent(change);
|
||||
}
|
||||
|
||||
public override dispose(): void {
|
||||
this._languageRegistryListener.dispose();
|
||||
this._tokenization.dispose();
|
||||
super.dispose();
|
||||
}
|
||||
|
||||
private _backgroundTokenizationState = BackgroundTokenizationState.Uninitialized;
|
||||
private _backgroundTokenizationState = BackgroundTokenizationState.InProgress;
|
||||
public get backgroundTokenizationState(): BackgroundTokenizationState {
|
||||
return this._backgroundTokenizationState;
|
||||
}
|
||||
private handleTokenizationProgress(completed: boolean) {
|
||||
if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) {
|
||||
// We already did a full tokenization and don't go back to progressing.
|
||||
return;
|
||||
}
|
||||
const newState = completed ? BackgroundTokenizationState.Completed : BackgroundTokenizationState.InProgress;
|
||||
if (this._backgroundTokenizationState !== newState) {
|
||||
this._backgroundTokenizationState = newState;
|
||||
this.bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
|
||||
this._onBackgroundTokenizationStateChanged.fire();
|
||||
}
|
||||
}
|
||||
|
||||
private readonly _onBackgroundTokenizationStateChanged = this._register(new Emitter<void>());
|
||||
public readonly onBackgroundTokenizationStateChanged: Event<void> = this._onBackgroundTokenizationStateChanged.event;
|
||||
|
||||
public setLineTokens(
|
||||
lineNumber: number,
|
||||
|
@ -151,64 +128,76 @@ export class TokenizationTextModelPart extends TextModelPart implements ITokeniz
|
|||
);
|
||||
}
|
||||
|
||||
public setTokens(
|
||||
tokens: ContiguousMultilineTokens[],
|
||||
backgroundTokenizationCompleted: boolean = false
|
||||
): void {
|
||||
if (tokens.length !== 0) {
|
||||
const ranges: { fromLineNumber: number; toLineNumber: number }[] = [];
|
||||
public handleBackgroundTokenizationFinished(): void {
|
||||
if (this._backgroundTokenizationState === BackgroundTokenizationState.Completed) {
|
||||
// We already did a full tokenization and don't go back to progressing.
|
||||
return;
|
||||
}
|
||||
const newState = BackgroundTokenizationState.Completed;
|
||||
this._backgroundTokenizationState = newState;
|
||||
this.bracketPairsTextModelPart.handleDidChangeBackgroundTokenizationState();
|
||||
}
|
||||
|
||||
for (let i = 0, len = tokens.length; i < len; i++) {
|
||||
const element = tokens[i];
|
||||
let minChangedLineNumber = 0;
|
||||
let maxChangedLineNumber = 0;
|
||||
let hasChange = false;
|
||||
for (
|
||||
let lineNumber = element.startLineNumber;
|
||||
lineNumber <= element.endLineNumber;
|
||||
lineNumber++
|
||||
) {
|
||||
if (hasChange) {
|
||||
this._tokens.setTokens(
|
||||
this._languageId,
|
||||
lineNumber - 1,
|
||||
this._textModel.getLineLength(lineNumber),
|
||||
element.getLineTokens(lineNumber),
|
||||
false
|
||||
);
|
||||
public get hasTokens(): boolean {
|
||||
return this._tokens.hasTokens;
|
||||
}
|
||||
|
||||
public setTokens(tokens: ContiguousMultilineTokens[]): void {
|
||||
if (tokens.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const ranges: { fromLineNumber: number; toLineNumber: number }[] = [];
|
||||
|
||||
for (let i = 0, len = tokens.length; i < len; i++) {
|
||||
const element = tokens[i];
|
||||
let minChangedLineNumber = 0;
|
||||
let maxChangedLineNumber = 0;
|
||||
let hasChange = false;
|
||||
for (
|
||||
let lineNumber = element.startLineNumber;
|
||||
lineNumber <= element.endLineNumber;
|
||||
lineNumber++
|
||||
) {
|
||||
if (hasChange) {
|
||||
this._tokens.setTokens(
|
||||
this._languageId,
|
||||
lineNumber - 1,
|
||||
this._textModel.getLineLength(lineNumber),
|
||||
element.getLineTokens(lineNumber),
|
||||
false
|
||||
);
|
||||
maxChangedLineNumber = lineNumber;
|
||||
} else {
|
||||
const lineHasChange = this._tokens.setTokens(
|
||||
this._languageId,
|
||||
lineNumber - 1,
|
||||
this._textModel.getLineLength(lineNumber),
|
||||
element.getLineTokens(lineNumber),
|
||||
true
|
||||
);
|
||||
if (lineHasChange) {
|
||||
hasChange = true;
|
||||
minChangedLineNumber = lineNumber;
|
||||
maxChangedLineNumber = lineNumber;
|
||||
} else {
|
||||
const lineHasChange = this._tokens.setTokens(
|
||||
this._languageId,
|
||||
lineNumber - 1,
|
||||
this._textModel.getLineLength(lineNumber),
|
||||
element.getLineTokens(lineNumber),
|
||||
true
|
||||
);
|
||||
if (lineHasChange) {
|
||||
hasChange = true;
|
||||
minChangedLineNumber = lineNumber;
|
||||
maxChangedLineNumber = lineNumber;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (hasChange) {
|
||||
ranges.push({
|
||||
fromLineNumber: minChangedLineNumber,
|
||||
toLineNumber: maxChangedLineNumber,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (ranges.length > 0) {
|
||||
this._emitModelTokensChangedEvent({
|
||||
tokenizationSupportChanged: false,
|
||||
semanticTokensApplied: false,
|
||||
ranges: ranges,
|
||||
if (hasChange) {
|
||||
ranges.push({
|
||||
fromLineNumber: minChangedLineNumber,
|
||||
toLineNumber: maxChangedLineNumber,
|
||||
});
|
||||
}
|
||||
}
|
||||
this.handleTokenizationProgress(backgroundTokenizationCompleted);
|
||||
|
||||
if (ranges.length > 0) {
|
||||
this._emitModelTokensChangedEvent({
|
||||
tokenizationSupportChanged: false,
|
||||
semanticTokensApplied: false,
|
||||
ranges: ranges,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
public setSemanticTokens(
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
* Licensed under the MIT License. See License.txt in the project root for license information.
|
||||
*--------------------------------------------------------------------------------------------*/
|
||||
|
||||
import { Event } from 'vs/base/common/event';
|
||||
import { IPosition } from 'vs/editor/common/core/position';
|
||||
import { Range } from 'vs/editor/common/core/range';
|
||||
import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
|
||||
|
@ -20,6 +19,8 @@ export interface ITokenizationTextModelPart {
|
|||
*/
|
||||
setTokens(tokens: ContiguousMultilineTokens[]): void;
|
||||
|
||||
readonly hasTokens: boolean;
|
||||
|
||||
/**
|
||||
* Replaces all semantic tokens with the provided `tokens`.
|
||||
* @internal
|
||||
|
@ -98,11 +99,9 @@ export interface ITokenizationTextModelPart {
|
|||
setLanguageId(languageId: string, source?: string): void;
|
||||
|
||||
readonly backgroundTokenizationState: BackgroundTokenizationState;
|
||||
readonly onBackgroundTokenizationStateChanged: Event<void>;
|
||||
}
|
||||
|
||||
export const enum BackgroundTokenizationState {
|
||||
Uninitialized = 0,
|
||||
InProgress = 1,
|
||||
Completed = 2,
|
||||
}
|
||||
|
|
|
@ -30,6 +30,10 @@ export class ContiguousTokensStore {
|
|||
this._len = 0;
|
||||
}
|
||||
|
||||
get hasTokens(): boolean {
|
||||
return this._lineTokens.length > 0;
|
||||
}
|
||||
|
||||
public getTokens(topLevelLanguageId: string, lineIndex: number, lineText: string): LineTokens {
|
||||
let rawLineTokens: Uint32Array | ArrayBuffer | null = null;
|
||||
if (lineIndex < this._len) {
|
||||
|
|
2
src/vs/monaco.d.ts
vendored
2
src/vs/monaco.d.ts
vendored
|
@ -875,10 +875,10 @@ declare namespace monaco {
|
|||
}
|
||||
|
||||
export class Token {
|
||||
_tokenBrand: void;
|
||||
readonly offset: number;
|
||||
readonly type: string;
|
||||
readonly language: string;
|
||||
_tokenBrand: void;
|
||||
constructor(offset: number, type: string, language: string);
|
||||
toString(): string;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue