Merge pull request #178683 from microsoft/hediet/tokenization-store-refactoring

Refactors TokenizationStateStore
This commit is contained in:
Henning Dieterichs 2023-03-30 15:17:49 +02:00 committed by GitHub
commit fdc9c7cccb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 510 additions and 321 deletions

View file

@ -9,6 +9,24 @@ import { BugIndicatingError } from 'vs/base/common/errors';
* A range of offsets (0-based).
*/
export class OffsetRange {
public static addRange(range: OffsetRange, sortedRanges: OffsetRange[]): void {
let i = 0;
while (i < sortedRanges.length && sortedRanges[i].endExclusive < range.start) {
i++;
}
let j = i;
while (j < sortedRanges.length && sortedRanges[j].start <= range.endExclusive) {
j++;
}
if (i === j) {
sortedRanges.splice(i, 0, range);
} else {
const start = Math.min(range.start, sortedRanges[i].start);
const end = Math.max(range.endExclusive, sortedRanges[j - 1].endExclusive);
sortedRanges.splice(i, j - i, new OffsetRange(start, end));
}
}
public static tryCreate(start: number, endExclusive: number): OffsetRange | undefined {
if (start > endExclusive) {
return undefined;
@ -64,9 +82,9 @@ export class OffsetRange {
* The resulting range is empty if the ranges do not intersect, but touch.
* If the ranges don't even touch, the result is undefined.
*/
public intersect(seq1Range: OffsetRange): OffsetRange | undefined {
const start = Math.max(this.start, seq1Range.start);
const end = Math.min(this.endExclusive, seq1Range.endExclusive);
public intersect(other: OffsetRange): OffsetRange | undefined {
const start = Math.max(this.start, other.start);
const end = Math.min(this.endExclusive, other.endExclusive);
if (start <= end) {
return new OffsetRange(start, end);
}

View file

@ -0,0 +1,77 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { arrayInsert } from 'vs/base/common/arrays';
/**
* An array that avoids being sparse by always
* filling up unused indices with a default value.
*/
export class FixedArray<T> {
private _store: T[] = [];
constructor(
private readonly _default: T
) { }
public get(index: number): T {
if (index < this._store.length) {
return this._store[index];
}
return this._default;
}
public set(index: number, value: T): void {
while (index >= this._store.length) {
this._store[this._store.length] = this._default;
}
this._store[index] = value;
}
public replace(index: number, oldLength: number, newLength: number): void {
if (index >= this._store.length) {
return;
}
if (oldLength === 0) {
this.insert(index, newLength);
return;
} else if (newLength === 0) {
this.delete(index, oldLength);
return;
}
const before = this._store.slice(0, index);
const after = this._store.slice(index + oldLength);
const insertArr = arrayFill(newLength, this._default);
this._store = before.concat(insertArr, after);
}
public delete(deleteIndex: number, deleteCount: number): void {
if (deleteCount === 0 || deleteIndex >= this._store.length) {
return;
}
this._store.splice(deleteIndex, deleteCount);
}
public insert(insertIndex: number, insertCount: number): void {
if (insertCount === 0 || insertIndex >= this._store.length) {
return;
}
const arr: T[] = [];
for (let i = 0; i < insertCount; i++) {
arr[i] = this._default;
}
this._store = arrayInsert(this._store, insertIndex, arr);
}
}
function arrayFill<T>(length: number, value: T): T[] {
const arr: T[] = [];
for (let i = 0; i < length; i++) {
arr[i] = value;
}
return arr;
}

View file

@ -3,22 +3,23 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as arrays from 'vs/base/common/arrays';
import { IdleDeadline, runWhenIdle } from 'vs/base/common/async';
import { BugIndicatingError, onUnexpectedError } from 'vs/base/common/errors';
import { Disposable, MutableDisposable } from 'vs/base/common/lifecycle';
import { setTimeout0 } from 'vs/base/common/platform';
import { StopWatch } from 'vs/base/common/stopwatch';
import { countEOL } from 'vs/editor/common/core/eolCounter';
import { LineRange } from 'vs/editor/common/core/lineRange';
import { OffsetRange } from 'vs/editor/common/core/offsetRange';
import { Position } from 'vs/editor/common/core/position';
import { IRange } from 'vs/editor/common/core/range';
import { StandardTokenType } from 'vs/editor/common/encodedTokenAttributes';
import { EncodedTokenizationResult, IBackgroundTokenizationStore, IBackgroundTokenizer, ILanguageIdCodec, IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/languages';
import { nullTokenizeEncoded } from 'vs/editor/common/languages/nullTokenize';
import { ITextModel } from 'vs/editor/common/model';
import { FixedArray } from 'vs/editor/common/model/fixedArray';
import { TextModel } from 'vs/editor/common/model/textModel';
import { TokenizationTextModelPart } from 'vs/editor/common/model/tokenizationTextModelPart';
import { IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
import { IModelContentChange, IModelContentChangedEvent, IModelLanguageChangedEvent } from 'vs/editor/common/textModelEvents';
import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
@ -26,162 +27,8 @@ const enum Constants {
CHEAP_TOKENIZATION_LENGTH_LIMIT = 2048
}
/**
* An array that avoids being sparse by always
* filling up unused indices with a default value.
*/
export class ContiguousGrowingArray<T> {
private _store: T[] = [];
constructor(
private readonly _default: T
) { }
public get(index: number): T {
if (index < this._store.length) {
return this._store[index];
}
return this._default;
}
public set(index: number, value: T): void {
while (index >= this._store.length) {
this._store[this._store.length] = this._default;
}
this._store[index] = value;
}
// TODO have `replace` instead of `delete` and `insert`
public delete(deleteIndex: number, deleteCount: number): void {
if (deleteCount === 0 || deleteIndex >= this._store.length) {
return;
}
this._store.splice(deleteIndex, deleteCount);
}
public insert(insertIndex: number, insertCount: number): void {
if (insertCount === 0 || insertIndex >= this._store.length) {
return;
}
const arr: T[] = [];
for (let i = 0; i < insertCount; i++) {
arr[i] = this._default;
}
this._store = arrays.arrayInsert(this._store, insertIndex, arr);
}
}
/**
* Stores the states at the start of each line and keeps track of which lines
* must be re-tokenized. Also uses state equality to quickly validate lines
* that don't need to be re-tokenized.
*
* For example, when typing on a line, the line gets marked as needing to be tokenized.
* Once the line is tokenized, the end state is checked for equality against the begin
* state of the next line. If the states are equal, tokenization doesn't need to run
* again over the rest of the file. If the states are not equal, the next line gets marked
* as needing to be tokenized.
*/
export class TokenizationStateStore {
/**
* `lineBeginState[i]` contains the begin state used to tokenize line number `i + 1`.
*/
private readonly _lineBeginState = new ContiguousGrowingArray<IState | null>(null);
/**
* `lineNeedsTokenization[i]` describes if line number `i + 1` needs to be tokenized.
*/
private readonly _lineNeedsTokenization = new ContiguousGrowingArray<boolean>(true);
/**
* `invalidLineStartIndex` indicates that line number `invalidLineStartIndex + 1`
* is the first one that needs to be re-tokenized.
*/
private _firstLineNeedsTokenization: number;
public get invalidLineStartIndex() {
return this._firstLineNeedsTokenization;
}
constructor(
public readonly tokenizationSupport: ITokenizationSupport,
public readonly initialState: IState
) {
this._firstLineNeedsTokenization = 0;
this._lineBeginState.set(0, this.initialState);
}
public markMustBeTokenized(lineIndex: number): void {
this._lineNeedsTokenization.set(lineIndex, true);
this._firstLineNeedsTokenization = Math.min(this._firstLineNeedsTokenization, lineIndex);
}
public getBeginState(lineIndex: number): IState | null {
return this._lineBeginState.get(lineIndex);
}
/**
* Returns `false` if the end state equals the previous end state.
*/
public setEndState(linesLength: number, lineIndex: number, endState: IState): boolean {
this._lineNeedsTokenization.set(lineIndex, false);
this._firstLineNeedsTokenization = lineIndex + 1;
// Check if the end state has changed
const previousEndState = this._lineBeginState.get(lineIndex + 1);
if (previousEndState === null || !endState.equals(previousEndState)) {
this._lineBeginState.set(lineIndex + 1, endState);
this.markMustBeTokenized(lineIndex + 1);
return true;
}
// Perhaps we can skip tokenizing some lines...
let i = lineIndex + 1;
while (i < linesLength) {
if (this._lineNeedsTokenization.get(i)) {
break;
}
i++;
}
this._firstLineNeedsTokenization = i;
return false;
}
public applyEdits(range: IRange, eolCount: number): void {
this.markMustBeTokenized(range.startLineNumber - 1);
this._lineBeginState.delete(range.startLineNumber, range.endLineNumber - range.startLineNumber);
this._lineNeedsTokenization.delete(range.startLineNumber, range.endLineNumber - range.startLineNumber);
this._lineBeginState.insert(range.startLineNumber, eolCount);
this._lineNeedsTokenization.insert(range.startLineNumber, eolCount);
}
public updateTokensUntilLine(textModel: ITextModel, languageIdCodec: ILanguageIdCodec, builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
const languageId = textModel.getLanguageId();
const linesLength = textModel.getLineCount();
const endLineIndex = lineNumber - 1;
// Validate all states up to and including endLineIndex
for (let lineIndex = this.invalidLineStartIndex; lineIndex <= endLineIndex; lineIndex++) {
const text = textModel.getLineContent(lineIndex + 1);
const lineStartState = this.getBeginState(lineIndex);
const r = safeTokenize(languageIdCodec, languageId, this.tokenizationSupport, text, true, lineStartState!);
builder.add(lineIndex + 1, r.tokens);
this.setEndState(linesLength, lineIndex, r.endState);
lineIndex = this.invalidLineStartIndex - 1; // -1 because the outer loop increments it
}
}
isTokenizationComplete(textModel: ITextModel): boolean {
return this.invalidLineStartIndex >= textModel.getLineCount();
}
}
export class TextModelTokenization extends Disposable {
private _tokenizationStateStore: TokenizationStateStore | null = null;
private _tokenizationStateStore: TokenizerWithStateStore | null = null;
private _defaultBackgroundTokenizer: DefaultBackgroundTokenizer | null = null;
private readonly backgroundTokenizer = this._register(new MutableDisposable<IBackgroundTokenizer>());
@ -212,11 +59,7 @@ export class TextModelTokenization extends Disposable {
return;
}
if (this._tokenizationStateStore) {
for (let i = 0, len = e.changes.length; i < len; i++) {
const change = e.changes[i];
const [eolCount] = countEOL(change.text);
this._tokenizationStateStore.applyEdits(change.range, eolCount);
}
this._tokenizationStateStore.store.acceptChanges(e.changes);
}
this._defaultBackgroundTokenizer?.handleChanges();
@ -234,7 +77,7 @@ export class TextModelTokenization extends Disposable {
private _resetTokenizationState(): void {
const [tokenizationSupport, initialState] = initializeTokenization(this._textModel, this._tokenizationPart);
if (tokenizationSupport && initialState) {
this._tokenizationStateStore = new TokenizationStateStore(tokenizationSupport, initialState);
this._tokenizationStateStore = new TokenizerWithStateStore(this._textModel.getLineCount(), tokenizationSupport);
} else {
this._tokenizationStateStore = null;
}
@ -254,10 +97,10 @@ export class TextModelTokenization extends Disposable {
if (!state) {
throw new BugIndicatingError();
}
const invalidLineStartIndex = this._tokenizationStateStore?.invalidLineStartIndex;
if (invalidLineStartIndex !== undefined && lineNumber - 1 >= invalidLineStartIndex) {
const firstInvalidEndStateLineNumber = this._tokenizationStateStore?.store.getFirstInvalidEndStateLineNumber() ?? undefined;
if (firstInvalidEndStateLineNumber !== undefined && lineNumber >= firstInvalidEndStateLineNumber) {
// Don't accept states for definitely valid states
this._tokenizationStateStore?.setEndState(this._textModel.getLineCount(), lineNumber - 1, state);
this._tokenizationStateStore?.store.setEndState(lineNumber, state);
}
},
};
@ -303,7 +146,7 @@ export class TextModelTokenization extends Disposable {
}
this.forceTokenization(position.lineNumber);
const lineStartState = this._tokenizationStateStore.getBeginState(position.lineNumber - 1);
const lineStartState = this._tokenizationStateStore.getStartState(position.lineNumber);
if (!lineStartState) {
return StandardTokenType.Other;
}
@ -337,7 +180,7 @@ export class TextModelTokenization extends Disposable {
}
this.forceTokenization(lineNumber);
const lineStartState = this._tokenizationStateStore.getBeginState(lineNumber - 1);
const lineStartState = this._tokenizationStateStore.getStartState(lineNumber);
if (!lineStartState) {
return null;
}
@ -365,16 +208,12 @@ export class TextModelTokenization extends Disposable {
return true;
}
const firstInvalidLineNumber = this._tokenizationStateStore.invalidLineStartIndex + 1;
if (lineNumber > firstInvalidLineNumber) {
return false;
}
const firstInvalidLineNumber = this._tokenizationStateStore.store.getFirstInvalidEndStateLineNumberOrMax();
if (lineNumber < firstInvalidLineNumber) {
return true;
}
if (this._textModel.getLineLength(lineNumber) < Constants.CHEAP_TOKENIZATION_LENGTH_LIMIT) {
if (lineNumber === firstInvalidLineNumber
&& this._textModel.getLineLength(lineNumber) < Constants.CHEAP_TOKENIZATION_LENGTH_LIMIT) {
return true;
}
@ -389,12 +228,12 @@ export class TextModelTokenization extends Disposable {
// nothing to do
return;
}
if (endLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
if (endLineNumber <= this._tokenizationStateStore.store.getFirstInvalidEndStateLineNumberOrMax()) {
// nothing to do
return;
}
if (startLineNumber <= this._tokenizationStateStore.invalidLineStartIndex) {
if (startLineNumber <= this._tokenizationStateStore.store.getFirstInvalidEndStateLineNumberOrMax()) {
// tokenization has reached the viewport start...
this._tokenizationStateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, endLineNumber);
return;
@ -427,7 +266,7 @@ export class TextModelTokenization extends Disposable {
if (newNonWhitespaceIndex < nonWhitespaceColumn) {
likelyRelevantLines.push(this._textModel.getLineContent(i));
nonWhitespaceColumn = newNonWhitespaceIndex;
initialState = this._tokenizationStateStore!.getBeginState(i - 1);
initialState = this._tokenizationStateStore!.getStartState(i);
if (initialState) {
break;
}
@ -435,7 +274,7 @@ export class TextModelTokenization extends Disposable {
}
if (!initialState) {
initialState = this._tokenizationStateStore!.initialState;
initialState = this._tokenizationStateStore!.tokenizationSupport.getInitialState();
}
likelyRelevantLines.reverse();
@ -449,6 +288,220 @@ export class TextModelTokenization extends Disposable {
}
}
export class TokenizerWithStateStore<TState extends IState = IState> {
private readonly initialState = this.tokenizationSupport.getInitialState();
public readonly store: TrackingTokenizationStateStore<TState>;
constructor(
lineCount: number,
public readonly tokenizationSupport: ITokenizationSupport
) {
this.store = new TrackingTokenizationStateStore<TState>(lineCount);
}
public getStartState(lineNumber: number): TState | null {
if (lineNumber === 1) {
return this.initialState as TState;
}
return this.store.getEndState(lineNumber - 1);
}
public updateTokensUntilLine(textModel: ITextModel, languageIdCodec: ILanguageIdCodec, builder: ContiguousMultilineTokensBuilder, lineNumber: number): void {
const languageId = textModel.getLanguageId();
while (true) {
const nextLineNumber = this.store.getFirstInvalidEndStateLineNumber();
if (!nextLineNumber || nextLineNumber > lineNumber) {
break;
}
const text = textModel.getLineContent(nextLineNumber);
const lineStartState = this.getStartState(nextLineNumber);
const r = safeTokenize(languageIdCodec, languageId, this.tokenizationSupport, text, true, lineStartState!);
builder.add(nextLineNumber, r.tokens);
this.store.setEndState(nextLineNumber, r.endState as TState);
}
}
}
export class TrackingTokenizationStateStore<TState extends IState> {
private readonly tokenizationStateStore = new TokenizationStateStore<TState>();
private readonly _invalidEndStatesLineNumbers = new RangePriorityQueueImpl();
constructor(private lineCount: number) {
this._invalidEndStatesLineNumbers.addRange(new OffsetRange(1, lineCount + 1));
}
public getEndState(lineNumber: number): TState | null {
return this.tokenizationStateStore.getEndState(lineNumber);
}
public setEndState(lineNumber: number, state: TState): boolean {
while (true) {
const min = this._invalidEndStatesLineNumbers.min;
if (min !== null && min <= lineNumber) {
this._invalidEndStatesLineNumbers.removeMin();
} else {
break;
}
}
const r = this.tokenizationStateStore.setEndState(lineNumber, state);
if (r && lineNumber < this.lineCount) {
// because the state changed, we cannot trust the next state anymore and have to invalidate it.
this._invalidEndStatesLineNumbers.addRange(new OffsetRange(lineNumber + 1, lineNumber + 2));
}
return r;
}
public acceptChange(range: LineRange, newLineCount: number): void {
this.lineCount += newLineCount - range.length;
this.tokenizationStateStore.acceptChange(range, newLineCount);
this._invalidEndStatesLineNumbers.addRangeAndResize(new OffsetRange(range.startLineNumber, range.endLineNumberExclusive), newLineCount);
}
public acceptChanges(changes: IModelContentChange[]) {
for (const c of changes) {
const [eolCount] = countEOL(c.text);
this.acceptChange(new LineRange(c.range.startLineNumber, c.range.endLineNumber + 1), eolCount + 1);
}
}
public invalidateEndStateRange(range: LineRange): void {
this._invalidEndStatesLineNumbers.addRange(new OffsetRange(range.startLineNumber, range.endLineNumberExclusive));
}
public getFirstInvalidEndStateLineNumber(): number | null {
return this._invalidEndStatesLineNumbers.min;
}
public getFirstInvalidEndStateLineNumberOrMax(): number {
return this._invalidEndStatesLineNumbers.min || Number.MAX_SAFE_INTEGER;
}
public isTokenizationComplete(): boolean {
return this._invalidEndStatesLineNumbers.min === null;
}
}
export class TokenizationStateStore<TState extends IState> {
private readonly _lineEndStates = new FixedArray<TState | null>(null);
public getEndState(lineNumber: number): TState | null {
return this._lineEndStates.get(lineNumber);
}
public setEndState(lineNumber: number, state: TState): boolean {
const oldState = this._lineEndStates.get(lineNumber);
if (oldState && oldState.equals(state)) {
return false;
}
this._lineEndStates.set(lineNumber, state);
return true;
}
public acceptChange(range: LineRange, newLineCount: number): void {
let length = range.length;
if (newLineCount > 0 && length > 0) {
// Keep the last state, even though it is unrelated.
// But if the new state happens to agree with this last state, then we know we can stop tokenizing.
length--;
newLineCount--;
}
this._lineEndStates.replace(range.startLineNumber, length, newLineCount);
}
public acceptChanges(changes: IModelContentChange[]) {
for (const c of changes) {
const [eolCount] = countEOL(c.text);
this.acceptChange(new LineRange(c.range.startLineNumber, c.range.endLineNumber + 1), eolCount + 1);
}
}
}
interface RangePriorityQueue {
get min(): number | null;
removeMin(): number | null;
addRange(range: OffsetRange): void;
addRangeAndResize(range: OffsetRange, newLength: number): void;
}
export class RangePriorityQueueImpl implements RangePriorityQueue {
private readonly _ranges: OffsetRange[] = [];
public getRanges(): OffsetRange[] {
return this._ranges;
}
public get min(): number | null {
if (this._ranges.length === 0) {
return null;
}
return this._ranges[0].start;
}
public removeMin(): number | null {
if (this._ranges.length === 0) {
return null;
}
const range = this._ranges[0];
if (range.start + 1 === range.endExclusive) {
this._ranges.shift();
} else {
this._ranges[0] = new OffsetRange(range.start + 1, range.endExclusive);
}
return range.start;
}
public addRange(range: OffsetRange): void {
OffsetRange.addRange(range, this._ranges);
}
public addRangeAndResize(range: OffsetRange, newLength: number): void {
let idxFirstMightBeIntersecting = 0;
while (!(idxFirstMightBeIntersecting >= this._ranges.length || range.start <= this._ranges[idxFirstMightBeIntersecting].endExclusive)) {
idxFirstMightBeIntersecting++;
}
let idxFirstIsAfter = idxFirstMightBeIntersecting;
while (!(idxFirstIsAfter >= this._ranges.length || range.endExclusive < this._ranges[idxFirstIsAfter].start)) {
idxFirstIsAfter++;
}
const delta = newLength - range.length;
for (let i = idxFirstIsAfter; i < this._ranges.length; i++) {
this._ranges[i] = this._ranges[i].delta(delta);
}
if (idxFirstMightBeIntersecting === idxFirstIsAfter) {
const newRange = new OffsetRange(range.start, range.start + newLength);
if (!newRange.isEmpty) {
this._ranges.splice(idxFirstMightBeIntersecting, 0, newRange);
}
} else {
const start = Math.min(range.start, this._ranges[idxFirstMightBeIntersecting].start);
const endEx = Math.max(range.endExclusive, this._ranges[idxFirstIsAfter - 1].endExclusive);
const newRange = new OffsetRange(start, endEx + delta);
if (!newRange.isEmpty) {
this._ranges.splice(idxFirstMightBeIntersecting, idxFirstIsAfter - idxFirstMightBeIntersecting, newRange);
} else {
this._ranges.splice(idxFirstMightBeIntersecting, idxFirstIsAfter - idxFirstMightBeIntersecting);
}
}
}
toString() {
return this._ranges.map(r => r.toString()).join(' + ');
}
}
function initializeTokenization(textModel: TextModel, tokenizationPart: TokenizationTextModelPart): [ITokenizationSupport, IState] | [null, null] {
if (textModel.isTooLargeForTokenization()) {
return [null, null];
@ -491,7 +544,7 @@ class DefaultBackgroundTokenizer implements IBackgroundTokenizer {
constructor(
private readonly _textModel: ITextModel,
private readonly _stateStore: TokenizationStateStore,
private readonly _tokenizerWithStateStore: TokenizerWithStateStore,
private readonly _backgroundTokenStore: IBackgroundTokenizationStore,
private readonly _languageIdCodec: ILanguageIdCodec,
) {
@ -575,18 +628,18 @@ class DefaultBackgroundTokenizer implements IBackgroundTokenizer {
}
private _hasLinesToTokenize(): boolean {
if (!this._stateStore) {
if (!this._tokenizerWithStateStore) {
return false;
}
return this._stateStore.invalidLineStartIndex < this._textModel.getLineCount();
return !this._tokenizerWithStateStore.store.isTokenizationComplete();
}
private _tokenizeOneInvalidLine(builder: ContiguousMultilineTokensBuilder): number {
if (!this._stateStore || !this._hasLinesToTokenize()) {
if (!this._tokenizerWithStateStore || !this._hasLinesToTokenize()) {
return this._textModel.getLineCount() + 1;
}
const lineNumber = this._stateStore.invalidLineStartIndex + 1;
this._stateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber);
const lineNumber = this._tokenizerWithStateStore.store.getFirstInvalidEndStateLineNumber()!;
this._tokenizerWithStateStore.updateTokensUntilLine(this._textModel, this._languageIdCodec, builder, lineNumber);
return lineNumber;
}
@ -594,14 +647,12 @@ class DefaultBackgroundTokenizer implements IBackgroundTokenizer {
if (this._isDisposed) {
return;
}
if (this._stateStore.isTokenizationComplete(this._textModel)) {
if (this._tokenizerWithStateStore.store.isTokenizationComplete()) {
this._backgroundTokenStore.backgroundTokenizationFinished();
}
}
requestTokens(startLineNumber: number, endLineNumberExclusive: number): void {
for (let lineNumber = startLineNumber; lineNumber < endLineNumberExclusive; lineNumber++) {
this._stateStore.markMustBeTokenized(lineNumber - 1);
}
this._tokenizerWithStateStore.store.invalidateEndStateRange(new LineRange(startLineNumber, endLineNumberExclusive));
}
}

View file

@ -19,9 +19,10 @@ suite('Editor Model - Model Modes 1', () => {
let calledFor: string[] = [];
function checkAndClear(arr: string[]) {
assert.deepStrictEqual(calledFor, arr);
function getAndClear(): string[] {
const result = calledFor;
calledFor = [];
return result;
}
const tokenizationSupport: languages.ITokenizationSupport = {
@ -57,98 +58,98 @@ suite('Editor Model - Model Modes 1', () => {
test('model calls syntax highlighter 1', () => {
thisModel.tokenization.forceTokenization(1);
checkAndClear(['1']);
assert.deepStrictEqual(getAndClear(), ['1']);
});
test('model calls syntax highlighter 2', () => {
thisModel.tokenization.forceTokenization(2);
checkAndClear(['1', '2']);
assert.deepStrictEqual(getAndClear(), ['1', '2']);
thisModel.tokenization.forceTokenization(2);
checkAndClear([]);
assert.deepStrictEqual(getAndClear(), []);
});
test('model caches states', () => {
thisModel.tokenization.forceTokenization(1);
checkAndClear(['1']);
assert.deepStrictEqual(getAndClear(), ['1']);
thisModel.tokenization.forceTokenization(2);
checkAndClear(['2']);
assert.deepStrictEqual(getAndClear(), ['2']);
thisModel.tokenization.forceTokenization(3);
checkAndClear(['3']);
assert.deepStrictEqual(getAndClear(), ['3']);
thisModel.tokenization.forceTokenization(4);
checkAndClear(['4']);
assert.deepStrictEqual(getAndClear(), ['4']);
thisModel.tokenization.forceTokenization(5);
checkAndClear(['5']);
assert.deepStrictEqual(getAndClear(), ['5']);
thisModel.tokenization.forceTokenization(5);
checkAndClear([]);
assert.deepStrictEqual(getAndClear(), []);
});
test('model invalidates states for one line insert', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['1', '2', '3', '4', '5']);
assert.deepStrictEqual(getAndClear(), ['1', '2', '3', '4', '5']);
thisModel.applyEdits([EditOperation.insert(new Position(1, 1), '-')]);
thisModel.tokenization.forceTokenization(5);
checkAndClear(['-']);
assert.deepStrictEqual(getAndClear(), ['-']);
thisModel.tokenization.forceTokenization(5);
checkAndClear([]);
assert.deepStrictEqual(getAndClear(), []);
});
test('model invalidates states for many lines insert', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['1', '2', '3', '4', '5']);
assert.deepStrictEqual(getAndClear(), ['1', '2', '3', '4', '5']);
thisModel.applyEdits([EditOperation.insert(new Position(1, 1), '0\n-\n+')]);
assert.strictEqual(thisModel.getLineCount(), 7);
thisModel.tokenization.forceTokenization(7);
checkAndClear(['0', '-', '+']);
assert.deepStrictEqual(getAndClear(), ['0', '-', '+']);
thisModel.tokenization.forceTokenization(7);
checkAndClear([]);
assert.deepStrictEqual(getAndClear(), []);
});
test('model invalidates states for one new line', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['1', '2', '3', '4', '5']);
assert.deepStrictEqual(getAndClear(), ['1', '2', '3', '4', '5']);
thisModel.applyEdits([EditOperation.insert(new Position(1, 2), '\n')]);
thisModel.applyEdits([EditOperation.insert(new Position(2, 1), 'a')]);
thisModel.tokenization.forceTokenization(6);
checkAndClear(['1', 'a']);
assert.deepStrictEqual(getAndClear(), ['1', 'a']);
});
test('model invalidates states for one line delete', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['1', '2', '3', '4', '5']);
assert.deepStrictEqual(getAndClear(), ['1', '2', '3', '4', '5']);
thisModel.applyEdits([EditOperation.insert(new Position(1, 2), '-')]);
thisModel.tokenization.forceTokenization(5);
checkAndClear(['1']);
assert.deepStrictEqual(getAndClear(), ['1']);
thisModel.applyEdits([EditOperation.delete(new Range(1, 1, 1, 2))]);
thisModel.tokenization.forceTokenization(5);
checkAndClear(['-']);
assert.deepStrictEqual(getAndClear(), ['-']);
thisModel.tokenization.forceTokenization(5);
checkAndClear([]);
assert.deepStrictEqual(getAndClear(), []);
});
test('model invalidates states for many lines delete', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['1', '2', '3', '4', '5']);
assert.deepStrictEqual(getAndClear(), ['1', '2', '3', '4', '5']);
thisModel.applyEdits([EditOperation.delete(new Range(1, 1, 3, 1))]);
thisModel.tokenization.forceTokenization(3);
checkAndClear(['3']);
assert.deepStrictEqual(getAndClear(), ['3']);
thisModel.tokenization.forceTokenization(3);
checkAndClear([]);
assert.deepStrictEqual(getAndClear(), []);
});
});
@ -172,9 +173,10 @@ suite('Editor Model - Model Modes 2', () => {
let calledFor: string[] = [];
function checkAndClear(arr: string[]): void {
assert.deepStrictEqual(calledFor, arr);
function getAndClear(): string[] {
const actual = calledFor;
calledFor = [];
return actual;
}
const tokenizationSupport: languages.ITokenizationSupport = {
@ -209,54 +211,54 @@ suite('Editor Model - Model Modes 2', () => {
test('getTokensForInvalidLines one text insert', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
assert.deepStrictEqual(getAndClear(), ['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
thisModel.applyEdits([EditOperation.insert(new Position(1, 6), '-')]);
thisModel.tokenization.forceTokenization(5);
checkAndClear(['Line1-', 'Line2']);
assert.deepStrictEqual(getAndClear(), ['Line1-', 'Line2']);
});
test('getTokensForInvalidLines two text insert', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
assert.deepStrictEqual(getAndClear(), ['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
thisModel.applyEdits([
EditOperation.insert(new Position(1, 6), '-'),
EditOperation.insert(new Position(3, 6), '-')
]);
thisModel.tokenization.forceTokenization(5);
checkAndClear(['Line1-', 'Line2', 'Line3-', 'Line4']);
assert.deepStrictEqual(getAndClear(), ['Line1-', 'Line2', 'Line3-', 'Line4']);
});
test('getTokensForInvalidLines one multi-line text insert, one small text insert', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
assert.deepStrictEqual(getAndClear(), ['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
thisModel.applyEdits([EditOperation.insert(new Position(1, 6), '\nNew line\nAnother new line')]);
thisModel.applyEdits([EditOperation.insert(new Position(5, 6), '-')]);
thisModel.tokenization.forceTokenization(7);
checkAndClear(['Line1', 'New line', 'Another new line', 'Line2', 'Line3-', 'Line4']);
assert.deepStrictEqual(getAndClear(), ['Line1', 'New line', 'Another new line', 'Line2', 'Line3-', 'Line4']);
});
test('getTokensForInvalidLines one delete text', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
assert.deepStrictEqual(getAndClear(), ['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
thisModel.applyEdits([EditOperation.delete(new Range(1, 1, 1, 5))]);
thisModel.tokenization.forceTokenization(5);
checkAndClear(['1', 'Line2']);
assert.deepStrictEqual(getAndClear(), ['1', 'Line2']);
});
test('getTokensForInvalidLines one line delete text', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
assert.deepStrictEqual(getAndClear(), ['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
thisModel.applyEdits([EditOperation.delete(new Range(1, 1, 2, 1))]);
thisModel.tokenization.forceTokenization(4);
checkAndClear(['Line2']);
assert.deepStrictEqual(getAndClear(), ['Line2']);
});
test('getTokensForInvalidLines multiple lines delete text', () => {
thisModel.tokenization.forceTokenization(5);
checkAndClear(['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
assert.deepStrictEqual(getAndClear(), ['Line1', 'Line2', 'Line3', 'Line4', 'Line5']);
thisModel.applyEdits([EditOperation.delete(new Range(1, 1, 3, 3))]);
thisModel.tokenization.forceTokenization(3);
checkAndClear(['ne3', 'Line4']);
assert.deepStrictEqual(getAndClear(), ['ne3', 'Line4']);
});
});

View file

@ -0,0 +1,96 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as assert from 'assert';
import { OffsetRange } from 'vs/editor/common/core/offsetRange';
import { RangePriorityQueueImpl } from 'vs/editor/common/model/textModelTokens';
suite('RangePriorityQueueImpl', () => {
test('addRange', () => {
const ranges: OffsetRange[] = [];
OffsetRange.addRange(new OffsetRange(0, 2), ranges);
OffsetRange.addRange(new OffsetRange(10, 13), ranges);
OffsetRange.addRange(new OffsetRange(20, 24), ranges);
assert.deepStrictEqual(
ranges.map(r => r.toString()),
(['[0, 2)', '[10, 13)', '[20, 24)'])
);
OffsetRange.addRange(new OffsetRange(2, 10), ranges);
assert.deepStrictEqual(
ranges.map(r => r.toString()),
(['[0, 13)', '[20, 24)'])
);
OffsetRange.addRange(new OffsetRange(14, 19), ranges);
assert.deepStrictEqual(
ranges.map(r => r.toString()),
(['[0, 13)', '[14, 19)', '[20, 24)'])
);
OffsetRange.addRange(new OffsetRange(10, 22), ranges);
assert.deepStrictEqual(
ranges.map(r => r.toString()),
(['[0, 24)'])
);
OffsetRange.addRange(new OffsetRange(-1, 29), ranges);
assert.deepStrictEqual(
ranges.map(r => r.toString()),
(['[-1, 29)'])
);
OffsetRange.addRange(new OffsetRange(-10, -5), ranges);
assert.deepStrictEqual(
ranges.map(r => r.toString()),
(['[-10, -5)', '[-1, 29)'])
);
});
test('addRangeAndResize', () => {
const queue = new RangePriorityQueueImpl();
queue.addRange(new OffsetRange(0, 20));
queue.addRange(new OffsetRange(100, 120));
queue.addRange(new OffsetRange(200, 220));
// disjoint
queue.addRangeAndResize(new OffsetRange(25, 27), 0);
assert.deepStrictEqual(
queue.getRanges().map(r => r.toString()),
(['[0, 20)', '[98, 118)', '[198, 218)'])
);
queue.addRangeAndResize(new OffsetRange(19, 20), 0);
assert.deepStrictEqual(
queue.getRanges().map(r => r.toString()),
(['[0, 19)', '[97, 117)', '[197, 217)'])
);
queue.addRangeAndResize(new OffsetRange(19, 97), 0);
assert.deepStrictEqual(
queue.getRanges().map(r => r.toString()),
(['[0, 39)', '[119, 139)'])
);
queue.addRangeAndResize(new OffsetRange(-1000, 1000), 0);
assert.deepStrictEqual(
queue.getRanges().map(r => r.toString()),
([])
);
});
});

View file

@ -4,7 +4,6 @@
*--------------------------------------------------------------------------------------------*/
import { compareBy, numberComparator } from 'vs/base/common/arrays';
import { ContiguousGrowingArray } from 'vs/editor/common/model/textModelTokens';
export class ArrayEdit {
public readonly edits: readonly SingleArrayEdit[];
@ -18,14 +17,6 @@ export class ArrayEdit {
this.edits = edits.slice().sort(compareBy(c => c.offset, numberComparator));
}
applyTo(array: ContiguousGrowingArray<any>): void {
for (let i = this.edits.length - 1; i >= 0; i--) {
const c = this.edits[i];
array.delete(c.offset, c.length);
array.insert(c.offset, c.newLength);
}
}
applyToArray(array: any[]): void {
for (let i = this.edits.length - 1; i >= 0; i--) {
const c = this.edits[i];

View file

@ -6,10 +6,9 @@
import { URI } from 'vs/base/common/uri';
import { LanguageId } from 'vs/editor/common/encodedTokenAttributes';
import { IModelChangedEvent, MirrorTextModel } from 'vs/editor/common/model/mirrorTextModel';
import { TokenizationStateStore } from 'vs/editor/common/model/textModelTokens';
import { TokenizerWithStateStore } from 'vs/editor/common/model/textModelTokens';
import { diffStateStacksRefEq, StateStack, StackDiff } from 'vscode-textmate';
import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
import { countEOL } from 'vs/editor/common/core/eolCounter';
import { LineTokens } from 'vs/editor/common/tokens/lineTokens';
import { TextMateTokenizationSupport } from 'vs/workbench/services/textMate/browser/tokenizationSupport/textMateTokenizationSupport';
import { StateDeltas } from 'vs/workbench/services/textMate/browser/workerHost/textMateWorkerHost';
@ -17,13 +16,11 @@ import { RunOnceScheduler } from 'vs/base/common/async';
import { TextMateTokenizationWorker } from './textMate.worker';
import { observableValue } from 'vs/base/common/observable';
import { TokenizationSupportWithLineLimit } from 'vs/workbench/services/textMate/browser/tokenizationSupport/tokenizationSupportWithLineLimit';
import { LineRange } from 'vs/editor/common/core/lineRange';
export class TextMateWorkerModel extends MirrorTextModel {
private _tokenizationStateStore: TokenizationStateStore | null;
private readonly _worker: TextMateTokenizationWorker;
private _languageId: string;
private _encodedLanguageId: LanguageId;
private _isDisposed: boolean;
private _tokenizationStateStore: TokenizerWithStateStore<StateStack> | null = null;
private _isDisposed: boolean = false;
private readonly _maxTokenizationLineLength = observableValue(
'_maxTokenizationLineLength',
-1
@ -34,17 +31,12 @@ export class TextMateWorkerModel extends MirrorTextModel {
lines: string[],
eol: string,
versionId: number,
worker: TextMateTokenizationWorker,
languageId: string,
encodedLanguageId: LanguageId,
private readonly _worker: TextMateTokenizationWorker,
private _languageId: string,
private _encodedLanguageId: LanguageId,
maxTokenizationLineLength: number,
) {
super(uri, lines, eol, versionId);
this._tokenizationStateStore = null;
this._worker = worker;
this._languageId = languageId;
this._encodedLanguageId = encodedLanguageId;
this._isDisposed = false;
this._maxTokenizationLineLength.set(maxTokenizationLineLength, undefined);
this._resetTokenization();
}
@ -67,32 +59,20 @@ export class TextMateWorkerModel extends MirrorTextModel {
override onEvents(e: IModelChangedEvent): void {
super.onEvents(e);
if (this._tokenizationStateStore) {
// Changes are sorted in descending order
for (let i = 0; i < e.changes.length; i++) {
const change = e.changes[i];
const [eolCount] = countEOL(change.text);
this._tokenizationStateStore.applyEdits(change.range, eolCount);
}
this._tokenizationStateStore.store.acceptChanges(e.changes);
}
this.tokenizeDebouncer.schedule();
}
public acceptMaxTokenizationLineLength(
maxTokenizationLineLength: number
): void {
public acceptMaxTokenizationLineLength(maxTokenizationLineLength: number): void {
this._maxTokenizationLineLength.set(maxTokenizationLineLength, undefined);
}
public retokenize(startLineNumber: number, endLineNumberExclusive: number) {
if (this._tokenizationStateStore) {
for (
let lineNumber = startLineNumber;
lineNumber < endLineNumberExclusive;
lineNumber++
) {
this._tokenizationStateStore.markMustBeTokenized(lineNumber - 1);
}
this._tokenizationStateStore.store.invalidateEndStateRange(new LineRange(startLineNumber, endLineNumberExclusive));
this.tokenizeDebouncer.schedule();
}
}
@ -118,10 +98,7 @@ export class TextMateWorkerModel extends MirrorTextModel {
new TextMateTokenizationSupport(r.grammar, r.initialState, false),
this._maxTokenizationLineLength
);
this._tokenizationStateStore = new TokenizationStateStore(
tokenizationSupport,
tokenizationSupport.getInitialState()
);
this._tokenizationStateStore = new TokenizerWithStateStore(this._lines.length, tokenizationSupport);
} else {
this._tokenizationStateStore = null;
}
@ -137,52 +114,30 @@ export class TextMateWorkerModel extends MirrorTextModel {
const startTime = new Date().getTime();
while (true) {
const builder = new ContiguousMultilineTokensBuilder();
const lineCount = this._lines.length;
let tokenizedLines = 0;
const tokenBuilder = new ContiguousMultilineTokensBuilder();
const stateDeltaBuilder = new StateDeltaBuilder();
// Validate all states up to and including endLineIndex
while (this._tokenizationStateStore.invalidLineStartIndex < lineCount) {
const lineIndex = this._tokenizationStateStore.invalidLineStartIndex;
tokenizedLines++;
// TODO don't spam the renderer
if (tokenizedLines > 200) {
while (true) {
const lineNumberToTokenize = this._tokenizationStateStore.store.getFirstInvalidEndStateLineNumber();
if (lineNumberToTokenize === null || tokenizedLines > 200) {
break;
}
const text = this._lines[lineIndex];
tokenizedLines++;
const lineStartState = this._tokenizationStateStore.getBeginState(
lineIndex
) as StateStack;
const tokenizeResult =
this._tokenizationStateStore.tokenizationSupport.tokenizeEncoded(
text,
true,
lineStartState
);
if (
this._tokenizationStateStore.setEndState(
lineCount,
lineIndex,
tokenizeResult.endState
)
) {
const delta = diffStateStacksRefEq(
lineStartState,
tokenizeResult.endState as StateStack
);
stateDeltaBuilder.setState(lineIndex + 1, delta);
const text = this._lines[lineNumberToTokenize - 1];
const lineStartState = this._tokenizationStateStore.getStartState(lineNumberToTokenize)!;
const r = this._tokenizationStateStore.tokenizationSupport.tokenizeEncoded(text, true, lineStartState);
if (this._tokenizationStateStore.store.setEndState(lineNumberToTokenize, r.endState as StateStack)) {
const delta = diffStateStacksRefEq(lineStartState, r.endState as StateStack);
stateDeltaBuilder.setState(lineNumberToTokenize, delta);
} else {
stateDeltaBuilder.setState(lineIndex + 1, null);
stateDeltaBuilder.setState(lineNumberToTokenize, null);
}
LineTokens.convertToEndOffset(tokenizeResult.tokens, text.length);
builder.add(lineIndex + 1, tokenizeResult.tokens);
LineTokens.convertToEndOffset(r.tokens, text.length);
tokenBuilder.add(lineNumberToTokenize, r.tokens);
const deltaMs = new Date().getTime() - startTime;
if (deltaMs > 20) {
@ -199,7 +154,7 @@ export class TextMateWorkerModel extends MirrorTextModel {
this._worker.setTokensAndStates(
this._uri,
this._versionId,
builder.serialize(),
tokenBuilder.serialize(),
stateDeltas
);

View file

@ -10,7 +10,7 @@ import { LineRange } from 'vs/editor/common/core/lineRange';
import { Range } from 'vs/editor/common/core/range';
import { IBackgroundTokenizationStore, ILanguageIdCodec } from 'vs/editor/common/languages';
import { ITextModel } from 'vs/editor/common/model';
import { ContiguousGrowingArray } from 'vs/editor/common/model/textModelTokens';
import { TokenizationStateStore } from 'vs/editor/common/model/textModelTokens';
import { IModelContentChange, IModelContentChangedEvent } from 'vs/editor/common/textModelEvents';
import { ContiguousMultilineTokensBuilder } from 'vs/editor/common/tokens/contiguousMultilineTokensBuilder';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
@ -26,7 +26,7 @@ export class TextMateWorkerTokenizerController extends Disposable {
* These states will eventually equal the worker states.
* _states[i] stores the state at the end of line number i+1.
*/
private readonly _states = new ContiguousGrowingArray<StateStack | null>(null);
private readonly _states = new TokenizationStateStore<StateStack>();
private readonly _loggingEnabled = observableConfigValue('editor.experimental.asyncTokenizationLogging', false, this._configurationService);
@ -123,8 +123,7 @@ export class TextMateWorkerTokenizerController extends Disposable {
this._pendingChanges[0].versionId <= versionId
) {
const change = this._pendingChanges.shift()!;
const op = lineArrayEditFromModelContentChange(change.changes);
op.applyTo(this._states);
this._states.acceptChanges(change.changes);
}
if (this._pendingChanges.length > 0) {
@ -180,15 +179,15 @@ export class TextMateWorkerTokenizerController extends Disposable {
// Apply state deltas to _states and _backgroundTokenizationStore
for (const d of stateDeltas) {
let prevState = d.startLineNumber <= 1 ? this._initialState : this._states.get(d.startLineNumber - 1 - 1);
let prevState = d.startLineNumber <= 1 ? this._initialState : this._states.getEndState(d.startLineNumber - 1);
for (let i = 0; i < d.stateDeltas.length; i++) {
const delta = d.stateDeltas[i];
let state: StateStack;
if (delta) {
state = applyStateStackDiff(prevState, delta)!;
this._states.set(d.startLineNumber + i - 1, state);
this._states.setEndState(d.startLineNumber + i, state);
} else {
state = this._states.get(d.startLineNumber + i - 1)!;
state = this._states.getEndState(d.startLineNumber + i)!;
}
const offset = curToFutureTransformerStates.transform(d.startLineNumber + i - 1);