Add tokenization API

This commit is contained in:
Alex Dima 2016-12-30 23:39:52 +02:00
parent 3767bd1f46
commit 3b45a32253
4 changed files with 69 additions and 2 deletions

View file

@ -33,6 +33,8 @@ import { IContextKeyService } from 'vs/platform/contextkey/common/contextkey';
import { ICodeEditorService } from 'vs/editor/common/services/codeEditorService';
import { IEditorWorkerService } from 'vs/editor/common/services/editorWorkerService';
import { ITextModelResolverService } from 'vs/editor/common/services/resolverService';
import { IState, ITokenizationSupport, TokenizationRegistry } from 'vs/editor/common/modes';
import { NULL_STATE, nullTokenize } from 'vs/editor/common/modes/nullMode';
/**
* @internal
@ -255,6 +257,53 @@ export function colorizeModelLine(model: IModel, lineNumber: number, tabSize: nu
return Colorizer.colorizeModelLine(model, lineNumber, tabSize);
}
export class Token {
public readonly offset: number;
public readonly type: string;
constructor(offset: number, type: string) {
this.offset = offset;
this.type = type;
}
}
/**
* @internal
*/
function getSafeTokenizationSupport(languageId: string): ITokenizationSupport {
let tokenizationSupport = TokenizationRegistry.get(languageId);
if (tokenizationSupport) {
return tokenizationSupport;
}
return {
getInitialState: () => NULL_STATE,
tokenize: (line: string, state: IState, deltaOffset: number) => nullTokenize(languageId, line, state, deltaOffset),
tokenize3: undefined,
};
}
/**
* Tokenize `text` using language `languageId`
*/
export function tokenize(text: string, languageId: string): Token[][] {
let modeService = StaticServices.modeService.get();
// Needed in order to get the mode registered for subsequent look-ups
modeService.getOrCreateMode(languageId);
let tokenizationSupport = getSafeTokenizationSupport(languageId);
let lines = text.split(/\r\n|\r|\n/);
let result: Token[][] = [];
let state = tokenizationSupport.getInitialState();
for (let i = 0, len = lines.length; i < len; i++) {
let line = lines[i];
let tokenizationResult = tokenizationSupport.tokenize(line, state, 0);
result[i] = tokenizationResult.tokens.map((t) => new Token(t.startIndex, t.type));
state = tokenizationResult.endState;
}
return result;
}
/**
* @internal
*/
@ -279,6 +328,7 @@ export function createMonacoEditorAPI(): typeof monaco.editor {
colorizeElement: colorizeElement,
colorize: colorize,
colorizeModelLine: colorizeModelLine,
tokenize: tokenize,
// enums
ScrollbarVisibility: ScrollbarVisibility,
@ -306,6 +356,7 @@ export function createMonacoEditorAPI(): typeof monaco.editor {
BareFontInfo: <any>editorCommon.BareFontInfo,
FontInfo: <any>editorCommon.FontInfo,
TextModelResolvedOptions: <any>editorCommon.TextModelResolvedOptions,
Token: Token,
// vars
EditorType: editorCommon.EditorType,

View file

@ -451,7 +451,7 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
if (!rules) {
rules = monarchCommon.findRules(this._lexer, state.stack.state); // do parent matching
if (!rules) {
monarchCommon.throwError(this._lexer, 'tokenizer state is not defined: ' + state);
monarchCommon.throwError(this._lexer, 'tokenizer state is not defined: ' + state.stack.state);
}
}
@ -485,7 +485,7 @@ export class MonarchTokenizer implements modes.ITokenizationSupport {
}
if (!hasEmbeddedPopRule) {
monarchCommon.throwError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state);
monarchCommon.throwError(this._lexer, 'no rule containing nextEmbedded: "@pop" in tokenizer embedded state: ' + state.stack.state);
}
return popOffset;

View file

@ -11,6 +11,7 @@ import * as strings from 'vs/base/common/strings';
import { ModesRegistry } from 'vs/editor/common/modes/modesRegistry';
import { ILanguageExtensionPoint } from 'vs/editor/common/services/modeService';
import { LanguageId, LanguageIdentifier } from 'vs/editor/common/modes';
import { NULL_MODE_ID, NULL_LANGUAGE_IDENTIFIER } from 'vs/editor/common/modes/nullMode';
var hasOwnProperty = Object.prototype.hasOwnProperty;
@ -254,6 +255,10 @@ export class LanguagesRegistry {
}
public getLanguageIdentifier(_modeId: string | LanguageId): LanguageIdentifier {
if (_modeId === NULL_MODE_ID || _modeId === LanguageId.Null) {
return NULL_LANGUAGE_IDENTIFIER;
}
let modeId: string;
if (typeof _modeId === 'string') {
modeId = _modeId;

11
src/vs/monaco.d.ts vendored
View file

@ -853,6 +853,17 @@ declare module monaco.editor {
*/
export function colorizeModelLine(model: IModel, lineNumber: number, tabSize?: number): string;
export class Token {
readonly offset: number;
readonly type: string;
constructor(offset: number, type: string);
}
/**
* Tokenize `text` using language `languageId`
*/
export function tokenize(text: string, languageId: string): Token[][];
/**
* A web worker that can provide a proxy to an arbitrary file.
*/