Initial support for language detection (#128708)

* initial language detection

* add configuration and extra event on UntitedTextEditorService

* check isWeb, use beter mode API, include model in unpacked

* typo in path
This commit is contained in:
Tyler James Leonhardt 2021-07-16 16:43:23 -07:00 committed by GitHub
parent 27858a0faf
commit 8f88009816
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 218 additions and 2 deletions

View file

@ -577,7 +577,8 @@
"vscode-textmate",
"vscode-oniguruma",
"iconv-lite-umd",
"jschardet"
"jschardet",
"@vscode/vscode-languagedetection"
]
},
{

View file

@ -232,7 +232,10 @@ function packageTask(platform, arch, sourceFolderName, destinationFolderName, op
'**/node-pty/build/Release/*',
'**/node-pty/lib/worker/conoutSocketWorker.js',
'**/node-pty/lib/shared/conout.js',
'**/*.wasm'
'**/*.wasm',
// For language detection
'**/model.json',
'**/group1-shard1of1.bin'
], 'node_modules.asar'));
let all = es.merge(

View file

@ -254,6 +254,10 @@
"name": "vs/workbench/contrib/views",
"project": "vscode-workbench"
},
{
"name": "vs/workbench/contrib/languageDetection",
"project": "vscode-workbench"
},
{
"name": "vs/workbench/services/actions",
"project": "vscode-workbench"

View file

@ -57,6 +57,7 @@
"extensions-ci": "node --max_old_space_size=4095 ./node_modules/gulp/bin/gulp.js extensions-ci"
},
"dependencies": {
"@vscode/vscode-languagedetection": "1.0.12",
"applicationinsights": "1.0.8",
"chokidar": "3.5.1",
"eslint-plugin-header": "3.1.1",

View file

@ -3,6 +3,7 @@
"version": "0.0.0",
"private": true,
"dependencies": {
"@vscode/vscode-languagedetection": "1.0.12",
"applicationinsights": "1.0.8",
"chokidar": "3.5.1",
"cookie": "^0.4.0",

View file

@ -3,6 +3,7 @@
"version": "0.0.0",
"private": true,
"dependencies": {
"@vscode/vscode-languagedetection": "1.0.12",
"iconv-lite-umd": "0.6.8",
"jschardet": "3.0.0",
"tas-client-umd": "0.1.4",

View file

@ -2,6 +2,11 @@
# yarn lockfile v1
"@vscode/vscode-languagedetection@1.0.12":
version "1.0.12"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa"
integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg==
iconv-lite-umd@0.6.8:
version "0.6.8"
resolved "https://registry.yarnpkg.com/iconv-lite-umd/-/iconv-lite-umd-0.6.8.tgz#5ad310ec126b260621471a2d586f7f37b9958ec0"

View file

@ -7,6 +7,11 @@
resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82"
integrity sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==
"@vscode/vscode-languagedetection@1.0.12":
version "1.0.12"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa"
integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg==
agent-base@4:
version "4.2.0"
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-4.2.0.tgz#9838b5c3392b962bad031e6a4c5e1024abec45ce"

View file

@ -41,6 +41,7 @@
}
}),
paths: {
'@vscode/vscode-languagedetection': `${window.location.origin}/static/remote/web/node_modules/@vscode/vscode-languagedetection/dist/lib/index.js`,
'vscode-textmate': `${window.location.origin}/static/remote/web/node_modules/vscode-textmate/release/main`,
'vscode-oniguruma': `${window.location.origin}/static/remote/web/node_modules/vscode-oniguruma/release/main`,
'xterm': `${window.location.origin}/static/remote/web/node_modules/xterm/lib/xterm.js`,

View file

@ -40,6 +40,7 @@
}
}),
paths: {
'@vscode/vscode-languagedetection': `${window.location.origin}/static/node_modules/@vscode/vscode-languagedetection/dist/lib/index.js`,
'vscode-textmate': `${window.location.origin}/static/node_modules/vscode-textmate/release/main`,
'vscode-oniguruma': `${window.location.origin}/static/node_modules/vscode-oniguruma/release/main`,
'xterm': `${window.location.origin}/static/node_modules/xterm/lib/xterm.js`,

View file

@ -0,0 +1,31 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { localize } from 'vs/nls';
import { IWorkbenchContributionsRegistry, Extensions as WorkbenchExtensions } from 'vs/workbench/common/contributions';
import { Registry } from 'vs/platform/registry/common/platform';
import { IConfigurationRegistry, Extensions as ConfigurationExtensions, ConfigurationScope } from 'vs/platform/configuration/common/configurationRegistry';
import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle';
import { LanguageDetectionService } from 'vs/workbench/services/languageDetection/browser/languageDetectionService';
// Configuration
const configurationRegistry = Registry.as<IConfigurationRegistry>(ConfigurationExtensions.Configuration);
configurationRegistry.registerConfiguration({
id: 'languageDetection',
order: 1025,
title: localize('languageDetectionConfigurationTitle', "Language Detection"),
type: 'object',
properties: {
'languageDetection.enabled': {
type: 'boolean',
default: false,
description: localize('languageDetection.enabled', "Experimental. Controls whether the language in an untitled text editor is automatically detected."),
scope: ConfigurationScope.LANGUAGE_OVERRIDABLE
}
}
});
Registry.as<IWorkbenchContributionsRegistry>(WorkbenchExtensions.Workbench)
.registerWorkbenchContribution(LanguageDetectionService, LifecyclePhase.Eventually);

View file

@ -0,0 +1,131 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Disposable } from 'vs/base/common/lifecycle';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetection';
import { IUntitledTextEditorService } from 'vs/workbench/services/untitled/common/untitledTextEditorService';
import { FileAccess } from 'vs/base/common/network';
import type { ModelOperations } from '@vscode/vscode-languagedetection';
import { IWorkbenchEnvironmentService } from 'vs/workbench/services/environment/common/environmentService';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { IModeService } from 'vs/editor/common/services/modeService';
import { URI } from 'vs/base/common/uri';
import { isWeb } from 'vs/base/common/platform';
export class LanguageDetectionService extends Disposable implements ILanguageDetectionService {
private static readonly expectedConfidence = 0.6;
private _loadFailed = false;
private _modelOperations: ModelOperations | undefined;
_serviceBrand: undefined;
constructor(
@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,
@IModeService private readonly _modeService: IModeService,
@IConfigurationService configurationService: IConfigurationService,
@IUntitledTextEditorService untitledTextEditorService: IUntitledTextEditorService) {
super();
this._register(untitledTextEditorService.onDidChangeContent(async e => {
if (!configurationService.getValue<boolean>('languageDetection.enabled', { overrideIdentifier: e.getMode() })) {
return;
}
const value = untitledTextEditorService.getValue(e.resource);
if (!value) { return; }
const lang = await this.detectLanguage(value);
if (!lang) { return; }
e.setMode(lang);
}));
}
async getModelOperations(): Promise<ModelOperations> {
if (this._modelOperations) {
return this._modelOperations;
}
const { ModelOperations } = await import('@vscode/vscode-languagedetection');
this._modelOperations = new ModelOperations(
async () => {
const response = await fetch(this._environmentService.isBuilt && !isWeb
? FileAccess.asBrowserUri('../../../../../../node_modules.asar.unpacked/@vscode/vscode-languagedetection/model/model.json', require).toString(true)
: FileAccess.asBrowserUri('../../../../../../node_modules/@vscode/vscode-languagedetection/model/model.json', require).toString(true));
try {
const modelJSON = await response.json();
return modelJSON;
} catch (e) {
const message = `Failed to parse model JSON.`;
throw new Error(message);
}
},
async () => {
const response = await fetch(this._environmentService.isBuilt && !isWeb
? FileAccess.asBrowserUri('../../../../../../node_modules.asar.unpacked/@vscode/vscode-languagedetection/model/group1-shard1of1.bin', require).toString(true)
: FileAccess.asBrowserUri('../../../../../../node_modules/@vscode/vscode-languagedetection/model/group1-shard1of1.bin', require).toString(true));
const buffer = await response.arrayBuffer();
return buffer;
}
);
return this._register(this._modelOperations);
}
async detectLanguage(content: string): Promise<string | undefined> {
if (this._loadFailed) {
return;
}
let modelOperations: ModelOperations | undefined;
try {
modelOperations = await this.getModelOperations();
} catch (e) {
this._loadFailed = true;
return;
}
const modelResults = await modelOperations.runModel(content);
if (!modelResults) {
return;
}
let { languageId, confidence } = modelResults[0];
// TODO: this is the place where we can improve the results of the model with know hueristics (popular languages, etc).
// For ts/js and c/cpp we "add" the confidence of the other language to ensure better results
switch (languageId) {
case 'ts':
if (modelResults[1].languageId === 'js') {
confidence += modelResults[1].confidence;
}
break;
case 'js':
if (modelResults[1].languageId === 'ts') {
confidence += modelResults[1].confidence;
}
break;
case 'c':
if (modelResults[1].languageId === 'cpp') {
confidence += modelResults[1].confidence;
}
break;
case 'cpp':
if (modelResults[1].languageId === 'c') {
confidence += modelResults[1].confidence;
}
break;
default:
break;
}
if (confidence < LanguageDetectionService.expectedConfidence) {
return;
}
// TODO: see if there's a better way to do this.
const vscodeLanguageId = this._modeService.getModeIdByFilepathOrFirstLine(URI.file(`file.${languageId}`));
return vscodeLanguageId ?? undefined;
}
}

View file

@ -0,0 +1,14 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { createDecorator } from 'vs/platform/instantiation/common/instantiation';
export const ILanguageDetectionService = createDecorator<ILanguageDetectionService>('ILanguageDetectionService');
export interface ILanguageDetectionService {
readonly _serviceBrand: undefined;
detectLanguage(content: string): Promise<string | undefined>;
}

View file

@ -63,6 +63,11 @@ type IInternalUntitledTextEditorOptions = IExistingUntitledTextEditorOptions & I
export interface IUntitledTextEditorModelManager {
/**
* Events for when untitled text editors change (e.g. getting dirty, saved or reverted).
*/
readonly onDidChangeContent: Event<IUntitledTextEditorModel>;
/**
* Events for when untitled text editors change (e.g. getting dirty, saved or reverted).
*/
@ -123,6 +128,9 @@ export class UntitledTextEditorService extends Disposable implements IUntitledTe
declare readonly _serviceBrand: undefined;
private readonly _onDidChangeContent = this._register(new Emitter<IUntitledTextEditorModel>());
readonly onDidChangeContent = this._onDidChangeContent.event;
private readonly _onDidChangeDirty = this._register(new Emitter<IUntitledTextEditorModel>());
readonly onDidChangeDirty = this._onDidChangeDirty.event;
@ -235,6 +243,7 @@ export class UntitledTextEditorService extends Disposable implements IUntitledTe
// Install model listeners
const modelListeners = new DisposableStore();
modelListeners.add(model.onDidChangeContent(() => this._onDidChangeContent.fire(model)));
modelListeners.add(model.onDidChangeDirty(() => this._onDidChangeDirty.fire(model)));
modelListeners.add(model.onDidChangeName(() => this._onDidChangeLabel.fire(model)));
modelListeners.add(model.onDidChangeEncoding(() => this._onDidChangeEncoding.fire(model)));

View file

@ -320,4 +320,7 @@ import 'vs/workbench/contrib/workspace/browser/workspace.contribution';
// Workspaces
import 'vs/workbench/contrib/workspaces/browser/workspaces.contribution';
// Language Detection
import 'vs/workbench/contrib/languageDetection/browser/languageDetection.contribution';
//#endregion

View file

@ -765,6 +765,11 @@
resolved "https://registry.yarnpkg.com/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz#aa58042711d6e3275dd37dc597e5d31e8c290a44"
integrity sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q==
"@vscode/vscode-languagedetection@1.0.12":
version "1.0.12"
resolved "https://registry.yarnpkg.com/@vscode/vscode-languagedetection/-/vscode-languagedetection-1.0.12.tgz#884c080257298b078fdd6dd75c35f8bd42ba83fa"
integrity sha512-tiHV6eev2TKgSdpsnVF0wD1Dtk2KqwFdk2TpPDsYdBvP5kjw2KsfSK3l6cPBWqbSdSOSkHk37XvOAhNRKzdlZg==
"@webassemblyjs/ast@1.11.0":
version "1.11.0"
resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.11.0.tgz#a5aa679efdc9e51707a4207139da57920555961f"