Implement language detection for notebook cells (#147537)

* Implement language detection for notebook cells

* Add lightbulb for normal text editors as well
Clean up mapping of language id's in neural model
Add config to opt out of language detection hints
This commit is contained in:
Jackson Kearl 2022-04-20 20:38:20 -07:00 committed by GitHub
parent 8cad47446f
commit bd95a8d451
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 358 additions and 61 deletions

View file

@ -1202,7 +1202,7 @@ export class ChangeLanguageAction extends Action {
if (resource) {
// Detect languages since we are in an untitled file
let languageId: string | undefined = withNullAsUndefined(this.languageService.guessLanguageIdByFilepathOrFirstLine(resource, textModel.getLineContent(1)));
if (!languageId) {
if (!languageId || languageId === 'unknown') {
detectedLanguage = await this.languageDetectionService.detectLanguage(resource);
languageId = detectedLanguage;
}

View file

@ -111,6 +111,19 @@ const registry = Registry.as<IConfigurationRegistry>(ConfigurationExtensions.Con
tags: ['experimental'],
description: localize('workbench.editor.preferBasedLanguageDetection', "When enabled, a language detection model that takes into account editor history will be given higher precedence."),
},
'workbench.editor.languageDetectionHints': {
type: 'string',
default: 'always',
tags: ['experimental'],
enum: ['always', 'notebookEditors', 'textEditors', 'never'],
description: localize('workbench.editor.showLanguageDetectionHints', "When enabled, shows a status bar quick fix when the editor language doesn't match detected content language."),
enumDescriptions: [
localize('workbench.editor.showLanguageDetectionHints.always', "Show show language detection quick fixes in both notebooks and untitled editors"),
localize('workbench.editor.showLanguageDetectionHints.notebook', "Only show language detection quick fixes in notebooks"),
localize('workbench.editor.showLanguageDetectionHints.editors', "Only show language detection quick fixes in untitled editors"),
localize('workbench.editor.showLanguageDetectionHints.never', "Never show language quick fixes"),
]
},
'workbench.editor.tabCloseButton': {
'type': 'string',
'enum': ['left', 'right', 'off'],

View file

@ -0,0 +1,148 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { DisposableStore } from 'vs/base/common/lifecycle';
import { getCodeEditor } from 'vs/editor/browser/editorBrowser';
import { localize } from 'vs/nls';
import { Registry } from 'vs/platform/registry/common/platform';
import { IWorkbenchContributionsRegistry, Extensions as WorkbenchExtensions, IWorkbenchContribution } from 'vs/workbench/common/contributions';
import { IEditorService } from 'vs/workbench/services/editor/common/editorService';
import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle';
import { IStatusbarEntry, IStatusbarEntryAccessor, IStatusbarService, StatusbarAlignment } from 'vs/workbench/services/statusbar/browser/statusbar';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
import { ThrottledDelayer } from 'vs/base/common/async';
import { ILanguageService } from 'vs/editor/common/languages/language';
import { IKeybindingService } from 'vs/platform/keybinding/common/keybinding';
import { ServicesAccessor } from 'vs/editor/browser/editorExtensions';
import { registerAction2, Action2 } from 'vs/platform/actions/common/actions';
import { INotificationService } from 'vs/platform/notification/common/notification';
import { ContextKeyExpr } from 'vs/platform/contextkey/common/contextkey';
import { KeybindingWeight } from 'vs/platform/keybinding/common/keybindingsRegistry';
import { NOTEBOOK_EDITOR_EDITABLE } from 'vs/workbench/contrib/notebook/common/notebookContextKeys';
import { KeyCode, KeyMod } from 'vs/base/common/keyCodes';
import { EditorContextKeys } from 'vs/editor/common/editorContextKeys';
import { Schemas } from 'vs/base/common/network';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
const detectLanguageCommandId = 'editor.detectLanguage';
class LanguageDetectionStatusContribution implements IWorkbenchContribution {
private static readonly _id = 'status.languageDetectionStatus';
private readonly _disposables = new DisposableStore();
private _combinedEntry?: IStatusbarEntryAccessor;
private _delayer = new ThrottledDelayer(1000);
private _renderDisposables = new DisposableStore();
constructor(
@ILanguageDetectionService private readonly _languageDetectionService: ILanguageDetectionService,
@IStatusbarService private readonly _statusBarService: IStatusbarService,
@IConfigurationService private readonly _configurationService: IConfigurationService,
@IEditorService private readonly _editorService: IEditorService,
@ILanguageService private readonly _languageService: ILanguageService,
@IKeybindingService private readonly _keybindingService: IKeybindingService,
) {
_editorService.onDidActiveEditorChange(() => this._update(true), this, this._disposables);
this._update(false);
}
dispose(): void {
this._disposables.dispose();
this._delayer.dispose();
this._combinedEntry?.dispose();
this._renderDisposables.dispose();
}
private _update(clear: boolean): void {
if (clear) {
this._combinedEntry?.dispose();
this._combinedEntry = undefined;
}
this._delayer.trigger(() => this._doUpdate());
}
private async _doUpdate(): Promise<void> {
const editor = getCodeEditor(this._editorService.activeTextEditorControl);
this._renderDisposables.clear();
// update when editor language changes
editor?.onDidChangeModelLanguage(() => this._update(true), this, this._renderDisposables);
editor?.onDidChangeModelContent(() => this._update(false), this, this._renderDisposables);
const editorModel = editor?.getModel();
const editorUri = editorModel?.uri;
const existingId = editorModel?.getLanguageId();
const enablementConfig = this._configurationService.getValue('workbench.editor.languageDetectionHints');
const enabled = enablementConfig === 'always' || enablementConfig === 'textEditors';
const disableLightbulb = !enabled || editorUri?.scheme !== Schemas.untitled || !existingId;
if (disableLightbulb || !editorUri) {
this._combinedEntry?.dispose();
this._combinedEntry = undefined;
} else {
const lang = await this._languageDetectionService.detectLanguage(editorUri);
const skip: Record<string, string | undefined> = { 'jsonc': 'json' };
const existing = editorModel.getLanguageId();
if (lang && lang !== existing && skip[existing] !== lang) {
const detectedName = this._languageService.getLanguageName(lang) || lang;
let tooltip = localize('status.autoDetectLanguage', "Accept Detected Language: {0}", detectedName);
const keybinding = this._keybindingService.lookupKeybinding(detectLanguageCommandId);
const label = keybinding?.getLabel();
if (label) {
tooltip += ` (${label})`;
}
const props: IStatusbarEntry = {
name: localize('langDetection.name', "Language Detection"),
ariaLabel: localize('langDetection.aria', "Change to Detected Language: {0}", lang),
tooltip,
command: detectLanguageCommandId,
text: '$(lightbulb-autofix)',
};
if (!this._combinedEntry) {
this._combinedEntry = this._statusBarService.addEntry(props, LanguageDetectionStatusContribution._id, StatusbarAlignment.RIGHT, { id: 'status.editor.mode', alignment: StatusbarAlignment.RIGHT, compact: true });
} else {
this._combinedEntry.update(props);
}
} else {
this._combinedEntry?.dispose();
this._combinedEntry = undefined;
}
}
}
}
Registry.as<IWorkbenchContributionsRegistry>(WorkbenchExtensions.Workbench).registerWorkbenchContribution(LanguageDetectionStatusContribution, LifecyclePhase.Restored);
registerAction2(class extends Action2 {
constructor() {
super({
id: detectLanguageCommandId,
title: localize('detectlang', 'Detect Language from Content'),
f1: true,
precondition: ContextKeyExpr.and(NOTEBOOK_EDITOR_EDITABLE.toNegated(), EditorContextKeys.editorTextFocus),
keybinding: { primary: KeyCode.KeyE | KeyMod.CtrlCmd, weight: KeybindingWeight.WorkbenchContrib }
});
}
async run(accessor: ServicesAccessor): Promise<void> {
const editorService = accessor.get(IEditorService);
const languageDetectionService = accessor.get(ILanguageDetectionService);
const editor = getCodeEditor(editorService.activeTextEditorControl);
const notificationService = accessor.get(INotificationService);
const editorUri = editor?.getModel()?.uri;
if (editorUri) {
const lang = await languageDetectionService.detectLanguage(editorUri);
if (lang) {
editor.getModel()?.setMode(lang);
} else {
notificationService.warn(localize('noDetection', "Unable to detect editor language"));
}
}
}
});

View file

@ -3,18 +3,23 @@
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import { Delayer } from 'vs/base/common/async';
import { CancellationToken } from 'vs/base/common/cancellation';
import { Disposable } from 'vs/base/common/lifecycle';
import { URI } from 'vs/base/common/uri';
import { ILanguageService } from 'vs/editor/common/languages/language';
import { localize } from 'vs/nls';
import { IConfigurationService } from 'vs/platform/configuration/common/configuration';
import { IInstantiationService } from 'vs/platform/instantiation/common/instantiation';
import { IKeybindingService } from 'vs/platform/keybinding/common/keybinding';
import { Registry } from 'vs/platform/registry/common/platform';
import { Extensions as WorkbenchExtensions, IWorkbenchContributionsRegistry } from 'vs/workbench/common/contributions';
import { CHANGE_CELL_LANGUAGE } from 'vs/workbench/contrib/notebook/browser/notebookBrowser';
import { CHANGE_CELL_LANGUAGE, DETECT_CELL_LANGUAGE } from 'vs/workbench/contrib/notebook/browser/notebookBrowser';
import { INotebookCellStatusBarService } from 'vs/workbench/contrib/notebook/common/notebookCellStatusBarService';
import { CellKind, CellStatusbarAlignment, INotebookCellStatusBarItem, INotebookCellStatusBarItemList, INotebookCellStatusBarItemProvider } from 'vs/workbench/contrib/notebook/common/notebookCommon';
import { INotebookKernelService } from 'vs/workbench/contrib/notebook/common/notebookKernelService';
import { INotebookService } from 'vs/workbench/contrib/notebook/common/notebookService';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
import { LifecyclePhase } from 'vs/workbench/services/lifecycle/common/lifecycle';
class CellStatusBarLanguagePickerProvider implements INotebookCellStatusBarItemProvider {
@ -50,6 +55,72 @@ class CellStatusBarLanguagePickerProvider implements INotebookCellStatusBarItemP
}
}
class CellStatusBarLanguageDetectionProvider implements INotebookCellStatusBarItemProvider {
readonly viewType = '*';
private delayer = new Delayer<INotebookCellStatusBarItemList | undefined>(500);
constructor(
@INotebookService private readonly _notebookService: INotebookService,
@INotebookKernelService private readonly _notebookKernelService: INotebookKernelService,
@ILanguageService private readonly _languageService: ILanguageService,
@IConfigurationService private readonly _configurationService: IConfigurationService,
@ILanguageDetectionService private readonly _languageDetectionService: ILanguageDetectionService,
@IKeybindingService private readonly _keybindingService: IKeybindingService,
) { }
async provideCellStatusBarItems(uri: URI, index: number, token: CancellationToken): Promise<INotebookCellStatusBarItemList | undefined> {
return await this.delayer.trigger(async () => {
const doc = this._notebookService.getNotebookTextModel(uri);
const cell = doc?.cells[index];
if (!cell || token.isCancellationRequested) {
return;
}
const enablementConfig = this._configurationService.getValue('workbench.editor.languageDetectionHints');
const enabled = enablementConfig === 'always' || enablementConfig === 'notebookEditors';
if (!enabled) {
return;
}
const currentLanguageId = cell.cellKind === CellKind.Markup ?
'markdown' :
(this._languageService.getLanguageIdByLanguageName(cell.language) || cell.language);
const kernel = this._notebookKernelService.getMatchingKernel(doc);
const items: INotebookCellStatusBarItem[] = [];
if (kernel.selected) {
const availableLangs = [];
availableLangs.push(...kernel.selected.supportedLanguages, 'markdown');
const detectedLanguageId = await this._languageDetectionService.detectLanguage(cell.uri, availableLangs);
if (detectedLanguageId && currentLanguageId !== detectedLanguageId) {
const detectedName = this._languageService.getLanguageName(detectedLanguageId) || detectedLanguageId;
let tooltip = localize('notebook.cell.status.autoDetectLanguage', "Accept Detected Language: {0}", detectedName);
const keybinding = this._keybindingService.lookupKeybinding(DETECT_CELL_LANGUAGE);
const label = keybinding?.getLabel();
if (label) {
tooltip += ` (${label})`;
}
items.push({
text: '$(lightbulb-autofix)',
command: DETECT_CELL_LANGUAGE,
tooltip,
alignment: CellStatusbarAlignment.Right,
priority: -Number.MAX_SAFE_INTEGER + 1
});
}
}
return { items };
});
}
}
class BuiltinCellStatusBarProviders extends Disposable {
constructor(
@IInstantiationService instantiationService: IInstantiationService,
@ -58,6 +129,7 @@ class BuiltinCellStatusBarProviders extends Disposable {
const builtinProviders = [
CellStatusBarLanguagePickerProvider,
CellStatusBarLanguageDetectionProvider,
];
builtinProviders.forEach(p => {
this._register(notebookCellStatusBarService.registerCellStatusBarItemProvider(instantiationService.createInstance(p)));

View file

@ -21,12 +21,13 @@ import { IQuickInputService, IQuickPickItem, QuickPickInput } from 'vs/platform/
import { changeCellToKind, runDeleteAction } from 'vs/workbench/contrib/notebook/browser/controller/cellOperations';
import { CellToolbarOrder, CELL_TITLE_CELL_GROUP_ID, CELL_TITLE_OUTPUT_GROUP_ID, executeNotebookCondition, INotebookActionContext, INotebookCellActionContext, NotebookAction, NotebookCellAction, NOTEBOOK_EDITOR_WIDGET_ACTION_WEIGHT } from 'vs/workbench/contrib/notebook/browser/controller/coreActions';
import { NOTEBOOK_CELL_EDITABLE, NOTEBOOK_CELL_HAS_OUTPUTS, NOTEBOOK_CELL_LIST_FOCUSED, NOTEBOOK_CELL_MARKDOWN_EDIT_MODE, NOTEBOOK_CELL_TYPE, NOTEBOOK_EDITOR_EDITABLE, NOTEBOOK_EDITOR_FOCUSED, NOTEBOOK_HAS_OUTPUTS, NOTEBOOK_IS_ACTIVE_EDITOR, NOTEBOOK_USE_CONSOLIDATED_OUTPUT_BUTTON } from 'vs/workbench/contrib/notebook/common/notebookContextKeys';
import { CellEditState, CHANGE_CELL_LANGUAGE, QUIT_EDIT_CELL_COMMAND_ID } from 'vs/workbench/contrib/notebook/browser/notebookBrowser';
import { CellEditState, CHANGE_CELL_LANGUAGE, DETECT_CELL_LANGUAGE, QUIT_EDIT_CELL_COMMAND_ID } from 'vs/workbench/contrib/notebook/browser/notebookBrowser';
import * as icons from 'vs/workbench/contrib/notebook/browser/notebookIcons';
import { CellEditType, CellKind, ICellEditOperation, NotebookCellExecutionState } from 'vs/workbench/contrib/notebook/common/notebookCommon';
import { ICellRange } from 'vs/workbench/contrib/notebook/common/notebookRange';
import { ILanguageDetectionService } from 'vs/workbench/services/languageDetection/common/languageDetectionWorkerService';
import { INotebookExecutionStateService } from 'vs/workbench/contrib/notebook/common/notebookExecutionStateService';
import { INotificationService } from 'vs/platform/notification/common/notification';
const CLEAR_ALL_CELLS_OUTPUTS_COMMAND_ID = 'notebook.clearAllCellsOutputs';
const EDIT_CELL_COMMAND_ID = 'notebook.cell.edit';
@ -437,23 +438,7 @@ registerAction2(class ChangeCellLanguageAction extends NotebookCellAction<ICellR
}
private async setLanguage(context: IChangeCellContext, languageId: string) {
if (languageId === 'markdown' && context.cell?.language !== 'markdown') {
const idx = context.notebookEditor.getCellIndex(context.cell);
await changeCellToKind(CellKind.Markup, { cell: context.cell, notebookEditor: context.notebookEditor, ui: true }, 'markdown', Mimes.markdown);
const newCell = context.notebookEditor.cellAt(idx);
if (newCell) {
context.notebookEditor.focusNotebookCell(newCell, 'editor');
}
} else if (languageId !== 'markdown' && context.cell?.cellKind === CellKind.Markup) {
await changeCellToKind(CellKind.Code, { cell: context.cell, notebookEditor: context.notebookEditor, ui: true }, languageId);
} else {
const index = context.notebookEditor.textModel.cells.indexOf(context.cell.model);
context.notebookEditor.textModel.applyEdits(
[{ editType: CellEditType.CellLanguage, index, language: languageId }],
true, undefined, () => undefined, undefined, true
);
}
await setCellToLanguage(languageId, context);
}
/**
@ -478,3 +463,48 @@ registerAction2(class ChangeCellLanguageAction extends NotebookCellAction<ICellR
return fakeResource;
}
});
registerAction2(class DetectCellLanguageAction extends NotebookCellAction {
constructor() {
super({
id: DETECT_CELL_LANGUAGE,
title: localize('detectLanguage', 'Accept Detected Language for Cell'),
f1: true,
precondition: ContextKeyExpr.and(NOTEBOOK_EDITOR_EDITABLE, NOTEBOOK_CELL_EDITABLE),
keybinding: { primary: KeyCode.KeyE | KeyMod.CtrlCmd, weight: KeybindingWeight.WorkbenchContrib }
});
}
async runWithContext(accessor: ServicesAccessor, context: INotebookCellActionContext): Promise<void> {
const languageDetectionService = accessor.get(ILanguageDetectionService);
const notificationService = accessor.get(INotificationService);
const providerLanguages = [...context.notebookEditor.activeKernel?.supportedLanguages ?? []];
providerLanguages.push('markdown');
const detection = await languageDetectionService.detectLanguage(context.cell.uri, providerLanguages);
if (detection) {
setCellToLanguage(detection, context);
} else {
notificationService.warn(localize('noDetection', "Unable to detect cell language"));
}
}
});
async function setCellToLanguage(languageId: string, context: IChangeCellContext) {
if (languageId === 'markdown' && context.cell?.language !== 'markdown') {
const idx = context.notebookEditor.getCellIndex(context.cell);
await changeCellToKind(CellKind.Markup, { cell: context.cell, notebookEditor: context.notebookEditor, ui: true }, 'markdown', Mimes.markdown);
const newCell = context.notebookEditor.cellAt(idx);
if (newCell) {
context.notebookEditor.focusNotebookCell(newCell, 'editor');
}
} else if (languageId !== 'markdown' && context.cell?.cellKind === CellKind.Markup) {
await changeCellToKind(CellKind.Code, { cell: context.cell, notebookEditor: context.notebookEditor, ui: true }, languageId);
} else {
const index = context.notebookEditor.textModel.cells.indexOf(context.cell.model);
context.notebookEditor.textModel.applyEdits(
[{ editType: CellEditType.CellLanguage, index, language: languageId }],
true, undefined, () => undefined, undefined, true
);
}
}

View file

@ -31,6 +31,7 @@ import { IEditorOptions } from 'vs/editor/common/config/editorOptions';
//#region Shared commands
export const EXPAND_CELL_INPUT_COMMAND_ID = 'notebook.cell.expandCellInput';
export const EXECUTE_CELL_COMMAND_ID = 'notebook.cell.execute';
export const DETECT_CELL_LANGUAGE = 'notebook.cell.detectLanguage';
export const CHANGE_CELL_LANGUAGE = 'notebook.cell.changeLanguage';
export const QUIT_EDIT_CELL_COMMAND_ID = 'notebook.cell.quitEdit';
export const EXPAND_CELL_OUTPUT_COMMAND_ID = 'notebook.cell.expandCellOutput';

View file

@ -9,7 +9,7 @@ import { IRequestHandler } from 'vs/base/common/worker/simpleWorker';
import { EditorSimpleWorker } from 'vs/editor/common/services/editorSimpleWorker';
import { IEditorWorkerHost } from 'vs/editor/common/services/editorWorkerHost';
type RegexpModel = { detect: (inp: string, langBiases: Record<string, number>) => string | undefined };
type RegexpModel = { detect: (inp: string, langBiases: Record<string, number>, supportedLangs?: string[]) => string | undefined };
/**
* Called on the worker side
@ -34,7 +34,9 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker {
private _modelOperations: ModelOperations | undefined;
private _loadFailed: boolean = false;
public async detectLanguage(uri: string, langBiases: Record<string, number> | undefined, preferHistory: boolean): Promise<string | undefined> {
private modelIdToCoreId = new Map<string, string>();
public async detectLanguage(uri: string, langBiases: Record<string, number> | undefined, preferHistory: boolean, supportedLangs?: string[]): Promise<string | undefined> {
const languages: string[] = [];
const confidences: number[] = [];
const stopWatch = new StopWatch(true);
@ -43,8 +45,14 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker {
const neuralResolver = async () => {
for await (const language of this.detectLanguagesImpl(documentTextSample)) {
languages.push(language.languageId);
confidences.push(language.confidence);
if (!this.modelIdToCoreId.has(language.languageId)) {
this.modelIdToCoreId.set(language.languageId, await this._host.fhr('getLanguageId', [language.languageId]));
}
const coreId = this.modelIdToCoreId.get(language.languageId);
if (coreId && (!supportedLangs?.length || supportedLangs.includes(coreId))) {
languages.push(coreId);
confidences.push(language.confidence);
}
}
stopWatch.stop();
@ -55,15 +63,7 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker {
return undefined;
};
const historicalResolver = async () => {
if (langBiases) {
const regexpDetection = await this.runRegexpModel(documentTextSample, langBiases);
if (regexpDetection) {
return regexpDetection;
}
}
return undefined;
};
const historicalResolver = async () => this.runRegexpModel(documentTextSample, langBiases ?? {}, supportedLangs);
if (preferHistory) {
const history = await historicalResolver();
@ -112,11 +112,22 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker {
}
}
private async runRegexpModel(content: string, langBiases: Record<string, number>): Promise<string | undefined> {
private async runRegexpModel(content: string, langBiases: Record<string, number>, supportedLangs?: string[]): Promise<string | undefined> {
const regexpModel = await this.getRegexpModel();
if (!regexpModel) { return; }
const detected = regexpModel.detect(content, langBiases);
if (supportedLangs?.length) {
// When using supportedLangs, normally computed biases are too extreme. Just use a "bitmask" of sorts.
for (const lang of Object.keys(langBiases)) {
if (supportedLangs.includes(lang)) {
langBiases[lang] = 1;
} else {
langBiases[lang] = 0;
}
}
}
const detected = regexpModel.detect(content, langBiases, supportedLangs);
return detected;
}
@ -156,21 +167,21 @@ export class LanguageDetectionSimpleWorker extends EditorSimpleWorker {
// For the following languages, we increase the confidence because
// these are commonly used languages in VS Code and supported
// by the model.
case 'javascript':
case 'js':
case 'html':
case 'json':
case 'typescript':
case 'ts':
case 'css':
case 'python':
case 'py':
case 'xml':
case 'php':
modelResult.confidence += LanguageDetectionSimpleWorker.positiveConfidenceCorrectionBucket1;
break;
// case 'yaml': // YAML has been know to cause incorrect language detection because the language is pretty simple. We don't want to increase the confidence for this.
case 'cpp':
case 'shellscript':
case 'sh':
case 'java':
case 'csharp':
case 'cs':
case 'c':
modelResult.confidence += LanguageDetectionSimpleWorker.positiveConfidenceCorrectionBucket2;
break;

View file

@ -53,7 +53,7 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet
constructor(
@IWorkbenchEnvironmentService private readonly _environmentService: IWorkbenchEnvironmentService,
@ILanguageService private readonly _languageService: ILanguageService,
@ILanguageService languageService: ILanguageService,
@IConfigurationService private readonly _configurationService: IConfigurationService,
@IDiagnosticsService private readonly _diagnosticsService: IDiagnosticsService,
@IWorkspaceContextService private readonly _workspaceContextService: IWorkspaceContextService,
@ -68,6 +68,7 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet
this._languageDetectionWorkerClient = new LanguageDetectionWorkerClient(
modelService,
languageService,
telemetryService,
// TODO: See if it's possible to bundle vscode-languagedetection
this._environmentService.isBuilt && !isWeb
@ -95,7 +96,7 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet
let count = 0;
for (const ext of fileExtensions.extensions) {
const langId = this.getLanguageId(ext);
const langId = this._languageDetectionWorkerClient.getLanguageId(ext);
if (langId && count < TOP_LANG_COUNTS) {
this.workspaceLanguageIds.add(langId);
count++;
@ -109,15 +110,6 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet
return !!languageId && this._configurationService.getValue<boolean>(LanguageDetectionService.enablementSettingKey, { overrideIdentifier: languageId });
}
private getLanguageId(language: string | undefined): string | undefined {
if (!language) {
return undefined;
}
if (this._languageService.isRegisteredLanguageId(language)) {
return language;
}
return this._languageService.guessLanguageIdByFilepathOrFirstLine(URI.file(`file.${language}`)) ?? undefined;
}
private getLanguageBiases(): Record<string, number> {
if (!this.dirtyBiases) { return this.langBiases; }
@ -147,19 +139,14 @@ export class LanguageDetectionService extends Disposable implements ILanguageDet
return biases;
}
async detectLanguage(resource: URI): Promise<string | undefined> {
async detectLanguage(resource: URI, supportedLangs?: string[]): Promise<string | undefined> {
const useHistory = this._configurationService.getValue<string[]>(LanguageDetectionService.historyBasedEnablementConfig);
const preferHistory = this._configurationService.getValue<boolean>(LanguageDetectionService.preferHistoryConfig);
if (useHistory) {
await this.resolveWorkspaceLanguageIds();
}
const biases = useHistory ? this.getLanguageBiases() : undefined;
const language = await this._languageDetectionWorkerClient.detectLanguage(resource, biases, preferHistory);
if (language) {
return this.getLanguageId(language);
}
return undefined;
return this._languageDetectionWorkerClient.detectLanguage(resource, biases, preferHistory, supportedLangs);
}
private initEditorOpenedListeners(storageService: IStorageService) {
@ -234,6 +221,7 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient {
constructor(
modelService: IModelService,
private readonly _languageService: ILanguageService,
private readonly _telemetryService: ITelemetryService,
private readonly _indexJsUri: string,
private readonly _modelJsonUri: string,
@ -260,6 +248,14 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient {
return this.workerPromise;
}
private _guessLanguageIdByUri(uri: URI): string | undefined {
const guess = this._languageService.guessLanguageIdByFilepathOrFirstLine(uri);
if (guess && guess !== 'unknown') {
return guess;
}
return undefined;
}
override async _getProxy(): Promise<LanguageDetectionSimpleWorker> {
return (await this._getOrCreateLanguageDetectionWorker()).getProxyObject();
}
@ -275,6 +271,8 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient {
return this.getWeightsUri();
case 'getRegexpModelUri':
return this.getRegexpModelUri();
case 'getLanguageId':
return this.getLanguageId(args[0]);
case 'sendTelemetryEvent':
return this.sendTelemetryEvent(args[0], args[1], args[2]);
default:
@ -286,6 +284,20 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient {
return this._indexJsUri;
}
getLanguageId(languageIdOrExt: string | undefined) {
if (!languageIdOrExt) {
return undefined;
}
if (this._languageService.isRegisteredLanguageId(languageIdOrExt)) {
return languageIdOrExt;
}
const guessed = this._guessLanguageIdByUri(URI.file(`file.${languageIdOrExt}`));
if (!guessed || guessed === 'unknown') {
return undefined;
}
return guessed;
}
async getModelJsonUri() {
return this._modelJsonUri;
}
@ -306,9 +318,15 @@ export class LanguageDetectionWorkerClient extends EditorWorkerClient {
});
}
public async detectLanguage(resource: URI, langBiases: Record<string, number> | undefined, preferHistory: boolean): Promise<string | undefined> {
public async detectLanguage(resource: URI, langBiases: Record<string, number> | undefined, preferHistory: boolean, supportedLangs?: string[]): Promise<string | undefined> {
const quickGuess = this._guessLanguageIdByUri(resource);
if (quickGuess) {
return quickGuess;
}
await this._withSyncedResources([resource]);
return (await this._getProxy()).detectLanguage(resource.toString(), langBiases, preferHistory);
const modelId = await (await this._getProxy()).detectLanguage(resource.toString(), langBiases, preferHistory, supportedLangs);
return this.getLanguageId(modelId);
}
}

View file

@ -19,9 +19,10 @@ export interface ILanguageDetectionService {
/**
* @param resource The resource to detect the language for.
* @param supportedLangs Optional. When populated, the model will only return languages from the provided list
* @returns the language id for the given resource or undefined if the model is not confident enough.
*/
detectLanguage(resource: URI): Promise<string | undefined>;
detectLanguage(resource: URI, supportedLangs?: string[]): Promise<string | undefined>;
}
//#region Telemetry events

View file

@ -303,6 +303,9 @@ import 'vs/workbench/contrib/typeHierarchy/browser/typeHierarchy.contribution';
import 'vs/workbench/contrib/codeEditor/browser/outline/documentSymbolsOutline';
import 'vs/workbench/contrib/outline/browser/outline.contribution';
// Language Detection
import 'vs/workbench/contrib/languageDetection/browser/languageDetection.contribution';
// Language Status
import 'vs/workbench/contrib/languageStatus/browser/languageStatus.contribution';