More perf fixes after review

This commit is contained in:
Don Jayamanne 2021-08-19 12:20:39 -07:00
parent 09f68d545d
commit 9347bc5c24
5 changed files with 49 additions and 88 deletions

View file

@ -64,7 +64,8 @@
"watch": "npx gulp watch-extension:ipynb"
},
"dependencies": {
"@enonic/fnv-plus": "^1.3.0"
"@enonic/fnv-plus": "^1.3.0",
"detect-indent": "^6.0.0"
},
"devDependencies": {
"@jupyterlab/coreutils": "^3.1.0"

View file

@ -56,34 +56,34 @@ const orderOfMimeTypes = [
function isEmptyVendoredMimeType(outputItem: NotebookCellOutputItem) {
if (outputItem.mime.startsWith('application/vnd.')) {
try {
return Buffer.from(outputItem.data).toString().length === 0;
return outputItem.data.byteLength === 0 || Buffer.from(outputItem.data).toString().length === 0;
} catch { }
}
return false;
}
function isMimeTypeMatch(value: string, compareWith: string) {
if (value.endsWith('.*')) {
value = value.substr(0, value.indexOf('.*'));
}
return compareWith.startsWith(value);
}
function sortOutputItemsBasedOnDisplayOrder(outputItems: NotebookCellOutputItem[]): NotebookCellOutputItem[] {
return outputItems.sort((outputItemA, outputItemB) => {
const isMimeTypeMatch = (value: string, compareWith: string) => {
if (value.endsWith('.*')) {
value = value.substr(0, value.indexOf('.*'));
return outputItems
.map(item => {
let index = orderOfMimeTypes.findIndex((mime) => isMimeTypeMatch(mime, item.mime));
// Sometimes we can have mime types with empty data, e.g. when using holoview we can have `application/vnd.holoviews_load.v0+json` with empty value.
// & in these cases we have HTML/JS and those take precedence.
// https://github.com/microsoft/vscode-jupyter/issues/6109
if (isEmptyVendoredMimeType(item)) {
index = -1;
}
return compareWith.startsWith(value);
};
let indexOfMimeTypeA = orderOfMimeTypes.findIndex((mime) => isMimeTypeMatch(mime, outputItemA.mime));
let indexOfMimeTypeB = orderOfMimeTypes.findIndex((mime) => isMimeTypeMatch(mime, outputItemB.mime));
// Sometimes we can have mime types with empty data, e.g. when using holoview we can have `application/vnd.holoviews_load.v0+json` with empty value.
// & in these cases we have HTML/JS and those take precedence.
// https://github.com/microsoft/vscode-jupyter/issues/6109
if (isEmptyVendoredMimeType(outputItemA)) {
indexOfMimeTypeA = -1;
}
if (isEmptyVendoredMimeType(outputItemB)) {
indexOfMimeTypeB = -1;
}
indexOfMimeTypeA = indexOfMimeTypeA === -1 ? 100 : indexOfMimeTypeA;
indexOfMimeTypeB = indexOfMimeTypeB === -1 ? 100 : indexOfMimeTypeB;
return indexOfMimeTypeA - indexOfMimeTypeB;
});
index = index === -1 ? 100 : index;
return {
item, index
};
})
.sort((outputItemA, outputItemB) => outputItemA.index - outputItemB.index).map(item => item.item);
}

View file

@ -4,6 +4,7 @@
*--------------------------------------------------------------------------------------------*/
import type { nbformat } from '@jupyterlab/coreutils';
import * as detectIndent from 'detect-indent';
import * as vscode from 'vscode';
import { defaultNotebookFormat } from './constants';
import { getPreferredLanguage, jupyterNotebookModelToNotebookData } from './deserializers';
@ -39,8 +40,8 @@ export class NotebookSerializer implements vscode.NotebookSerializer {
}
}
// Then compute indent from the contents
const indentAmount = contents ? detectIndent(contents) : ' ';
// Then compute indent from the contents (only use first 1K characters as a perf optimization)
const indentAmount = contents ? detectIndent(contents.substring(0, 1_000)) : ' ';
const preferredCellLanguage = getPreferredLanguage(json.metadata);
// Ensure we always have a blank cell.
@ -93,17 +94,3 @@ export class NotebookSerializer implements vscode.NotebookSerializer {
return JSON.stringify(notebookContent, undefined, indentAmount);
}
}
export function detectIndent(jsonString: string) {
// ipynb is a JSON string of Object, hence first character will always `{`.
// Lets just take the distance between the first `{` and the next non-white space character`, ignoring \r & \n
if (!jsonString.startsWith('{')) {
return '';
}
// We're only interested in a small part of the string.
// The assumption is that we won't have an indentation of 10, just around 5 or so.
jsonString = jsonString.substring(1, 10).replace(/\r?\n/g, '');
// first index of non white space is the indentation.
const firstPositionOfNonWhiteSpace = jsonString.length - jsonString.trimStart().length;
return jsonString.substring(0, firstPositionOfNonWhiteSpace);
}

View file

@ -236,8 +236,8 @@ function convertStreamOutput(output: NotebookCellOutput): JupyterOutput {
if (outputs.length && lines.length && lines[0].length > 0) {
outputs[outputs.length - 1] = `${outputs[outputs.length - 1]}${lines.shift()!}`;
}
while (lines.length) {
outputs.push(lines.shift()!);
for (const line of lines) {
outputs.push(line);
}
});
// Skip last one if empty (it's the only one that could be length 0)

View file

@ -7,7 +7,6 @@ import { nbformat } from '@jupyterlab/coreutils';
import * as assert from 'assert';
import * as vscode from 'vscode';
import { jupyterCellOutputToCellOutput, jupyterNotebookModelToNotebookData } from '../deserializers';
import { detectIndent } from '../notebookSerializer';
function deepStripProperties(obj: any, props: string[]) {
for (let prop in obj) {
@ -53,32 +52,6 @@ suite('ipynb serializer', () => {
assert.deepStrictEqual(notebook.cells, [expectedCodeCell, expectedMarkdownCell]);
});
suite('Indentation detection', () => {
const ipynbNotebook: nbformat.INotebookContent = {
cells: [{ cell_type: 'raw', metadata: {}, source: [] }],
metadata: {
orig_nbformat: 4
},
nbformat: 4,
nbformat_minor: 0
};
test('JSON with no indents', () => {
assert.deepStrictEqual(detectIndent(JSON.stringify(ipynbNotebook, undefined, '')), '');
});
test('JSON with 1 indent', () => {
assert.deepStrictEqual(detectIndent(JSON.stringify(ipynbNotebook, undefined, ' ')), ' ');
});
test('JSON with 4 indent', () => {
assert.deepStrictEqual(detectIndent(JSON.stringify(ipynbNotebook, undefined, ' ')), ' ');
});
test('JSON with 1 tab indent', () => {
assert.deepStrictEqual(detectIndent(JSON.stringify(ipynbNotebook, undefined, ' ')), ' ');
});
test('JSON with 3 tab indent', () => {
assert.deepStrictEqual(detectIndent(JSON.stringify(ipynbNotebook, undefined, ' ')), ' ');
});
});
suite('Outputs', () => {
function validateCellOutputTranslation(
outputs: nbformat.IOutput[],
@ -141,29 +114,29 @@ suite('ipynb serializer', () => {
output_type: 'stream',
text: [
'Epoch 1/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.2913 - accuracy: 0.9147\n',
'...\n',
'Epoch 2/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.1418 - accuracy: 0.9584\n',
'...\n',
'Epoch 3/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.1058 - accuracy: 0.9681\n',
'...\n',
'Epoch 4/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.0879 - accuracy: 0.9730\n',
'...\n',
'Epoch 5/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.0744 - accuracy: 0.9765\n'
'...\n'
]
}
],
[
new vscode.NotebookCellOutput([vscode.NotebookCellOutputItem.stdout(['Epoch 1/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.2913 - accuracy: 0.9147\n',
'...\n',
'Epoch 2/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.1418 - accuracy: 0.9584\n',
'...\n',
'Epoch 3/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.1058 - accuracy: 0.9681\n',
'...\n',
'Epoch 4/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.0879 - accuracy: 0.9730\n',
'...\n',
'Epoch 5/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.0744 - accuracy: 0.9765\n'].join(''))], {
'...\n'].join(''))], {
outputType: 'stream'
})
]
@ -178,29 +151,29 @@ suite('ipynb serializer', () => {
output_type: 'stream',
text: [
'Epoch 1/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.2913 - accuracy: 0.9147\n',
'...\n',
'Epoch 2/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.1418 - accuracy: 0.9584\n',
'...\n',
'Epoch 3/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.1058 - accuracy: 0.9681\n',
'...\n',
'Epoch 4/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.0879 - accuracy: 0.9730\n',
'...\n',
'Epoch 5/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.0744 - accuracy: 0.9765\n'
'...\n'
]
}
],
[
new vscode.NotebookCellOutput([vscode.NotebookCellOutputItem.stderr(['Epoch 1/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.2913 - accuracy: 0.9147\n',
'...\n',
'Epoch 2/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.1418 - accuracy: 0.9584\n',
'...\n',
'Epoch 3/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.1058 - accuracy: 0.9681\n',
'...\n',
'Epoch 4/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.0879 - accuracy: 0.9730\n',
'...\n',
'Epoch 5/5\n',
'1875/1875 [==============================] - 3s 1ms/step - loss: 0.0744 - accuracy: 0.9765\n',
'...\n',
// This last empty line should not be saved in ipynb.
'\n'].join(''))], {
outputType: 'stream'