Fixes #20624: Unaligned surrogate pairs are corrupted at multiples of 50 columns

This commit is contained in:
Alex Dima 2017-02-23 10:25:39 +01:00
parent 64cccfc0cf
commit 72fe8f51b7
2 changed files with 34 additions and 2 deletions

View file

@ -298,7 +298,7 @@ function resolveRenderLineInput(input: RenderLineInput): ResolvedRenderLineInput
containsRTL = strings.containsRTL(lineContent);
}
if (!containsRTL) {
tokens = splitLargeTokens(tokens);
tokens = splitLargeTokens(lineContent, tokens);
}
return new ResolvedRenderLineInput(
@ -347,7 +347,7 @@ const enum Constants {
* It appears that having very large spans causes very slow reading of character positions.
* So here we try to avoid that.
*/
function splitLargeTokens(tokens: LinePart[]): LinePart[] {
function splitLargeTokens(lineContent: string, tokens: LinePart[]): LinePart[] {
let lastTokenEndIndex = 0;
let result: LinePart[] = [], resultLen = 0;
for (let i = 0, len = tokens.length; i < len; i++) {
@ -359,6 +359,11 @@ function splitLargeTokens(tokens: LinePart[]): LinePart[] {
const piecesCount = Math.ceil(diff / Constants.LongToken);
for (let j = 1; j < piecesCount; j++) {
let pieceEndIndex = lastTokenEndIndex + (j * Constants.LongToken);
let lastCharInPiece = lineContent.charCodeAt(pieceEndIndex - 1);
if (strings.isHighSurrogate(lastCharInPiece)) {
// Don't cut in the middle of a surrogate pair
pieceEndIndex--;
}
result[resultLen++] = new LinePart(pieceEndIndex, tokenType);
}
result[resultLen++] = new LinePart(tokenEndIndex, tokenType);

View file

@ -468,6 +468,33 @@ suite('viewLineRenderer.renderLine', () => {
}
});
test('issue #20624: Unaligned surrogate pairs are corrupted at multiples of 50 columns', () => {
let lineText = 'a𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷';
let lineParts = [createPart(lineText.length, 1)];
let actual = renderViewLine(new RenderLineInput(
false,
lineText,
false,
0,
lineParts,
[],
4,
10,
-1,
'none',
false
));
let expectedOutput = [
'<span class="mtk1">a𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷</span>',
'<span class="mtk1">𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷</span>',
'<span class="mtk1">𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷</span>',
'<span class="mtk1">𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷</span>',
'<span class="mtk1">𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷𠮷</span>',
];
assert.equal(actual.html, '<span>' + expectedOutput.join('') + '</span>');
});
test('issue #6885: Does not split large tokens in RTL text', () => {
let lineText = 'את גרמנית בהתייחסות שמו, שנתי המשפט אל חפש, אם כתב אחרים ולחבר. של התוכן אודות בויקיפדיה כלל, של עזרה כימיה היא. על עמוד יוצרים מיתולוגיה סדר, אם שכל שתפו לעברית שינויים, אם שאלות אנגלית עזה. שמות בקלות מה סדר.';
let lineParts = [createPart(lineText.length, 1)];