mirror of
https://github.com/Microsoft/vscode
synced 2024-08-28 05:19:39 +00:00
Improve markdown link regexp (#152533)
* Improve markdown link regexp This makes the markdown link regexp more readable and also combines the two regular expressions we were running * Fixed backtracking
This commit is contained in:
parent
5ffcfde11d
commit
5a175207de
|
@ -131,11 +131,14 @@ export type MdLink = MdInlineLink | MdLinkDefinition;
|
|||
|
||||
function extractDocumentLink(
|
||||
document: SkinnyTextDocument,
|
||||
pre: number,
|
||||
link: string,
|
||||
pre: string,
|
||||
rawLink: string,
|
||||
matchIndex: number | undefined
|
||||
): MdLink | undefined {
|
||||
const offset = (matchIndex || 0) + pre;
|
||||
const isAngleBracketLink = rawLink.startsWith('<');
|
||||
const link = stripAngleBrackets(rawLink);
|
||||
|
||||
const offset = (matchIndex || 0) + pre.length + (isAngleBracketLink ? 1 : 0);
|
||||
const linkStart = document.positionAt(offset);
|
||||
const linkEnd = document.positionAt(offset + link.length);
|
||||
try {
|
||||
|
@ -185,20 +188,36 @@ function stripAngleBrackets(link: string) {
|
|||
return link.replace(angleBracketLinkRe, '$1');
|
||||
}
|
||||
|
||||
/**
|
||||
* Matches `[text](link)`
|
||||
*/
|
||||
const linkPattern = /(\[((!\[[^\]]*?\]\(\s*)([^\s\(\)]+?)\s*\)\]|(?:\\\]|[^\]]|\][^(])*\])\(\s*)(([^\s\(\)]|\([^\s\(\)]*?\))+)\s*("[^"]*"|'[^']*'|\([^\(\)]*\))?\s*\)/g;
|
||||
const r = String.raw;
|
||||
|
||||
/**
|
||||
* Matches `[text](<link>)`
|
||||
* Matches `[text](link)` or `[text](<link>)`
|
||||
*/
|
||||
const linkPatternAngle = /(\[((!\[[^\]]*?\]\(\s*)([^\s\(\)]+?)\s*\)\]|(?:\\\]|[^\]]|\][^(])*\])\(\s*<)(([^<>]|\([^\s\(\)]*?\))+)>\s*("[^"]*"|'[^']*'|\([^\(\)]*\))?\s*\)/g;
|
||||
const linkPattern = new RegExp(
|
||||
// text
|
||||
r`(\[` + // open prefix match -->
|
||||
/**/r`(?:` +
|
||||
/*****/r`[^\[\]\\]|` + // Non-bracket chars, or...
|
||||
/*****/r`\\.|` + // Escaped char, or...
|
||||
/*****/r`\[[^\[\]]*\]` + // Matched bracket pair
|
||||
/**/r`)*` +
|
||||
r`\]` +
|
||||
|
||||
// Destination
|
||||
r`\(\s*)` + // <-- close prefix match
|
||||
/**/r`(` +
|
||||
/*****/r`[^\s\(\)\<](?:[^\s\(\)]|\([^\s\(\)]*?\))*|` + // Link without whitespace, or...
|
||||
/*****/r`<[^<>]*>` + // In angle brackets
|
||||
/**/r`)` +
|
||||
|
||||
// Title
|
||||
/**/r`\s*(?:"[^"]*"|'[^']*'|\([^\(\)]*\))?\s*` +
|
||||
r`\)`,
|
||||
'g');
|
||||
|
||||
/**
|
||||
* Matches `[text][ref]` or `[shorthand]`
|
||||
*/
|
||||
* Matches `[text][ref]` or `[shorthand]`
|
||||
*/
|
||||
const referenceLinkPattern = /(^|[^\]\\])(?:(?:(\[((?:\\\]|[^\]])+)\]\[\s*?)([^\s\]]*?)\]|\[\s*?([^\s\]]*?)\])(?![\:\(]))/gm;
|
||||
|
||||
/**
|
||||
|
@ -270,36 +289,23 @@ export class MdLinkComputer {
|
|||
|
||||
private *getInlineLinks(document: SkinnyTextDocument, noLinkRanges: NoLinkRanges): Iterable<MdLink> {
|
||||
const text = document.getText();
|
||||
|
||||
for (const match of text.matchAll(linkPatternAngle)) {
|
||||
const matchImageData = match[4] && extractDocumentLink(document, match[3].length + 1, match[4], match.index);
|
||||
if (matchImageData && !noLinkRanges.contains(matchImageData.source.hrefRange)) {
|
||||
yield matchImageData;
|
||||
}
|
||||
const matchLinkData = extractDocumentLink(document, match[1].length, match[5], match.index);
|
||||
if (matchLinkData && !noLinkRanges.contains(matchLinkData.source.hrefRange)) {
|
||||
yield matchLinkData;
|
||||
}
|
||||
}
|
||||
|
||||
for (const match of text.matchAll(linkPattern)) {
|
||||
const matchImageData = match[4] && extractDocumentLink(document, match[3].length + 1, match[4], match.index);
|
||||
if (matchImageData && !noLinkRanges.contains(matchImageData.source.hrefRange)) {
|
||||
yield matchImageData;
|
||||
}
|
||||
|
||||
if (match[5] !== undefined && match[5].startsWith('<')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const matchLinkData = extractDocumentLink(document, match[1].length, match[5], match.index);
|
||||
const matchLinkData = extractDocumentLink(document, match[1], match[2], match.index);
|
||||
if (matchLinkData && !noLinkRanges.contains(matchLinkData.source.hrefRange)) {
|
||||
yield matchLinkData;
|
||||
|
||||
// Also check link destination for links
|
||||
for (const innerMatch of match[1].matchAll(linkPattern)) {
|
||||
const innerData = extractDocumentLink(document, innerMatch[1], innerMatch[2], (match.index ?? 0) + (innerMatch.index ?? 0));
|
||||
if (innerData) {
|
||||
yield innerData;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private *getAutoLinks(document: SkinnyTextDocument, noLinkRanges: NoLinkRanges): Iterable<MdLink> {
|
||||
private * getAutoLinks(document: SkinnyTextDocument, noLinkRanges: NoLinkRanges): Iterable<MdLink> {
|
||||
const text = document.getText();
|
||||
|
||||
for (const match of text.matchAll(autoLinkPattern)) {
|
||||
|
|
|
@ -32,7 +32,7 @@ function assertLinksEqual(actualLinks: readonly vscode.DocumentLink[], expectedR
|
|||
}
|
||||
}
|
||||
|
||||
suite('markdown.DocumentLinkProvider', () => {
|
||||
suite('Markdown: DocumentLinkProvider', () => {
|
||||
test('Should not return anything for empty document', async () => {
|
||||
const links = await getLinksForFile('');
|
||||
assert.strictEqual(links.length, 0);
|
||||
|
@ -131,24 +131,24 @@ suite('markdown.DocumentLinkProvider', () => {
|
|||
{
|
||||
const links = await getLinksForFile('[![alt text](image.jpg)](https://example.com)');
|
||||
assertLinksEqual(links, [
|
||||
new vscode.Range(0, 25, 0, 44),
|
||||
new vscode.Range(0, 13, 0, 22),
|
||||
new vscode.Range(0, 25, 0, 44)
|
||||
]);
|
||||
}
|
||||
{
|
||||
const links = await getLinksForFile('[![a]( whitespace.jpg )]( https://whitespace.com )');
|
||||
assertLinksEqual(links, [
|
||||
new vscode.Range(0, 26, 0, 48),
|
||||
new vscode.Range(0, 7, 0, 21),
|
||||
new vscode.Range(0, 26, 0, 48)
|
||||
]);
|
||||
}
|
||||
{
|
||||
const links = await getLinksForFile('[![a](img1.jpg)](file1.txt) text [![a](img2.jpg)](file2.txt)');
|
||||
assertLinksEqual(links, [
|
||||
new vscode.Range(0, 6, 0, 14),
|
||||
new vscode.Range(0, 17, 0, 26),
|
||||
new vscode.Range(0, 39, 0, 47),
|
||||
new vscode.Range(0, 6, 0, 14),
|
||||
new vscode.Range(0, 50, 0, 59),
|
||||
new vscode.Range(0, 39, 0, 47),
|
||||
]);
|
||||
}
|
||||
});
|
||||
|
|
Loading…
Reference in a new issue