mirror of
https://github.com/dart-lang/sdk
synced 2024-10-14 11:31:57 +00:00
[analyzer] Separate the HTML parser utility from the DOM
With the parser code in the same library as the DOM classes, the `pkg/analyzer/tool/messages/generate.dart` program, which generates all of the diagnostic classes and diagnostics, _depends_ on a library with a `parse` method which depends on these diagnostic classes (in order to report errors while parsing HTML). This means that if there is any existing error (like an unknown identifier) in the existing error codes, it is impossible to generate the error codes. Since we don't need the `parse` method to generate diagnostics, we split up the library and remove the indirect dependency. Separating the parser code out removes the loop. Change-Id: Ifb9e9fd979e341ae64a1ed49aaf3758d3dcf1dee Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/287220 Reviewed-by: Brian Wilkerson <brianwilkerson@google.com> Commit-Queue: Samuel Rawlins <srawlins@google.com>
This commit is contained in:
parent
25779cc933
commit
49a3fb4f96
|
@ -7,6 +7,7 @@ import 'dart:io';
|
||||||
|
|
||||||
import 'package:analyzer_utilities/html_dom.dart' as dom;
|
import 'package:analyzer_utilities/html_dom.dart' as dom;
|
||||||
import 'package:analyzer_utilities/html_generator.dart';
|
import 'package:analyzer_utilities/html_generator.dart';
|
||||||
|
import 'package:analyzer_utilities/html_parser.dart' as parser;
|
||||||
import 'package:path/path.dart';
|
import 'package:path/path.dart';
|
||||||
|
|
||||||
import 'api.dart';
|
import 'api.dart';
|
||||||
|
@ -313,7 +314,7 @@ class ApiReader {
|
||||||
Api readApi() {
|
Api readApi() {
|
||||||
var file = File(filePath);
|
var file = File(filePath);
|
||||||
var htmlContents = file.readAsStringSync();
|
var htmlContents = file.readAsStringSync();
|
||||||
var document = dom.parse(htmlContents, file.uri);
|
var document = parser.parse(htmlContents, file.uri);
|
||||||
var htmlElement = document.children
|
var htmlElement = document.children
|
||||||
.singleWhere((element) => element.name.toLowerCase() == 'html');
|
.singleWhere((element) => element.name.toLowerCase() == 'html');
|
||||||
return apiFromHtml(htmlElement);
|
return apiFromHtml(htmlElement);
|
||||||
|
|
|
@ -7,6 +7,7 @@ import 'dart:io';
|
||||||
|
|
||||||
import 'package:analyzer_utilities/html_dom.dart' as dom;
|
import 'package:analyzer_utilities/html_dom.dart' as dom;
|
||||||
import 'package:analyzer_utilities/html_generator.dart';
|
import 'package:analyzer_utilities/html_generator.dart';
|
||||||
|
import 'package:analyzer_utilities/html_parser.dart' as parser;
|
||||||
import 'package:path/path.dart';
|
import 'package:path/path.dart';
|
||||||
|
|
||||||
import 'api.dart';
|
import 'api.dart';
|
||||||
|
@ -292,7 +293,7 @@ class ApiReader {
|
||||||
Api readApi() {
|
Api readApi() {
|
||||||
var file = File(filePath);
|
var file = File(filePath);
|
||||||
var htmlContents = file.readAsStringSync();
|
var htmlContents = file.readAsStringSync();
|
||||||
var document = dom.parse(htmlContents, file.uri);
|
var document = parser.parse(htmlContents, file.uri);
|
||||||
var htmlElement = document.children
|
var htmlElement = document.children
|
||||||
.singleWhere((element) => element.name.toLowerCase() == 'html');
|
.singleWhere((element) => element.name.toLowerCase() == 'html');
|
||||||
return apiFromHtml(htmlElement);
|
return apiFromHtml(htmlElement);
|
||||||
|
|
|
@ -2,13 +2,10 @@
|
||||||
// for details. All rights reserved. Use of this source code is governed by a
|
// for details. All rights reserved. Use of this source code is governed by a
|
||||||
// BSD-style license that can be found in the LICENSE file.
|
// BSD-style license that can be found in the LICENSE file.
|
||||||
|
|
||||||
/// A lightweight html parser and DOM model.
|
/// A lightweight DOM model.
|
||||||
|
|
||||||
import 'dart:convert';
|
import 'dart:convert';
|
||||||
|
|
||||||
// ignore: implementation_imports
|
|
||||||
import 'package:analyzer/src/manifest/manifest_validator.dart';
|
|
||||||
|
|
||||||
const _htmlEscape = HtmlEscape(HtmlEscapeMode.element);
|
const _htmlEscape = HtmlEscape(HtmlEscapeMode.element);
|
||||||
|
|
||||||
abstract class Node {
|
abstract class Node {
|
||||||
|
@ -109,71 +106,3 @@ class Document extends Element {
|
||||||
return buf.toString();
|
return buf.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given HTML text, return a parsed HTML tree.
|
|
||||||
Document parse(String htmlContents, Uri uri) {
|
|
||||||
final RegExp commentRegex = RegExp(r'<!--[^>]+-->');
|
|
||||||
|
|
||||||
Element createElement(XmlElement xmlElement) {
|
|
||||||
// element
|
|
||||||
var element = Element.tag(xmlElement.name);
|
|
||||||
|
|
||||||
// attributes
|
|
||||||
for (var key in xmlElement.attributes.keys) {
|
|
||||||
element.attributes[key] = xmlElement.attributes[key]!.value;
|
|
||||||
}
|
|
||||||
|
|
||||||
// From the immediate children, determine where the text between the tags is
|
|
||||||
// report any such non empty text as Text nodes.
|
|
||||||
var text = xmlElement.sourceSpan?.text ?? '';
|
|
||||||
|
|
||||||
if (!text.endsWith('/>')) {
|
|
||||||
var indices = <int>[];
|
|
||||||
var offset = xmlElement.sourceSpan!.start.offset;
|
|
||||||
|
|
||||||
indices.add(text.indexOf('>') + 1);
|
|
||||||
for (var child in xmlElement.children) {
|
|
||||||
var childSpan = child.sourceSpan!;
|
|
||||||
indices.add(childSpan.start.offset - offset);
|
|
||||||
indices.add(childSpan.end.offset - offset);
|
|
||||||
}
|
|
||||||
indices.add(text.lastIndexOf('<'));
|
|
||||||
|
|
||||||
var textNodes = <Text>[];
|
|
||||||
for (var index = 0; index < indices.length; index += 2) {
|
|
||||||
var start = indices[index];
|
|
||||||
var end = indices[index + 1];
|
|
||||||
// Remove html comments (<!-- -->) from text.
|
|
||||||
textNodes.add(
|
|
||||||
Text(text.substring(start, end).replaceAll(commentRegex, '')),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
element.append(textNodes.removeAt(0));
|
|
||||||
|
|
||||||
for (var child in xmlElement.children) {
|
|
||||||
element.append(createElement(child));
|
|
||||||
element.append(textNodes.removeAt(0));
|
|
||||||
}
|
|
||||||
|
|
||||||
element.nodes.removeWhere((node) => node is Text && node.text.isEmpty);
|
|
||||||
}
|
|
||||||
|
|
||||||
return element;
|
|
||||||
}
|
|
||||||
|
|
||||||
var parser = ManifestParser.general(htmlContents, uri: uri);
|
|
||||||
var result = parser.parseXmlTag();
|
|
||||||
|
|
||||||
while (result.parseResult != ParseTagResult.eof.parseResult) {
|
|
||||||
if (result.element != null) {
|
|
||||||
var document = Document();
|
|
||||||
document.append(createElement(result.element!));
|
|
||||||
return document;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = parser.parseXmlTag();
|
|
||||||
}
|
|
||||||
|
|
||||||
throw 'parse error - element not found';
|
|
||||||
}
|
|
||||||
|
|
78
pkg/analyzer_utilities/lib/html_parser.dart
Normal file
78
pkg/analyzer_utilities/lib/html_parser.dart
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
|
||||||
|
// for details. All rights reserved. Use of this source code is governed by a
|
||||||
|
// BSD-style license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
/// A lightweight HTML parser.
|
||||||
|
|
||||||
|
// ignore: implementation_imports
|
||||||
|
import 'package:analyzer/src/manifest/manifest_validator.dart';
|
||||||
|
|
||||||
|
import 'html_dom.dart';
|
||||||
|
|
||||||
|
/// Given HTML text, return a parsed HTML tree.
|
||||||
|
Document parse(String htmlContents, Uri uri) {
|
||||||
|
final RegExp commentRegex = RegExp(r'<!--[^>]+-->');
|
||||||
|
|
||||||
|
Element createElement(XmlElement xmlElement) {
|
||||||
|
// element
|
||||||
|
var element = Element.tag(xmlElement.name);
|
||||||
|
|
||||||
|
// attributes
|
||||||
|
for (var key in xmlElement.attributes.keys) {
|
||||||
|
element.attributes[key] = xmlElement.attributes[key]!.value;
|
||||||
|
}
|
||||||
|
|
||||||
|
// From the immediate children, determine where the text between the tags is
|
||||||
|
// report any such non empty text as Text nodes.
|
||||||
|
var text = xmlElement.sourceSpan?.text ?? '';
|
||||||
|
|
||||||
|
if (!text.endsWith('/>')) {
|
||||||
|
var indices = <int>[];
|
||||||
|
var offset = xmlElement.sourceSpan!.start.offset;
|
||||||
|
|
||||||
|
indices.add(text.indexOf('>') + 1);
|
||||||
|
for (var child in xmlElement.children) {
|
||||||
|
var childSpan = child.sourceSpan!;
|
||||||
|
indices.add(childSpan.start.offset - offset);
|
||||||
|
indices.add(childSpan.end.offset - offset);
|
||||||
|
}
|
||||||
|
indices.add(text.lastIndexOf('<'));
|
||||||
|
|
||||||
|
var textNodes = <Text>[];
|
||||||
|
for (var index = 0; index < indices.length; index += 2) {
|
||||||
|
var start = indices[index];
|
||||||
|
var end = indices[index + 1];
|
||||||
|
// Remove html comments (<!-- -->) from text.
|
||||||
|
textNodes.add(
|
||||||
|
Text(text.substring(start, end).replaceAll(commentRegex, '')),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
element.append(textNodes.removeAt(0));
|
||||||
|
|
||||||
|
for (var child in xmlElement.children) {
|
||||||
|
element.append(createElement(child));
|
||||||
|
element.append(textNodes.removeAt(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
element.nodes.removeWhere((node) => node is Text && node.text.isEmpty);
|
||||||
|
}
|
||||||
|
|
||||||
|
return element;
|
||||||
|
}
|
||||||
|
|
||||||
|
var parser = ManifestParser.general(htmlContents, uri: uri);
|
||||||
|
var result = parser.parseXmlTag();
|
||||||
|
|
||||||
|
while (result.parseResult != ParseTagResult.eof.parseResult) {
|
||||||
|
if (result.element != null) {
|
||||||
|
var document = Document();
|
||||||
|
document.append(createElement(result.element!));
|
||||||
|
return document;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = parser.parseXmlTag();
|
||||||
|
}
|
||||||
|
|
||||||
|
throw 'parse error - element not found';
|
||||||
|
}
|
Loading…
Reference in a new issue