diff --git a/pkg/analysis_server/tool/spec/from_html.dart b/pkg/analysis_server/tool/spec/from_html.dart index 5dee0bf0d54..856d74b6058 100644 --- a/pkg/analysis_server/tool/spec/from_html.dart +++ b/pkg/analysis_server/tool/spec/from_html.dart @@ -7,6 +7,7 @@ import 'dart:io'; import 'package:analyzer_utilities/html_dom.dart' as dom; import 'package:analyzer_utilities/html_generator.dart'; +import 'package:analyzer_utilities/html_parser.dart' as parser; import 'package:path/path.dart'; import 'api.dart'; @@ -313,7 +314,7 @@ class ApiReader { Api readApi() { var file = File(filePath); var htmlContents = file.readAsStringSync(); - var document = dom.parse(htmlContents, file.uri); + var document = parser.parse(htmlContents, file.uri); var htmlElement = document.children .singleWhere((element) => element.name.toLowerCase() == 'html'); return apiFromHtml(htmlElement); diff --git a/pkg/analyzer_plugin/tool/spec/from_html.dart b/pkg/analyzer_plugin/tool/spec/from_html.dart index 11a80fe31c9..6e9bb32dd31 100644 --- a/pkg/analyzer_plugin/tool/spec/from_html.dart +++ b/pkg/analyzer_plugin/tool/spec/from_html.dart @@ -7,6 +7,7 @@ import 'dart:io'; import 'package:analyzer_utilities/html_dom.dart' as dom; import 'package:analyzer_utilities/html_generator.dart'; +import 'package:analyzer_utilities/html_parser.dart' as parser; import 'package:path/path.dart'; import 'api.dart'; @@ -292,7 +293,7 @@ class ApiReader { Api readApi() { var file = File(filePath); var htmlContents = file.readAsStringSync(); - var document = dom.parse(htmlContents, file.uri); + var document = parser.parse(htmlContents, file.uri); var htmlElement = document.children .singleWhere((element) => element.name.toLowerCase() == 'html'); return apiFromHtml(htmlElement); diff --git a/pkg/analyzer_utilities/lib/html_dom.dart b/pkg/analyzer_utilities/lib/html_dom.dart index f08d9864426..87cd410dcc4 100644 --- a/pkg/analyzer_utilities/lib/html_dom.dart +++ b/pkg/analyzer_utilities/lib/html_dom.dart @@ -2,13 +2,10 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. -/// A lightweight html parser and DOM model. +/// A lightweight DOM model. import 'dart:convert'; -// ignore: implementation_imports -import 'package:analyzer/src/manifest/manifest_validator.dart'; - const _htmlEscape = HtmlEscape(HtmlEscapeMode.element); abstract class Node { @@ -109,71 +106,3 @@ class Document extends Element { return buf.toString(); } } - -/// Given HTML text, return a parsed HTML tree. -Document parse(String htmlContents, Uri uri) { - final RegExp commentRegex = RegExp(r''); - - Element createElement(XmlElement xmlElement) { - // element - var element = Element.tag(xmlElement.name); - - // attributes - for (var key in xmlElement.attributes.keys) { - element.attributes[key] = xmlElement.attributes[key]!.value; - } - - // From the immediate children, determine where the text between the tags is - // report any such non empty text as Text nodes. - var text = xmlElement.sourceSpan?.text ?? ''; - - if (!text.endsWith('/>')) { - var indices = []; - var offset = xmlElement.sourceSpan!.start.offset; - - indices.add(text.indexOf('>') + 1); - for (var child in xmlElement.children) { - var childSpan = child.sourceSpan!; - indices.add(childSpan.start.offset - offset); - indices.add(childSpan.end.offset - offset); - } - indices.add(text.lastIndexOf('<')); - - var textNodes = []; - for (var index = 0; index < indices.length; index += 2) { - var start = indices[index]; - var end = indices[index + 1]; - // Remove html comments () from text. - textNodes.add( - Text(text.substring(start, end).replaceAll(commentRegex, '')), - ); - } - - element.append(textNodes.removeAt(0)); - - for (var child in xmlElement.children) { - element.append(createElement(child)); - element.append(textNodes.removeAt(0)); - } - - element.nodes.removeWhere((node) => node is Text && node.text.isEmpty); - } - - return element; - } - - var parser = ManifestParser.general(htmlContents, uri: uri); - var result = parser.parseXmlTag(); - - while (result.parseResult != ParseTagResult.eof.parseResult) { - if (result.element != null) { - var document = Document(); - document.append(createElement(result.element!)); - return document; - } - - result = parser.parseXmlTag(); - } - - throw 'parse error - element not found'; -} diff --git a/pkg/analyzer_utilities/lib/html_parser.dart b/pkg/analyzer_utilities/lib/html_parser.dart new file mode 100644 index 00000000000..7847c3cfc88 --- /dev/null +++ b/pkg/analyzer_utilities/lib/html_parser.dart @@ -0,0 +1,78 @@ +// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file +// for details. All rights reserved. Use of this source code is governed by a +// BSD-style license that can be found in the LICENSE file. + +/// A lightweight HTML parser. + +// ignore: implementation_imports +import 'package:analyzer/src/manifest/manifest_validator.dart'; + +import 'html_dom.dart'; + +/// Given HTML text, return a parsed HTML tree. +Document parse(String htmlContents, Uri uri) { + final RegExp commentRegex = RegExp(r''); + + Element createElement(XmlElement xmlElement) { + // element + var element = Element.tag(xmlElement.name); + + // attributes + for (var key in xmlElement.attributes.keys) { + element.attributes[key] = xmlElement.attributes[key]!.value; + } + + // From the immediate children, determine where the text between the tags is + // report any such non empty text as Text nodes. + var text = xmlElement.sourceSpan?.text ?? ''; + + if (!text.endsWith('/>')) { + var indices = []; + var offset = xmlElement.sourceSpan!.start.offset; + + indices.add(text.indexOf('>') + 1); + for (var child in xmlElement.children) { + var childSpan = child.sourceSpan!; + indices.add(childSpan.start.offset - offset); + indices.add(childSpan.end.offset - offset); + } + indices.add(text.lastIndexOf('<')); + + var textNodes = []; + for (var index = 0; index < indices.length; index += 2) { + var start = indices[index]; + var end = indices[index + 1]; + // Remove html comments () from text. + textNodes.add( + Text(text.substring(start, end).replaceAll(commentRegex, '')), + ); + } + + element.append(textNodes.removeAt(0)); + + for (var child in xmlElement.children) { + element.append(createElement(child)); + element.append(textNodes.removeAt(0)); + } + + element.nodes.removeWhere((node) => node is Text && node.text.isEmpty); + } + + return element; + } + + var parser = ManifestParser.general(htmlContents, uri: uri); + var result = parser.parseXmlTag(); + + while (result.parseResult != ParseTagResult.eof.parseResult) { + if (result.element != null) { + var document = Document(); + document.append(createElement(result.element!)); + return document; + } + + result = parser.parseXmlTag(); + } + + throw 'parse error - element not found'; +}