html: process </td> tags; foster parent at most one node per token

Correctly close table cell when </td> is read.

Because of reconstructing the active formatting elements, more than one
node may be created when reading a single token.
If both nodes are foster parented, they will be siblings, but the first
node should be the parent of the second.

Pass tests1.dat, test 77:
<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe

| <html>
|   <head>
|   <body>
|     <a>
|       href="blah"
|       "aba"
|       <a>
|         href="foo"
|         "br"
|       <a>
|         href="foo"
|         "x"
|       <table>
|         <tbody>
|           <tr>
|             <td>
|     <a>
|       href="foo"
|       "aoe"

R=nigeltao
CC=golang-dev
https://golang.org/cl/5305074
This commit is contained in:
Andrew Balholm 2011-11-01 11:42:54 +11:00 committed by Nigel Tao
parent cae23f036a
commit 9db3f78c39
2 changed files with 10 additions and 4 deletions

View file

@ -126,6 +126,7 @@ func (p *parser) addChild(n *Node) {
// fosterParent adds a child node according to the foster parenting rules.
// Section 11.2.5.3, "foster parenting".
func (p *parser) fosterParent(n *Node) {
p.fosterParenting = false
var table, parent *Node
var i int
for i = len(p.oe) - 1; i >= 0; i-- {
@ -986,7 +987,12 @@ func inCellIM(p *parser) (insertionMode, bool) {
case EndTagToken:
switch p.tok.Data {
case "td", "th":
// TODO.
if !p.popUntil(tableScopeStopTags, p.tok.Data) {
// Ignore the token.
return inCellIM, true
}
p.clearActiveFormattingElements()
return inRowIM, true
case "body", "caption", "col", "colgroup", "html":
// TODO.
case "table", "tbody", "tfoot", "thead", "tr":

View file

@ -132,7 +132,7 @@ func TestParser(t *testing.T) {
rc := make(chan io.Reader)
go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset.
for i := 0; i < 77; i++ {
for i := 0; i < 78; i++ {
// Parse the #data section.
b, err := ioutil.ReadAll(<-rc)
if err != nil {
@ -161,8 +161,8 @@ func TestParser(t *testing.T) {
continue
}
// Check that rendering and re-parsing results in an identical tree.
if filename == "tests1.dat" && i == 30 {
// Test 30 in tests1.dat is such messed-up markup that a correct parse
if filename == "tests1.dat" && (i == 30 || i == 77) {
// Some tests in tests1.dat have such messed-up markup that a correct parse
// results in a non-conforming tree (one <a> element nested inside another).
// Therefore when it is rendered and re-parsed, it isn't the same.
// So we skip rendering on that test.