html: improve parsing of tables

When foster parenting, merge adjacent text nodes.
Properly close table row at </tr> tag.

Pass tests1.dat, test 32:
<!-----><font><div>hello<table>excite!<b>me!<th><i>please!</tr><!--X-->

| <!-- - -->
| <html>
|   <head>
|   <body>
|     <font>
|       <div>
|         "helloexcite!"
|         <b>
|           "me!"
|         <table>
|           <tbody>
|             <tr>
|               <th>
|                 <i>
|                   "please!"
|             <!-- X -->

R=nigeltao
CC=golang-dev
https://golang.org/cl/5323048
This commit is contained in:
Andrew Balholm 2011-10-26 11:36:46 +11:00 committed by Nigel Tao
parent 7959aeb0f9
commit 6e318bda6c
2 changed files with 27 additions and 8 deletions

View file

@ -52,6 +52,11 @@ var (
tableScopeStopTags = []string{"html", "table"}
)
// stopTags for use in clearStackToContext.
var (
tableRowContextStopTags = []string{"tr", "html"}
)
// popUntil pops the stack of open elements at the highest element whose tag
// is in matchTags, provided there is no higher element in stopTags. It returns
// whether or not there was such an element. If there was not, popUntil leaves
@ -146,6 +151,11 @@ func (p *parser) fosterParent(n *Node) {
}
}
if i > 0 && parent.Child[i-1].Type == TextNode && n.Type == TextNode {
parent.Child[i-1].Data += n.Data
return
}
if i == len(parent.Child) {
parent.Add(n)
} else {
@ -749,11 +759,11 @@ func inTableIM(p *parser) (insertionMode, bool) {
case StartTagToken:
switch p.tok.Data {
case "tbody", "tfoot", "thead":
p.clearStackToTableContext()
p.clearStackToContext(tableScopeStopTags)
p.addElement(p.tok.Data, p.tok.Attr)
return inTableBodyIM, true
case "td", "th", "tr":
p.clearStackToTableContext()
p.clearStackToContext(tableScopeStopTags)
p.addElement("tbody", nil)
return inTableBodyIM, false
case "table":
@ -794,11 +804,15 @@ func inTableIM(p *parser) (insertionMode, bool) {
return useTheRulesFor(p, inTableIM, inBodyIM)
}
func (p *parser) clearStackToTableContext() {
// clearStackToContext pops elements off the stack of open elements
// until an element listed in stopTags is found.
func (p *parser) clearStackToContext(stopTags []string) {
for i := len(p.oe) - 1; i >= 0; i-- {
if x := p.oe[i].Data; x == "table" || x == "html" {
p.oe = p.oe[:i+1]
return
for _, tag := range stopTags {
if p.oe[i].Data == tag {
p.oe = p.oe[:i+1]
return
}
}
}
}
@ -877,7 +891,12 @@ func inRowIM(p *parser) (insertionMode, bool) {
case EndTagToken:
switch p.tok.Data {
case "tr":
// TODO.
if !p.elementInScope(tableScopeStopTags, "tr") {
return inRowIM, true
}
p.clearStackToContext(tableRowContextStopTags)
p.oe.pop()
return inTableBodyIM, true
case "table":
if p.popUntil(tableScopeStopTags, "tr") {
return inTableBodyIM, false

View file

@ -132,7 +132,7 @@ func TestParser(t *testing.T) {
rc := make(chan io.Reader)
go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset.
for i := 0; i < 32; i++ {
for i := 0; i < 33; i++ {
// Parse the #data section.
b, err := ioutil.ReadAll(<-rc)
if err != nil {