html: allow whitespace text nodes in <head>

Pass tests1.dat, test 50:
<!DOCTYPE html><script> <!-- </script> --> </script> EOF

| <!DOCTYPE html>
| <html>
|   <head>
|     <script>
|       " <!-- "
|     " "
|   <body>
|     "-->  EOF"

Also pass tests through test 54:
<!DOCTYPE html><title>U-test</title><body><div><p>Test<u></p></div></body>

R=nigeltao
CC=golang-dev
https://golang.org/cl/5311066
This commit is contained in:
Andrew Balholm 2011-10-28 09:06:30 +11:00 committed by Nigel Tao
parent c92a499bc3
commit 053549ca1b
2 changed files with 17 additions and 3 deletions

View file

@ -7,6 +7,7 @@ package html
import (
"io"
"os"
"strings"
)
// A parser implements the HTML5 parsing algorithm:
@ -430,6 +431,8 @@ func beforeHeadIM(p *parser) (insertionMode, bool) {
return inHeadIM, !implied
}
const whitespace = " \t\r\n\f"
// Section 11.2.5.4.4.
func inHeadIM(p *parser) (insertionMode, bool) {
var (
@ -437,7 +440,18 @@ func inHeadIM(p *parser) (insertionMode, bool) {
implied bool
)
switch p.tok.Type {
case ErrorToken, TextToken:
case ErrorToken:
implied = true
case TextToken:
s := strings.TrimLeft(p.tok.Data, whitespace)
if len(s) < len(p.tok.Data) {
// Add the initial whitespace to the current node.
p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
if s == "" {
return inHeadIM, true
}
p.tok.Data = s
}
implied = true
case StartTagToken:
switch p.tok.Data {
@ -469,7 +483,7 @@ func inHeadIM(p *parser) (insertionMode, bool) {
}
return afterHeadIM, !implied
}
return inHeadIM, !implied
return inHeadIM, true
}
// Section 11.2.5.4.6.

View file

@ -132,7 +132,7 @@ func TestParser(t *testing.T) {
rc := make(chan io.Reader)
go readDat(filename, rc)
// TODO(nigeltao): Process all test cases, not just a subset.
for i := 0; i < 50; i++ {
for i := 0; i < 55; i++ {
// Parse the #data section.
b, err := ioutil.ReadAll(<-rc)
if err != nil {