gh-50002: xml.dom.minidom now preserves whitespaces in attributes (GH-107947)

Also double quotes (") are now only quoted in attributes.
This commit is contained in:
Serhiy Storchaka 2023-08-23 15:23:41 +03:00 committed by GitHub
parent 29bc6165ab
commit 154477be72
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 65 additions and 7 deletions

View file

@ -505,6 +505,46 @@ def testWriteXML(self):
dom.unlink()
self.confirm(str == domstr)
def test_toxml_quote_text(self):
dom = Document()
elem = dom.appendChild(dom.createElement('elem'))
elem.appendChild(dom.createTextNode('&<>"'))
cr = elem.appendChild(dom.createElement('cr'))
cr.appendChild(dom.createTextNode('\r'))
crlf = elem.appendChild(dom.createElement('crlf'))
crlf.appendChild(dom.createTextNode('\r\n'))
lflf = elem.appendChild(dom.createElement('lflf'))
lflf.appendChild(dom.createTextNode('\n\n'))
ws = elem.appendChild(dom.createElement('ws'))
ws.appendChild(dom.createTextNode('\t\n\r '))
domstr = dom.toxml()
dom.unlink()
self.assertEqual(domstr, '<?xml version="1.0" ?>'
'<elem>&amp;&lt;&gt;"'
'<cr>\r</cr>'
'<crlf>\r\n</crlf>'
'<lflf>\n\n</lflf>'
'<ws>\t\n\r </ws></elem>')
def test_toxml_quote_attrib(self):
dom = Document()
elem = dom.appendChild(dom.createElement('elem'))
elem.setAttribute("a", '&<>"')
elem.setAttribute("cr", "\r")
elem.setAttribute("lf", "\n")
elem.setAttribute("crlf", "\r\n")
elem.setAttribute("lflf", "\n\n")
elem.setAttribute("ws", "\t\n\r ")
domstr = dom.toxml()
dom.unlink()
self.assertEqual(domstr, '<?xml version="1.0" ?>'
'<elem a="&amp;&lt;&gt;&quot;" '
'cr="&#13;" '
'lf="&#10;" '
'crlf="&#13;&#10;" '
'lflf="&#10;&#10;" '
'ws="&#9;&#10;&#13; "/>')
def testAltNewline(self):
str = '<?xml version="1.0" ?>\n<a b="c"/>\n'
dom = parseString(str)

View file

@ -300,12 +300,28 @@ def _in_document(node):
node = node.parentNode
return False
def _write_data(writer, data):
def _write_data(writer, text, attr):
"Writes datachars to writer."
if data:
data = data.replace("&", "&amp;").replace("<", "&lt;"). \
replace("\"", "&quot;").replace(">", "&gt;")
writer.write(data)
if not text:
return
# See the comments in ElementTree.py for behavior and
# implementation details.
if "&" in text:
text = text.replace("&", "&amp;")
if "<" in text:
text = text.replace("<", "&lt;")
if ">" in text:
text = text.replace(">", "&gt;")
if attr:
if '"' in text:
text = text.replace('"', "&quot;")
if "\r" in text:
text = text.replace("\r", "&#13;")
if "\n" in text:
text = text.replace("\n", "&#10;")
if "\t" in text:
text = text.replace("\t", "&#9;")
writer.write(text)
def _get_elements_by_tagName_helper(parent, name, rc):
for node in parent.childNodes:
@ -883,7 +899,7 @@ def writexml(self, writer, indent="", addindent="", newl=""):
for a_name in attrs.keys():
writer.write(" %s=\"" % a_name)
_write_data(writer, attrs[a_name].value)
_write_data(writer, attrs[a_name].value, True)
writer.write("\"")
if self.childNodes:
writer.write(">")
@ -1112,7 +1128,7 @@ def splitText(self, offset):
return newText
def writexml(self, writer, indent="", addindent="", newl=""):
_write_data(writer, "%s%s%s" % (indent, self.data, newl))
_write_data(writer, "%s%s%s" % (indent, self.data, newl), False)
# DOM Level 3 (WD 9 April 2002)

View file

@ -0,0 +1 @@
:mod:`xml.dom.minidom` now preserves whitespaces in attributes.

View file

@ -0,0 +1 @@
:mod:`xml.dom.minidom` now only quotes ``"`` in attributes.