SF #965425: fix so hyphenated words surrounded by punctuation are

wrapped correctly.
This commit is contained in:
Greg Ward 2004-06-03 01:59:41 +00:00
parent 29eb8c315d
commit 6186410db0
2 changed files with 21 additions and 4 deletions

View file

@ -1,5 +1,5 @@
#
# Test script for the textwrap module.
# Test suite for the textwrap module.
#
# Original tests written by Greg Ward <gward@python.net>.
# Converted to PyUnit by Peter Hansen <peter@engcorp.com>.
@ -271,6 +271,23 @@ def test_funky_hyphens (self):
self.check_split("foo --option-opt bar",
["foo", " ", "--option-", "opt", " ", "bar"])
def test_punct_hyphens(self):
# Oh bother, SF #965425 found another problem with hyphens --
# hyphenated words in single quotes weren't handled correctly.
# In fact, the bug is that *any* punctuation around a hyphenated
# word was handled incorrectly, except for a leading "--", which
# was special-cased for Optik and Docutils. So test a variety
# of styles of punctuation around a hyphenated word.
# (Actually this is based on an Optik bug report, #813077).
self.check_split("the 'wibble-wobble' widget",
['the', ' ', "'wibble-", "wobble'", ' ', 'widget'])
self.check_split('the "wibble-wobble" widget',
['the', ' ', '"wibble-', 'wobble"', ' ', 'widget'])
self.check_split("the (wibble-wobble) widget",
['the', ' ', "(wibble-", "wobble)", ' ', 'widget'])
self.check_split("the ['wibble-wobble'] widget",
['the', ' ', "['wibble-", "wobble']", ' ', 'widget'])
def test_funky_parens (self):
# Second part of SF bug #596434: long option strings inside
# parentheses.

View file

@ -79,11 +79,11 @@ class TextWrapper:
# Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
# (after stripping out empty strings).
wordsep_re = re.compile(r'(\s+|' # any whitespace
r'-*\w{2,}-(?=\w{2,})|' # hyphenated words
r'[^\s\w]*\w{2,}-(?=\w{2,})|' # hyphenated words
r'(?<=[\w\!\"\'\&\.\,\?])-{2,}(?=\w))') # em-dash
# XXX will there be a locale-or-charset-aware version of
# string.lowercase in 2.3?
# XXX this is not locale- or charset-aware -- string.lowercase
# is US-ASCII only (and therefore English-only)
sentence_end_re = re.compile(r'[%s]' # lowercase letter
r'[\.\!\?]' # sentence-ending punct.
r'[\"\']?' # optional end-of-quote