From 91bb70c5c07c2e73535638df2baadb302d9d71d2 Mon Sep 17 00:00:00 2001 From: Skip Montanaro Date: Wed, 28 Dec 2005 15:37:25 +0000 Subject: [PATCH] Fix for problem with Sniffer class. If your delimiter is whitespace and the last field was empty it would strip the delimiter and incorrectly guess that "" was the delimiter. Reported in c.l.py by Laurent Laporte. Will backport. --- Lib/csv.py | 2 +- Lib/test/test_csv.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Lib/csv.py b/Lib/csv.py index 14b4d17c234..75163800834 100644 --- a/Lib/csv.py +++ b/Lib/csv.py @@ -271,7 +271,7 @@ def _guess_delimiter(self, data, delimiters): for char in ascii: metaFrequency = charFrequency.get(char, {}) # must count even if frequency is 0 - freq = line.strip().count(char) + freq = line.count(char) # value is the mode metaFrequency[freq] = metaFrequency.get(freq, 0) + 1 charFrequency[char] = metaFrequency diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py index f1f183f3fb1..0ad77ef09c6 100644 --- a/Lib/test/test_csv.py +++ b/Lib/test/test_csv.py @@ -836,7 +836,6 @@ class TestSniffer(unittest.TestCase): 'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow' 'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back' """ - header = '''\ "venue","city","state","date","performers" ''' @@ -852,6 +851,8 @@ class TestSniffer(unittest.TestCase): 47483648;43.0;170;abc;def ''' + sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n" + def test_has_header(self): sniffer = csv.Sniffer() self.assertEqual(sniffer.has_header(self.sample1), False) @@ -879,6 +880,8 @@ def test_delimiters(self): self.assertEqual(dialect.delimiter, "/") dialect = sniffer.sniff(self.sample4) self.assertEqual(dialect.delimiter, ";") + dialect = sniffer.sniff(self.sample5) + self.assertEqual(dialect.delimiter, "\t") if not hasattr(sys, "gettotalrefcount"): if test_support.verbose: print "*** skipping leakage tests ***"