Issue #29405: Make total calculation in _guess_delimiter more accurate.

This commit is contained in:
Xiang Zhang 2017-02-06 10:50:09 +08:00
parent 97353845f8
commit 6aee6fbce8

View file

@ -307,7 +307,7 @@ def _guess_delimiter(self, data, delimiters):
charFrequency = {}
modes = {}
delims = {}
start, end = 0, min(chunkLength, len(data))
start, end = 0, chunkLength
while start < len(data):
iteration += 1
for line in data[start:end]:
@ -336,7 +336,7 @@ def _guess_delimiter(self, data, delimiters):
# build a list of possible delimiters
modeList = modes.items()
total = float(chunkLength * iteration)
total = float(min(chunkLength * iteration, len(data)))
# (rows of consistent data) / (number of rows) = 100%
consistency = 1.0
# minimum consistency threshold