commit of r41849 - in python/trunk/Lib: csv.py test/test_csv.py
![](https://secure.gravatar.com/avatar/8ac615df352a970211b0e3d94a307c6d.jpg?s=120&d=mm&r=g)
Author: skip.montanaro Date: Fri Dec 30 06:09:48 2005 New Revision: 41849 Modified: python/trunk/Lib/csv.py python/trunk/Lib/test/test_csv.py Log: Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was returning 'a' as the delimiter. It now returns '|', but not because I understood better what the code was supposed to do. Would someone that understands the idea behind _guess_delimiter() (see its doc string) look to see if my fallback choice is better than before or if it's just serendipity that I picked the proper delimiter? Modified: python/trunk/Lib/csv.py ============================================================================== --- python/trunk/Lib/csv.py (original) +++ python/trunk/Lib/csv.py Fri Dec 30 06:09:48 2005 @@ -152,10 +152,13 @@ quotechar, delimiter, skipinitialspace = \ self._guess_quote_and_delimiter(sample, delimiters) - if delimiter is None: + if not delimiter: delimiter, skipinitialspace = self._guess_delimiter(sample, delimiters) + if not delimiter: + raise Error, "Could not determine delimiter" + class dialect(Dialect): _name = "sniffed" lineterminator = '\r\n' @@ -329,8 +332,12 @@ data[0].count("%c " % d)) return (d, skipinitialspace) - # finally, just return the first damn character in the list - delim = delims.keys()[0] + # nothing else indicates a preference, pick the character that + # dominates(?) + items = [(v,k) for (k,v) in delims.items()] + items.sort() + delim = items[-1][1] + skipinitialspace = (data[0].count(delim) == data[0].count("%c " % delim)) return (delim, skipinitialspace) Modified: python/trunk/Lib/test/test_csv.py ============================================================================== --- python/trunk/Lib/test/test_csv.py (original) +++ python/trunk/Lib/test/test_csv.py Fri Dec 30 06:09:48 2005 @@ -852,6 +852,8 @@ ''' sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n" + sample6 = "a|b|c\r\nd|e|f\r\n" + sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n" def test_has_header(self): sniffer = csv.Sniffer() @@ -882,6 +884,11 @@ self.assertEqual(dialect.delimiter, ";") dialect = sniffer.sniff(self.sample5) self.assertEqual(dialect.delimiter, "\t") + dialect = sniffer.sniff(self.sample6) + self.assertEqual(dialect.delimiter, "|") + dialect = sniffer.sniff(self.sample7) + self.assertEqual(dialect.delimiter, "|") + self.assertEqual(dialect.quotechar, "'") if not hasattr(sys, "gettotalrefcount"): if test_support.verbose: print "*** skipping leakage tests ***"
participants (1)
-
skip.montanaro