github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/pypy/_csv.py (about) 1 __doc__ = """CSV parsing and writing. 2 3 This module provides classes that assist in the reading and writing 4 of Comma Separated Value (CSV) files, and implements the interface 5 described by PEP 305. Although many CSV files are simple to parse, 6 the format is not formally defined by a stable specification and 7 is subtle enough that parsing lines of a CSV file with something 8 like line.split(\",\") is bound to fail. The module supports three 9 basic APIs: reading, writing, and registration of dialects. 10 11 12 DIALECT REGISTRATION: 13 14 Readers and writers support a dialect argument, which is a convenient 15 handle on a group of settings. When the dialect argument is a string, 16 it identifies one of the dialects previously registered with the module. 17 If it is a class or instance, the attributes of the argument are used as 18 the settings for the reader or writer: 19 20 class excel: 21 delimiter = ',' 22 quotechar = '\"' 23 escapechar = None 24 doublequote = True 25 skipinitialspace = False 26 lineterminator = '\\r\\n' 27 quoting = QUOTE_MINIMAL 28 29 SETTINGS: 30 31 * quotechar - specifies a one-character string to use as the 32 quoting character. It defaults to '\"'. 33 * delimiter - specifies a one-character string to use as the 34 field separator. It defaults to ','. 35 * skipinitialspace - specifies how to interpret whitespace which 36 immediately follows a delimiter. It defaults to False, which 37 means that whitespace immediately following a delimiter is part 38 of the following field. 39 * lineterminator - specifies the character sequence which should 40 terminate rows. 41 * quoting - controls when quotes should be generated by the writer. 42 It can take on any of the following module constants: 43 44 csv.QUOTE_MINIMAL means only when required, for example, when a 45 field contains either the quotechar or the delimiter 46 csv.QUOTE_ALL means that quotes are always placed around fields. 47 csv.QUOTE_NONNUMERIC means that quotes are always placed around 48 fields which do not parse as integers or floating point 49 numbers. 50 csv.QUOTE_NONE means that quotes are never placed around fields. 51 * escapechar - specifies a one-character string used to escape 52 the delimiter when quoting is set to QUOTE_NONE. 53 * doublequote - controls the handling of quotes inside fields. When 54 True, two consecutive quotes are interpreted as one during read, 55 and when writing, each quote character embedded in the data is 56 written as two quotes. 57 """ 58 59 __version__ = "1.0" 60 61 __all__ = [ 62 'Dialect', 'Error', 'QUOTE_ALL', 'QUOTE_MINIMAL', 'QUOTE_NONE', 63 'QUOTE_NONNUMERIC', 'Reader', 'Writer', '__doc__', '__version__', 64 '_call_dialect', '_dialects', '_field_limit', 'field_size_limit', 65 'get_dialect', 'list_dialects', 'reader', 'register_dialect', 66 'undefined', 'unregister_dialect', 'writer' 67 ] 68 69 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE = range(4) 70 _dialects = {} 71 _field_limit = 128 * 1024 # max parsed field size 72 73 class Error(Exception): 74 pass 75 76 class Dialect(object): 77 """CSV dialect 78 79 The Dialect type records CSV parsing and generation options.""" 80 81 __slots__ = ["_delimiter", "_doublequote", "_escapechar", 82 "_lineterminator", "_quotechar", "_quoting", 83 "_skipinitialspace", "_strict"] 84 85 def __new__(cls, dialect, **kwargs): 86 87 for name in kwargs: 88 if '_' + name not in Dialect.__slots__: 89 raise TypeError("unexpected keyword argument '%s'" % 90 (name,)) 91 92 if dialect is not None: 93 if isinstance(dialect, basestring): 94 dialect = get_dialect(dialect) 95 96 # Can we reuse this instance? 97 if (isinstance(dialect, Dialect) 98 and all(value is None for value in kwargs.itervalues())): 99 return dialect 100 101 self = object.__new__(cls) 102 103 104 def set_char(x): 105 if x is None: 106 return None 107 if isinstance(x, str) and len(x) <= 1: 108 return x 109 raise TypeError("%r must be a 1-character string" % (name,)) 110 def set_str(x): 111 if isinstance(x, str): 112 return x 113 raise TypeError("%r must be a string" % (name,)) 114 def set_quoting(x): 115 if x in range(4): 116 return x 117 raise TypeError("bad 'quoting' value") 118 119 attributes = {"delimiter": (',', set_char), 120 "doublequote": (True, bool), 121 "escapechar": (None, set_char), 122 "lineterminator": ("\r\n", set_str), 123 "quotechar": ('"', set_char), 124 "quoting": (QUOTE_MINIMAL, set_quoting), 125 "skipinitialspace": (False, bool), 126 "strict": (False, bool), 127 } 128 129 # Copy attributes 130 notset = object() 131 for name in Dialect.__slots__: 132 name = name[1:] 133 value = notset 134 if name in kwargs: 135 value = kwargs[name] 136 elif dialect is not None: 137 value = getattr(dialect, name, notset) 138 139 # mapping by name: (default, converter) 140 if value is notset: 141 value = attributes[name][0] 142 if name == 'quoting' and not self.quotechar: 143 value = QUOTE_NONE 144 else: 145 converter = attributes[name][1] 146 if converter: 147 value = converter(value) 148 149 # setattr(self, '_' + name, value) 150 self.__dict__['_' + name] = value 151 152 if not self.delimiter: 153 raise TypeError("delimiter must be set") 154 155 if self.quoting != QUOTE_NONE and not self.quotechar: 156 raise TypeError("quotechar must be set if quoting enabled") 157 158 if not self.lineterminator: 159 raise TypeError("lineterminator must be set") 160 161 return self 162 163 delimiter = property(lambda self: self._delimiter) 164 doublequote = property(lambda self: self._doublequote) 165 escapechar = property(lambda self: self._escapechar) 166 lineterminator = property(lambda self: self._lineterminator) 167 quotechar = property(lambda self: self._quotechar) 168 quoting = property(lambda self: self._quoting) 169 skipinitialspace = property(lambda self: self._skipinitialspace) 170 strict = property(lambda self: self._strict) 171 172 173 def _call_dialect(dialect_inst, kwargs): 174 return Dialect(dialect_inst, **kwargs) 175 176 def register_dialect(name, dialect=None, **kwargs): 177 """Create a mapping from a string name to a dialect class. 178 dialect = csv.register_dialect(name, dialect)""" 179 if not isinstance(name, basestring): 180 raise TypeError("dialect name must be a string or unicode") 181 182 dialect = _call_dialect(dialect, kwargs) 183 _dialects[name] = dialect 184 185 def unregister_dialect(name): 186 """Delete the name/dialect mapping associated with a string name.\n 187 csv.unregister_dialect(name)""" 188 try: 189 del _dialects[name] 190 except KeyError: 191 raise Error("unknown dialect") 192 193 def get_dialect(name): 194 """Return the dialect instance associated with name. 195 dialect = csv.get_dialect(name)""" 196 try: 197 return _dialects[name] 198 except KeyError: 199 raise Error("unknown dialect") 200 201 def list_dialects(): 202 """Return a list of all know dialect names 203 names = csv.list_dialects()""" 204 return list(_dialects) 205 206 class Reader(object): 207 """CSV reader 208 209 Reader objects are responsible for reading and parsing tabular data 210 in CSV format.""" 211 212 213 (START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, 214 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, 215 EAT_CRNL) = range(8) 216 217 def __init__(self, iterator, dialect=None, **kwargs): 218 self.dialect = _call_dialect(dialect, kwargs) 219 self.input_iter = iter(iterator) 220 self.line_num = 0 221 222 self._parse_reset() 223 224 def _parse_reset(self): 225 self.field = '' 226 self.fields = [] 227 self.state = self.START_RECORD 228 self.numeric_field = False 229 230 def __iter__(self): 231 return self 232 233 def next(self): 234 self._parse_reset() 235 while True: 236 try: 237 line = next(self.input_iter) 238 except StopIteration: 239 # End of input OR exception 240 if len(self.field) > 0: 241 raise Error("newline inside string") 242 raise 243 244 self.line_num += 1 245 246 if '\0' in line: 247 raise Error("line contains NULL byte") 248 pos = 0 249 while pos < len(line): 250 pos = self._parse_process_char(line, pos) 251 self._parse_eol() 252 253 if self.state == self.START_RECORD: 254 break 255 256 fields = self.fields 257 self.fields = [] 258 return fields 259 260 def _parse_process_char(self, line, pos): 261 c = line[pos] 262 if self.state == self.IN_FIELD: 263 # in unquoted field 264 pos2 = pos 265 while True: 266 if c in '\n\r': 267 # end of line - return [fields] 268 if pos2 > pos: 269 self._parse_add_char(line[pos:pos2]) 270 pos = pos2 271 self._parse_save_field() 272 self.state = self.EAT_CRNL 273 elif c == self.dialect.escapechar: 274 # possible escaped character 275 pos2 -= 1 276 self.state = self.ESCAPED_CHAR 277 elif c == self.dialect.delimiter: 278 # save field - wait for new field 279 if pos2 > pos: 280 self._parse_add_char(line[pos:pos2]) 281 pos = pos2 282 self._parse_save_field() 283 self.state = self.START_FIELD 284 else: 285 # normal character - save in field 286 pos2 += 1 287 if pos2 < len(line): 288 c = line[pos2] 289 continue 290 break 291 if pos2 > pos: 292 self._parse_add_char(line[pos:pos2]) 293 pos = pos2 - 1 294 295 elif self.state == self.START_RECORD: 296 if c in '\n\r': 297 self.state = self.EAT_CRNL 298 else: 299 self.state = self.START_FIELD 300 # restart process 301 self._parse_process_char(line, pos) 302 303 elif self.state == self.START_FIELD: 304 if c in '\n\r': 305 # save empty field - return [fields] 306 self._parse_save_field() 307 self.state = self.EAT_CRNL 308 elif (c == self.dialect.quotechar 309 and self.dialect.quoting != QUOTE_NONE): 310 # start quoted field 311 self.state = self.IN_QUOTED_FIELD 312 elif c == self.dialect.escapechar: 313 # possible escaped character 314 self.state = self.ESCAPED_CHAR 315 elif c == ' ' and self.dialect.skipinitialspace: 316 # ignore space at start of field 317 pass 318 elif c == self.dialect.delimiter: 319 # save empty field 320 self._parse_save_field() 321 else: 322 # begin new unquoted field 323 if self.dialect.quoting == QUOTE_NONNUMERIC: 324 self.numeric_field = True 325 self._parse_add_char(c) 326 self.state = self.IN_FIELD 327 328 elif self.state == self.ESCAPED_CHAR: 329 self._parse_add_char(c) 330 self.state = self.IN_FIELD 331 332 elif self.state == self.IN_QUOTED_FIELD: 333 if c == self.dialect.escapechar: 334 # possible escape character 335 self.state = self.ESCAPE_IN_QUOTED_FIELD 336 elif (c == self.dialect.quotechar 337 and self.dialect.quoting != QUOTE_NONE): 338 if self.dialect.doublequote: 339 # doublequote; " represented by "" 340 self.state = self.QUOTE_IN_QUOTED_FIELD 341 else: 342 #end of quote part of field 343 self.state = self.IN_FIELD 344 else: 345 # normal character - save in field 346 self._parse_add_char(c) 347 348 elif self.state == self.ESCAPE_IN_QUOTED_FIELD: 349 self._parse_add_char(c) 350 self.state = self.IN_QUOTED_FIELD 351 352 elif self.state == self.QUOTE_IN_QUOTED_FIELD: 353 # doublequote - seen a quote in a quoted field 354 if (c == self.dialect.quotechar 355 and self.dialect.quoting != QUOTE_NONE): 356 # save "" as " 357 self._parse_add_char(c) 358 self.state = self.IN_QUOTED_FIELD 359 elif c == self.dialect.delimiter: 360 # save field - wait for new field 361 self._parse_save_field() 362 self.state = self.START_FIELD 363 elif c in '\r\n': 364 # end of line - return [fields] 365 self._parse_save_field() 366 self.state = self.EAT_CRNL 367 elif not self.dialect.strict: 368 self._parse_add_char(c) 369 self.state = self.IN_FIELD 370 else: 371 raise Error("'%c' expected after '%c'" % 372 (self.dialect.delimiter, self.dialect.quotechar)) 373 374 elif self.state == self.EAT_CRNL: 375 if c not in '\r\n': 376 raise Error("new-line character seen in unquoted field - " 377 "do you need to open the file " 378 "in universal-newline mode?") 379 380 else: 381 raise RuntimeError("unknown state: %r" % (self.state,)) 382 383 return pos + 1 384 385 def _parse_eol(self): 386 if self.state == self.EAT_CRNL: 387 self.state = self.START_RECORD 388 elif self.state == self.START_RECORD: 389 # empty line - return [] 390 pass 391 elif self.state == self.IN_FIELD: 392 # in unquoted field 393 # end of line - return [fields] 394 self._parse_save_field() 395 self.state = self.START_RECORD 396 elif self.state == self.START_FIELD: 397 # save empty field - return [fields] 398 self._parse_save_field() 399 self.state = self.START_RECORD 400 elif self.state == self.ESCAPED_CHAR: 401 self._parse_add_char('\n') 402 self.state = self.IN_FIELD 403 elif self.state == self.IN_QUOTED_FIELD: 404 pass 405 elif self.state == self.ESCAPE_IN_QUOTED_FIELD: 406 self._parse_add_char('\n') 407 self.state = self.IN_QUOTED_FIELD 408 elif self.state == self.QUOTE_IN_QUOTED_FIELD: 409 # end of line - return [fields] 410 self._parse_save_field() 411 self.state = self.START_RECORD 412 else: 413 raise RuntimeError("unknown state: %r" % (self.state,)) 414 415 def _parse_save_field(self): 416 field, self.field = self.field, '' 417 if self.numeric_field: 418 self.numeric_field = False 419 field = float(field) 420 self.fields.append(field) 421 422 def _parse_add_char(self, c): 423 if len(self.field) + len(c) > _field_limit: 424 raise Error("field larger than field limit (%d)" % (_field_limit)) 425 self.field += c 426 427 428 class Writer(object): 429 """CSV writer 430 431 Writer objects are responsible for generating tabular data 432 in CSV format from sequence input.""" 433 434 def __init__(self, file, dialect=None, **kwargs): 435 if not (hasattr(file, 'write') and callable(file.write)): 436 raise TypeError("argument 1 must have a 'write' method") 437 self.writeline = file.write 438 self.dialect = _call_dialect(dialect, kwargs) 439 440 def _join_reset(self): 441 self.rec = [] 442 self.num_fields = 0 443 444 def _join_append(self, field, quoted, quote_empty): 445 dialect = self.dialect 446 # If this is not the first field we need a field separator 447 if self.num_fields > 0: 448 self.rec.append(dialect.delimiter) 449 450 if dialect.quoting == QUOTE_NONE: 451 need_escape = tuple(dialect.lineterminator) + ( 452 dialect.escapechar, # escapechar always first 453 dialect.delimiter, dialect.quotechar) 454 455 else: 456 for c in tuple(dialect.lineterminator) + ( 457 dialect.delimiter, dialect.escapechar): 458 if c and c in field: 459 quoted = True 460 461 need_escape = () 462 if dialect.quotechar in field: 463 if dialect.doublequote: 464 field = field.replace(dialect.quotechar, 465 dialect.quotechar * 2) 466 quoted = True 467 else: 468 need_escape = (dialect.quotechar,) 469 470 471 for c in need_escape: 472 if c and c in field: 473 if not dialect.escapechar: 474 raise Error("need to escape, but no escapechar set") 475 field = field.replace(c, dialect.escapechar + c) 476 477 # If field is empty check if it needs to be quoted 478 if field == '' and quote_empty: 479 if dialect.quoting == QUOTE_NONE: 480 raise Error("single empty field record must be quoted") 481 quoted = 1 482 483 if quoted: 484 field = dialect.quotechar + field + dialect.quotechar 485 486 self.rec.append(field) 487 self.num_fields += 1 488 489 490 491 def writerow(self, row): 492 dialect = self.dialect 493 try: 494 rowlen = len(row) 495 except TypeError: 496 raise Error("sequence expected") 497 498 # join all fields in internal buffer 499 self._join_reset() 500 501 for field in row: 502 quoted = False 503 if dialect.quoting == QUOTE_NONNUMERIC: 504 try: 505 float(field) 506 except: 507 quoted = True 508 # This changed since 2.5: 509 # quoted = not isinstance(field, (int, long, float)) 510 elif dialect.quoting == QUOTE_ALL: 511 quoted = True 512 513 if field is None: 514 value = "" 515 elif isinstance(field, float): 516 value = repr(field) 517 else: 518 value = str(field) 519 self._join_append(value, quoted, rowlen == 1) 520 521 # add line terminator 522 self.rec.append(dialect.lineterminator) 523 524 self.writeline(''.join(self.rec)) 525 526 def writerows(self, rows): 527 for row in rows: 528 self.writerow(row) 529 530 def reader(*args, **kwargs): 531 """ 532 csv_reader = reader(iterable [, dialect='excel'] 533 [optional keyword args]) 534 for row in csv_reader: 535 process(row) 536 537 The "iterable" argument can be any object that returns a line 538 of input for each iteration, such as a file object or a list. The 539 optional \"dialect\" parameter is discussed below. The function 540 also accepts optional keyword arguments which override settings 541 provided by the dialect. 542 543 The returned object is an iterator. Each iteration returns a row 544 of the CSV file (which can span multiple input lines)""" 545 546 return Reader(*args, **kwargs) 547 548 def writer(*args, **kwargs): 549 """ 550 csv_writer = csv.writer(fileobj [, dialect='excel'] 551 [optional keyword args]) 552 for row in sequence: 553 csv_writer.writerow(row) 554 555 [or] 556 557 csv_writer = csv.writer(fileobj [, dialect='excel'] 558 [optional keyword args]) 559 csv_writer.writerows(rows) 560 561 The \"fileobj\" argument can be any object that supports the file API.""" 562 return Writer(*args, **kwargs) 563 564 565 undefined = object() 566 def field_size_limit(limit=undefined): 567 """Sets an upper limit on parsed fields. 568 csv.field_size_limit([limit]) 569 570 Returns old limit. If limit is not given, no new limit is set and 571 the old limit is returned""" 572 573 global _field_limit 574 old_limit = _field_limit 575 576 if limit is not undefined: 577 if not isinstance(limit, (int, long)): 578 raise TypeError("int expected, got %s" % 579 (limit.__class__.__name__,)) 580 _field_limit = limit 581 582 return old_limit