github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/pypy/_csv.py (about)

     1  __doc__ = """CSV parsing and writing.
     2  
     3  This module provides classes that assist in the reading and writing
     4  of Comma Separated Value (CSV) files, and implements the interface
     5  described by PEP 305.  Although many CSV files are simple to parse,
     6  the format is not formally defined by a stable specification and
     7  is subtle enough that parsing lines of a CSV file with something
     8  like line.split(\",\") is bound to fail.  The module supports three
     9  basic APIs: reading, writing, and registration of dialects.
    10  
    11  
    12  DIALECT REGISTRATION:
    13  
    14  Readers and writers support a dialect argument, which is a convenient
    15  handle on a group of settings.  When the dialect argument is a string,
    16  it identifies one of the dialects previously registered with the module.
    17  If it is a class or instance, the attributes of the argument are used as
    18  the settings for the reader or writer:
    19  
    20      class excel:
    21          delimiter = ','
    22          quotechar = '\"'
    23          escapechar = None
    24          doublequote = True
    25          skipinitialspace = False
    26          lineterminator = '\\r\\n'
    27          quoting = QUOTE_MINIMAL
    28  
    29  SETTINGS:
    30  
    31      * quotechar - specifies a one-character string to use as the
    32          quoting character.  It defaults to '\"'.
    33      * delimiter - specifies a one-character string to use as the
    34          field separator.  It defaults to ','.
    35      * skipinitialspace - specifies how to interpret whitespace which
    36          immediately follows a delimiter.  It defaults to False, which
    37          means that whitespace immediately following a delimiter is part
    38          of the following field.
    39      * lineterminator -  specifies the character sequence which should
    40          terminate rows.
    41      * quoting - controls when quotes should be generated by the writer.
    42          It can take on any of the following module constants:
    43  
    44          csv.QUOTE_MINIMAL means only when required, for example, when a
    45              field contains either the quotechar or the delimiter
    46          csv.QUOTE_ALL means that quotes are always placed around fields.
    47          csv.QUOTE_NONNUMERIC means that quotes are always placed around
    48              fields which do not parse as integers or floating point
    49              numbers.
    50          csv.QUOTE_NONE means that quotes are never placed around fields.
    51      * escapechar - specifies a one-character string used to escape
    52          the delimiter when quoting is set to QUOTE_NONE.
    53      * doublequote - controls the handling of quotes inside fields.  When
    54          True, two consecutive quotes are interpreted as one during read,
    55          and when writing, each quote character embedded in the data is
    56          written as two quotes.
    57  """
    58  
    59  __version__ = "1.0"
    60  
    61  __all__ = [
    62      'Dialect', 'Error', 'QUOTE_ALL', 'QUOTE_MINIMAL', 'QUOTE_NONE',
    63      'QUOTE_NONNUMERIC', 'Reader', 'Writer', '__doc__', '__version__',
    64      '_call_dialect', '_dialects', '_field_limit', 'field_size_limit',
    65      'get_dialect', 'list_dialects', 'reader', 'register_dialect',
    66      'undefined', 'unregister_dialect', 'writer'
    67  ]
    68  
    69  QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE = range(4)
    70  _dialects = {}
    71  _field_limit = 128 * 1024 # max parsed field size
    72  
    73  class Error(Exception):
    74      pass
    75  
    76  class Dialect(object):
    77      """CSV dialect
    78  
    79      The Dialect type records CSV parsing and generation options."""
    80  
    81      __slots__ = ["_delimiter", "_doublequote", "_escapechar",
    82                   "_lineterminator", "_quotechar", "_quoting",
    83                   "_skipinitialspace", "_strict"]
    84  
    85      def __new__(cls, dialect, **kwargs):
    86  
    87          for name in kwargs:
    88              if '_' + name not in Dialect.__slots__:
    89                  raise TypeError("unexpected keyword argument '%s'" %
    90                                  (name,))
    91  
    92          if dialect is not None:
    93              if isinstance(dialect, basestring):
    94                  dialect = get_dialect(dialect)
    95  
    96              # Can we reuse this instance?
    97              if (isinstance(dialect, Dialect)
    98                  and all(value is None for value in kwargs.itervalues())):
    99                  return dialect
   100  
   101          self = object.__new__(cls)
   102  
   103  
   104          def set_char(x):
   105              if x is None:
   106                  return None
   107              if isinstance(x, str) and len(x) <= 1:
   108                  return x
   109              raise TypeError("%r must be a 1-character string" % (name,))
   110          def set_str(x):
   111              if isinstance(x, str):
   112                  return x
   113              raise TypeError("%r must be a string" % (name,))
   114          def set_quoting(x):
   115              if x in range(4):
   116                  return x
   117              raise TypeError("bad 'quoting' value")
   118  
   119          attributes = {"delimiter": (',', set_char),
   120                        "doublequote": (True, bool),
   121                        "escapechar": (None, set_char),
   122                        "lineterminator": ("\r\n", set_str),
   123                        "quotechar": ('"', set_char),
   124                        "quoting": (QUOTE_MINIMAL, set_quoting),
   125                        "skipinitialspace": (False, bool),
   126                        "strict": (False, bool),
   127                        }
   128  
   129          # Copy attributes
   130          notset = object()
   131          for name in Dialect.__slots__:
   132              name = name[1:]
   133              value = notset
   134              if name in kwargs:
   135                  value = kwargs[name]
   136              elif dialect is not None:
   137                  value = getattr(dialect, name, notset)
   138  
   139              # mapping by name: (default, converter)
   140              if value is notset:
   141                  value = attributes[name][0]
   142                  if name == 'quoting' and not self.quotechar:
   143                      value = QUOTE_NONE
   144              else:
   145                  converter = attributes[name][1]
   146                  if converter:
   147                      value = converter(value)
   148  
   149              # setattr(self, '_' + name, value)
   150              self.__dict__['_' + name] = value
   151  
   152          if not self.delimiter:
   153              raise TypeError("delimiter must be set")
   154  
   155          if self.quoting != QUOTE_NONE and not self.quotechar:
   156              raise TypeError("quotechar must be set if quoting enabled")
   157  
   158          if not self.lineterminator:
   159              raise TypeError("lineterminator must be set")
   160  
   161          return self
   162  
   163      delimiter        = property(lambda self: self._delimiter)
   164      doublequote      = property(lambda self: self._doublequote)
   165      escapechar       = property(lambda self: self._escapechar)
   166      lineterminator   = property(lambda self: self._lineterminator)
   167      quotechar        = property(lambda self: self._quotechar)
   168      quoting          = property(lambda self: self._quoting)
   169      skipinitialspace = property(lambda self: self._skipinitialspace)
   170      strict           = property(lambda self: self._strict)
   171  
   172  
   173  def _call_dialect(dialect_inst, kwargs):
   174      return Dialect(dialect_inst, **kwargs)
   175  
   176  def register_dialect(name, dialect=None, **kwargs):
   177      """Create a mapping from a string name to a dialect class.
   178      dialect = csv.register_dialect(name, dialect)"""
   179      if not isinstance(name, basestring):
   180          raise TypeError("dialect name must be a string or unicode")
   181  
   182      dialect = _call_dialect(dialect, kwargs)
   183      _dialects[name] = dialect
   184  
   185  def unregister_dialect(name):
   186      """Delete the name/dialect mapping associated with a string name.\n
   187      csv.unregister_dialect(name)"""
   188      try:
   189          del _dialects[name]
   190      except KeyError:
   191          raise Error("unknown dialect")
   192  
   193  def get_dialect(name):
   194      """Return the dialect instance associated with name.
   195      dialect = csv.get_dialect(name)"""
   196      try:
   197          return _dialects[name]
   198      except KeyError:
   199          raise Error("unknown dialect")
   200  
   201  def list_dialects():
   202      """Return a list of all know dialect names
   203      names = csv.list_dialects()"""
   204      return list(_dialects)
   205  
   206  class Reader(object):
   207      """CSV reader
   208  
   209      Reader objects are responsible for reading and parsing tabular data
   210      in CSV format."""
   211  
   212  
   213      (START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
   214       IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
   215       EAT_CRNL) = range(8)
   216  
   217      def __init__(self, iterator, dialect=None, **kwargs):
   218          self.dialect = _call_dialect(dialect, kwargs)
   219          self.input_iter = iter(iterator)
   220          self.line_num = 0
   221  
   222          self._parse_reset()
   223  
   224      def _parse_reset(self):
   225          self.field = ''
   226          self.fields = []
   227          self.state = self.START_RECORD
   228          self.numeric_field = False
   229  
   230      def __iter__(self):
   231          return self
   232  
   233      def next(self):
   234          self._parse_reset()
   235          while True:
   236              try:
   237                  line = next(self.input_iter)
   238              except StopIteration:
   239                  # End of input OR exception
   240                  if len(self.field) > 0:
   241                      raise Error("newline inside string")
   242                  raise
   243  
   244              self.line_num += 1
   245  
   246              if '\0' in line:
   247                  raise Error("line contains NULL byte")
   248              pos = 0
   249              while pos < len(line):
   250                  pos = self._parse_process_char(line, pos)
   251              self._parse_eol()
   252  
   253              if self.state == self.START_RECORD:
   254                  break
   255  
   256          fields = self.fields
   257          self.fields = []
   258          return fields
   259  
   260      def _parse_process_char(self, line, pos):
   261          c = line[pos]
   262          if self.state == self.IN_FIELD:
   263              # in unquoted field
   264              pos2 = pos
   265              while True:
   266                  if c in '\n\r':
   267                      # end of line - return [fields]
   268                      if pos2 > pos:
   269                          self._parse_add_char(line[pos:pos2])
   270                          pos = pos2
   271                      self._parse_save_field()
   272                      self.state = self.EAT_CRNL
   273                  elif c == self.dialect.escapechar:
   274                      # possible escaped character
   275                      pos2 -= 1
   276                      self.state = self.ESCAPED_CHAR
   277                  elif c == self.dialect.delimiter:
   278                      # save field - wait for new field
   279                      if pos2 > pos:
   280                          self._parse_add_char(line[pos:pos2])
   281                          pos = pos2
   282                      self._parse_save_field()
   283                      self.state = self.START_FIELD
   284                  else:
   285                      # normal character - save in field
   286                      pos2 += 1
   287                      if pos2 < len(line):
   288                          c = line[pos2]
   289                          continue
   290                  break
   291              if pos2 > pos:
   292                  self._parse_add_char(line[pos:pos2])
   293                  pos = pos2 - 1
   294  
   295          elif self.state == self.START_RECORD:
   296              if c in '\n\r':
   297                  self.state = self.EAT_CRNL
   298              else:
   299                  self.state = self.START_FIELD
   300                  # restart process
   301                  self._parse_process_char(line, pos)
   302  
   303          elif self.state == self.START_FIELD:
   304              if c in '\n\r':
   305                  # save empty field - return [fields]
   306                  self._parse_save_field()
   307                  self.state = self.EAT_CRNL
   308              elif (c == self.dialect.quotechar
   309                    and self.dialect.quoting != QUOTE_NONE):
   310                  # start quoted field
   311                  self.state = self.IN_QUOTED_FIELD
   312              elif c == self.dialect.escapechar:
   313                  # possible escaped character
   314                  self.state = self.ESCAPED_CHAR
   315              elif c == ' ' and self.dialect.skipinitialspace:
   316                  # ignore space at start of field
   317                  pass
   318              elif c == self.dialect.delimiter:
   319                  # save empty field
   320                  self._parse_save_field()
   321              else:
   322                  # begin new unquoted field
   323                  if self.dialect.quoting == QUOTE_NONNUMERIC:
   324                      self.numeric_field = True
   325                  self._parse_add_char(c)
   326                  self.state = self.IN_FIELD
   327  
   328          elif self.state == self.ESCAPED_CHAR:
   329              self._parse_add_char(c)
   330              self.state = self.IN_FIELD
   331  
   332          elif self.state == self.IN_QUOTED_FIELD:
   333              if c == self.dialect.escapechar:
   334                  # possible escape character
   335                  self.state = self.ESCAPE_IN_QUOTED_FIELD
   336              elif (c == self.dialect.quotechar
   337                    and self.dialect.quoting != QUOTE_NONE):
   338                  if self.dialect.doublequote:
   339                      # doublequote; " represented by ""
   340                      self.state = self.QUOTE_IN_QUOTED_FIELD
   341                  else:
   342                      #end of quote part of field
   343                      self.state = self.IN_FIELD
   344              else:
   345                  # normal character - save in field
   346                  self._parse_add_char(c)
   347  
   348          elif self.state == self.ESCAPE_IN_QUOTED_FIELD:
   349              self._parse_add_char(c)
   350              self.state = self.IN_QUOTED_FIELD
   351  
   352          elif self.state == self.QUOTE_IN_QUOTED_FIELD:
   353              # doublequote - seen a quote in a quoted field
   354              if (c == self.dialect.quotechar
   355                  and self.dialect.quoting != QUOTE_NONE):
   356                  # save "" as "
   357                  self._parse_add_char(c)
   358                  self.state = self.IN_QUOTED_FIELD
   359              elif c == self.dialect.delimiter:
   360                  # save field - wait for new field
   361                  self._parse_save_field()
   362                  self.state = self.START_FIELD
   363              elif c in '\r\n':
   364                  # end of line - return [fields]
   365                  self._parse_save_field()
   366                  self.state = self.EAT_CRNL
   367              elif not self.dialect.strict:
   368                  self._parse_add_char(c)
   369                  self.state = self.IN_FIELD
   370              else:
   371                  raise Error("'%c' expected after '%c'" %
   372                              (self.dialect.delimiter, self.dialect.quotechar))
   373  
   374          elif self.state == self.EAT_CRNL:
   375              if c not in '\r\n':
   376                  raise Error("new-line character seen in unquoted field - "
   377                              "do you need to open the file "
   378                              "in universal-newline mode?")
   379  
   380          else:
   381              raise RuntimeError("unknown state: %r" % (self.state,))
   382  
   383          return pos + 1
   384  
   385      def _parse_eol(self):
   386          if self.state == self.EAT_CRNL:
   387              self.state = self.START_RECORD
   388          elif self.state == self.START_RECORD:
   389              # empty line - return []
   390              pass
   391          elif self.state == self.IN_FIELD:
   392              # in unquoted field
   393              # end of line - return [fields]
   394              self._parse_save_field()
   395              self.state = self.START_RECORD
   396          elif self.state == self.START_FIELD:
   397              # save empty field - return [fields]
   398              self._parse_save_field()
   399              self.state = self.START_RECORD
   400          elif self.state == self.ESCAPED_CHAR:
   401              self._parse_add_char('\n')
   402              self.state = self.IN_FIELD
   403          elif self.state == self.IN_QUOTED_FIELD:
   404              pass
   405          elif self.state == self.ESCAPE_IN_QUOTED_FIELD:
   406              self._parse_add_char('\n')
   407              self.state = self.IN_QUOTED_FIELD
   408          elif self.state == self.QUOTE_IN_QUOTED_FIELD:
   409              # end of line - return [fields]
   410              self._parse_save_field()
   411              self.state = self.START_RECORD
   412          else:
   413              raise RuntimeError("unknown state: %r" % (self.state,))
   414  
   415      def _parse_save_field(self):
   416          field, self.field = self.field, ''
   417          if self.numeric_field:
   418              self.numeric_field = False
   419              field = float(field)
   420          self.fields.append(field)
   421  
   422      def _parse_add_char(self, c):
   423          if len(self.field) + len(c) > _field_limit:
   424              raise Error("field larger than field limit (%d)" % (_field_limit))
   425          self.field += c
   426  
   427  
   428  class Writer(object):
   429      """CSV writer
   430  
   431      Writer objects are responsible for generating tabular data
   432      in CSV format from sequence input."""
   433  
   434      def __init__(self, file, dialect=None, **kwargs):
   435          if not (hasattr(file, 'write') and callable(file.write)):
   436              raise TypeError("argument 1 must have a 'write' method")
   437          self.writeline = file.write
   438          self.dialect = _call_dialect(dialect, kwargs)
   439  
   440      def _join_reset(self):
   441          self.rec = []
   442          self.num_fields = 0
   443  
   444      def _join_append(self, field, quoted, quote_empty):
   445          dialect = self.dialect
   446          # If this is not the first field we need a field separator
   447          if self.num_fields > 0:
   448              self.rec.append(dialect.delimiter)
   449  
   450          if dialect.quoting == QUOTE_NONE:
   451              need_escape = tuple(dialect.lineterminator) + (
   452                  dialect.escapechar,  # escapechar always first
   453                  dialect.delimiter, dialect.quotechar)
   454  
   455          else:
   456              for c in tuple(dialect.lineterminator) + (
   457                  dialect.delimiter, dialect.escapechar):
   458                  if c and c in field:
   459                      quoted = True
   460  
   461              need_escape = ()
   462              if dialect.quotechar in field:
   463                  if dialect.doublequote:
   464                      field = field.replace(dialect.quotechar,
   465                                            dialect.quotechar * 2)
   466                      quoted = True
   467                  else:
   468                      need_escape = (dialect.quotechar,)
   469  
   470  
   471          for c in need_escape:
   472              if c and c in field:
   473                  if not dialect.escapechar:
   474                      raise Error("need to escape, but no escapechar set")
   475                  field = field.replace(c, dialect.escapechar + c)
   476  
   477          # If field is empty check if it needs to be quoted
   478          if field == '' and quote_empty:
   479              if dialect.quoting == QUOTE_NONE:
   480                  raise Error("single empty field record must be quoted")
   481              quoted = 1
   482  
   483          if quoted:
   484              field = dialect.quotechar + field + dialect.quotechar
   485  
   486          self.rec.append(field)
   487          self.num_fields += 1
   488  
   489  
   490  
   491      def writerow(self, row):
   492          dialect = self.dialect
   493          try:
   494              rowlen = len(row)
   495          except TypeError:
   496              raise Error("sequence expected")
   497  
   498          # join all fields in internal buffer
   499          self._join_reset()
   500  
   501          for field in row:
   502              quoted = False
   503              if dialect.quoting == QUOTE_NONNUMERIC:
   504                  try:
   505                      float(field)
   506                  except:
   507                      quoted = True
   508                  # This changed since 2.5:
   509                  # quoted = not isinstance(field, (int, long, float))
   510              elif dialect.quoting == QUOTE_ALL:
   511                  quoted = True
   512  
   513              if field is None:
   514                  value = ""
   515              elif isinstance(field, float):
   516                  value = repr(field)
   517              else:
   518                  value = str(field)
   519              self._join_append(value, quoted, rowlen == 1)
   520  
   521          # add line terminator
   522          self.rec.append(dialect.lineterminator)
   523  
   524          self.writeline(''.join(self.rec))
   525  
   526      def writerows(self, rows):
   527          for row in rows:
   528              self.writerow(row)
   529  
   530  def reader(*args, **kwargs):
   531      """
   532      csv_reader = reader(iterable [, dialect='excel']
   533                         [optional keyword args])
   534      for row in csv_reader:
   535          process(row)
   536  
   537      The "iterable" argument can be any object that returns a line
   538      of input for each iteration, such as a file object or a list.  The
   539      optional \"dialect\" parameter is discussed below.  The function
   540      also accepts optional keyword arguments which override settings
   541      provided by the dialect.
   542  
   543      The returned object is an iterator.  Each iteration returns a row
   544      of the CSV file (which can span multiple input lines)"""
   545  
   546      return Reader(*args, **kwargs)
   547  
   548  def writer(*args, **kwargs):
   549      """
   550      csv_writer = csv.writer(fileobj [, dialect='excel']
   551                              [optional keyword args])
   552      for row in sequence:
   553          csv_writer.writerow(row)
   554  
   555      [or]
   556  
   557      csv_writer = csv.writer(fileobj [, dialect='excel']
   558                              [optional keyword args])
   559      csv_writer.writerows(rows)
   560  
   561      The \"fileobj\" argument can be any object that supports the file API."""
   562      return Writer(*args, **kwargs)
   563  
   564  
   565  undefined = object()
   566  def field_size_limit(limit=undefined):
   567      """Sets an upper limit on parsed fields.
   568      csv.field_size_limit([limit])
   569  
   570      Returns old limit. If limit is not given, no new limit is set and
   571      the old limit is returned"""
   572  
   573      global _field_limit
   574      old_limit = _field_limit
   575  
   576      if limit is not undefined:
   577          if not isinstance(limit, (int, long)):
   578              raise TypeError("int expected, got %s" %
   579                              (limit.__class__.__name__,))
   580          _field_limit = limit
   581  
   582      return old_limit