github.com/grumpyhome/grumpy@v0.3.1-0.20201208125205-7b775405bdf1/grumpy-runtime-src/third_party/stdlib/rfc822.py (about)

     1  """RFC 2822 message manipulation.
     2  
     3  Note: This is only a very rough sketch of a full RFC-822 parser; in particular
     4  the tokenizing of addresses does not adhere to all the quoting rules.
     5  
     6  Note: RFC 2822 is a long awaited update to RFC 822.  This module should
     7  conform to RFC 2822, and is thus mis-named (it's not worth renaming it).  Some
     8  effort at RFC 2822 updates have been made, but a thorough audit has not been
     9  performed.  Consider any RFC 2822 non-conformance to be a bug.
    10  
    11      RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
    12      RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
    13  
    14  Directions for use:
    15  
    16  To create a Message object: first open a file, e.g.:
    17  
    18    fp = open(file, 'r')
    19  
    20  You can use any other legal way of getting an open file object, e.g. use
    21  sys.stdin or call os.popen().  Then pass the open file object to the Message()
    22  constructor:
    23  
    24    m = Message(fp)
    25  
    26  This class can work with any input object that supports a readline method.  If
    27  the input object has seek and tell capability, the rewindbody method will
    28  work; also illegal lines will be pushed back onto the input stream.  If the
    29  input object lacks seek but has an `unread' method that can push back a line
    30  of input, Message will use that to push back illegal lines.  Thus this class
    31  can be used to parse messages coming from a buffered stream.
    32  
    33  The optional `seekable' argument is provided as a workaround for certain stdio
    34  libraries in which tell() discards buffered data before discovering that the
    35  lseek() system call doesn't work.  For maximum portability, you should set the
    36  seekable argument to zero to prevent that initial \code{tell} when passing in
    37  an unseekable object such as a file object created from a socket object.  If
    38  it is 1 on entry -- which it is by default -- the tell() method of the open
    39  file object is called once; if this raises an exception, seekable is reset to
    40  0.  For other nonzero values of seekable, this test is not made.
    41  
    42  To get the text of a particular header there are several methods:
    43  
    44    str = m.getheader(name)
    45    str = m.getrawheader(name)
    46  
    47  where name is the name of the header, e.g. 'Subject'.  The difference is that
    48  getheader() strips the leading and trailing whitespace, while getrawheader()
    49  doesn't.  Both functions retain embedded whitespace (including newlines)
    50  exactly as they are specified in the header, and leave the case of the text
    51  unchanged.
    52  
    53  For addresses and address lists there are functions
    54  
    55    realname, mailaddress = m.getaddr(name)
    56    list = m.getaddrlist(name)
    57  
    58  where the latter returns a list of (realname, mailaddr) tuples.
    59  
    60  There is also a method
    61  
    62    time = m.getdate(name)
    63  
    64  which parses a Date-like field and returns a time-compatible tuple,
    65  i.e. a tuple such as returned by time.localtime() or accepted by
    66  time.mktime().
    67  
    68  See the class definition for lower level access methods.
    69  
    70  There are also some utility functions here.
    71  """
    72  # Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
    73  
    74  import time
    75  
    76  from warnings import warnpy3k
    77  warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
    78           stacklevel=2)
    79  
    80  __all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
    81  
    82  _blanklines = ('\r\n', '\n')            # Optimization for islast()
    83  
    84  
    85  class Message(object):
    86      """Represents a single RFC 2822-compliant message."""
    87  
    88      def __init__(self, fp, seekable = 1):
    89          """Initialize the class instance and read the headers."""
    90          if seekable == 1:
    91              # Exercise tell() to make sure it works
    92              # (and then assume seek() works, too)
    93              try:
    94                  fp.tell()
    95              except (AttributeError, IOError):
    96                  seekable = 0
    97          self.fp = fp
    98          self.seekable = seekable
    99          self.startofheaders = None
   100          self.startofbody = None
   101          #
   102          if self.seekable:
   103              try:
   104                  self.startofheaders = self.fp.tell()
   105              except IOError:
   106                  self.seekable = 0
   107          #
   108          self.readheaders()
   109          #
   110          if self.seekable:
   111              try:
   112                  self.startofbody = self.fp.tell()
   113              except IOError:
   114                  self.seekable = 0
   115  
   116      def rewindbody(self):
   117          """Rewind the file to the start of the body (if seekable)."""
   118          if not self.seekable:
   119              raise IOError, "unseekable file"
   120          self.fp.seek(self.startofbody)
   121  
   122      def readheaders(self):
   123          """Read header lines.
   124  
   125          Read header lines up to the entirely blank line that terminates them.
   126          The (normally blank) line that ends the headers is skipped, but not
   127          included in the returned list.  If a non-header line ends the headers,
   128          (which is an error), an attempt is made to backspace over it; it is
   129          never included in the returned list.
   130  
   131          The variable self.status is set to the empty string if all went well,
   132          otherwise it is an error message.  The variable self.headers is a
   133          completely uninterpreted list of lines contained in the header (so
   134          printing them will reproduce the header exactly as it appears in the
   135          file).
   136          """
   137          self.dict = {}
   138          self.unixfrom = ''
   139          self.headers = lst = []
   140          self.status = ''
   141          headerseen = ""
   142          firstline = 1
   143          startofline = unread = tell = None
   144          if hasattr(self.fp, 'unread'):
   145              unread = self.fp.unread
   146          elif self.seekable:
   147              tell = self.fp.tell
   148          while 1:
   149              if tell:
   150                  try:
   151                      startofline = tell()
   152                  except IOError:
   153                      startofline = tell = None
   154                      self.seekable = 0
   155              line = self.fp.readline()
   156              if not line:
   157                  self.status = 'EOF in headers'
   158                  break
   159              # Skip unix From name time lines
   160              if firstline and line.startswith('From '):
   161                  self.unixfrom = self.unixfrom + line
   162                  continue
   163              firstline = 0
   164              if headerseen and line[0] in ' \t':
   165                  # It's a continuation line.
   166                  lst.append(line)
   167                  x = (self.dict[headerseen] + "\n " + line.strip())
   168                  self.dict[headerseen] = x.strip()
   169                  continue
   170              elif self.iscomment(line):
   171                  # It's a comment.  Ignore it.
   172                  continue
   173              elif self.islast(line):
   174                  # Note! No pushback here!  The delimiter line gets eaten.
   175                  break
   176              headerseen = self.isheader(line)
   177              if headerseen:
   178                  # It's a legal header line, save it.
   179                  lst.append(line)
   180                  self.dict[headerseen] = line[len(headerseen)+1:].strip()
   181                  continue
   182              elif headerseen is not None:
   183                  # An empty header name. These aren't allowed in HTTP, but it's
   184                  # probably a benign mistake. Don't add the header, just keep
   185                  # going.
   186                  continue
   187              else:
   188                  # It's not a header line; throw it back and stop here.
   189                  if not self.dict:
   190                      self.status = 'No headers'
   191                  else:
   192                      self.status = 'Non-header line where header expected'
   193                  # Try to undo the read.
   194                  if unread:
   195                      unread(line)
   196                  elif tell:
   197                      self.fp.seek(startofline)
   198                  else:
   199                      self.status = self.status + '; bad seek'
   200                  break
   201  
   202      def isheader(self, line):
   203          """Determine whether a given line is a legal header.
   204  
   205          This method should return the header name, suitably canonicalized.
   206          You may override this method in order to use Message parsing on tagged
   207          data in RFC 2822-like formats with special header formats.
   208          """
   209          i = line.find(':')
   210          if i > -1:
   211              return line[:i].lower()
   212          return None
   213  
   214      def islast(self, line):
   215          """Determine whether a line is a legal end of RFC 2822 headers.
   216  
   217          You may override this method if your application wants to bend the
   218          rules, e.g. to strip trailing whitespace, or to recognize MH template
   219          separators ('--------').  For convenience (e.g. for code reading from
   220          sockets) a line consisting of \\r\\n also matches.
   221          """
   222          return line in _blanklines
   223  
   224      def iscomment(self, line):
   225          """Determine whether a line should be skipped entirely.
   226  
   227          You may override this method in order to use Message parsing on tagged
   228          data in RFC 2822-like formats that support embedded comments or
   229          free-text data.
   230          """
   231          return False
   232  
   233      def getallmatchingheaders(self, name):
   234          """Find all header lines matching a given header name.
   235  
   236          Look through the list of headers and find all lines matching a given
   237          header name (and their continuation lines).  A list of the lines is
   238          returned, without interpretation.  If the header does not occur, an
   239          empty list is returned.  If the header occurs multiple times, all
   240          occurrences are returned.  Case is not important in the header name.
   241          """
   242          name = name.lower() + ':'
   243          n = len(name)
   244          lst = []
   245          hit = 0
   246          for line in self.headers:
   247              if line[:n].lower() == name:
   248                  hit = 1
   249              elif not line[:1].isspace():
   250                  hit = 0
   251              if hit:
   252                  lst.append(line)
   253          return lst
   254  
   255      def getfirstmatchingheader(self, name):
   256          """Get the first header line matching name.
   257  
   258          This is similar to getallmatchingheaders, but it returns only the
   259          first matching header (and its continuation lines).
   260          """
   261          name = name.lower() + ':'
   262          n = len(name)
   263          lst = []
   264          hit = 0
   265          for line in self.headers:
   266              if hit:
   267                  if not line[:1].isspace():
   268                      break
   269              elif line[:n].lower() == name:
   270                  hit = 1
   271              if hit:
   272                  lst.append(line)
   273          return lst
   274  
   275      def getrawheader(self, name):
   276          """A higher-level interface to getfirstmatchingheader().
   277  
   278          Return a string containing the literal text of the header but with the
   279          keyword stripped.  All leading, trailing and embedded whitespace is
   280          kept in the string, however.  Return None if the header does not
   281          occur.
   282          """
   283  
   284          lst = self.getfirstmatchingheader(name)
   285          if not lst:
   286              return None
   287          lst[0] = lst[0][len(name) + 1:]
   288          return ''.join(lst)
   289  
   290      def getheader(self, name, default=None):
   291          """Get the header value for a name.
   292  
   293          This is the normal interface: it returns a stripped version of the
   294          header value for a given header name, or None if it doesn't exist.
   295          This uses the dictionary version which finds the *last* such header.
   296          """
   297          return self.dict.get(name.lower(), default)
   298      get = getheader
   299  
   300      def getheaders(self, name):
   301          """Get all values for a header.
   302  
   303          This returns a list of values for headers given more than once; each
   304          value in the result list is stripped in the same way as the result of
   305          getheader().  If the header is not given, return an empty list.
   306          """
   307          result = []
   308          current = ''
   309          have_header = 0
   310          for s in self.getallmatchingheaders(name):
   311              if s[0].isspace():
   312                  if current:
   313                      current = "%s\n %s" % (current, s.strip())
   314                  else:
   315                      current = s.strip()
   316              else:
   317                  if have_header:
   318                      result.append(current)
   319                  current = s[s.find(":") + 1:].strip()
   320                  have_header = 1
   321          if have_header:
   322              result.append(current)
   323          return result
   324  
   325      def getaddr(self, name):
   326          """Get a single address from a header, as a tuple.
   327  
   328          An example return value:
   329          ('Guido van Rossum', 'guido@cwi.nl')
   330          """
   331          # New, by Ben Escoto
   332          alist = self.getaddrlist(name)
   333          if alist:
   334              return alist[0]
   335          else:
   336              return (None, None)
   337  
   338      def getaddrlist(self, name):
   339          """Get a list of addresses from a header.
   340  
   341          Retrieves a list of addresses from a header, where each address is a
   342          tuple as returned by getaddr().  Scans all named headers, so it works
   343          properly with multiple To: or Cc: headers for example.
   344          """
   345          raw = []
   346          for h in self.getallmatchingheaders(name):
   347              if h[0] in ' \t':
   348                  raw.append(h)
   349              else:
   350                  if raw:
   351                      raw.append(', ')
   352                  i = h.find(':')
   353                  if i > 0:
   354                      addr = h[i+1:]
   355                  raw.append(addr)
   356          alladdrs = ''.join(raw)
   357          a = AddressList(alladdrs)
   358          return a.addresslist
   359  
   360      def getdate(self, name):
   361          """Retrieve a date field from a header.
   362  
   363          Retrieves a date field from the named header, returning a tuple
   364          compatible with time.mktime().
   365          """
   366          try:
   367              data = self[name]
   368          except KeyError:
   369              return None
   370          return parsedate(data)
   371  
   372      def getdate_tz(self, name):
   373          """Retrieve a date field from a header as a 10-tuple.
   374  
   375          The first 9 elements make up a tuple compatible with time.mktime(),
   376          and the 10th is the offset of the poster's time zone from GMT/UTC.
   377          """
   378          try:
   379              data = self[name]
   380          except KeyError:
   381              return None
   382          return parsedate_tz(data)
   383  
   384  
   385      # Access as a dictionary (only finds *last* header of each type):
   386  
   387      def __len__(self):
   388          """Get the number of headers in a message."""
   389          return len(self.dict)
   390  
   391      def __getitem__(self, name):
   392          """Get a specific header, as from a dictionary."""
   393          return self.dict[name.lower()]
   394  
   395      def __setitem__(self, name, value):
   396          """Set the value of a header.
   397  
   398          Note: This is not a perfect inversion of __getitem__, because any
   399          changed headers get stuck at the end of the raw-headers list rather
   400          than where the altered header was.
   401          """
   402          del self[name] # Won't fail if it doesn't exist
   403          self.dict[name.lower()] = value
   404          text = name + ": " + value
   405          for line in text.split("\n"):
   406              self.headers.append(line + "\n")
   407  
   408      def __delitem__(self, name):
   409          """Delete all occurrences of a specific header, if it is present."""
   410          name = name.lower()
   411          if not name in self.dict:
   412              return
   413          del self.dict[name]
   414          name = name + ':'
   415          n = len(name)
   416          lst = []
   417          hit = 0
   418          for i in range(len(self.headers)):
   419              line = self.headers[i]
   420              if line[:n].lower() == name:
   421                  hit = 1
   422              elif not line[:1].isspace():
   423                  hit = 0
   424              if hit:
   425                  lst.append(i)
   426          for i in reversed(lst):
   427              del self.headers[i]
   428  
   429      def setdefault(self, name, default=""):
   430          lowername = name.lower()
   431          if lowername in self.dict:
   432              return self.dict[lowername]
   433          else:
   434              text = name + ": " + default
   435              for line in text.split("\n"):
   436                  self.headers.append(line + "\n")
   437              self.dict[lowername] = default
   438              return default
   439  
   440      def has_key(self, name):
   441          """Determine whether a message contains the named header."""
   442          return name.lower() in self.dict
   443  
   444      def __contains__(self, name):
   445          """Determine whether a message contains the named header."""
   446          return name.lower() in self.dict
   447  
   448      def __iter__(self):
   449          return iter(self.dict)
   450  
   451      def keys(self):
   452          """Get all of a message's header field names."""
   453          return self.dict.keys()
   454  
   455      def values(self):
   456          """Get all of a message's header field values."""
   457          return self.dict.values()
   458  
   459      def items(self):
   460          """Get all of a message's headers.
   461  
   462          Returns a list of name, value tuples.
   463          """
   464          return self.dict.items()
   465  
   466      def __str__(self):
   467          return ''.join(self.headers)
   468  
   469  
   470  # Utility functions
   471  # -----------------
   472  
   473  # XXX Should fix unquote() and quote() to be really conformant.
   474  # XXX The inverses of the parse functions may also be useful.
   475  
   476  
   477  def unquote(s):
   478      """Remove quotes from a string."""
   479      if len(s) > 1:
   480          if s.startswith('"') and s.endswith('"'):
   481              return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
   482          if s.startswith('<') and s.endswith('>'):
   483              return s[1:-1]
   484      return s
   485  
   486  
   487  def quote(s):
   488      """Add quotes around a string."""
   489      return s.replace('\\', '\\\\').replace('"', '\\"')
   490  
   491  
   492  def parseaddr(address):
   493      """Parse an address into a (realname, mailaddr) tuple."""
   494      a = AddressList(address)
   495      lst = a.addresslist
   496      if not lst:
   497          return (None, None)
   498      return lst[0]
   499  
   500  
   501  class AddrlistClass(object):
   502      """Address parser class by Ben Escoto.
   503  
   504      To understand what this class does, it helps to have a copy of
   505      RFC 2822 in front of you.
   506  
   507      http://www.faqs.org/rfcs/rfc2822.html
   508  
   509      Note: this class interface is deprecated and may be removed in the future.
   510      Use rfc822.AddressList instead.
   511      """
   512  
   513      def __init__(self, field):
   514          """Initialize a new instance.
   515  
   516          `field' is an unparsed address header field, containing one or more
   517          addresses.
   518          """
   519          self.specials = '()<>@,:;.\"[]'
   520          self.pos = 0
   521          self.LWS = ' \t'
   522          self.CR = '\r\n'
   523          self.atomends = self.specials + self.LWS + self.CR
   524          # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
   525          # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
   526          # syntax, so allow dots in phrases.
   527          self.phraseends = self.atomends.replace('.', '')
   528          self.field = field
   529          self.commentlist = []
   530  
   531      def gotonext(self):
   532          """Parse up to the start of the next address."""
   533          while self.pos < len(self.field):
   534              if self.field[self.pos] in self.LWS + '\n\r':
   535                  self.pos = self.pos + 1
   536              elif self.field[self.pos] == '(':
   537                  self.commentlist.append(self.getcomment())
   538              else: break
   539  
   540      def getaddrlist(self):
   541          """Parse all addresses.
   542  
   543          Returns a list containing all of the addresses.
   544          """
   545          result = []
   546          ad = self.getaddress()
   547          while ad:
   548              result += ad
   549              ad = self.getaddress()
   550          return result
   551  
   552      def getaddress(self):
   553          """Parse the next address."""
   554          self.commentlist = []
   555          self.gotonext()
   556  
   557          oldpos = self.pos
   558          oldcl = self.commentlist
   559          plist = self.getphraselist()
   560  
   561          self.gotonext()
   562          returnlist = []
   563  
   564          if self.pos >= len(self.field):
   565              # Bad email address technically, no domain.
   566              if plist:
   567                  returnlist = [(' '.join(self.commentlist), plist[0])]
   568  
   569          elif self.field[self.pos] in '.@':
   570              # email address is just an addrspec
   571              # this isn't very efficient since we start over
   572              self.pos = oldpos
   573              self.commentlist = oldcl
   574              addrspec = self.getaddrspec()
   575              returnlist = [(' '.join(self.commentlist), addrspec)]
   576  
   577          elif self.field[self.pos] == ':':
   578              # address is a group
   579              returnlist = []
   580  
   581              fieldlen = len(self.field)
   582              self.pos += 1
   583              while self.pos < len(self.field):
   584                  self.gotonext()
   585                  if self.pos < fieldlen and self.field[self.pos] == ';':
   586                      self.pos += 1
   587                      break
   588                  returnlist = returnlist + self.getaddress()
   589  
   590          elif self.field[self.pos] == '<':
   591              # Address is a phrase then a route addr
   592              routeaddr = self.getrouteaddr()
   593  
   594              if self.commentlist:
   595                  returnlist = [(' '.join(plist) + ' (' + \
   596                           ' '.join(self.commentlist) + ')', routeaddr)]
   597              else: returnlist = [(' '.join(plist), routeaddr)]
   598  
   599          else:
   600              if plist:
   601                  returnlist = [(' '.join(self.commentlist), plist[0])]
   602              elif self.field[self.pos] in self.specials:
   603                  self.pos += 1
   604  
   605          self.gotonext()
   606          if self.pos < len(self.field) and self.field[self.pos] == ',':
   607              self.pos += 1
   608          return returnlist
   609  
   610      def getrouteaddr(self):
   611          """Parse a route address (Return-path value).
   612  
   613          This method just skips all the route stuff and returns the addrspec.
   614          """
   615          if self.field[self.pos] != '<':
   616              return
   617  
   618          expectroute = 0
   619          self.pos += 1
   620          self.gotonext()
   621          adlist = ""
   622          while self.pos < len(self.field):
   623              if expectroute:
   624                  self.getdomain()
   625                  expectroute = 0
   626              elif self.field[self.pos] == '>':
   627                  self.pos += 1
   628                  break
   629              elif self.field[self.pos] == '@':
   630                  self.pos += 1
   631                  expectroute = 1
   632              elif self.field[self.pos] == ':':
   633                  self.pos += 1
   634              else:
   635                  adlist = self.getaddrspec()
   636                  self.pos += 1
   637                  break
   638              self.gotonext()
   639  
   640          return adlist
   641  
   642      def getaddrspec(self):
   643          """Parse an RFC 2822 addr-spec."""
   644          aslist = []
   645  
   646          self.gotonext()
   647          while self.pos < len(self.field):
   648              if self.field[self.pos] == '.':
   649                  aslist.append('.')
   650                  self.pos += 1
   651              elif self.field[self.pos] == '"':
   652                  aslist.append('"%s"' % self.getquote())
   653              elif self.field[self.pos] in self.atomends:
   654                  break
   655              else: aslist.append(self.getatom())
   656              self.gotonext()
   657  
   658          if self.pos >= len(self.field) or self.field[self.pos] != '@':
   659              return ''.join(aslist)
   660  
   661          aslist.append('@')
   662          self.pos += 1
   663          self.gotonext()
   664          return ''.join(aslist) + self.getdomain()
   665  
   666      def getdomain(self):
   667          """Get the complete domain name from an address."""
   668          sdlist = []
   669          while self.pos < len(self.field):
   670              if self.field[self.pos] in self.LWS:
   671                  self.pos += 1
   672              elif self.field[self.pos] == '(':
   673                  self.commentlist.append(self.getcomment())
   674              elif self.field[self.pos] == '[':
   675                  sdlist.append(self.getdomainliteral())
   676              elif self.field[self.pos] == '.':
   677                  self.pos += 1
   678                  sdlist.append('.')
   679              elif self.field[self.pos] in self.atomends:
   680                  break
   681              else: sdlist.append(self.getatom())
   682          return ''.join(sdlist)
   683  
   684      def getdelimited(self, beginchar, endchars, allowcomments = 1):
   685          """Parse a header fragment delimited by special characters.
   686  
   687          `beginchar' is the start character for the fragment.  If self is not
   688          looking at an instance of `beginchar' then getdelimited returns the
   689          empty string.
   690  
   691          `endchars' is a sequence of allowable end-delimiting characters.
   692          Parsing stops when one of these is encountered.
   693  
   694          If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
   695          within the parsed fragment.
   696          """
   697          if self.field[self.pos] != beginchar:
   698              return ''
   699  
   700          slist = ['']
   701          quote = 0
   702          self.pos += 1
   703          while self.pos < len(self.field):
   704              if quote == 1:
   705                  slist.append(self.field[self.pos])
   706                  quote = 0
   707              elif self.field[self.pos] in endchars:
   708                  self.pos += 1
   709                  break
   710              elif allowcomments and self.field[self.pos] == '(':
   711                  slist.append(self.getcomment())
   712                  continue        # have already advanced pos from getcomment
   713              elif self.field[self.pos] == '\\':
   714                  quote = 1
   715              else:
   716                  slist.append(self.field[self.pos])
   717              self.pos += 1
   718  
   719          return ''.join(slist)
   720  
   721      def getquote(self):
   722          """Get a quote-delimited fragment from self's field."""
   723          return self.getdelimited('"', '"\r', 0)
   724  
   725      def getcomment(self):
   726          """Get a parenthesis-delimited fragment from self's field."""
   727          return self.getdelimited('(', ')\r', 1)
   728  
   729      def getdomainliteral(self):
   730          """Parse an RFC 2822 domain-literal."""
   731          return '[%s]' % self.getdelimited('[', ']\r', 0)
   732  
   733      def getatom(self, atomends=None):
   734          """Parse an RFC 2822 atom.
   735  
   736          Optional atomends specifies a different set of end token delimiters
   737          (the default is to use self.atomends).  This is used e.g. in
   738          getphraselist() since phrase endings must not include the `.' (which
   739          is legal in phrases)."""
   740          atomlist = ['']
   741          if atomends is None:
   742              atomends = self.atomends
   743  
   744          while self.pos < len(self.field):
   745              if self.field[self.pos] in atomends:
   746                  break
   747              else: atomlist.append(self.field[self.pos])
   748              self.pos += 1
   749  
   750          return ''.join(atomlist)
   751  
   752      def getphraselist(self):
   753          """Parse a sequence of RFC 2822 phrases.
   754  
   755          A phrase is a sequence of words, which are in turn either RFC 2822
   756          atoms or quoted-strings.  Phrases are canonicalized by squeezing all
   757          runs of continuous whitespace into one space.
   758          """
   759          plist = []
   760  
   761          while self.pos < len(self.field):
   762              if self.field[self.pos] in self.LWS:
   763                  self.pos += 1
   764              elif self.field[self.pos] == '"':
   765                  plist.append(self.getquote())
   766              elif self.field[self.pos] == '(':
   767                  self.commentlist.append(self.getcomment())
   768              elif self.field[self.pos] in self.phraseends:
   769                  break
   770              else:
   771                  plist.append(self.getatom(self.phraseends))
   772  
   773          return plist
   774  
   775  class AddressList(AddrlistClass):
   776      """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
   777      def __init__(self, field):
   778          AddrlistClass.__init__(self, field)
   779          if field:
   780              self.addresslist = self.getaddrlist()
   781          else:
   782              self.addresslist = []
   783  
   784      def __len__(self):
   785          return len(self.addresslist)
   786  
   787      def __str__(self):
   788          return ", ".join(map(dump_address_pair, self.addresslist))
   789  
   790      def __add__(self, other):
   791          # Set union
   792          newaddr = AddressList(None)
   793          newaddr.addresslist = self.addresslist[:]
   794          for x in other.addresslist:
   795              if not x in self.addresslist:
   796                  newaddr.addresslist.append(x)
   797          return newaddr
   798  
   799      def __iadd__(self, other):
   800          # Set union, in-place
   801          for x in other.addresslist:
   802              if not x in self.addresslist:
   803                  self.addresslist.append(x)
   804          return self
   805  
   806      def __sub__(self, other):
   807          # Set difference
   808          newaddr = AddressList(None)
   809          for x in self.addresslist:
   810              if not x in other.addresslist:
   811                  newaddr.addresslist.append(x)
   812          return newaddr
   813  
   814      def __isub__(self, other):
   815          # Set difference, in-place
   816          for x in other.addresslist:
   817              if x in self.addresslist:
   818                  self.addresslist.remove(x)
   819          return self
   820  
   821      def __getitem__(self, index):
   822          # Make indexing, slices, and 'in' work
   823          return self.addresslist[index]
   824  
   825  def dump_address_pair(pair):
   826      """Dump a (name, address) pair in a canonicalized form."""
   827      if pair[0]:
   828          return '"' + pair[0] + '" <' + pair[1] + '>'
   829      else:
   830          return pair[1]
   831  
   832  # Parse a date field
   833  
   834  _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
   835                 'aug', 'sep', 'oct', 'nov', 'dec',
   836                 'january', 'february', 'march', 'april', 'may', 'june', 'july',
   837                 'august', 'september', 'october', 'november', 'december']
   838  _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
   839  
   840  # The timezone table does not include the military time zones defined
   841  # in RFC822, other than Z.  According to RFC1123, the description in
   842  # RFC822 gets the signs wrong, so we can't rely on any such time
   843  # zones.  RFC1123 recommends that numeric timezone indicators be used
   844  # instead of timezone names.
   845  
   846  _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
   847                'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
   848                'EST': -500, 'EDT': -400,  # Eastern
   849                'CST': -600, 'CDT': -500,  # Central
   850                'MST': -700, 'MDT': -600,  # Mountain
   851                'PST': -800, 'PDT': -700   # Pacific
   852                }
   853  
   854  
   855  def parsedate_tz(data):
   856      """Convert a date string to a time tuple.
   857  
   858      Accounts for military timezones.
   859      """
   860      if not data:
   861          return None
   862      data = data.split()
   863      if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
   864          # There's a dayname here. Skip it
   865          del data[0]
   866      else:
   867          # no space after the "weekday,"?
   868          i = data[0].rfind(',')
   869          if i >= 0:
   870              data[0] = data[0][i+1:]
   871      if len(data) == 3: # RFC 850 date, deprecated
   872          stuff = data[0].split('-')
   873          if len(stuff) == 3:
   874              data = stuff + data[1:]
   875      if len(data) == 4:
   876          s = data[3]
   877          i = s.find('+')
   878          if i > 0:
   879              data[3:] = [s[:i], s[i+1:]]
   880          else:
   881              data.append('') # Dummy tz
   882      if len(data) < 5:
   883          return None
   884      data = data[:5]
   885      [dd, mm, yy, tm, tz] = data
   886      mm = mm.lower()
   887      if not mm in _monthnames:
   888          dd, mm = mm, dd.lower()
   889          if not mm in _monthnames:
   890              return None
   891      mm = _monthnames.index(mm)+1
   892      if mm > 12: mm = mm - 12
   893      if dd[-1] == ',':
   894          dd = dd[:-1]
   895      i = yy.find(':')
   896      if i > 0:
   897          yy, tm = tm, yy
   898      if yy[-1] == ',':
   899          yy = yy[:-1]
   900      if not yy[0].isdigit():
   901          yy, tz = tz, yy
   902      if tm[-1] == ',':
   903          tm = tm[:-1]
   904      tm = tm.split(':')
   905      if len(tm) == 2:
   906          [thh, tmm] = tm
   907          tss = '0'
   908      elif len(tm) == 3:
   909          [thh, tmm, tss] = tm
   910      else:
   911          return None
   912      try:
   913          yy = int(yy)
   914          dd = int(dd)
   915          thh = int(thh)
   916          tmm = int(tmm)
   917          tss = int(tss)
   918      except ValueError:
   919          return None
   920      tzoffset = None
   921      tz = tz.upper()
   922      if tz in _timezones:
   923          tzoffset = _timezones[tz]
   924      else:
   925          try:
   926              tzoffset = int(tz)
   927          except ValueError:
   928              pass
   929      # Convert a timezone offset into seconds ; -0500 -> -18000
   930      if tzoffset:
   931          if tzoffset < 0:
   932              tzsign = -1
   933              tzoffset = -tzoffset
   934          else:
   935              tzsign = 1
   936          tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
   937      return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
   938  
   939  
   940  def parsedate(data):
   941      """Convert a time string to a time tuple."""
   942      t = parsedate_tz(data)
   943      if t is None:
   944          return t
   945      return t[:9]
   946  
   947  
   948  def mktime_tz(data):
   949      """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
   950      if data[9] is None:
   951          # No zone info, so localtime is better assumption than GMT
   952          return time.mktime(data[:8] + (-1,))
   953      else:
   954          t = time.mktime(data[:8] + (0,))
   955          return t - data[9] - time.timezone
   956  
   957  def formatdate(timeval=None):
   958      """Returns time format preferred for Internet standards.
   959  
   960      Sun, 06 Nov 1994 08:49:37 GMT  ; RFC 822, updated by RFC 1123
   961  
   962      According to RFC 1123, day and month names must always be in
   963      English.  If not for that, this code could use strftime().  It
   964      can't because strftime() honors the locale and could generate
   965      non-English names.
   966      """
   967      if timeval is None:
   968          timeval = time.time()
   969      timeval = time.gmtime(timeval)
   970      return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
   971              ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
   972              timeval[2],
   973              ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
   974               "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
   975                                  timeval[0], timeval[3], timeval[4], timeval[5])
   976  
   977  
   978  # When used as script, run a small test program.
   979  # The first command line argument must be a filename containing one
   980  # message in RFC-822 format.
   981  
   982  if __name__ == '__main__':
   983      import sys, os
   984      file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
   985      if sys.argv[1:]: file = sys.argv[1]
   986      f = open(file, 'r')
   987      m = Message(f)
   988      print 'From:', m.getaddr('from')
   989      print 'To:', m.getaddrlist('to')
   990      print 'Subject:', m.getheader('subject')
   991      print 'Date:', m.getheader('date')
   992      date = m.getdate_tz('date')
   993      tz = date[-1]
   994      date = time.localtime(mktime_tz(date))
   995      if date:
   996          print 'ParsedDate:', time.asctime(date),
   997          hhmmss = tz
   998          hhmm, ss = divmod(hhmmss, 60)
   999          hh, mm = divmod(hhmm, 60)
  1000          print "%+03d%02d" % (hh, mm),
  1001          if ss: print ".%02d" % ss,
  1002          print
  1003      else:
  1004          print 'ParsedDate:', None
  1005      m.rewindbody()
  1006      n = 0
  1007      while f.readline():
  1008          n += 1
  1009      print 'Lines:', n
  1010      print '-'*70
  1011      print 'len =', len(m)
  1012      if 'Date' in m: print 'Date =', m['Date']
  1013      if 'X-Nonsense' in m: pass
  1014      print 'keys =', m.keys()
  1015      print 'values =', m.values()
  1016      print 'items =', m.items()