github.com/pachyderm/pachyderm@v1.13.4/examples/spouts/EmailSentimentAnalyzer/imap_spout.py (about)

     1  import imaplib
     2  import pprint
     3  import os
     4  import tarfile
     5  import errno
     6  import time
     7  import io
     8  import stat
     9  
    10  
    11  SPOUT = '/pfs/out'
    12  
    13  def open_pipe(path_to_file, attempts=0, timeout=2, sleep_int=5):
    14      if attempts < timeout :
    15          flags = os.O_WRONLY  # Refer to "man 2 open".
    16          mode = stat.S_IWUSR  # This is 0o400.
    17          umask = 0o777 ^ mode  # Prevents always downgrading umask to 0.
    18          umask_original = os.umask(umask)
    19          try:
    20              file = os.open(path_to_file, flags, mode)
    21              # you must open the pipe as binary to prevent line-buffering problems.
    22              return os.fdopen(file, "wb")
    23          except OSError as oe:
    24              print ('{0} attempt of {1}; error opening file: {2}'.format(attempts + 1, timeout, oe))
    25              os.umask(umask_original)
    26              time.sleep(sleep_int)
    27              return open_pipe(path_to_file, attempts + 1)
    28          finally:
    29              os.umask(umask_original)
    30      return None
    31  
    32  
    33  unspecified_value = 'not specified';
    34  imap_host = os.getenv('IMAP_SERVER', 'imap.gmail.com')
    35  imap_user = os.getenv('IMAP_LOGIN', unspecified_value)
    36  imap_pass = os.getenv('IMAP_PASSWORD', unspecified_value)
    37  imap_inbox = os.getenv('IMAP_INBOX', 'Inbox')
    38  imap_processed_box = os.getenv('IMAP_PROCESSED_BOX', 'Processed')
    39  
    40  if ((imap_pass == unspecified_value) or (imap_user == unspecified_value)):
    41      print("imap spout error: IMAP_LOGIN and IMAP_PASSWORD environment variables not set.")
    42      exit(-1)
    43  
    44  
    45  # connect to host using SSL
    46  imap = imaplib.IMAP4_SSL(imap_host)
    47  
    48  ## login to server
    49  imap.login(imap_user, imap_pass)
    50  
    51  try:
    52      imap.create(imap_processed_box)
    53  except imaplib.IMAP4.error as im4e:
    54      print("error creating processed box: {}".format(im4e))
    55      pass
    56  
    57  while (True):
    58      print("checking for emails...")
    59      ## select the mailbox for reading messages from
    60      imap.select(imap_inbox)
    61  
    62      typ, data = imap.uid("search", None, 'ALL')
    63      all_emails = data[0].split()
    64      number_of_emails = len(data[0].split())
    65  
    66      if number_of_emails > 0:
    67          print("{} new emails.".format(number_of_emails))
    68          mySpout = open_pipe(SPOUT)
    69          if mySpout is None:
    70              print ('error opening file: {}'.format(SPOUT))
    71              exit(-2)
    72  
    73          # To use a tarfile object with a named pipe, you must use the "w|" mode
    74          # which makes it not seekable
    75          print("Creating tarstream...")
    76          try:
    77              tarStream = tarfile.open(fileobj=mySpout,mode="w|", encoding='utf-8')
    78          except tarfile.TarError as te:
    79              print('error creating tarstream: {0}'.format(te))
    80              exit(-2)
    81  
    82          for current in range(number_of_emails):
    83              current_uid = all_emails[current]
    84              typ, email_data = imap.uid("fetch", current_uid, '(RFC822)')
    85              current_email_rfc822 = email_data[0][1].decode('utf-8')
    86              name = "{}.mbox".format(current_uid)
    87              print("Creating tar archive entry for message {}...".format(current_uid))
    88  
    89              tarHeader = tarfile.TarInfo()
    90              tarHeader.size = len(current_email_rfc822)
    91              tarHeader.mode = 0o600
    92              tarHeader.name = name
    93  
    94              print("Writing tarfile to spout for message {}...".format(current_uid))
    95              try:
    96                  with io.BytesIO(current_email_rfc822.encode('utf-8')) as email:
    97                      tarStream.addfile(tarinfo=tarHeader, fileobj=email)
    98              except tarfile.TarError as te:
    99                  print('error writing message {0} to tarstream: {1}'.format(current_uid, te))
   100                  exit(-2)
   101  
   102              print("copying message {} to {}".format(current_uid, imap_processed_box))
   103  
   104              copyResult = imap.uid("copy", current_uid, imap_processed_box)
   105              if copyResult[0] == "OK":
   106                  print("Deleting message {} from {}".format(current_uid, imap_inbox))
   107                  mov, data = imap.uid("store", current_uid, "+FLAGS", "(\Deleted)")
   108                  imap.expunge()
   109              else:
   110                  print("Error copying message {} to {}".format(current_uid, imap_processed_box))
   111                  exit(-2)
   112  
   113          tarStream.close()
   114      else:
   115          print("No new emails...")
   116  
   117      print("waiting for new emails...")
   118      time.sleep(5)
   119  
   120  
   121  
   122  mySpout.close()
   123  imap.close()
   124