github.com/pachyderm/pachyderm@v1.13.4/examples/spouts/EmailSentimentAnalyzer/imap_spout.py (about) 1 import imaplib 2 import pprint 3 import os 4 import tarfile 5 import errno 6 import time 7 import io 8 import stat 9 10 11 SPOUT = '/pfs/out' 12 13 def open_pipe(path_to_file, attempts=0, timeout=2, sleep_int=5): 14 if attempts < timeout : 15 flags = os.O_WRONLY # Refer to "man 2 open". 16 mode = stat.S_IWUSR # This is 0o400. 17 umask = 0o777 ^ mode # Prevents always downgrading umask to 0. 18 umask_original = os.umask(umask) 19 try: 20 file = os.open(path_to_file, flags, mode) 21 # you must open the pipe as binary to prevent line-buffering problems. 22 return os.fdopen(file, "wb") 23 except OSError as oe: 24 print ('{0} attempt of {1}; error opening file: {2}'.format(attempts + 1, timeout, oe)) 25 os.umask(umask_original) 26 time.sleep(sleep_int) 27 return open_pipe(path_to_file, attempts + 1) 28 finally: 29 os.umask(umask_original) 30 return None 31 32 33 unspecified_value = 'not specified'; 34 imap_host = os.getenv('IMAP_SERVER', 'imap.gmail.com') 35 imap_user = os.getenv('IMAP_LOGIN', unspecified_value) 36 imap_pass = os.getenv('IMAP_PASSWORD', unspecified_value) 37 imap_inbox = os.getenv('IMAP_INBOX', 'Inbox') 38 imap_processed_box = os.getenv('IMAP_PROCESSED_BOX', 'Processed') 39 40 if ((imap_pass == unspecified_value) or (imap_user == unspecified_value)): 41 print("imap spout error: IMAP_LOGIN and IMAP_PASSWORD environment variables not set.") 42 exit(-1) 43 44 45 # connect to host using SSL 46 imap = imaplib.IMAP4_SSL(imap_host) 47 48 ## login to server 49 imap.login(imap_user, imap_pass) 50 51 try: 52 imap.create(imap_processed_box) 53 except imaplib.IMAP4.error as im4e: 54 print("error creating processed box: {}".format(im4e)) 55 pass 56 57 while (True): 58 print("checking for emails...") 59 ## select the mailbox for reading messages from 60 imap.select(imap_inbox) 61 62 typ, data = imap.uid("search", None, 'ALL') 63 all_emails = data[0].split() 64 number_of_emails = len(data[0].split()) 65 66 if number_of_emails > 0: 67 print("{} new emails.".format(number_of_emails)) 68 mySpout = open_pipe(SPOUT) 69 if mySpout is None: 70 print ('error opening file: {}'.format(SPOUT)) 71 exit(-2) 72 73 # To use a tarfile object with a named pipe, you must use the "w|" mode 74 # which makes it not seekable 75 print("Creating tarstream...") 76 try: 77 tarStream = tarfile.open(fileobj=mySpout,mode="w|", encoding='utf-8') 78 except tarfile.TarError as te: 79 print('error creating tarstream: {0}'.format(te)) 80 exit(-2) 81 82 for current in range(number_of_emails): 83 current_uid = all_emails[current] 84 typ, email_data = imap.uid("fetch", current_uid, '(RFC822)') 85 current_email_rfc822 = email_data[0][1].decode('utf-8') 86 name = "{}.mbox".format(current_uid) 87 print("Creating tar archive entry for message {}...".format(current_uid)) 88 89 tarHeader = tarfile.TarInfo() 90 tarHeader.size = len(current_email_rfc822) 91 tarHeader.mode = 0o600 92 tarHeader.name = name 93 94 print("Writing tarfile to spout for message {}...".format(current_uid)) 95 try: 96 with io.BytesIO(current_email_rfc822.encode('utf-8')) as email: 97 tarStream.addfile(tarinfo=tarHeader, fileobj=email) 98 except tarfile.TarError as te: 99 print('error writing message {0} to tarstream: {1}'.format(current_uid, te)) 100 exit(-2) 101 102 print("copying message {} to {}".format(current_uid, imap_processed_box)) 103 104 copyResult = imap.uid("copy", current_uid, imap_processed_box) 105 if copyResult[0] == "OK": 106 print("Deleting message {} from {}".format(current_uid, imap_inbox)) 107 mov, data = imap.uid("store", current_uid, "+FLAGS", "(\Deleted)") 108 imap.expunge() 109 else: 110 print("Error copying message {} to {}".format(current_uid, imap_processed_box)) 111 exit(-2) 112 113 tarStream.close() 114 else: 115 print("No new emails...") 116 117 print("waiting for new emails...") 118 time.sleep(5) 119 120 121 122 mySpout.close() 123 imap.close() 124