github.com/pachyderm/pachyderm@v1.13.4/examples/spouts/spout101/src/consumer/main.py (about) 1 #!/usr/local/bin/python3 2 from python_pachyderm import Client 3 import os 4 import json 5 import hashlib 6 from random import randint 7 from time import sleep 8 9 # Emulates the reception of messages from a third party messaging system or queue (such as AWS SQS, Kafka, Google Pub/Sub etc...) 10 11 12 def receive_message(): 13 # Emulates a network response time to poll new messages 14 sleep(randint(10, 30)) 15 # Creates a random string of 1KB 16 random1 = os.urandom(1024) 17 random2 = os.urandom(2048) 18 return (random1, random2) 19 20 # Polls data from a third party messaging system or queue and push them to a Pachyderm repo in a transaction. 21 22 23 def polling_consumer(): 24 25 while True: 26 # Polls queue 27 msgs = receive_message() 28 if msgs: 29 print("connecting to pachd") 30 client = Client.new_from_config() 31 print("connected") 32 33 with client.commit('spout', 'master') as c: 34 for msg in msgs: 35 # hash the file to assign unique name 36 filename = hashlib.sha256(msg).hexdigest() + ".txt" 37 client.put_file_bytes(c, filename, msg) 38 39 40 if __name__ == '__main__': 41 polling_consumer()