github.com/pachyderm/pachyderm@v1.13.4/examples/spouts/spout101/src/consumer/main.py (about)

     1  #!/usr/local/bin/python3
     2  from python_pachyderm import Client
     3  import os
     4  import json
     5  import hashlib
     6  from random import randint
     7  from time import sleep
     8  
     9  # Emulates the reception of messages from a third party messaging system or queue (such as AWS SQS, Kafka, Google Pub/Sub etc...)
    10  
    11  
    12  def receive_message():
    13      # Emulates a network response time to poll new messages
    14      sleep(randint(10, 30))
    15      # Creates a random string of 1KB
    16      random1 = os.urandom(1024)
    17      random2 = os.urandom(2048)
    18      return (random1, random2)
    19  
    20  # Polls data from a third party messaging system or queue and push them to a Pachyderm repo in a transaction.
    21  
    22  
    23  def polling_consumer():
    24  
    25      while True:
    26          # Polls queue
    27          msgs = receive_message()
    28          if msgs:
    29              print("connecting to pachd")
    30              client = Client.new_from_config()
    31              print("connected")
    32              
    33              with client.commit('spout', 'master') as c:
    34                  for msg in msgs:
    35                      # hash the file to assign unique name
    36                      filename = hashlib.sha256(msg).hexdigest() + ".txt"
    37                      client.put_file_bytes(c, filename, msg)
    38  
    39  
    40  if __name__ == '__main__':
    41      polling_consumer()