github.com/pachyderm/pachyderm@v1.13.4/examples/ml/gpt-2/tweets.py (about)

     1  #!/usr/bin/python3
     2  import os
     3  import twitterscraper as t
     4  
     5  for query in os.listdir("/pfs/queries/"):
     6      with open(os.path.join("/pfs/queries", query)) as f:
     7          for q in f:
     8              q = q.strip()  # clean whitespace
     9              with open(os.path.join("/pfs/out", query), "w+") as out:
    10                  for tweet in t.query_tweets(q):
    11                      out.write("<|startoftext|> ")
    12                      out.write(tweet.text.encode("ascii", "replace").decode("ascii"))
    13                      out.write(" <|endoftext|> ")