github.com/pachyderm/pachyderm@v1.13.4/examples/ml/gpt-2/tweets.py (about) 1 #!/usr/bin/python3 2 import os 3 import twitterscraper as t 4 5 for query in os.listdir("/pfs/queries/"): 6 with open(os.path.join("/pfs/queries", query)) as f: 7 for q in f: 8 q = q.strip() # clean whitespace 9 with open(os.path.join("/pfs/out", query), "w+") as out: 10 for tweet in t.query_tweets(q): 11 out.write("<|startoftext|> ") 12 out.write(tweet.text.encode("ascii", "replace").decode("ascii")) 13 out.write(" <|endoftext|> ")