github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/tools/nyc-taxi-benchmark/ingester.py (about) 1 import subprocess 2 import sys 3 4 5 def ingest(filename, batch_size=100): 6 # Determine the total number of lines in the file 7 total_lines = sum(1 for _ in open(filename, "r")) 8 9 lines = [] 10 with open(filename, 'r') as f: 11 for i, line in enumerate(f): 12 lines.append(line) 13 14 if len(lines) >= batch_size: 15 print(f"\rProcessing... {((i + 1) / total_lines) * 100:.2f}%", end='') 16 ingest_lines(lines) 17 lines = [] 18 if lines: 19 ingest_lines(lines) 20 print(f"\rProcessing... 100.00%") 21 22 23 def ingest_lines(lines): 24 index_data = '{"index": {"_index": "trips", "_type": "_doc"}}' 25 data = '' 26 for line in lines: 27 data += index_data + '\n' + line 28 29 # Prepare the curl command 30 curl_command = [ 31 "curl", 32 "-s", 33 "-o", "/dev/null", 34 "http://localhost:8081/elastic/_bulk", 35 "-X", "POST", 36 "-H", "Authorization: Bearer ", 37 "-H", "Content-Type: application/json", 38 "--data-binary", data 39 ] 40 41 # Execute the curl command 42 process = subprocess.run(curl_command, capture_output=False, text=False) 43 if process.stderr: 44 print("Error:", process.stderr) 45 46 47 if __name__ == "__main__": 48 ingest(sys.argv[1]) 49