github.com/siglens/siglens@v0.0.0-20240328180423-f7ce9ae441ed/tools/nyc-taxi-benchmark/parquet_to_tsv.py (about) 1 import pandas as pd 2 import glob 3 import os 4 import sys 5 6 7 def convert_parquet_to_tsv(input_directory, output_directory): 8 if not os.path.exists(output_directory): 9 os.makedirs(output_directory) 10 11 for parquet_file in glob.glob(os.path.join(input_directory, '*.parquet')): 12 df = pd.read_parquet(parquet_file) 13 base_name = os.path.basename(parquet_file) 14 tsv_file = os.path.join(output_directory, base_name.replace('.parquet', '.tsv')) 15 df.to_csv(tsv_file, sep='\t', index=False) 16 print(f"Converted {parquet_file} to {tsv_file}") 17 18 19 if __name__ == "__main__": 20 if len(sys.argv) != 3: 21 print("Usage: python script.py <input_directory> <output_directory>") 22 sys.exit(1) 23 24 input_dir = sys.argv[1] 25 output_dir = sys.argv[2] 26 convert_parquet_to_tsv(input_dir, output_dir) 27