github.com/tobgu/qframe@v0.4.0/arrow/arrow.py (about) 1 # Utility script for cross language test of arrow format. 2 # 3 # Requires that pyarrow is installed: 4 # pip install pyarrow 5 # 6 # Run: 7 # python arrow.py 8 9 import pyarrow as pa 10 11 def write_data(data_dict, file_name): 12 keys = sorted(data_dict.keys()) 13 data = [pa.array(data_dict[k]) for k in keys] 14 batch = pa.RecordBatch.from_arrays(data, keys) 15 writer = pa.RecordBatchStreamWriter(file_name, batch.schema) 16 writer.write(batch) 17 writer.close() 18 19 def read_data(file_name): 20 reader = pa.RecordBatchStreamReader(file_name) 21 table = reader.read_all() 22 print(str(table.to_pydict())) 23 24 25 write_data({'f0': [True, False, True]}, 'bool.bin') 26 write_data({'f0': [1.5, 2.5, None]}, 'float.bin') 27 write_data({'f0': ['foo', 'bar', None]}, 'string.bin') 28 write_data({'f0': [1, 2, 3]}, 'int.bin') 29 write_data({'f0': [1, 2, 3], 30 'f1': [1.5, 2.5, None], 31 'f2': [True, False, True], 32 'f3': ['foo', 'bar', None]}, 'mixed.bin') 33 34 read_data('mixed.bin') 35 36 # TODO: dictionary/enum 37 # TODO: corner cases, empty arrays for example 38 # TODO: Test with tables/columns as well