github.com/tobgu/qframe@v0.4.0/arrow/arrow.py (about)

     1  # Utility script for cross language test of arrow format.
     2  #
     3  # Requires that pyarrow is installed:
     4  # pip install pyarrow
     5  #
     6  # Run:
     7  # python arrow.py
     8  
     9  import pyarrow as pa
    10  
    11  def write_data(data_dict, file_name):
    12      keys = sorted(data_dict.keys())
    13      data = [pa.array(data_dict[k]) for k in keys]
    14      batch = pa.RecordBatch.from_arrays(data, keys)
    15      writer = pa.RecordBatchStreamWriter(file_name, batch.schema)
    16      writer.write(batch)
    17      writer.close()
    18  
    19  def read_data(file_name):
    20      reader = pa.RecordBatchStreamReader(file_name)
    21      table = reader.read_all()
    22      print(str(table.to_pydict()))
    23  
    24  
    25  write_data({'f0': [True, False, True]}, 'bool.bin')
    26  write_data({'f0': [1.5, 2.5, None]}, 'float.bin')
    27  write_data({'f0': ['foo', 'bar', None]}, 'string.bin')
    28  write_data({'f0': [1, 2, 3]}, 'int.bin')
    29  write_data({'f0': [1, 2, 3],
    30              'f1': [1.5, 2.5, None],
    31              'f2': [True, False, True],
    32              'f3': ['foo', 'bar', None]}, 'mixed.bin')
    33  
    34  read_data('mixed.bin')
    35  
    36  # TODO: dictionary/enum
    37  # TODO: corner cases, empty arrays for example
    38  # TODO: Test with tables/columns as well