github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/parquet_go18.go (about)

     1  //go:build go1.18
     2  
     3  package parquet
     4  
     5  import (
     6  	"io"
     7  	"os"
     8  )
     9  
    10  // Read reads and returns rows from the parquet file in the given reader.
    11  //
    12  // The type T defines the type of rows read from r. T must be compatible with
    13  // the file's schema or an error will be returned. The row type might represent
    14  // a subset of the full schema, in which case only a subset of the columns will
    15  // be loaded from r.
    16  //
    17  // This function is provided for convenience to facilitate reading of parquet
    18  // files from arbitrary locations in cases where the data set fit in memory.
    19  func Read[T any](r io.ReaderAt, size int64, options ...ReaderOption) (rows []T, err error) {
    20  	config, err := NewReaderConfig(options...)
    21  	if err != nil {
    22  		return nil, err
    23  	}
    24  	file, err := OpenFile(r, size)
    25  	if err != nil {
    26  		return nil, err
    27  	}
    28  	rows = make([]T, file.NumRows())
    29  	reader := NewGenericReader[T](file, config)
    30  	n, err := reader.Read(rows)
    31  	if n < len(rows) && err == io.EOF {
    32  		rows, err = rows[:n], nil
    33  	}
    34  	reader.Close()
    35  	return rows, err
    36  }
    37  
    38  // ReadFile reads rows of the parquet file at the given path.
    39  //
    40  // The type T defines the type of rows read from r. T must be compatible with
    41  // the file's schema or an error will be returned. The row type might represent
    42  // a subset of the full schema, in which case only a subset of the columns will
    43  // be loaded from the file.
    44  //
    45  // This function is provided for convenience to facilitate reading of parquet
    46  // files from the file system in cases where the data set fit in memory.
    47  func ReadFile[T any](path string, options ...ReaderOption) (rows []T, err error) {
    48  	f, err := os.Open(path)
    49  	if err != nil {
    50  		return nil, err
    51  	}
    52  	defer f.Close()
    53  	s, err := f.Stat()
    54  	if err != nil {
    55  		return nil, err
    56  	}
    57  	return Read[T](f, s.Size())
    58  }
    59  
    60  // Write writes the given list of rows to a parquet file written to w.
    61  //
    62  // This function is provided for convenience to facilitate the creation of
    63  // parquet files.
    64  func Write[T any](w io.Writer, rows []T, options ...WriterOption) error {
    65  	config, err := NewWriterConfig(options...)
    66  	if err != nil {
    67  		return err
    68  	}
    69  	writer := NewGenericWriter[T](w, config)
    70  	if _, err := writer.Write(rows); err != nil {
    71  		return err
    72  	}
    73  	return writer.Close()
    74  }
    75  
    76  // Write writes the given list of rows to a parquet file written to w.
    77  //
    78  // This function is provided for convenience to facilitate writing parquet
    79  // files to the file system.
    80  func WriteFile[T any](path string, rows []T, options ...WriterOption) error {
    81  	f, err := os.Create(path)
    82  	if err != nil {
    83  		return err
    84  	}
    85  	defer f.Close()
    86  	return Write(f, rows, options...)
    87  }