github.com/fraugster/parquet-go@v0.12.0/file_meta.go (about)

     1  package goparquet
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/binary"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  
    11  	"github.com/fraugster/parquet-go/parquet"
    12  )
    13  
    14  var magic = []byte{'P', 'A', 'R', '1'}
    15  
    16  // ReadFileMetaData reads and returns the meta data of a parquet file. You can use this function
    17  // to read and inspect the meta data before starting to read the whole parquet file.
    18  func ReadFileMetaData(r io.ReadSeeker, extraValidation bool) (*parquet.FileMetaData, error) {
    19  	return ReadFileMetaDataWithContext(context.Background(), r, extraValidation)
    20  }
    21  
    22  // ReadFileMetaDataWithContext reads and returns the meta data of a parquet file. You can use this function
    23  // to read and inspect the meta data before starting to read the whole parquet file.
    24  func ReadFileMetaDataWithContext(ctx context.Context, r io.ReadSeeker, extraValidation bool) (*parquet.FileMetaData, error) {
    25  	if extraValidation {
    26  		if _, err := r.Seek(0, io.SeekStart); err != nil {
    27  			return nil, fmt.Errorf("seek for the file magic header failed: %w", err)
    28  		}
    29  
    30  		buf := make([]byte, 4)
    31  		// read and validate header
    32  		if _, err := io.ReadFull(r, buf); err != nil {
    33  			return nil, fmt.Errorf("read the file magic header failed: %w", err)
    34  		}
    35  		if !bytes.Equal(buf, magic) {
    36  			return nil, errors.New("invalid parquet file header")
    37  		}
    38  
    39  		// read and validate footer
    40  		if _, err := r.Seek(-4, io.SeekEnd); err != nil {
    41  			return nil, fmt.Errorf("seek for the file magic footer failed: %w", err)
    42  		}
    43  
    44  		if _, err := io.ReadFull(r, buf); err != nil {
    45  			return nil, fmt.Errorf("read the file magic header failed: %w", err)
    46  		}
    47  		if !bytes.Equal(buf, magic) {
    48  			return nil, errors.New("invalid parquet file footer")
    49  		}
    50  	}
    51  
    52  	// read footer length
    53  	if _, err := r.Seek(-8, io.SeekEnd); err != nil {
    54  		return nil, fmt.Errorf("seek for the footer len failed: %w", err)
    55  	}
    56  	var fl int32
    57  	if err := binary.Read(r, binary.LittleEndian, &fl); err != nil {
    58  		return nil, fmt.Errorf("read the footer len failed: %w", err)
    59  	}
    60  	if fl <= 0 {
    61  		return nil, fmt.Errorf("invalid footer len %d", fl)
    62  	}
    63  
    64  	// read file metadata
    65  	if _, err := r.Seek(-8-int64(fl), io.SeekEnd); err != nil {
    66  		return nil, fmt.Errorf("seek file meta data failed: %w", err)
    67  	}
    68  	meta := &parquet.FileMetaData{}
    69  	if err := readThrift(ctx, meta, io.LimitReader(r, int64(fl))); err != nil {
    70  		return nil, fmt.Errorf("read file meta failed: %w", err)
    71  	}
    72  
    73  	return meta, nil
    74  }