github.com/fraugster/parquet-go@v0.12.0/file_meta.go (about) 1 package goparquet 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/binary" 7 "errors" 8 "fmt" 9 "io" 10 11 "github.com/fraugster/parquet-go/parquet" 12 ) 13 14 var magic = []byte{'P', 'A', 'R', '1'} 15 16 // ReadFileMetaData reads and returns the meta data of a parquet file. You can use this function 17 // to read and inspect the meta data before starting to read the whole parquet file. 18 func ReadFileMetaData(r io.ReadSeeker, extraValidation bool) (*parquet.FileMetaData, error) { 19 return ReadFileMetaDataWithContext(context.Background(), r, extraValidation) 20 } 21 22 // ReadFileMetaDataWithContext reads and returns the meta data of a parquet file. You can use this function 23 // to read and inspect the meta data before starting to read the whole parquet file. 24 func ReadFileMetaDataWithContext(ctx context.Context, r io.ReadSeeker, extraValidation bool) (*parquet.FileMetaData, error) { 25 if extraValidation { 26 if _, err := r.Seek(0, io.SeekStart); err != nil { 27 return nil, fmt.Errorf("seek for the file magic header failed: %w", err) 28 } 29 30 buf := make([]byte, 4) 31 // read and validate header 32 if _, err := io.ReadFull(r, buf); err != nil { 33 return nil, fmt.Errorf("read the file magic header failed: %w", err) 34 } 35 if !bytes.Equal(buf, magic) { 36 return nil, errors.New("invalid parquet file header") 37 } 38 39 // read and validate footer 40 if _, err := r.Seek(-4, io.SeekEnd); err != nil { 41 return nil, fmt.Errorf("seek for the file magic footer failed: %w", err) 42 } 43 44 if _, err := io.ReadFull(r, buf); err != nil { 45 return nil, fmt.Errorf("read the file magic header failed: %w", err) 46 } 47 if !bytes.Equal(buf, magic) { 48 return nil, errors.New("invalid parquet file footer") 49 } 50 } 51 52 // read footer length 53 if _, err := r.Seek(-8, io.SeekEnd); err != nil { 54 return nil, fmt.Errorf("seek for the footer len failed: %w", err) 55 } 56 var fl int32 57 if err := binary.Read(r, binary.LittleEndian, &fl); err != nil { 58 return nil, fmt.Errorf("read the footer len failed: %w", err) 59 } 60 if fl <= 0 { 61 return nil, fmt.Errorf("invalid footer len %d", fl) 62 } 63 64 // read file metadata 65 if _, err := r.Seek(-8-int64(fl), io.SeekEnd); err != nil { 66 return nil, fmt.Errorf("seek file meta data failed: %w", err) 67 } 68 meta := &parquet.FileMetaData{} 69 if err := readThrift(ctx, meta, io.LimitReader(r, int64(fl))); err != nil { 70 return nil, fmt.Errorf("read file meta failed: %w", err) 71 } 72 73 return meta, nil 74 }