storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/parquet/reader.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2019 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package parquet 18 19 import ( 20 "fmt" 21 "io" 22 "time" 23 24 "github.com/bcicen/jstream" 25 26 parquetgo "storj.io/minio/pkg/s3select/internal/parquet-go" 27 parquetgen "storj.io/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" 28 jsonfmt "storj.io/minio/pkg/s3select/json" 29 "storj.io/minio/pkg/s3select/sql" 30 ) 31 32 // Reader - Parquet record reader for S3Select. 33 type Reader struct { 34 args *ReaderArgs 35 reader *parquetgo.Reader 36 } 37 38 // Read - reads single record. 39 func (r *Reader) Read(dst sql.Record) (rec sql.Record, rerr error) { 40 defer func() { 41 if rec := recover(); rec != nil { 42 rerr = fmt.Errorf("panic reading parquet record: %v", rec) 43 } 44 }() 45 46 parquetRecord, err := r.reader.Read() 47 if err != nil { 48 if err != io.EOF { 49 return nil, errParquetParsingError(err) 50 } 51 52 return nil, err 53 } 54 55 kvs := jstream.KVS{} 56 f := func(name string, v parquetgo.Value) bool { 57 if v.Value == nil { 58 kvs = append(kvs, jstream.KV{Key: name, Value: nil}) 59 return true 60 } 61 62 var value interface{} 63 switch v.Type { 64 case parquetgen.Type_BOOLEAN: 65 value = v.Value.(bool) 66 case parquetgen.Type_INT32: 67 value = int64(v.Value.(int32)) 68 if v.Schema != nil && v.Schema.ConvertedType != nil { 69 switch *v.Schema.ConvertedType { 70 case parquetgen.ConvertedType_DATE: 71 value = sql.FormatSQLTimestamp(time.Unix(60*60*24*int64(v.Value.(int32)), 0).UTC()) 72 } 73 } 74 case parquetgen.Type_INT64: 75 value = v.Value.(int64) 76 if v.Schema != nil && v.Schema.ConvertedType != nil { 77 switch *v.Schema.ConvertedType { 78 // Only UTC supported, add one NS to never be exactly midnight. 79 case parquetgen.ConvertedType_TIMESTAMP_MILLIS: 80 value = sql.FormatSQLTimestamp(time.Unix(0, 0).Add(time.Duration(v.Value.(int64)) * time.Millisecond).UTC()) 81 case parquetgen.ConvertedType_TIMESTAMP_MICROS: 82 value = sql.FormatSQLTimestamp(time.Unix(0, 0).Add(time.Duration(v.Value.(int64)) * time.Microsecond).UTC()) 83 } 84 } 85 case parquetgen.Type_FLOAT: 86 value = float64(v.Value.(float32)) 87 case parquetgen.Type_DOUBLE: 88 value = v.Value.(float64) 89 case parquetgen.Type_INT96, parquetgen.Type_BYTE_ARRAY, parquetgen.Type_FIXED_LEN_BYTE_ARRAY: 90 value = string(v.Value.([]byte)) 91 default: 92 rerr = errParquetParsingError(nil) 93 return false 94 } 95 96 kvs = append(kvs, jstream.KV{Key: name, Value: value}) 97 return true 98 } 99 100 // Apply our range 101 parquetRecord.Range(f) 102 103 // Reuse destination if we can. 104 dstRec, ok := dst.(*jsonfmt.Record) 105 if !ok { 106 dstRec = &jsonfmt.Record{} 107 } 108 dstRec.SelectFormat = sql.SelectFmtParquet 109 dstRec.KVS = kvs 110 return dstRec, nil 111 } 112 113 // Close - closes underlying readers. 114 func (r *Reader) Close() error { 115 return r.reader.Close() 116 } 117 118 // NewReader - creates new Parquet reader using readerFunc callback. 119 func NewReader(getReaderFunc func(offset, length int64) (io.ReadCloser, error), args *ReaderArgs) (r *Reader, err error) { 120 defer func() { 121 if rec := recover(); rec != nil { 122 err = fmt.Errorf("panic reading parquet header: %v", rec) 123 } 124 }() 125 reader, err := parquetgo.NewReader(getReaderFunc, nil) 126 if err != nil { 127 if err != io.EOF { 128 return nil, errParquetParsingError(err) 129 } 130 131 return nil, err 132 } 133 134 return &Reader{ 135 args: args, 136 reader: reader, 137 }, nil 138 }