go.charczuk.com@v0.0.0-20240327042549-bc490516bd1a/projects/nodes/pkg/funcs/read_parquet.go (about) 1 /* 2 3 Copyright (c) 2023 - Present. Will Charczuk. All rights reserved. 4 Use of this source code is governed by a MIT license that can be found in the LICENSE file at the root of the repository. 5 6 */ 7 8 package funcs 9 10 import ( 11 "context" 12 "os" 13 "time" 14 15 parquet "github.com/parquet-go/parquet-go" 16 17 "go.charczuk.com/projects/nodes/pkg/incrutil" 18 "go.charczuk.com/projects/nodes/pkg/types" 19 "go.charczuk.com/sdk/iter" 20 ) 21 22 const defaultMaxParquetFileSize = 32 << 20 // 32mb 23 24 func ReadParquet(ctx context.Context, filepath string, modtime time.Time) (output *types.Table, err error) { 25 var f *os.File 26 f, err = os.Open(filepath) 27 if err != nil { 28 return 29 } 30 defer f.Close() 31 32 var pf *parquet.File 33 pf, err = parquet.OpenFile(f, defaultMaxParquetFileSize) 34 if err != nil { 35 return 36 } 37 38 output = new(types.Table) 39 40 columns := pf.Root().Columns() 41 for _, column := range columns { 42 newCol := types.TableColumn{ 43 Name: column.Name(), 44 } 45 pages := column.Pages() 46 var page parquet.Page 47 for { 48 page, err = pages.ReadPage() 49 if err != nil { 50 break 51 } 52 switch typedPage := page.Values().(type) { 53 case parquet.BooleanReader: 54 values := make([]bool, page.NumValues()) 55 typedPage.ReadBooleans(values) 56 newCol.ValueType = incrutil.APIValueTypeForGoType(incrutil.ValueTypeBool) 57 newCol.Values = append(newCol.Values, iter.Apply(values, func(v bool) any { return v })...) 58 case parquet.Int64Reader: 59 values := make([]int64, page.NumValues()) 60 typedPage.ReadInt64s(values) 61 newCol.ValueType = incrutil.APIValueTypeForGoType(incrutil.ValueTypeInt64) 62 newCol.Values = append(newCol.Values, iter.Apply(values, func(v int64) any { return v })...) 63 case parquet.DoubleReader: 64 values := make([]float64, page.NumValues()) 65 typedPage.ReadDoubles(values) 66 newCol.ValueType = incrutil.APIValueTypeForGoType(incrutil.ValueTypeFloat64) 67 newCol.Values = append(newCol.Values, iter.Apply(values, func(v float64) any { return v })...) 68 default: 69 values := make([]parquet.Value, page.NumValues()) 70 typedPage.ReadValues(values) 71 newCol.ValueType = incrutil.APIValueTypeForGoType(incrutil.ValueTypeString) 72 newCol.Values = append(newCol.Values, iter.Apply(values, func(v parquet.Value) any { return v.GoString() })...) 73 } 74 } 75 output.Columns = append(output.Columns, newCol) 76 } 77 return 78 }