github.com/grafana/pyroscope@v1.18.0/cmd/profilecli/parquet.go (about) 1 package main 2 3 import ( 4 "context" 5 "fmt" 6 "math" 7 "os" 8 "strings" 9 10 "github.com/dustin/go-humanize" 11 "github.com/olekukonko/tablewriter" 12 "github.com/parquet-go/parquet-go" 13 ) 14 15 func parquetInspect(ctx context.Context, path string) error { 16 f, err := os.Open(path) 17 if err != nil { 18 return err 19 } 20 defer f.Close() 21 stats, err := f.Stat() 22 if err != nil { 23 return err 24 } 25 pf, err := parquet.OpenFile(f, stats.Size()) 26 if err != nil { 27 return err 28 } 29 out := output(ctx) 30 fmt.Fprintln(out, "schema:", pf.Schema()) 31 numColumns := len(pf.Schema().Columns()) 32 meta := pf.Metadata() 33 fmt.Println("Num Rows:", meta.NumRows) 34 for i, rg := range meta.RowGroups { 35 fmt.Fprintln(out, "\t Row group:", i) 36 fmt.Fprintln(out, "\t\t Row Count:", rg.NumRows) 37 fmt.Fprintln(out, "\t\t Row size:", humanize.Bytes(uint64(rg.TotalByteSize))) 38 fmt.Fprintln(out, "\t\t Columns:") 39 table := tablewriter.NewWriter(out) 40 table.SetHeader([]string{ 41 "Col", "Type", "NumVal", "TotalCompressedSize", "TotalUncompressedSize", "Compression", "%", "PageCount", "PageSize", 42 }) 43 44 for j, ds := range rg.Columns { 45 offsets := pf.OffsetIndexes()[(i*numColumns)+j] 46 var avgPageSize int64 47 maxPageSize := int64(0) 48 minPageSize := int64(math.MaxInt64) 49 for _, offset := range offsets.PageLocations { 50 avgPageSize += int64(offset.CompressedPageSize) 51 if int64(offset.CompressedPageSize) > maxPageSize { 52 maxPageSize = int64(offset.CompressedPageSize) 53 } 54 if int64(offset.CompressedPageSize) < minPageSize { 55 minPageSize = int64(offset.CompressedPageSize) 56 } 57 } 58 avgPageSize /= int64(len(offsets.PageLocations)) 59 60 table.Append( 61 []string{ 62 strings.Join(ds.MetaData.PathInSchema, "/"), 63 ds.MetaData.Type.String(), 64 fmt.Sprintf("%d", ds.MetaData.NumValues), 65 humanize.Bytes(uint64(ds.MetaData.TotalCompressedSize)), 66 humanize.Bytes(uint64(ds.MetaData.TotalUncompressedSize)), 67 fmt.Sprintf("%.2f", float64(ds.MetaData.TotalUncompressedSize-ds.MetaData.TotalCompressedSize)/float64(ds.MetaData.TotalCompressedSize)*100), 68 fmt.Sprintf("%.2f", float64(ds.MetaData.TotalCompressedSize)/float64(rg.TotalByteSize)*100), 69 fmt.Sprintf("%d", len(offsets.PageLocations)), 70 "avg:" + humanize.Bytes(uint64(avgPageSize)) + ", max:" + humanize.Bytes(uint64(maxPageSize)) + ", min:" + humanize.Bytes(uint64(minPageSize)), 71 }) 72 } 73 table.Render() 74 } 75 76 return nil 77 }