github.com/grafana/pyroscope@v1.18.0/cmd/profilecli/parquet.go (about)

     1  package main
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math"
     7  	"os"
     8  	"strings"
     9  
    10  	"github.com/dustin/go-humanize"
    11  	"github.com/olekukonko/tablewriter"
    12  	"github.com/parquet-go/parquet-go"
    13  )
    14  
    15  func parquetInspect(ctx context.Context, path string) error {
    16  	f, err := os.Open(path)
    17  	if err != nil {
    18  		return err
    19  	}
    20  	defer f.Close()
    21  	stats, err := f.Stat()
    22  	if err != nil {
    23  		return err
    24  	}
    25  	pf, err := parquet.OpenFile(f, stats.Size())
    26  	if err != nil {
    27  		return err
    28  	}
    29  	out := output(ctx)
    30  	fmt.Fprintln(out, "schema:", pf.Schema())
    31  	numColumns := len(pf.Schema().Columns())
    32  	meta := pf.Metadata()
    33  	fmt.Println("Num Rows:", meta.NumRows)
    34  	for i, rg := range meta.RowGroups {
    35  		fmt.Fprintln(out, "\t Row group:", i)
    36  		fmt.Fprintln(out, "\t\t Row Count:", rg.NumRows)
    37  		fmt.Fprintln(out, "\t\t Row size:", humanize.Bytes(uint64(rg.TotalByteSize)))
    38  		fmt.Fprintln(out, "\t\t Columns:")
    39  		table := tablewriter.NewWriter(out)
    40  		table.SetHeader([]string{
    41  			"Col", "Type", "NumVal", "TotalCompressedSize", "TotalUncompressedSize", "Compression", "%", "PageCount", "PageSize",
    42  		})
    43  
    44  		for j, ds := range rg.Columns {
    45  			offsets := pf.OffsetIndexes()[(i*numColumns)+j]
    46  			var avgPageSize int64
    47  			maxPageSize := int64(0)
    48  			minPageSize := int64(math.MaxInt64)
    49  			for _, offset := range offsets.PageLocations {
    50  				avgPageSize += int64(offset.CompressedPageSize)
    51  				if int64(offset.CompressedPageSize) > maxPageSize {
    52  					maxPageSize = int64(offset.CompressedPageSize)
    53  				}
    54  				if int64(offset.CompressedPageSize) < minPageSize {
    55  					minPageSize = int64(offset.CompressedPageSize)
    56  				}
    57  			}
    58  			avgPageSize /= int64(len(offsets.PageLocations))
    59  
    60  			table.Append(
    61  				[]string{
    62  					strings.Join(ds.MetaData.PathInSchema, "/"),
    63  					ds.MetaData.Type.String(),
    64  					fmt.Sprintf("%d", ds.MetaData.NumValues),
    65  					humanize.Bytes(uint64(ds.MetaData.TotalCompressedSize)),
    66  					humanize.Bytes(uint64(ds.MetaData.TotalUncompressedSize)),
    67  					fmt.Sprintf("%.2f", float64(ds.MetaData.TotalUncompressedSize-ds.MetaData.TotalCompressedSize)/float64(ds.MetaData.TotalCompressedSize)*100),
    68  					fmt.Sprintf("%.2f", float64(ds.MetaData.TotalCompressedSize)/float64(rg.TotalByteSize)*100),
    69  					fmt.Sprintf("%d", len(offsets.PageLocations)),
    70  					"avg:" + humanize.Bytes(uint64(avgPageSize)) + ", max:" + humanize.Bytes(uint64(maxPageSize)) + ", min:" + humanize.Bytes(uint64(minPageSize)),
    71  				})
    72  		}
    73  		table.Render()
    74  	}
    75  
    76  	return nil
    77  }