github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/block_querier_symbols.go (about) 1 package phlaredb 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "strings" 9 10 "github.com/grafana/dskit/multierror" 11 "github.com/grafana/dskit/runutil" 12 "github.com/parquet-go/parquet-go" 13 "golang.org/x/sync/errgroup" 14 15 "github.com/grafana/pyroscope/pkg/iter" 16 phlareobj "github.com/grafana/pyroscope/pkg/objstore" 17 parquetobj "github.com/grafana/pyroscope/pkg/objstore/parquet" 18 "github.com/grafana/pyroscope/pkg/phlaredb/block" 19 "github.com/grafana/pyroscope/pkg/phlaredb/query" 20 schemav1 "github.com/grafana/pyroscope/pkg/phlaredb/schemas/v1" 21 "github.com/grafana/pyroscope/pkg/phlaredb/symdb" 22 "github.com/grafana/pyroscope/pkg/util" 23 ) 24 25 // TODO(kolesnikovae) Decouple from phlaredb and refactor to symdb/compat. 26 27 type symbolsResolver interface { 28 symdb.SymbolsReader 29 io.Closer 30 } 31 32 type symbolsResolverV1 struct { 33 stacktraces parquetReader[*schemav1.StacktracePersister] 34 bucketReader phlareobj.Bucket 35 *inMemoryParquetTables 36 } 37 38 func newSymbolsResolverV1(ctx context.Context, bucketReader phlareobj.Bucket, meta *block.Meta) (*symbolsResolverV1, error) { 39 r := &symbolsResolverV1{bucketReader: bucketReader} 40 p := r.stacktraces.relPath() 41 for _, f := range meta.Files { 42 if f.RelPath == p { 43 r.stacktraces.meta = f 44 break 45 } 46 } 47 var err error 48 if err = r.stacktraces.open(ctx, r.bucketReader); err != nil { 49 return nil, err 50 } 51 r.inMemoryParquetTables, err = openInMemoryParquetTables(ctx, bucketReader, meta) 52 return r, err 53 } 54 55 func (r *symbolsResolverV1) Close() error { 56 return multierror.New( 57 r.stacktraces.Close(), 58 r.inMemoryParquetTables.Close()). 59 Err() 60 } 61 62 func (r *symbolsResolverV1) Partition(_ context.Context, _ uint64) (symdb.PartitionReader, error) { 63 p := symbolsPartition{ 64 stats: symdb.PartitionStats{ 65 StacktracesTotal: int(r.stacktraces.file.NumRows()), 66 MaxStacktraceID: int(r.stacktraces.file.NumRows()), 67 LocationsTotal: len(r.locations.cache), 68 MappingsTotal: len(r.mappings.cache), 69 FunctionsTotal: len(r.functions.cache), 70 StringsTotal: len(r.strings.cache), 71 }, 72 symbols: &symdb.Symbols{ 73 Stacktraces: stacktraceResolverV1{r: r}, 74 Locations: r.locations.cache, 75 Mappings: r.mappings.cache, 76 Functions: r.functions.cache, 77 Strings: r.strings.cache, 78 }, 79 } 80 return &p, nil 81 } 82 83 type stacktraceResolverV1 struct{ r *symbolsResolverV1 } 84 85 func (r stacktraceResolverV1) ResolveStacktraceLocations(ctx context.Context, dst symdb.StacktraceInserter, stacktraces []uint32) error { 86 column, err := schemav1.ResolveColumnByPath(r.r.stacktraces.file.Schema(), strings.Split("LocationIDs.list.element", ".")) 87 if err != nil { 88 return err 89 } 90 it := query.NewRepeatedRowIterator(ctx, iter.NewSliceIterator(stacktraces), r.r.stacktraces.file.RowGroups(), column.ColumnIndex) 91 defer runutil.CloseWithErrCapture(&err, it, "failed to close stack trace stream") 92 t := make([]int32, 0, 64) 93 for it.Next() { 94 s := it.At() 95 t = grow(t, len(s.Values)) 96 for i, v := range s.Values { 97 t[i] = v[0].Int32() 98 } 99 dst.InsertStacktrace(s.Row, t) 100 } 101 return it.Err() 102 } 103 104 func (r stacktraceResolverV1) LookupLocations(_ []uint64, _ uint32) []uint64 { 105 // NOTE(kolesnikovae): This API is not supported. 106 // Despite the fact that this could be implemented, 107 // practically this is not viable. 108 // 109 // The method is only implemented to satisfy the 110 // StacktraceResolver interface and must not be used. 111 return nil 112 } 113 114 func grow[T any](s []T, n int) []T { 115 if cap(s) < n { 116 return make([]T, n, 2*n) 117 } 118 return s[:n] 119 } 120 121 type symbolsResolverV2 struct { 122 symbols *symdb.Reader 123 bucket phlareobj.Bucket 124 *inMemoryParquetTables 125 } 126 127 func newSymbolsResolverV2(ctx context.Context, b phlareobj.Bucket, meta *block.Meta) (*symbolsResolverV2, error) { 128 r := symbolsResolverV2{bucket: b} 129 var err error 130 if r.symbols, err = symdb.Open(ctx, b, meta); err != nil { 131 return nil, err 132 } 133 r.inMemoryParquetTables, err = openInMemoryParquetTables(ctx, b, meta) 134 return &r, err 135 } 136 137 func (r *symbolsResolverV2) Close() error { 138 err := multierror.New() 139 if r.symbols != nil { 140 err.Add(r.symbols.Close()) 141 } 142 if r.inMemoryParquetTables != nil { 143 err.Add(r.inMemoryParquetTables.Close()) 144 } 145 return err.Err() 146 } 147 148 func (r *symbolsResolverV2) Partition(ctx context.Context, partition uint64) (symdb.PartitionReader, error) { 149 sr, err := r.symbols.Partition(ctx, partition) 150 if err != nil { 151 return nil, err 152 } 153 var t symdb.PartitionStats 154 sr.WriteStats(&t) 155 s := sr.Symbols() 156 p := symbolsPartition{ 157 stats: symdb.PartitionStats{ 158 StacktracesTotal: t.StacktracesTotal, 159 MaxStacktraceID: t.MaxStacktraceID, 160 LocationsTotal: len(s.Locations), 161 MappingsTotal: len(s.Mappings), 162 FunctionsTotal: len(s.Functions), 163 StringsTotal: len(s.Strings), 164 }, 165 symbols: &symdb.Symbols{ 166 Stacktraces: s.Stacktraces, 167 Locations: r.locations.cache, 168 Mappings: r.mappings.cache, 169 Functions: r.functions.cache, 170 Strings: r.strings.cache, 171 }, 172 release: sr.Release, 173 } 174 return &p, nil 175 } 176 177 type symbolsPartition struct { 178 stats symdb.PartitionStats 179 symbols *symdb.Symbols 180 release func() 181 } 182 183 func (p *symbolsPartition) Symbols() *symdb.Symbols { return p.symbols } 184 185 func (p *symbolsPartition) WriteStats(stats *symdb.PartitionStats) { *stats = p.stats } 186 187 func (p *symbolsPartition) Release() { 188 if p.release != nil { 189 p.release() 190 } 191 } 192 193 type inMemoryParquetTables struct { 194 strings inMemoryparquetReader[string, schemav1.StringPersister] 195 functions inMemoryparquetReader[schemav1.InMemoryFunction, schemav1.FunctionPersister] 196 locations inMemoryparquetReader[schemav1.InMemoryLocation, schemav1.LocationPersister] 197 mappings inMemoryparquetReader[schemav1.InMemoryMapping, schemav1.MappingPersister] 198 } 199 200 func openInMemoryParquetTables(ctx context.Context, r phlareobj.BucketReader, meta *block.Meta) (*inMemoryParquetTables, error) { 201 var t inMemoryParquetTables 202 for _, f := range meta.Files { 203 switch f.RelPath { 204 case t.locations.relPath(): 205 t.locations.meta = f 206 case t.functions.relPath(): 207 t.functions.meta = f 208 case t.mappings.relPath(): 209 t.mappings.meta = f 210 case t.strings.relPath(): 211 t.strings.meta = f 212 } 213 } 214 g, ctx := errgroup.WithContext(ctx) 215 g.Go(func() error { return t.locations.open(ctx, r) }) 216 g.Go(func() error { return t.mappings.open(ctx, r) }) 217 g.Go(func() error { return t.functions.open(ctx, r) }) 218 g.Go(func() error { return t.strings.open(ctx, r) }) 219 return &t, g.Wait() 220 } 221 222 func (t *inMemoryParquetTables) Close() error { 223 return multierror.New( 224 t.strings.Close(), 225 t.functions.Close(), 226 t.locations.Close(), 227 t.mappings.Close()). 228 Err() 229 } 230 231 type ResultWithRowNum[M any] struct { 232 Result M 233 RowNum int64 234 } 235 236 type inMemoryparquetReader[M schemav1.Models, P schemav1.Persister[M]] struct { 237 persister P 238 meta block.File 239 cache []M 240 } 241 242 func (r *inMemoryparquetReader[M, P]) open(ctx context.Context, bucketReader phlareobj.BucketReader) error { 243 var file parquetobj.File 244 if err := file.Open( 245 ctx, 246 bucketReader, 247 r.meta, 248 parquet.SkipBloomFilters(true), // we don't use bloom filters 249 parquet.FileReadMode(parquet.ReadModeAsync), 250 parquet.ReadBufferSize(parquetReadBufferSize), 251 ); err != nil { 252 return err 253 } 254 255 // read all rows into memory 256 r.cache = make([]M, file.NumRows()) 257 var offset int64 258 for _, rg := range file.RowGroups() { 259 rows := rg.NumRows() 260 dst := r.cache[offset : offset+rows] 261 offset += rows 262 if err := r.readRG(dst, rg); err != nil { 263 return fmt.Errorf("reading row group from parquet file '%s': %w", file.Path(), err) 264 } 265 } 266 return file.Close() 267 } 268 269 // parquet.CopyRows uses hardcoded buffer size: 270 // defaultRowBufferSize = 42 271 const inMemoryReaderRowsBufSize = 1 << 10 272 273 func (r *inMemoryparquetReader[M, P]) readRG(dst []M, rg parquet.RowGroup) (err error) { 274 rr := parquet.NewRowGroupReader(rg) 275 defer runutil.CloseWithLogOnErr(util.Logger, rr, "closing parquet row group reader") 276 buf := make([]parquet.Row, inMemoryReaderRowsBufSize) 277 for i := 0; i < len(dst); { 278 n, err := rr.ReadRows(buf) 279 if n > 0 { 280 for _, row := range buf[:n] { 281 v, err := r.persister.Reconstruct(row) 282 if err != nil { 283 return err 284 } 285 dst[i] = v 286 i++ 287 } 288 } 289 if err != nil { 290 if errors.Is(err, io.EOF) { 291 return nil 292 } 293 return err 294 } 295 } 296 return nil 297 } 298 299 func (r *inMemoryparquetReader[M, P]) Close() error { 300 r.cache = nil 301 return nil 302 } 303 304 func (r *inMemoryparquetReader[M, P]) relPath() string { 305 return r.persister.Name() + block.ParquetSuffix 306 }