github.com/grafana/pyroscope@v1.18.0/pkg/querier/store_gateway_querier.go (about) 1 package querier 2 3 import ( 4 "context" 5 6 "connectrpc.com/connect" 7 "github.com/go-kit/log" 8 "github.com/grafana/dskit/kv" 9 "github.com/grafana/dskit/ring" 10 ring_client "github.com/grafana/dskit/ring/client" 11 "github.com/grafana/dskit/services" 12 "github.com/opentracing/opentracing-go" 13 "github.com/pkg/errors" 14 "github.com/prometheus/client_golang/prometheus" 15 "github.com/prometheus/client_golang/prometheus/promauto" 16 "github.com/prometheus/prometheus/promql/parser" 17 "golang.org/x/sync/errgroup" 18 19 googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1" 20 ingesterv1 "github.com/grafana/pyroscope/api/gen/proto/go/ingester/v1" 21 querierv1 "github.com/grafana/pyroscope/api/gen/proto/go/querier/v1" 22 typesv1 "github.com/grafana/pyroscope/api/gen/proto/go/types/v1" 23 "github.com/grafana/pyroscope/pkg/clientpool" 24 model "github.com/grafana/pyroscope/pkg/model" 25 "github.com/grafana/pyroscope/pkg/storegateway" 26 "github.com/grafana/pyroscope/pkg/tenant" 27 "github.com/grafana/pyroscope/pkg/util" 28 ) 29 30 type StoreGatewayQueryClient interface { 31 MergeProfilesStacktraces(context.Context) clientpool.BidiClientMergeProfilesStacktraces 32 MergeProfilesLabels(ctx context.Context) clientpool.BidiClientMergeProfilesLabels 33 MergeProfilesPprof(ctx context.Context) clientpool.BidiClientMergeProfilesPprof 34 MergeSpanProfile(ctx context.Context) clientpool.BidiClientMergeSpanProfile 35 ProfileTypes(context.Context, *connect.Request[ingesterv1.ProfileTypesRequest]) (*connect.Response[ingesterv1.ProfileTypesResponse], error) 36 LabelValues(context.Context, *connect.Request[typesv1.LabelValuesRequest]) (*connect.Response[typesv1.LabelValuesResponse], error) 37 LabelNames(context.Context, *connect.Request[typesv1.LabelNamesRequest]) (*connect.Response[typesv1.LabelNamesResponse], error) 38 Series(context.Context, *connect.Request[ingesterv1.SeriesRequest]) (*connect.Response[ingesterv1.SeriesResponse], error) 39 BlockMetadata(ctx context.Context, req *connect.Request[ingesterv1.BlockMetadataRequest]) (*connect.Response[ingesterv1.BlockMetadataResponse], error) 40 GetBlockStats(ctx context.Context, req *connect.Request[ingesterv1.GetBlockStatsRequest]) (*connect.Response[ingesterv1.GetBlockStatsResponse], error) 41 } 42 43 type StoreGatewayLimits interface { 44 StoreGatewayTenantShardSize(userID string) int 45 } 46 47 type StoreGatewayQuerier struct { 48 ring ring.ReadRing 49 pool *ring_client.Pool 50 limits StoreGatewayLimits 51 52 services.Service 53 // Subservices manager. 54 subservices *services.Manager 55 subservicesWatcher *services.FailureWatcher 56 } 57 58 func newStoreGatewayQuerier( 59 gatewayCfg storegateway.Config, 60 factory ring_client.PoolFactory, 61 limits StoreGatewayLimits, 62 logger log.Logger, 63 reg prometheus.Registerer, 64 clientsOptions ...connect.ClientOption, 65 ) (*StoreGatewayQuerier, error) { 66 storesRingCfg := gatewayCfg.ShardingRing.ToRingConfig() 67 storesRingBackend, err := kv.NewClient( 68 storesRingCfg.KVStore, 69 ring.GetCodec(), 70 kv.RegistererWithKVName(prometheus.WrapRegistererWithPrefix("pyroscope_", reg), "querier-store-gateway"), 71 logger, 72 ) 73 if err != nil { 74 return nil, errors.Wrap(err, "failed to create store-gateway ring backend") 75 } 76 storesRing, err := ring.NewWithStoreClientAndStrategy(storesRingCfg, storegateway.RingNameForClient, storegateway.RingKey, storesRingBackend, ring.NewIgnoreUnhealthyInstancesReplicationStrategy(), prometheus.WrapRegistererWithPrefix("pyroscope_", reg), logger) 77 if err != nil { 78 return nil, errors.Wrap(err, "failed to create store-gateway ring client") 79 } 80 // Disable compression for querier -> store-gateway connections 81 clientsMetrics := promauto.With(reg).NewGauge(prometheus.GaugeOpts{ 82 Namespace: "pyroscope", 83 Name: "storegateway_clients", 84 Help: "The current number of store-gateway clients in the pool.", 85 ConstLabels: map[string]string{"client": "querier"}, 86 }) 87 pool := clientpool.NewStoreGatewayPool(storesRing, factory, clientsMetrics, logger, clientsOptions...) 88 89 s := &StoreGatewayQuerier{ 90 ring: storesRing, 91 pool: pool, 92 limits: limits, 93 subservicesWatcher: services.NewFailureWatcher(), 94 } 95 s.subservices, err = services.NewManager(storesRing, pool) 96 if err != nil { 97 return nil, err 98 } 99 100 s.Service = services.NewBasicService(s.starting, s.running, s.stopping) 101 102 return s, nil 103 } 104 105 func (s *StoreGatewayQuerier) starting(ctx context.Context) error { 106 s.subservicesWatcher.WatchManager(s.subservices) 107 108 if err := services.StartManagerAndAwaitHealthy(ctx, s.subservices); err != nil { 109 return errors.Wrap(err, "unable to start store gateway querier set subservices") 110 } 111 112 return nil 113 } 114 115 func (s *StoreGatewayQuerier) running(ctx context.Context) error { 116 for { 117 select { 118 case <-ctx.Done(): 119 return nil 120 case err := <-s.subservicesWatcher.Chan(): 121 return errors.Wrap(err, "store gateway querier set subservice failed") 122 } 123 } 124 } 125 126 func (s *StoreGatewayQuerier) stopping(_ error) error { 127 return services.StopManagerAndAwaitStopped(context.Background(), s.subservices) 128 } 129 130 // forAllStoreGateways runs f, in parallel, for all store-gateways that are part of the replication set for the given tenant. 131 func forAllStoreGateways[T any](ctx context.Context, tenantID string, storegatewayQuerier *StoreGatewayQuerier, f QueryReplicaFn[T, StoreGatewayQueryClient]) ([]ResponseFromReplica[T], error) { 132 replicationSet, err := GetShuffleShardingSubring(storegatewayQuerier.ring, tenantID, storegatewayQuerier.limits).GetReplicationSetForOperation(storegateway.BlocksRead) 133 if err != nil { 134 return nil, err 135 } 136 137 return forGivenReplicationSet(ctx, func(addr string) (StoreGatewayQueryClient, error) { 138 client, err := storegatewayQuerier.pool.GetClientFor(addr) 139 if err != nil { 140 return nil, err 141 } 142 return client.(StoreGatewayQueryClient), nil 143 }, replicationSet, f) 144 } 145 146 // forAllPlannedStoreGatway runs f, in parallel, for all store-gateways part of the plan 147 func forAllPlannedStoreGateways[T any](ctx context.Context, _ string, storegatewayQuerier *StoreGatewayQuerier, plan map[string]*blockPlanEntry, f QueryReplicaWithHintsFn[T, StoreGatewayQueryClient]) ([]ResponseFromReplica[T], error) { 148 replicationSet, err := storegatewayQuerier.ring.GetReplicationSetForOperation(readNoExtend) 149 if err != nil { 150 return nil, err 151 } 152 153 return forGivenPlan(ctx, plan, func(addr string) (StoreGatewayQueryClient, error) { 154 client, err := storegatewayQuerier.pool.GetClientFor(addr) 155 if err != nil { 156 return nil, err 157 } 158 return client.(StoreGatewayQueryClient), nil 159 }, replicationSet, f) 160 } 161 162 // GetShuffleShardingSubring returns the subring to be used for a given user. This function 163 // should be used both by store-gateway and querier in order to guarantee the same logic is used. 164 func GetShuffleShardingSubring(ring ring.ReadRing, userID string, limits StoreGatewayLimits) ring.ReadRing { 165 shardSize := limits.StoreGatewayTenantShardSize(userID) 166 167 // A shard size of 0 means shuffle sharding is disabled for this specific user, 168 // so we just return the full ring so that blocks will be sharded across all store-gateways. 169 if shardSize <= 0 { 170 return ring 171 } 172 173 return ring.ShuffleShard(userID, shardSize) 174 } 175 176 func (q *Querier) selectTreeFromStoreGateway(ctx context.Context, req *querierv1.SelectMergeStacktracesRequest, plan map[string]*blockPlanEntry) (*model.Tree, error) { 177 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectTree StoreGateway") 178 defer sp.Finish() 179 profileType, err := model.ParseProfileTypeSelector(req.ProfileTypeID) 180 if err != nil { 181 return nil, connect.NewError(connect.CodeInvalidArgument, err) 182 } 183 _, err = parser.ParseMetricSelector(req.LabelSelector) 184 if err != nil { 185 return nil, connect.NewError(connect.CodeInvalidArgument, err) 186 } 187 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 188 if err != nil { 189 return nil, connect.NewError(connect.CodeInvalidArgument, err) 190 } 191 ctx, cancel := context.WithCancel(ctx) 192 defer cancel() 193 194 var responses []ResponseFromReplica[clientpool.BidiClientMergeProfilesStacktraces] 195 if plan != nil { 196 responses, err = forAllPlannedStoreGateways(ctx, tenantID, q.storeGatewayQuerier, plan, func(ctx context.Context, ic StoreGatewayQueryClient, hints *ingesterv1.Hints) (clientpool.BidiClientMergeProfilesStacktraces, error) { 197 return ic.MergeProfilesStacktraces(ctx), nil 198 }) 199 } else { 200 responses, err = forAllStoreGateways(ctx, tenantID, q.storeGatewayQuerier, func(ctx context.Context, ic StoreGatewayQueryClient) (clientpool.BidiClientMergeProfilesStacktraces, error) { 201 return ic.MergeProfilesStacktraces(ctx), nil 202 }) 203 } 204 if err != nil { 205 return nil, connect.NewError(connect.CodeInternal, err) 206 } 207 // send the first initial request to all ingesters. 208 g, _ := errgroup.WithContext(ctx) 209 for _, r := range responses { 210 r := r 211 blockHints, err := BlockHints(plan, r.addr) 212 if err != nil { 213 return nil, connect.NewError(connect.CodeInternal, err) 214 } 215 g.Go(util.RecoverPanic(func() error { 216 return r.response.Send(&ingesterv1.MergeProfilesStacktracesRequest{ 217 Request: &ingesterv1.SelectProfilesRequest{ 218 LabelSelector: req.LabelSelector, 219 Start: req.Start, 220 End: req.End, 221 Type: profileType, 222 Hints: &ingesterv1.Hints{Block: blockHints}, 223 }, 224 MaxNodes: req.MaxNodes, 225 }) 226 })) 227 } 228 if err = g.Wait(); err != nil { 229 return nil, connect.NewError(connect.CodeInternal, err) 230 } 231 232 // merge all profiles 233 return selectMergeTree(ctx, responses) 234 } 235 236 func (q *Querier) selectProfileFromStoreGateway(ctx context.Context, req *querierv1.SelectMergeProfileRequest, plan map[string]*blockPlanEntry) (*googlev1.Profile, error) { 237 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectProfile StoreGateway") 238 defer sp.Finish() 239 profileType, err := model.ParseProfileTypeSelector(req.ProfileTypeID) 240 if err != nil { 241 return nil, connect.NewError(connect.CodeInvalidArgument, err) 242 } 243 _, err = parser.ParseMetricSelector(req.LabelSelector) 244 if err != nil { 245 return nil, connect.NewError(connect.CodeInvalidArgument, err) 246 } 247 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 248 if err != nil { 249 return nil, connect.NewError(connect.CodeInvalidArgument, err) 250 } 251 ctx, cancel := context.WithCancel(ctx) 252 defer cancel() 253 254 var responses []ResponseFromReplica[clientpool.BidiClientMergeProfilesPprof] 255 if plan != nil { 256 responses, err = forAllPlannedStoreGateways(ctx, tenantID, q.storeGatewayQuerier, plan, func(ctx context.Context, ic StoreGatewayQueryClient, hints *ingesterv1.Hints) (clientpool.BidiClientMergeProfilesPprof, error) { 257 return ic.MergeProfilesPprof(ctx), nil 258 }) 259 } else { 260 responses, err = forAllStoreGateways(ctx, tenantID, q.storeGatewayQuerier, func(ctx context.Context, ic StoreGatewayQueryClient) (clientpool.BidiClientMergeProfilesPprof, error) { 261 return ic.MergeProfilesPprof(ctx), nil 262 }) 263 } 264 if err != nil { 265 return nil, connect.NewError(connect.CodeInternal, err) 266 } 267 // send the first initial request to all ingesters. 268 g, _ := errgroup.WithContext(ctx) 269 for _, r := range responses { 270 r := r 271 blockHints, err := BlockHints(plan, r.addr) 272 if err != nil { 273 return nil, connect.NewError(connect.CodeInternal, err) 274 } 275 g.Go(util.RecoverPanic(func() error { 276 return r.response.Send(&ingesterv1.MergeProfilesPprofRequest{ 277 Request: &ingesterv1.SelectProfilesRequest{ 278 LabelSelector: req.LabelSelector, 279 Start: req.Start, 280 End: req.End, 281 Type: profileType, 282 Hints: &ingesterv1.Hints{Block: blockHints}, 283 }, 284 MaxNodes: req.MaxNodes, 285 StackTraceSelector: req.StackTraceSelector, 286 }) 287 })) 288 } 289 if err = g.Wait(); err != nil { 290 return nil, connect.NewError(connect.CodeInternal, err) 291 } 292 293 // merge all profiles 294 return selectMergePprofProfile(ctx, profileType, responses) 295 } 296 297 func (q *Querier) selectSeriesFromStoreGateway(ctx context.Context, req *ingesterv1.MergeProfilesLabelsRequest, plan map[string]*blockPlanEntry) ([]ResponseFromReplica[clientpool.BidiClientMergeProfilesLabels], error) { 298 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectSeries StoreGateway") 299 defer sp.Finish() 300 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 301 if err != nil { 302 return nil, connect.NewError(connect.CodeInvalidArgument, err) 303 } 304 var responses []ResponseFromReplica[clientpool.BidiClientMergeProfilesLabels] 305 if plan != nil { 306 responses, err = forAllPlannedStoreGateways(ctx, tenantID, q.storeGatewayQuerier, plan, func(ctx context.Context, ic StoreGatewayQueryClient, hints *ingesterv1.Hints) (clientpool.BidiClientMergeProfilesLabels, error) { 307 return ic.MergeProfilesLabels(ctx), nil 308 }) 309 } else { 310 responses, err = forAllStoreGateways(ctx, tenantID, q.storeGatewayQuerier, func(ctx context.Context, ic StoreGatewayQueryClient) (clientpool.BidiClientMergeProfilesLabels, error) { 311 return ic.MergeProfilesLabels(ctx), nil 312 }) 313 } 314 315 if err != nil { 316 return nil, connect.NewError(connect.CodeInternal, err) 317 } 318 // send the first initial request to all ingesters. 319 g, _ := errgroup.WithContext(ctx) 320 for _, r := range responses { 321 r := r 322 blockHints, err := BlockHints(plan, r.addr) 323 if err != nil { 324 return nil, connect.NewError(connect.CodeInternal, err) 325 } 326 g.Go(util.RecoverPanic(func() error { 327 req := req.CloneVT() 328 req.Request.Hints = &ingesterv1.Hints{Block: blockHints} 329 return r.response.Send(req) 330 })) 331 } 332 if err := g.Wait(); err != nil { 333 return nil, connect.NewError(connect.CodeInternal, err) 334 } 335 return responses, nil 336 } 337 338 func (q *Querier) labelValuesFromStoreGateway(ctx context.Context, req *typesv1.LabelValuesRequest) ([]ResponseFromReplica[[]string], error) { 339 sp, ctx := opentracing.StartSpanFromContext(ctx, "LabelValues StoreGateway") 340 defer sp.Finish() 341 342 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 343 if err != nil { 344 return nil, connect.NewError(connect.CodeInvalidArgument, err) 345 } 346 347 responses, err := forAllStoreGateways(ctx, tenantID, q.storeGatewayQuerier, func(ctx context.Context, ic StoreGatewayQueryClient) ([]string, error) { 348 res, err := ic.LabelValues(ctx, connect.NewRequest(req)) 349 if err != nil { 350 return nil, err 351 } 352 return res.Msg.Names, nil 353 }) 354 if err != nil { 355 return nil, connect.NewError(connect.CodeInternal, err) 356 } 357 return responses, nil 358 } 359 360 func (q *Querier) labelNamesFromStoreGateway(ctx context.Context, req *typesv1.LabelNamesRequest) ([]ResponseFromReplica[[]string], error) { 361 sp, ctx := opentracing.StartSpanFromContext(ctx, "LabelNames StoreGateway") 362 defer sp.Finish() 363 364 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 365 if err != nil { 366 return nil, connect.NewError(connect.CodeInvalidArgument, err) 367 } 368 369 responses, err := forAllStoreGateways(ctx, tenantID, q.storeGatewayQuerier, func(ctx context.Context, ic StoreGatewayQueryClient) ([]string, error) { 370 res, err := ic.LabelNames(ctx, connect.NewRequest(req)) 371 if err != nil { 372 return nil, err 373 } 374 return res.Msg.Names, nil 375 }) 376 if err != nil { 377 return nil, connect.NewError(connect.CodeInternal, err) 378 } 379 return responses, nil 380 } 381 382 func (q *Querier) seriesFromStoreGateway(ctx context.Context, req *ingesterv1.SeriesRequest) ([]ResponseFromReplica[[]*typesv1.Labels], error) { 383 sp, ctx := opentracing.StartSpanFromContext(ctx, "Series StoreGateway") 384 defer sp.Finish() 385 386 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 387 if err != nil { 388 return nil, connect.NewError(connect.CodeInvalidArgument, err) 389 } 390 391 responses, err := forAllStoreGateways(ctx, tenantID, q.storeGatewayQuerier, func(ctx context.Context, ic StoreGatewayQueryClient) ([]*typesv1.Labels, error) { 392 res, err := ic.Series(ctx, connect.NewRequest(req)) 393 if err != nil { 394 return nil, err 395 } 396 return res.Msg.LabelsSet, nil 397 }) 398 if err != nil { 399 return nil, connect.NewError(connect.CodeInternal, err) 400 } 401 return responses, nil 402 } 403 404 func (q *Querier) selectSpanProfileFromStoreGateway(ctx context.Context, req *querierv1.SelectMergeSpanProfileRequest, plan map[string]*blockPlanEntry) (*model.Tree, error) { 405 sp, ctx := opentracing.StartSpanFromContext(ctx, "SelectSpanProfile StoreGateway") 406 defer sp.Finish() 407 profileType, err := model.ParseProfileTypeSelector(req.ProfileTypeID) 408 if err != nil { 409 return nil, connect.NewError(connect.CodeInvalidArgument, err) 410 } 411 _, err = parser.ParseMetricSelector(req.LabelSelector) 412 if err != nil { 413 return nil, connect.NewError(connect.CodeInvalidArgument, err) 414 } 415 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 416 if err != nil { 417 return nil, connect.NewError(connect.CodeInvalidArgument, err) 418 } 419 ctx, cancel := context.WithCancel(ctx) 420 defer cancel() 421 422 var responses []ResponseFromReplica[clientpool.BidiClientMergeSpanProfile] 423 if plan != nil { 424 responses, err = forAllPlannedStoreGateways(ctx, tenantID, q.storeGatewayQuerier, plan, func(ctx context.Context, ic StoreGatewayQueryClient, hints *ingesterv1.Hints) (clientpool.BidiClientMergeSpanProfile, error) { 425 return ic.MergeSpanProfile(ctx), nil 426 }) 427 } else { 428 responses, err = forAllStoreGateways(ctx, tenantID, q.storeGatewayQuerier, func(ctx context.Context, ic StoreGatewayQueryClient) (clientpool.BidiClientMergeSpanProfile, error) { 429 return ic.MergeSpanProfile(ctx), nil 430 }) 431 } 432 if err != nil { 433 return nil, connect.NewError(connect.CodeInternal, err) 434 } 435 // send the first initial request to all ingesters. 436 g, _ := errgroup.WithContext(ctx) 437 for _, r := range responses { 438 r := r 439 blockHints, err := BlockHints(plan, r.addr) 440 if err != nil { 441 return nil, connect.NewError(connect.CodeInternal, err) 442 } 443 g.Go(util.RecoverPanic(func() error { 444 return r.response.Send(&ingesterv1.MergeSpanProfileRequest{ 445 Request: &ingesterv1.SelectSpanProfileRequest{ 446 LabelSelector: req.LabelSelector, 447 Start: req.Start, 448 End: req.End, 449 Type: profileType, 450 SpanSelector: req.SpanSelector, 451 Hints: &ingesterv1.Hints{Block: blockHints}, 452 }, 453 MaxNodes: req.MaxNodes, 454 }) 455 })) 456 } 457 if err = g.Wait(); err != nil { 458 return nil, connect.NewError(connect.CodeInternal, err) 459 } 460 461 // merge all profiles 462 return selectMergeSpanProfile(ctx, responses) 463 } 464 465 func (q *Querier) blockSelectFromStoreGateway(ctx context.Context, req *ingesterv1.BlockMetadataRequest) ([]ResponseFromReplica[[]*typesv1.BlockInfo], error) { 466 sp, ctx := opentracing.StartSpanFromContext(ctx, "blockSelect StoreGateway") 467 defer sp.Finish() 468 469 tenantID, err := tenant.ExtractTenantIDFromContext(ctx) 470 if err != nil { 471 return nil, connect.NewError(connect.CodeInvalidArgument, err) 472 } 473 474 responses, err := forAllStoreGateways(ctx, tenantID, q.storeGatewayQuerier, func(ctx context.Context, ic StoreGatewayQueryClient) ([]*typesv1.BlockInfo, error) { 475 res, err := ic.BlockMetadata(ctx, connect.NewRequest(req)) 476 if err != nil { 477 return nil, err 478 } 479 return res.Msg.Blocks, nil 480 }) 481 if err != nil { 482 return nil, connect.NewError(connect.CodeInternal, err) 483 } 484 return responses, nil 485 }