github.com/grafana/pyroscope@v1.18.0/pkg/frontend/readpath/queryfrontend/query_frontend.go (about)

     1  package queryfrontend
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math/rand"
     7  	"slices"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/go-kit/log"
    12  	"github.com/grafana/dskit/tenant"
    13  	"github.com/opentracing/opentracing-go"
    14  	"github.com/opentracing/opentracing-go/ext"
    15  	otlog "github.com/opentracing/opentracing-go/log"
    16  	"github.com/prometheus/prometheus/model/labels"
    17  	"github.com/prometheus/prometheus/promql/parser"
    18  	"google.golang.org/grpc/codes"
    19  	"google.golang.org/grpc/status"
    20  
    21  	googlev1 "github.com/grafana/pyroscope/api/gen/proto/go/google/v1"
    22  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    23  	"github.com/grafana/pyroscope/api/gen/proto/go/querier/v1/querierv1connect"
    24  	queryv1 "github.com/grafana/pyroscope/api/gen/proto/go/query/v1"
    25  	"github.com/grafana/pyroscope/pkg/block/metadata"
    26  	"github.com/grafana/pyroscope/pkg/frontend"
    27  	"github.com/grafana/pyroscope/pkg/model"
    28  	"github.com/grafana/pyroscope/pkg/pprof"
    29  	"github.com/grafana/pyroscope/pkg/querybackend/queryplan"
    30  )
    31  
    32  var _ querierv1connect.QuerierServiceClient = (*QueryFrontend)(nil)
    33  
    34  type QueryBackend interface {
    35  	Invoke(ctx context.Context, req *queryv1.InvokeRequest) (*queryv1.InvokeResponse, error)
    36  }
    37  
    38  type Symbolizer interface {
    39  	SymbolizePprof(ctx context.Context, profile *googlev1.Profile) error
    40  }
    41  
    42  type QueryFrontend struct {
    43  	logger log.Logger
    44  	limits frontend.Limits
    45  
    46  	metadataQueryClient metastorev1.MetadataQueryServiceClient
    47  	tenantServiceClient metastorev1.TenantServiceClient
    48  	querybackend        QueryBackend
    49  	symbolizer          Symbolizer
    50  	now                 func() time.Time
    51  }
    52  
    53  func NewQueryFrontend(
    54  	logger log.Logger,
    55  	limits frontend.Limits,
    56  	metadataQueryClient metastorev1.MetadataQueryServiceClient,
    57  	tenantServiceClient metastorev1.TenantServiceClient,
    58  	querybackendClient QueryBackend,
    59  	sym Symbolizer,
    60  ) *QueryFrontend {
    61  	return &QueryFrontend{
    62  		logger:              logger,
    63  		limits:              limits,
    64  		metadataQueryClient: metadataQueryClient,
    65  		tenantServiceClient: tenantServiceClient,
    66  		querybackend:        querybackendClient,
    67  		symbolizer:          sym,
    68  		now:                 time.Now,
    69  	}
    70  }
    71  
    72  var xrand = rand.New(rand.NewSource(4349676827832284783))
    73  var xrandMutex = sync.Mutex{} // todo fix the race properly
    74  
    75  func (q *QueryFrontend) Query(
    76  	ctx context.Context,
    77  	req *queryv1.QueryRequest,
    78  ) (*queryv1.QueryResponse, error) {
    79  	span, ctx := opentracing.StartSpanFromContext(ctx, "QueryFrontend.Query")
    80  	defer span.Finish()
    81  	span.SetTag("start_time", req.StartTime)
    82  	span.SetTag("end_time", req.EndTime)
    83  	span.SetTag("label_selector", req.LabelSelector)
    84  
    85  	// This method is supposed to be the entry point of the read path
    86  	// in the future versions. Therefore, validation, overrides, and
    87  	// rest of the request handling should be moved here.
    88  	tenants, err := tenant.TenantIDs(ctx)
    89  	if err != nil {
    90  		return nil, status.Error(codes.InvalidArgument, err.Error())
    91  	}
    92  	span.SetTag("tenant_ids", tenants)
    93  
    94  	blocks, err := q.QueryMetadata(ctx, req)
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	span.SetTag("block_count", len(blocks))
    99  	if len(blocks) == 0 {
   100  		return new(queryv1.QueryResponse), nil
   101  	}
   102  	// Randomize the order of blocks to avoid hotspots.
   103  	xrandMutex.Lock()
   104  	xrand.Shuffle(len(blocks), func(i, j int) {
   105  		blocks[i], blocks[j] = blocks[j], blocks[i]
   106  	})
   107  	xrandMutex.Unlock()
   108  	// TODO(kolesnikovae): Should be dynamic.
   109  	p := queryplan.Build(blocks, 4, 20)
   110  
   111  	// Only check for symbolization if all tenants have it enabled
   112  	shouldSymbolize := q.shouldSymbolize(ctx, tenants, blocks)
   113  	span.SetTag("should_symbolize", shouldSymbolize)
   114  
   115  	modifiedQueries := make([]*queryv1.Query, len(req.Query))
   116  	for i, originalQuery := range req.Query {
   117  		modifiedQueries[i] = originalQuery.CloneVT()
   118  
   119  		// If we need symbolization and this is a TREE query, convert it to PPROF
   120  		if shouldSymbolize && originalQuery.QueryType == queryv1.QueryType_QUERY_TREE {
   121  			modifiedQueries[i].QueryType = queryv1.QueryType_QUERY_PPROF
   122  			modifiedQueries[i].Pprof = &queryv1.PprofQuery{
   123  				MaxNodes: originalQuery.Tree.GetMaxNodes(),
   124  			}
   125  			modifiedQueries[i].Tree = nil
   126  		}
   127  	}
   128  
   129  	resp, err := q.querybackend.Invoke(ctx, &queryv1.InvokeRequest{
   130  		Tenant:        tenants,
   131  		StartTime:     req.StartTime,
   132  		EndTime:       req.EndTime,
   133  		LabelSelector: req.LabelSelector,
   134  		Options: &queryv1.InvokeOptions{
   135  			SanitizeOnMerge: q.limits.QuerySanitizeOnMerge(tenants[0]),
   136  		},
   137  		QueryPlan: p,
   138  		Query:     modifiedQueries,
   139  	})
   140  	if err != nil {
   141  		return nil, err
   142  	}
   143  
   144  	if shouldSymbolize {
   145  		err = q.processAndSymbolizeProfiles(ctx, resp, req.Query)
   146  		if err != nil {
   147  			return nil, status.Error(codes.Internal, fmt.Sprintf("symbolizing profiles: %v", err))
   148  		}
   149  	}
   150  
   151  	// TODO(kolesnikovae): Extend diagnostics
   152  	if resp.Diagnostics == nil {
   153  		resp.Diagnostics = new(queryv1.Diagnostics)
   154  	}
   155  
   156  	resp.Diagnostics.QueryPlan = p
   157  	return &queryv1.QueryResponse{Reports: resp.Reports}, nil
   158  }
   159  
   160  func (q *QueryFrontend) QueryMetadata(
   161  	ctx context.Context,
   162  	req *queryv1.QueryRequest,
   163  ) (blocks []*metastorev1.BlockMeta, err error) {
   164  	span, ctx := opentracing.StartSpanFromContext(ctx, "QueryFrontend.QueryMetadata")
   165  	defer func() {
   166  		if err != nil {
   167  			ext.LogError(span, err)
   168  		}
   169  		span.Finish()
   170  	}()
   171  	span.SetTag("start_time", req.StartTime)
   172  	span.SetTag("end_time", req.EndTime)
   173  	span.SetTag("label_selector", req.LabelSelector)
   174  
   175  	tenants, err := tenant.TenantIDs(ctx)
   176  	if err != nil {
   177  		return nil, status.Error(codes.InvalidArgument, err.Error())
   178  	}
   179  	span.SetTag("tenant_ids", tenants)
   180  
   181  	matchers, err := parser.ParseMetricSelector(req.LabelSelector)
   182  	if err != nil {
   183  		return nil, status.Error(codes.InvalidArgument, err.Error())
   184  	}
   185  
   186  	query := &metastorev1.QueryMetadataRequest{
   187  		TenantId:  tenants,
   188  		StartTime: req.StartTime,
   189  		EndTime:   req.EndTime,
   190  		Labels:    []string{metadata.LabelNameUnsymbolized},
   191  	}
   192  
   193  	// Delete all matchers but service_name with strict match. If no matchers
   194  	// left, request the dataset index for query backend to lookup block datasets
   195  	// locally.
   196  	matchers = slices.DeleteFunc(matchers, func(m *labels.Matcher) bool {
   197  		return m.Name != model.LabelNameServiceName || m.Type != labels.MatchEqual
   198  	})
   199  	if len(matchers) == 0 {
   200  		// We preserve the __tenant_dataset__= label: this is needed for the
   201  		// query backend to identify that the dataset is the tenant-wide index,
   202  		// and a dataset lookup is needed.
   203  		query.Labels = append(query.Labels, metadata.LabelNameTenantDataset)
   204  		matchers = []*labels.Matcher{{
   205  			Name:  metadata.LabelNameTenantDataset,
   206  			Value: metadata.LabelValueDatasetTSDBIndex,
   207  			Type:  labels.MatchEqual,
   208  		}}
   209  	}
   210  
   211  	query.Query = matchersToLabelSelector(matchers)
   212  	md, err := q.metadataQueryClient.QueryMetadata(ctx, query)
   213  	if err != nil {
   214  		return nil, err
   215  	}
   216  	span.SetTag("blocks_count", len(md.Blocks))
   217  
   218  	return md.Blocks, nil
   219  }
   220  
   221  // hasUnsymbolizedProfiles checks if a block has unsymbolized profiles
   222  func (q *QueryFrontend) hasUnsymbolizedProfiles(block *metastorev1.BlockMeta) bool {
   223  	matcher, err := labels.NewMatcher(labels.MatchEqual, metadata.LabelNameUnsymbolized, "true")
   224  	if err != nil {
   225  		return false
   226  	}
   227  
   228  	return len(slices.Collect(metadata.FindDatasets(block, matcher))) > 0
   229  }
   230  
   231  // shouldSymbolize determines if we should symbolize profiles based on tenant settings
   232  func (q *QueryFrontend) shouldSymbolize(ctx context.Context, tenants []string, blocks []*metastorev1.BlockMeta) bool {
   233  	span := opentracing.SpanFromContext(ctx)
   234  	if span != nil {
   235  		span.LogFields(otlog.String("event", "shouldSymbolize"))
   236  	}
   237  
   238  	if q.symbolizer == nil {
   239  		return false
   240  	}
   241  
   242  	for _, t := range tenants {
   243  		if !q.limits.SymbolizerEnabled(t) {
   244  			return false
   245  		}
   246  	}
   247  
   248  	blocksWithUnsymbolized := 0
   249  	for _, block := range blocks {
   250  		if q.hasUnsymbolizedProfiles(block) {
   251  			blocksWithUnsymbolized++
   252  		}
   253  	}
   254  
   255  	if span != nil {
   256  		span.LogFields(
   257  			otlog.Int("blocks_with_unsymbolized", blocksWithUnsymbolized),
   258  			otlog.Int("total_blocks", len(blocks)),
   259  		)
   260  	}
   261  
   262  	return blocksWithUnsymbolized > 0
   263  }
   264  
   265  // processAndSymbolizeProfiles handles the symbolization of profiles from the response
   266  func (q *QueryFrontend) processAndSymbolizeProfiles(
   267  	ctx context.Context,
   268  	resp *queryv1.InvokeResponse,
   269  	originalQueries []*queryv1.Query,
   270  ) (err error) {
   271  	span, ctx := opentracing.StartSpanFromContext(ctx, "QueryFrontend.processAndSymbolizeProfiles")
   272  	defer func() {
   273  		if err != nil {
   274  			ext.LogError(span, err)
   275  		}
   276  		span.Finish()
   277  	}()
   278  	span.SetTag("query_count", len(originalQueries))
   279  	span.SetTag("report_count", len(resp.Reports))
   280  
   281  	if len(originalQueries) != len(resp.Reports) {
   282  		return fmt.Errorf("query/report count mismatch: %d queries but %d reports",
   283  			len(originalQueries), len(resp.Reports))
   284  	}
   285  
   286  	for i, r := range resp.Reports {
   287  		if r.Pprof == nil || r.Pprof.Pprof == nil {
   288  			continue
   289  		}
   290  
   291  		var prof googlev1.Profile
   292  		if err := pprof.Unmarshal(r.Pprof.Pprof, &prof); err != nil {
   293  			return fmt.Errorf("failed to unmarshal profile: %w", err)
   294  		}
   295  
   296  		if err := q.symbolizer.SymbolizePprof(ctx, &prof); err != nil {
   297  			return fmt.Errorf("failed to symbolize profile: %w", err)
   298  		}
   299  
   300  		// Convert back to tree if originally a tree
   301  		if i < len(originalQueries) && originalQueries[i].QueryType == queryv1.QueryType_QUERY_TREE {
   302  			treeBytes, err := model.TreeFromBackendProfile(&prof, originalQueries[i].Tree.GetMaxNodes())
   303  			if err != nil {
   304  				return fmt.Errorf("failed to build tree: %w", err)
   305  			}
   306  			r.Tree = &queryv1.TreeReport{Tree: treeBytes}
   307  			r.ReportType = queryv1.ReportType_REPORT_TREE
   308  			r.Pprof = nil
   309  		} else {
   310  			symbolizedBytes, err := pprof.Marshal(&prof, true)
   311  			if err != nil {
   312  				return fmt.Errorf("failed to marshal symbolized profile: %w", err)
   313  			}
   314  			r.Pprof.Pprof = symbolizedBytes
   315  		}
   316  	}
   317  
   318  	return nil
   319  }