github.com/grafana/pyroscope@v1.18.0/pkg/metastore/client/methods.go (about)

     1  package metastoreclient
     2  
     3  import (
     4  	"context"
     5  	"math/rand"
     6  	"strings"
     7  	"time"
     8  
     9  	"github.com/go-kit/log/level"
    10  	"github.com/grafana/dskit/backoff"
    11  	"github.com/hashicorp/raft"
    12  	"google.golang.org/grpc"
    13  	"google.golang.org/grpc/codes"
    14  	"google.golang.org/grpc/status"
    15  
    16  	metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1"
    17  	"github.com/grafana/pyroscope/pkg/metastore/raftnode"
    18  	"github.com/grafana/pyroscope/pkg/metastore/raftnode/raftnodepb"
    19  )
    20  
    21  func invoke[R any](ctx context.Context, cl *Client,
    22  	f func(ctx context.Context, instance instance) (*R, error),
    23  ) (*R, error) {
    24  	backoffConfig := backoff.Config{
    25  		MinBackoff: 10 * time.Millisecond,
    26  		MaxBackoff: 100 * time.Millisecond,
    27  		MaxRetries: 50,
    28  	}
    29  	const (
    30  		deadline = 20 * time.Second
    31  	)
    32  
    33  	ctx, cancel := context.WithDeadline(ctx, time.Now().Add(deadline))
    34  	defer cancel()
    35  
    36  	var res *R
    37  	var err error
    38  
    39  	attempt := func() (done bool) {
    40  		it := cl.selectInstance(false)
    41  		if it == nil {
    42  			cl.logger.Log("msg", "no instances available, backoff and retry")
    43  			return false
    44  		}
    45  		responseFromAttempt, errFromAttempt := f(ctx, it)
    46  		if errFromAttempt == nil {
    47  			res = responseFromAttempt
    48  			return true
    49  		}
    50  		cl.logger.Log(
    51  			"msg", "metastore client error",
    52  			"err", errFromAttempt,
    53  			"server_id", it.srv.Raft.ID,
    54  			"server_address", it.srv.Raft.Address,
    55  			"server_resolved_address", it.srv.ResolvedAddress,
    56  		)
    57  		node, ok := raftnode.RaftLeaderFromStatusDetails(errFromAttempt)
    58  		if ok {
    59  			cl.mu.Lock()
    60  			if strings.Contains(string(it.srv.Raft.ID), string(cl.leader)) {
    61  				cl.logger.Log("msg", "changing metastore client leader", "current", cl.leader, "new", node.Id)
    62  				cl.leader = stripPort(node.Id)
    63  			}
    64  			cl.mu.Unlock()
    65  		} else {
    66  			// Some errors will not contain the Raft leader. This is a valid scenario, e.g., when a node gets removed
    67  			// for maintenance. We try to move to a different client instance.
    68  			cl.selectInstance(true)
    69  		}
    70  		// A workaround to prevent retries for specific error codes. This needs a larger refactoring later on.
    71  		switch status.Code(errFromAttempt) {
    72  		case codes.InvalidArgument:
    73  			cl.logger.Log("msg", "skip metastore retries", "err", err, "leader", cl.leader)
    74  			err = errFromAttempt
    75  			return true
    76  		}
    77  		return false
    78  	}
    79  
    80  	b := backoff.New(ctx, backoffConfig)
    81  
    82  	for b.Ongoing() {
    83  		if !attempt() {
    84  			b.Wait()
    85  			cl.discovery.Rediscover()
    86  		} else {
    87  			return res, err
    88  		}
    89  	}
    90  
    91  	return nil, b.Err()
    92  }
    93  
    94  func (c *Client) selectInstance(override bool) *client {
    95  	c.mu.Lock()
    96  	defer c.mu.Unlock()
    97  
    98  	it := c.servers[c.leader]
    99  	if (it == nil || override) && len(c.servers) > 0 {
   100  		idx := rand.Intn(len(c.servers))
   101  		j := 0
   102  		for k, v := range c.servers {
   103  			if j == idx {
   104  				it = v
   105  				c.leader = k
   106  				level.Debug(c.logger).Log("msg", "selected a random metastore server", "new_leader", c.leader)
   107  				break
   108  			}
   109  			j++
   110  		}
   111  	}
   112  	return it
   113  }
   114  
   115  func stripPort(server string) raft.ServerID {
   116  	serverWithoutPort := server
   117  	if idx := strings.LastIndex(serverWithoutPort, ":"); idx != -1 {
   118  		serverWithoutPort = serverWithoutPort[:idx]
   119  	}
   120  	return raft.ServerID(serverWithoutPort)
   121  }
   122  
   123  // TODO(kolesnikovae): Interceptor.
   124  
   125  func (c *Client) AddBlock(ctx context.Context, in *metastorev1.AddBlockRequest, opts ...grpc.CallOption) (*metastorev1.AddBlockResponse, error) {
   126  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.AddBlockResponse, error) {
   127  		return instance.AddBlock(ctx, in, opts...)
   128  	})
   129  }
   130  
   131  func (c *Client) GetBlockMetadata(ctx context.Context, in *metastorev1.GetBlockMetadataRequest, opts ...grpc.CallOption) (*metastorev1.GetBlockMetadataResponse, error) {
   132  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.GetBlockMetadataResponse, error) {
   133  		return instance.GetBlockMetadata(ctx, in, opts...)
   134  	})
   135  }
   136  
   137  func (c *Client) QueryMetadata(ctx context.Context, in *metastorev1.QueryMetadataRequest, opts ...grpc.CallOption) (*metastorev1.QueryMetadataResponse, error) {
   138  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.QueryMetadataResponse, error) {
   139  		return instance.QueryMetadata(ctx, in, opts...)
   140  	})
   141  }
   142  
   143  func (c *Client) QueryMetadataLabels(ctx context.Context, in *metastorev1.QueryMetadataLabelsRequest, opts ...grpc.CallOption) (*metastorev1.QueryMetadataLabelsResponse, error) {
   144  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.QueryMetadataLabelsResponse, error) {
   145  		return instance.QueryMetadataLabels(ctx, in, opts...)
   146  	})
   147  }
   148  
   149  func (c *Client) PollCompactionJobs(ctx context.Context, in *metastorev1.PollCompactionJobsRequest, opts ...grpc.CallOption) (*metastorev1.PollCompactionJobsResponse, error) {
   150  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.PollCompactionJobsResponse, error) {
   151  		return instance.PollCompactionJobs(ctx, in, opts...)
   152  	})
   153  }
   154  
   155  func (c *Client) GetTenants(ctx context.Context, in *metastorev1.GetTenantsRequest, opts ...grpc.CallOption) (*metastorev1.GetTenantsResponse, error) {
   156  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.GetTenantsResponse, error) {
   157  		return instance.GetTenants(ctx, in, opts...)
   158  	})
   159  }
   160  
   161  func (c *Client) GetTenant(ctx context.Context, in *metastorev1.GetTenantRequest, opts ...grpc.CallOption) (*metastorev1.GetTenantResponse, error) {
   162  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.GetTenantResponse, error) {
   163  		return instance.GetTenant(ctx, in, opts...)
   164  	})
   165  }
   166  
   167  func (c *Client) DeleteTenant(ctx context.Context, in *metastorev1.DeleteTenantRequest, opts ...grpc.CallOption) (*metastorev1.DeleteTenantResponse, error) {
   168  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.DeleteTenantResponse, error) {
   169  		return instance.DeleteTenant(ctx, in, opts...)
   170  	})
   171  }
   172  
   173  func (c *Client) ReadIndex(ctx context.Context, in *raftnodepb.ReadIndexRequest, opts ...grpc.CallOption) (*raftnodepb.ReadIndexResponse, error) {
   174  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.ReadIndexResponse, error) {
   175  		return instance.ReadIndex(ctx, in, opts...)
   176  	})
   177  }
   178  
   179  func (c *Client) NodeInfo(ctx context.Context, in *raftnodepb.NodeInfoRequest, opts ...grpc.CallOption) (*raftnodepb.NodeInfoResponse, error) {
   180  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.NodeInfoResponse, error) {
   181  		return instance.NodeInfo(ctx, in, opts...)
   182  	})
   183  }
   184  
   185  func (c *Client) RemoveNode(ctx context.Context, in *raftnodepb.RemoveNodeRequest, opts ...grpc.CallOption) (*raftnodepb.RemoveNodeResponse, error) {
   186  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.RemoveNodeResponse, error) {
   187  		return instance.RemoveNode(ctx, in, opts...)
   188  	})
   189  }
   190  
   191  func (c *Client) AddNode(ctx context.Context, in *raftnodepb.AddNodeRequest, opts ...grpc.CallOption) (*raftnodepb.AddNodeResponse, error) {
   192  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.AddNodeResponse, error) {
   193  		return instance.AddNode(ctx, in, opts...)
   194  	})
   195  }
   196  
   197  func (c *Client) DemoteLeader(ctx context.Context, in *raftnodepb.DemoteLeaderRequest, opts ...grpc.CallOption) (*raftnodepb.DemoteLeaderResponse, error) {
   198  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.DemoteLeaderResponse, error) {
   199  		return instance.DemoteLeader(ctx, in, opts...)
   200  	})
   201  }
   202  
   203  func (c *Client) PromoteToLeader(ctx context.Context, in *raftnodepb.PromoteToLeaderRequest, opts ...grpc.CallOption) (*raftnodepb.PromoteToLeaderResponse, error) {
   204  	return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.PromoteToLeaderResponse, error) {
   205  		return instance.PromoteToLeader(ctx, in, opts...)
   206  	})
   207  }