github.com/grafana/pyroscope@v1.18.0/pkg/metastore/client/methods.go (about) 1 package metastoreclient 2 3 import ( 4 "context" 5 "math/rand" 6 "strings" 7 "time" 8 9 "github.com/go-kit/log/level" 10 "github.com/grafana/dskit/backoff" 11 "github.com/hashicorp/raft" 12 "google.golang.org/grpc" 13 "google.golang.org/grpc/codes" 14 "google.golang.org/grpc/status" 15 16 metastorev1 "github.com/grafana/pyroscope/api/gen/proto/go/metastore/v1" 17 "github.com/grafana/pyroscope/pkg/metastore/raftnode" 18 "github.com/grafana/pyroscope/pkg/metastore/raftnode/raftnodepb" 19 ) 20 21 func invoke[R any](ctx context.Context, cl *Client, 22 f func(ctx context.Context, instance instance) (*R, error), 23 ) (*R, error) { 24 backoffConfig := backoff.Config{ 25 MinBackoff: 10 * time.Millisecond, 26 MaxBackoff: 100 * time.Millisecond, 27 MaxRetries: 50, 28 } 29 const ( 30 deadline = 20 * time.Second 31 ) 32 33 ctx, cancel := context.WithDeadline(ctx, time.Now().Add(deadline)) 34 defer cancel() 35 36 var res *R 37 var err error 38 39 attempt := func() (done bool) { 40 it := cl.selectInstance(false) 41 if it == nil { 42 cl.logger.Log("msg", "no instances available, backoff and retry") 43 return false 44 } 45 responseFromAttempt, errFromAttempt := f(ctx, it) 46 if errFromAttempt == nil { 47 res = responseFromAttempt 48 return true 49 } 50 cl.logger.Log( 51 "msg", "metastore client error", 52 "err", errFromAttempt, 53 "server_id", it.srv.Raft.ID, 54 "server_address", it.srv.Raft.Address, 55 "server_resolved_address", it.srv.ResolvedAddress, 56 ) 57 node, ok := raftnode.RaftLeaderFromStatusDetails(errFromAttempt) 58 if ok { 59 cl.mu.Lock() 60 if strings.Contains(string(it.srv.Raft.ID), string(cl.leader)) { 61 cl.logger.Log("msg", "changing metastore client leader", "current", cl.leader, "new", node.Id) 62 cl.leader = stripPort(node.Id) 63 } 64 cl.mu.Unlock() 65 } else { 66 // Some errors will not contain the Raft leader. This is a valid scenario, e.g., when a node gets removed 67 // for maintenance. We try to move to a different client instance. 68 cl.selectInstance(true) 69 } 70 // A workaround to prevent retries for specific error codes. This needs a larger refactoring later on. 71 switch status.Code(errFromAttempt) { 72 case codes.InvalidArgument: 73 cl.logger.Log("msg", "skip metastore retries", "err", err, "leader", cl.leader) 74 err = errFromAttempt 75 return true 76 } 77 return false 78 } 79 80 b := backoff.New(ctx, backoffConfig) 81 82 for b.Ongoing() { 83 if !attempt() { 84 b.Wait() 85 cl.discovery.Rediscover() 86 } else { 87 return res, err 88 } 89 } 90 91 return nil, b.Err() 92 } 93 94 func (c *Client) selectInstance(override bool) *client { 95 c.mu.Lock() 96 defer c.mu.Unlock() 97 98 it := c.servers[c.leader] 99 if (it == nil || override) && len(c.servers) > 0 { 100 idx := rand.Intn(len(c.servers)) 101 j := 0 102 for k, v := range c.servers { 103 if j == idx { 104 it = v 105 c.leader = k 106 level.Debug(c.logger).Log("msg", "selected a random metastore server", "new_leader", c.leader) 107 break 108 } 109 j++ 110 } 111 } 112 return it 113 } 114 115 func stripPort(server string) raft.ServerID { 116 serverWithoutPort := server 117 if idx := strings.LastIndex(serverWithoutPort, ":"); idx != -1 { 118 serverWithoutPort = serverWithoutPort[:idx] 119 } 120 return raft.ServerID(serverWithoutPort) 121 } 122 123 // TODO(kolesnikovae): Interceptor. 124 125 func (c *Client) AddBlock(ctx context.Context, in *metastorev1.AddBlockRequest, opts ...grpc.CallOption) (*metastorev1.AddBlockResponse, error) { 126 return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.AddBlockResponse, error) { 127 return instance.AddBlock(ctx, in, opts...) 128 }) 129 } 130 131 func (c *Client) GetBlockMetadata(ctx context.Context, in *metastorev1.GetBlockMetadataRequest, opts ...grpc.CallOption) (*metastorev1.GetBlockMetadataResponse, error) { 132 return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.GetBlockMetadataResponse, error) { 133 return instance.GetBlockMetadata(ctx, in, opts...) 134 }) 135 } 136 137 func (c *Client) QueryMetadata(ctx context.Context, in *metastorev1.QueryMetadataRequest, opts ...grpc.CallOption) (*metastorev1.QueryMetadataResponse, error) { 138 return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.QueryMetadataResponse, error) { 139 return instance.QueryMetadata(ctx, in, opts...) 140 }) 141 } 142 143 func (c *Client) QueryMetadataLabels(ctx context.Context, in *metastorev1.QueryMetadataLabelsRequest, opts ...grpc.CallOption) (*metastorev1.QueryMetadataLabelsResponse, error) { 144 return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.QueryMetadataLabelsResponse, error) { 145 return instance.QueryMetadataLabels(ctx, in, opts...) 146 }) 147 } 148 149 func (c *Client) PollCompactionJobs(ctx context.Context, in *metastorev1.PollCompactionJobsRequest, opts ...grpc.CallOption) (*metastorev1.PollCompactionJobsResponse, error) { 150 return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.PollCompactionJobsResponse, error) { 151 return instance.PollCompactionJobs(ctx, in, opts...) 152 }) 153 } 154 155 func (c *Client) GetTenants(ctx context.Context, in *metastorev1.GetTenantsRequest, opts ...grpc.CallOption) (*metastorev1.GetTenantsResponse, error) { 156 return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.GetTenantsResponse, error) { 157 return instance.GetTenants(ctx, in, opts...) 158 }) 159 } 160 161 func (c *Client) GetTenant(ctx context.Context, in *metastorev1.GetTenantRequest, opts ...grpc.CallOption) (*metastorev1.GetTenantResponse, error) { 162 return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.GetTenantResponse, error) { 163 return instance.GetTenant(ctx, in, opts...) 164 }) 165 } 166 167 func (c *Client) DeleteTenant(ctx context.Context, in *metastorev1.DeleteTenantRequest, opts ...grpc.CallOption) (*metastorev1.DeleteTenantResponse, error) { 168 return invoke(ctx, c, func(ctx context.Context, instance instance) (*metastorev1.DeleteTenantResponse, error) { 169 return instance.DeleteTenant(ctx, in, opts...) 170 }) 171 } 172 173 func (c *Client) ReadIndex(ctx context.Context, in *raftnodepb.ReadIndexRequest, opts ...grpc.CallOption) (*raftnodepb.ReadIndexResponse, error) { 174 return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.ReadIndexResponse, error) { 175 return instance.ReadIndex(ctx, in, opts...) 176 }) 177 } 178 179 func (c *Client) NodeInfo(ctx context.Context, in *raftnodepb.NodeInfoRequest, opts ...grpc.CallOption) (*raftnodepb.NodeInfoResponse, error) { 180 return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.NodeInfoResponse, error) { 181 return instance.NodeInfo(ctx, in, opts...) 182 }) 183 } 184 185 func (c *Client) RemoveNode(ctx context.Context, in *raftnodepb.RemoveNodeRequest, opts ...grpc.CallOption) (*raftnodepb.RemoveNodeResponse, error) { 186 return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.RemoveNodeResponse, error) { 187 return instance.RemoveNode(ctx, in, opts...) 188 }) 189 } 190 191 func (c *Client) AddNode(ctx context.Context, in *raftnodepb.AddNodeRequest, opts ...grpc.CallOption) (*raftnodepb.AddNodeResponse, error) { 192 return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.AddNodeResponse, error) { 193 return instance.AddNode(ctx, in, opts...) 194 }) 195 } 196 197 func (c *Client) DemoteLeader(ctx context.Context, in *raftnodepb.DemoteLeaderRequest, opts ...grpc.CallOption) (*raftnodepb.DemoteLeaderResponse, error) { 198 return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.DemoteLeaderResponse, error) { 199 return instance.DemoteLeader(ctx, in, opts...) 200 }) 201 } 202 203 func (c *Client) PromoteToLeader(ctx context.Context, in *raftnodepb.PromoteToLeaderRequest, opts ...grpc.CallOption) (*raftnodepb.PromoteToLeaderResponse, error) { 204 return invoke(ctx, c, func(ctx context.Context, instance instance) (*raftnodepb.PromoteToLeaderResponse, error) { 205 return instance.PromoteToLeader(ctx, in, opts...) 206 }) 207 }