github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/gatewayclient/gateway_client.go (about) 1 package gatewayclient 2 3 import ( 4 "context" 5 "flag" 6 "fmt" 7 "io" 8 "math/rand" 9 10 "github.com/go-kit/log" 11 "github.com/go-kit/log/level" 12 "github.com/grafana/dskit/concurrency" 13 "github.com/grafana/dskit/grpcclient" 14 "github.com/grafana/dskit/ring" 15 ring_client "github.com/grafana/dskit/ring/client" 16 "github.com/grafana/dskit/tenant" 17 "github.com/pkg/errors" 18 "github.com/prometheus/client_golang/prometheus" 19 "github.com/weaveworks/common/instrument" 20 "google.golang.org/grpc" 21 22 "github.com/grafana/loki/pkg/distributor/clientpool" 23 "github.com/grafana/loki/pkg/logproto" 24 "github.com/grafana/loki/pkg/storage/stores/series/index" 25 "github.com/grafana/loki/pkg/storage/stores/shipper/indexgateway" 26 shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util" 27 "github.com/grafana/loki/pkg/util" 28 util_log "github.com/grafana/loki/pkg/util/log" 29 util_math "github.com/grafana/loki/pkg/util/math" 30 ) 31 32 const ( 33 maxQueriesPerGrpc = 100 34 maxConcurrentGrpcCalls = 10 35 ) 36 37 // IndexGatewayClientConfig configures the Index Gateway client used to 38 // communicate with the Index Gateway server. 39 type IndexGatewayClientConfig struct { 40 // Mode sets in which mode the client will operate. It is actually defined at the 41 // index_gateway YAML section and reused here. 42 Mode indexgateway.Mode `yaml:"-"` 43 44 // PoolConfig defines the behavior of the gRPC connection pool used to communicate 45 // with the Index Gateway. 46 // 47 // Only relevant for the ring mode. 48 // It is defined at the distributors YAML section and reused here. 49 PoolConfig clientpool.PoolConfig `yaml:"-"` 50 51 // Ring is the Index Gateway ring used to find the appropriate Index Gateway instance 52 // this client should talk to. 53 // 54 // Only relevant for the ring mode. 55 Ring ring.ReadRing `yaml:"-"` 56 57 // GRPCClientConfig configures the gRPC connection between the Index Gateway client and the server. 58 // 59 // Used by both, ring and simple mode. 60 GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config"` 61 62 // Address of the Index Gateway instance responsible for retaining the index for all tenants. 63 // 64 // Only relevant for the simple mode. 65 Address string `yaml:"server_address,omitempty"` 66 67 // Forcefully disable the use of the index gateway client for the storage. 68 // This is mainly useful for the index-gateway component which should always use the storage. 69 Disabled bool `yaml:"-"` 70 71 // LogGatewayRequests configures if requests sent to the gateway should be logged or not. 72 // The log messages are of type debug and contain the address of the gateway and the relevant tenant. 73 LogGatewayRequests bool `yaml:"log_gateway_requests"` 74 } 75 76 // RegisterFlagsWithPrefix register client-specific flags with the given prefix. 77 // 78 // Flags that are used by both, client and server, are defined in the indexgateway package. 79 func (i *IndexGatewayClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { 80 i.GRPCClientConfig.RegisterFlagsWithPrefix(prefix+".grpc", f) 81 f.StringVar(&i.Address, prefix+".server-address", "", "Hostname or IP of the Index Gateway gRPC server running in simple mode.") 82 f.BoolVar(&i.LogGatewayRequests, prefix+".log-gateway-requests", false, "Whether requests sent to the gateway should be logged or not.") 83 } 84 85 func (i *IndexGatewayClientConfig) RegisterFlags(f *flag.FlagSet) { 86 i.RegisterFlagsWithPrefix("index-gateway-client", f) 87 } 88 89 type GatewayClient struct { 90 cfg IndexGatewayClientConfig 91 92 storeGatewayClientRequestDuration *prometheus.HistogramVec 93 94 conn *grpc.ClientConn 95 grpcClient logproto.IndexGatewayClient 96 97 pool *ring_client.Pool 98 99 ring ring.ReadRing 100 } 101 102 // NewGatewayClient instantiates a new client used to communicate with an Index Gateway instance. 103 // 104 // If it is configured to be in ring mode, a pool of GRPC connections to all Index Gateway instances is created. 105 // Otherwise, it creates a single GRPC connection to an Index Gateway instance running in simple mode. 106 func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, logger log.Logger) (*GatewayClient, error) { 107 latency := prometheus.NewHistogramVec(prometheus.HistogramOpts{ 108 Namespace: "loki_boltdb_shipper", 109 Name: "store_gateway_request_duration_seconds", 110 Help: "Time (in seconds) spent serving requests when using boltdb shipper store gateway", 111 Buckets: instrument.DefBuckets, 112 }, []string{"operation", "status_code"}) 113 if r != nil { 114 err := r.Register(latency) 115 if err != nil { 116 alreadyErr, ok := err.(prometheus.AlreadyRegisteredError) 117 if !ok { 118 return nil, err 119 } 120 latency = alreadyErr.ExistingCollector.(*prometheus.HistogramVec) 121 } 122 } 123 124 sgClient := &GatewayClient{ 125 cfg: cfg, 126 storeGatewayClientRequestDuration: latency, 127 ring: cfg.Ring, 128 } 129 130 dialOpts, err := cfg.GRPCClientConfig.DialOption(grpcclient.Instrument(sgClient.storeGatewayClientRequestDuration)) 131 if err != nil { 132 return nil, errors.Wrap(err, "index gateway grpc dial option") 133 } 134 135 if sgClient.cfg.Mode == indexgateway.RingMode { 136 factory := func(addr string) (ring_client.PoolClient, error) { 137 igPool, err := NewIndexGatewayGRPCPool(addr, dialOpts) 138 if err != nil { 139 return nil, errors.Wrap(err, "new index gateway grpc pool") 140 } 141 142 return igPool, nil 143 } 144 145 sgClient.pool = clientpool.NewPool(cfg.PoolConfig, sgClient.ring, factory, logger) 146 } else { 147 sgClient.conn, err = grpc.Dial(cfg.Address, dialOpts...) 148 if err != nil { 149 return nil, errors.Wrap(err, "index gateway grpc dial") 150 } 151 152 sgClient.grpcClient = logproto.NewIndexGatewayClient(sgClient.conn) 153 } 154 155 return sgClient, nil 156 } 157 158 // Stop stops the execution of this gateway client. 159 // 160 // If it is in simple mode, the single GRPC connection is closed. Otherwise, nothing happens. 161 func (s *GatewayClient) Stop() { 162 if s.cfg.Mode == indexgateway.SimpleMode { 163 s.conn.Close() 164 } 165 } 166 167 func (s *GatewayClient) QueryPages(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error { 168 if len(queries) <= maxQueriesPerGrpc { 169 return s.doQueries(ctx, queries, callback) 170 } 171 172 jobsCount := len(queries) / maxQueriesPerGrpc 173 if len(queries)%maxQueriesPerGrpc != 0 { 174 jobsCount++ 175 } 176 return concurrency.ForEachJob(ctx, jobsCount, maxConcurrentGrpcCalls, func(ctx context.Context, idx int) error { 177 return s.doQueries(ctx, queries[idx*maxQueriesPerGrpc:util_math.Min((idx+1)*maxQueriesPerGrpc, len(queries))], callback) 178 }) 179 } 180 181 func (s *GatewayClient) GetChunkRef(ctx context.Context, in *logproto.GetChunkRefRequest, opts ...grpc.CallOption) (*logproto.GetChunkRefResponse, error) { 182 if s.cfg.Mode == indexgateway.RingMode { 183 var ( 184 resp *logproto.GetChunkRefResponse 185 err error 186 ) 187 err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error { 188 resp, err = client.GetChunkRef(ctx, in, opts...) 189 return err 190 }) 191 return resp, err 192 } 193 return s.grpcClient.GetChunkRef(ctx, in, opts...) 194 } 195 196 func (s *GatewayClient) GetSeries(ctx context.Context, in *logproto.GetSeriesRequest, opts ...grpc.CallOption) (*logproto.GetSeriesResponse, error) { 197 if s.cfg.Mode == indexgateway.RingMode { 198 var ( 199 resp *logproto.GetSeriesResponse 200 err error 201 ) 202 err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error { 203 resp, err = client.GetSeries(ctx, in, opts...) 204 return err 205 }) 206 return resp, err 207 } 208 return s.grpcClient.GetSeries(ctx, in, opts...) 209 } 210 211 func (s *GatewayClient) LabelNamesForMetricName(ctx context.Context, in *logproto.LabelNamesForMetricNameRequest, opts ...grpc.CallOption) (*logproto.LabelResponse, error) { 212 if s.cfg.Mode == indexgateway.RingMode { 213 var ( 214 resp *logproto.LabelResponse 215 err error 216 ) 217 err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error { 218 resp, err = client.LabelNamesForMetricName(ctx, in, opts...) 219 return err 220 }) 221 return resp, err 222 } 223 return s.grpcClient.LabelNamesForMetricName(ctx, in, opts...) 224 } 225 226 func (s *GatewayClient) LabelValuesForMetricName(ctx context.Context, in *logproto.LabelValuesForMetricNameRequest, opts ...grpc.CallOption) (*logproto.LabelResponse, error) { 227 if s.cfg.Mode == indexgateway.RingMode { 228 var ( 229 resp *logproto.LabelResponse 230 err error 231 ) 232 err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error { 233 resp, err = client.LabelValuesForMetricName(ctx, in, opts...) 234 return err 235 }) 236 return resp, err 237 } 238 return s.grpcClient.LabelValuesForMetricName(ctx, in, opts...) 239 } 240 241 func (s *GatewayClient) GetStats(ctx context.Context, in *logproto.IndexStatsRequest, opts ...grpc.CallOption) (*logproto.IndexStatsResponse, error) { 242 if s.cfg.Mode == indexgateway.RingMode { 243 var ( 244 resp *logproto.IndexStatsResponse 245 err error 246 ) 247 err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error { 248 resp, err = client.GetStats(ctx, in, opts...) 249 return err 250 }) 251 return resp, err 252 } 253 return s.grpcClient.GetStats(ctx, in, opts...) 254 } 255 256 func (s *GatewayClient) doQueries(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error { 257 queryKeyQueryMap := make(map[string]index.Query, len(queries)) 258 gatewayQueries := make([]*logproto.IndexQuery, 0, len(queries)) 259 260 for _, query := range queries { 261 queryKeyQueryMap[shipper_util.QueryKey(query)] = query 262 gatewayQueries = append(gatewayQueries, &logproto.IndexQuery{ 263 TableName: query.TableName, 264 HashValue: query.HashValue, 265 RangeValuePrefix: query.RangeValuePrefix, 266 RangeValueStart: query.RangeValueStart, 267 ValueEqual: query.ValueEqual, 268 }) 269 } 270 271 if s.cfg.Mode == indexgateway.RingMode { 272 return s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error { 273 return s.clientDoQueries(ctx, gatewayQueries, queryKeyQueryMap, callback, client) 274 }) 275 } 276 277 return s.clientDoQueries(ctx, gatewayQueries, queryKeyQueryMap, callback, s.grpcClient) 278 } 279 280 // clientDoQueries send a query request to an Index Gateway instance using the given gRPC client. 281 // 282 // It is used by both, simple and ring mode. 283 func (s *GatewayClient) clientDoQueries(ctx context.Context, gatewayQueries []*logproto.IndexQuery, 284 queryKeyQueryMap map[string]index.Query, callback index.QueryPagesCallback, client logproto.IndexGatewayClient, 285 ) error { 286 streamer, err := client.QueryIndex(ctx, &logproto.QueryIndexRequest{Queries: gatewayQueries}) 287 if err != nil { 288 return errors.Wrap(err, "query index") 289 } 290 291 for { 292 resp, err := streamer.Recv() 293 if err == io.EOF { 294 break 295 } 296 if err != nil { 297 return errors.WithStack(err) 298 } 299 query, ok := queryKeyQueryMap[resp.QueryKey] 300 if !ok { 301 level.Error(util_log.Logger).Log("msg", fmt.Sprintf("unexpected %s QueryKey received, expected queries %s", resp.QueryKey, fmt.Sprint(queryKeyQueryMap))) 302 return fmt.Errorf("unexpected %s QueryKey received", resp.QueryKey) 303 } 304 if !callback(query, &readBatch{resp}) { 305 return nil 306 } 307 } 308 309 return nil 310 } 311 312 // ringModeDo executes the given function for each Index Gateway instance in the ring mapping to the correct tenant in the index. 313 // In case of callback failure, we'll try another member of the ring for that tenant ID. 314 func (s *GatewayClient) ringModeDo(ctx context.Context, callback func(client logproto.IndexGatewayClient) error) error { 315 userID, err := tenant.TenantID(ctx) 316 if err != nil { 317 return errors.Wrap(err, "index gateway client get tenant ID") 318 } 319 320 bufDescs, bufHosts, bufZones := ring.MakeBuffersForGet() 321 322 key := util.TokenFor(userID, "" /* labels */) 323 rs, err := s.ring.Get(key, ring.WriteNoExtend, bufDescs, bufHosts, bufZones) 324 if err != nil { 325 return errors.Wrap(err, "index gateway get ring") 326 } 327 328 addrs := rs.GetAddresses() 329 // shuffle addresses to make sure we don't always access the same Index Gateway instances in sequence for same tenant. 330 rand.Shuffle(len(addrs), func(i, j int) { 331 addrs[i], addrs[j] = addrs[j], addrs[i] 332 }) 333 var lastErr error 334 for _, addr := range addrs { 335 if s.cfg.LogGatewayRequests { 336 level.Debug(util_log.Logger).Log("msg", "sending request to gateway", "gateway", addr, "tenant", userID) 337 } 338 339 genericClient, err := s.pool.GetClientFor(addr) 340 if err != nil { 341 level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to get client for instance %s", addr), "err", err) 342 continue 343 } 344 345 client := (genericClient.(logproto.IndexGatewayClient)) 346 if err := callback(client); err != nil { 347 lastErr = err 348 level.Error(util_log.Logger).Log("msg", fmt.Sprintf("client do failed for instance %s", addr), "err", err) 349 continue 350 } 351 352 return nil 353 } 354 355 return lastErr 356 } 357 358 func (s *GatewayClient) NewWriteBatch() index.WriteBatch { 359 panic("unsupported") 360 } 361 362 func (s *GatewayClient) BatchWrite(ctx context.Context, batch index.WriteBatch) error { 363 panic("unsupported") 364 } 365 366 type readBatch struct { 367 *logproto.QueryIndexResponse 368 } 369 370 func (r *readBatch) Iterator() index.ReadBatchIterator { 371 return &grpcIter{ 372 i: -1, 373 QueryIndexResponse: r.QueryIndexResponse, 374 } 375 } 376 377 type grpcIter struct { 378 i int 379 *logproto.QueryIndexResponse 380 } 381 382 func (b *grpcIter) Next() bool { 383 b.i++ 384 return b.i < len(b.Rows) 385 } 386 387 func (b *grpcIter) RangeValue() []byte { 388 return b.Rows[b.i].RangeValue 389 } 390 391 func (b *grpcIter) Value() []byte { 392 return b.Rows[b.i].Value 393 }