github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/storage/stores/indexshipper/gatewayclient/gateway_client.go (about)

     1  package gatewayclient
     2  
     3  import (
     4  	"context"
     5  	"flag"
     6  	"fmt"
     7  	"io"
     8  	"math/rand"
     9  
    10  	"github.com/go-kit/log"
    11  	"github.com/go-kit/log/level"
    12  	"github.com/grafana/dskit/concurrency"
    13  	"github.com/grafana/dskit/grpcclient"
    14  	"github.com/grafana/dskit/ring"
    15  	ring_client "github.com/grafana/dskit/ring/client"
    16  	"github.com/grafana/dskit/tenant"
    17  	"github.com/pkg/errors"
    18  	"github.com/prometheus/client_golang/prometheus"
    19  	"github.com/weaveworks/common/instrument"
    20  	"google.golang.org/grpc"
    21  
    22  	"github.com/grafana/loki/pkg/distributor/clientpool"
    23  	"github.com/grafana/loki/pkg/logproto"
    24  	"github.com/grafana/loki/pkg/storage/stores/series/index"
    25  	"github.com/grafana/loki/pkg/storage/stores/shipper/indexgateway"
    26  	shipper_util "github.com/grafana/loki/pkg/storage/stores/shipper/util"
    27  	"github.com/grafana/loki/pkg/util"
    28  	util_log "github.com/grafana/loki/pkg/util/log"
    29  	util_math "github.com/grafana/loki/pkg/util/math"
    30  )
    31  
    32  const (
    33  	maxQueriesPerGrpc      = 100
    34  	maxConcurrentGrpcCalls = 10
    35  )
    36  
    37  // IndexGatewayClientConfig configures the Index Gateway client used to
    38  // communicate with the Index Gateway server.
    39  type IndexGatewayClientConfig struct {
    40  	// Mode sets in which mode the client will operate. It is actually defined at the
    41  	// index_gateway YAML section and reused here.
    42  	Mode indexgateway.Mode `yaml:"-"`
    43  
    44  	// PoolConfig defines the behavior of the gRPC connection pool used to communicate
    45  	// with the Index Gateway.
    46  	//
    47  	// Only relevant for the ring mode.
    48  	// It is defined at the distributors YAML section and reused here.
    49  	PoolConfig clientpool.PoolConfig `yaml:"-"`
    50  
    51  	// Ring is the Index Gateway ring used to find the appropriate Index Gateway instance
    52  	// this client should talk to.
    53  	//
    54  	// Only relevant for the ring mode.
    55  	Ring ring.ReadRing `yaml:"-"`
    56  
    57  	// GRPCClientConfig configures the gRPC connection between the Index Gateway client and the server.
    58  	//
    59  	// Used by both, ring and simple mode.
    60  	GRPCClientConfig grpcclient.Config `yaml:"grpc_client_config"`
    61  
    62  	// Address of the Index Gateway instance responsible for retaining the index for all tenants.
    63  	//
    64  	// Only relevant for the simple mode.
    65  	Address string `yaml:"server_address,omitempty"`
    66  
    67  	// Forcefully disable the use of the index gateway client for the storage.
    68  	// This is mainly useful for the index-gateway component which should always use the storage.
    69  	Disabled bool `yaml:"-"`
    70  
    71  	// LogGatewayRequests configures if requests sent to the gateway should be logged or not.
    72  	// The log messages are of type debug and contain the address of the gateway and the relevant tenant.
    73  	LogGatewayRequests bool `yaml:"log_gateway_requests"`
    74  }
    75  
    76  // RegisterFlagsWithPrefix register client-specific flags with the given prefix.
    77  //
    78  // Flags that are used by both, client and server, are defined in the indexgateway package.
    79  func (i *IndexGatewayClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
    80  	i.GRPCClientConfig.RegisterFlagsWithPrefix(prefix+".grpc", f)
    81  	f.StringVar(&i.Address, prefix+".server-address", "", "Hostname or IP of the Index Gateway gRPC server running in simple mode.")
    82  	f.BoolVar(&i.LogGatewayRequests, prefix+".log-gateway-requests", false, "Whether requests sent to the gateway should be logged or not.")
    83  }
    84  
    85  func (i *IndexGatewayClientConfig) RegisterFlags(f *flag.FlagSet) {
    86  	i.RegisterFlagsWithPrefix("index-gateway-client", f)
    87  }
    88  
    89  type GatewayClient struct {
    90  	cfg IndexGatewayClientConfig
    91  
    92  	storeGatewayClientRequestDuration *prometheus.HistogramVec
    93  
    94  	conn       *grpc.ClientConn
    95  	grpcClient logproto.IndexGatewayClient
    96  
    97  	pool *ring_client.Pool
    98  
    99  	ring ring.ReadRing
   100  }
   101  
   102  // NewGatewayClient instantiates a new client used to communicate with an Index Gateway instance.
   103  //
   104  // If it is configured to be in ring mode, a pool of GRPC connections to all Index Gateway instances is created.
   105  // Otherwise, it creates a single GRPC connection to an Index Gateway instance running in simple mode.
   106  func NewGatewayClient(cfg IndexGatewayClientConfig, r prometheus.Registerer, logger log.Logger) (*GatewayClient, error) {
   107  	latency := prometheus.NewHistogramVec(prometheus.HistogramOpts{
   108  		Namespace: "loki_boltdb_shipper",
   109  		Name:      "store_gateway_request_duration_seconds",
   110  		Help:      "Time (in seconds) spent serving requests when using boltdb shipper store gateway",
   111  		Buckets:   instrument.DefBuckets,
   112  	}, []string{"operation", "status_code"})
   113  	if r != nil {
   114  		err := r.Register(latency)
   115  		if err != nil {
   116  			alreadyErr, ok := err.(prometheus.AlreadyRegisteredError)
   117  			if !ok {
   118  				return nil, err
   119  			}
   120  			latency = alreadyErr.ExistingCollector.(*prometheus.HistogramVec)
   121  		}
   122  	}
   123  
   124  	sgClient := &GatewayClient{
   125  		cfg:                               cfg,
   126  		storeGatewayClientRequestDuration: latency,
   127  		ring:                              cfg.Ring,
   128  	}
   129  
   130  	dialOpts, err := cfg.GRPCClientConfig.DialOption(grpcclient.Instrument(sgClient.storeGatewayClientRequestDuration))
   131  	if err != nil {
   132  		return nil, errors.Wrap(err, "index gateway grpc dial option")
   133  	}
   134  
   135  	if sgClient.cfg.Mode == indexgateway.RingMode {
   136  		factory := func(addr string) (ring_client.PoolClient, error) {
   137  			igPool, err := NewIndexGatewayGRPCPool(addr, dialOpts)
   138  			if err != nil {
   139  				return nil, errors.Wrap(err, "new index gateway grpc pool")
   140  			}
   141  
   142  			return igPool, nil
   143  		}
   144  
   145  		sgClient.pool = clientpool.NewPool(cfg.PoolConfig, sgClient.ring, factory, logger)
   146  	} else {
   147  		sgClient.conn, err = grpc.Dial(cfg.Address, dialOpts...)
   148  		if err != nil {
   149  			return nil, errors.Wrap(err, "index gateway grpc dial")
   150  		}
   151  
   152  		sgClient.grpcClient = logproto.NewIndexGatewayClient(sgClient.conn)
   153  	}
   154  
   155  	return sgClient, nil
   156  }
   157  
   158  // Stop stops the execution of this gateway client.
   159  //
   160  // If it is in simple mode, the single GRPC connection is closed. Otherwise, nothing happens.
   161  func (s *GatewayClient) Stop() {
   162  	if s.cfg.Mode == indexgateway.SimpleMode {
   163  		s.conn.Close()
   164  	}
   165  }
   166  
   167  func (s *GatewayClient) QueryPages(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error {
   168  	if len(queries) <= maxQueriesPerGrpc {
   169  		return s.doQueries(ctx, queries, callback)
   170  	}
   171  
   172  	jobsCount := len(queries) / maxQueriesPerGrpc
   173  	if len(queries)%maxQueriesPerGrpc != 0 {
   174  		jobsCount++
   175  	}
   176  	return concurrency.ForEachJob(ctx, jobsCount, maxConcurrentGrpcCalls, func(ctx context.Context, idx int) error {
   177  		return s.doQueries(ctx, queries[idx*maxQueriesPerGrpc:util_math.Min((idx+1)*maxQueriesPerGrpc, len(queries))], callback)
   178  	})
   179  }
   180  
   181  func (s *GatewayClient) GetChunkRef(ctx context.Context, in *logproto.GetChunkRefRequest, opts ...grpc.CallOption) (*logproto.GetChunkRefResponse, error) {
   182  	if s.cfg.Mode == indexgateway.RingMode {
   183  		var (
   184  			resp *logproto.GetChunkRefResponse
   185  			err  error
   186  		)
   187  		err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error {
   188  			resp, err = client.GetChunkRef(ctx, in, opts...)
   189  			return err
   190  		})
   191  		return resp, err
   192  	}
   193  	return s.grpcClient.GetChunkRef(ctx, in, opts...)
   194  }
   195  
   196  func (s *GatewayClient) GetSeries(ctx context.Context, in *logproto.GetSeriesRequest, opts ...grpc.CallOption) (*logproto.GetSeriesResponse, error) {
   197  	if s.cfg.Mode == indexgateway.RingMode {
   198  		var (
   199  			resp *logproto.GetSeriesResponse
   200  			err  error
   201  		)
   202  		err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error {
   203  			resp, err = client.GetSeries(ctx, in, opts...)
   204  			return err
   205  		})
   206  		return resp, err
   207  	}
   208  	return s.grpcClient.GetSeries(ctx, in, opts...)
   209  }
   210  
   211  func (s *GatewayClient) LabelNamesForMetricName(ctx context.Context, in *logproto.LabelNamesForMetricNameRequest, opts ...grpc.CallOption) (*logproto.LabelResponse, error) {
   212  	if s.cfg.Mode == indexgateway.RingMode {
   213  		var (
   214  			resp *logproto.LabelResponse
   215  			err  error
   216  		)
   217  		err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error {
   218  			resp, err = client.LabelNamesForMetricName(ctx, in, opts...)
   219  			return err
   220  		})
   221  		return resp, err
   222  	}
   223  	return s.grpcClient.LabelNamesForMetricName(ctx, in, opts...)
   224  }
   225  
   226  func (s *GatewayClient) LabelValuesForMetricName(ctx context.Context, in *logproto.LabelValuesForMetricNameRequest, opts ...grpc.CallOption) (*logproto.LabelResponse, error) {
   227  	if s.cfg.Mode == indexgateway.RingMode {
   228  		var (
   229  			resp *logproto.LabelResponse
   230  			err  error
   231  		)
   232  		err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error {
   233  			resp, err = client.LabelValuesForMetricName(ctx, in, opts...)
   234  			return err
   235  		})
   236  		return resp, err
   237  	}
   238  	return s.grpcClient.LabelValuesForMetricName(ctx, in, opts...)
   239  }
   240  
   241  func (s *GatewayClient) GetStats(ctx context.Context, in *logproto.IndexStatsRequest, opts ...grpc.CallOption) (*logproto.IndexStatsResponse, error) {
   242  	if s.cfg.Mode == indexgateway.RingMode {
   243  		var (
   244  			resp *logproto.IndexStatsResponse
   245  			err  error
   246  		)
   247  		err = s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error {
   248  			resp, err = client.GetStats(ctx, in, opts...)
   249  			return err
   250  		})
   251  		return resp, err
   252  	}
   253  	return s.grpcClient.GetStats(ctx, in, opts...)
   254  }
   255  
   256  func (s *GatewayClient) doQueries(ctx context.Context, queries []index.Query, callback index.QueryPagesCallback) error {
   257  	queryKeyQueryMap := make(map[string]index.Query, len(queries))
   258  	gatewayQueries := make([]*logproto.IndexQuery, 0, len(queries))
   259  
   260  	for _, query := range queries {
   261  		queryKeyQueryMap[shipper_util.QueryKey(query)] = query
   262  		gatewayQueries = append(gatewayQueries, &logproto.IndexQuery{
   263  			TableName:        query.TableName,
   264  			HashValue:        query.HashValue,
   265  			RangeValuePrefix: query.RangeValuePrefix,
   266  			RangeValueStart:  query.RangeValueStart,
   267  			ValueEqual:       query.ValueEqual,
   268  		})
   269  	}
   270  
   271  	if s.cfg.Mode == indexgateway.RingMode {
   272  		return s.ringModeDo(ctx, func(client logproto.IndexGatewayClient) error {
   273  			return s.clientDoQueries(ctx, gatewayQueries, queryKeyQueryMap, callback, client)
   274  		})
   275  	}
   276  
   277  	return s.clientDoQueries(ctx, gatewayQueries, queryKeyQueryMap, callback, s.grpcClient)
   278  }
   279  
   280  // clientDoQueries send a query request to an Index Gateway instance using the given gRPC client.
   281  //
   282  // It is used by both, simple and ring mode.
   283  func (s *GatewayClient) clientDoQueries(ctx context.Context, gatewayQueries []*logproto.IndexQuery,
   284  	queryKeyQueryMap map[string]index.Query, callback index.QueryPagesCallback, client logproto.IndexGatewayClient,
   285  ) error {
   286  	streamer, err := client.QueryIndex(ctx, &logproto.QueryIndexRequest{Queries: gatewayQueries})
   287  	if err != nil {
   288  		return errors.Wrap(err, "query index")
   289  	}
   290  
   291  	for {
   292  		resp, err := streamer.Recv()
   293  		if err == io.EOF {
   294  			break
   295  		}
   296  		if err != nil {
   297  			return errors.WithStack(err)
   298  		}
   299  		query, ok := queryKeyQueryMap[resp.QueryKey]
   300  		if !ok {
   301  			level.Error(util_log.Logger).Log("msg", fmt.Sprintf("unexpected %s QueryKey received, expected queries %s", resp.QueryKey, fmt.Sprint(queryKeyQueryMap)))
   302  			return fmt.Errorf("unexpected %s QueryKey received", resp.QueryKey)
   303  		}
   304  		if !callback(query, &readBatch{resp}) {
   305  			return nil
   306  		}
   307  	}
   308  
   309  	return nil
   310  }
   311  
   312  // ringModeDo executes the given function for each Index Gateway instance in the ring mapping to the correct tenant in the index.
   313  // In case of callback failure, we'll try another member of the ring for that tenant ID.
   314  func (s *GatewayClient) ringModeDo(ctx context.Context, callback func(client logproto.IndexGatewayClient) error) error {
   315  	userID, err := tenant.TenantID(ctx)
   316  	if err != nil {
   317  		return errors.Wrap(err, "index gateway client get tenant ID")
   318  	}
   319  
   320  	bufDescs, bufHosts, bufZones := ring.MakeBuffersForGet()
   321  
   322  	key := util.TokenFor(userID, "" /* labels */)
   323  	rs, err := s.ring.Get(key, ring.WriteNoExtend, bufDescs, bufHosts, bufZones)
   324  	if err != nil {
   325  		return errors.Wrap(err, "index gateway get ring")
   326  	}
   327  
   328  	addrs := rs.GetAddresses()
   329  	// shuffle addresses to make sure we don't always access the same Index Gateway instances in sequence for same tenant.
   330  	rand.Shuffle(len(addrs), func(i, j int) {
   331  		addrs[i], addrs[j] = addrs[j], addrs[i]
   332  	})
   333  	var lastErr error
   334  	for _, addr := range addrs {
   335  		if s.cfg.LogGatewayRequests {
   336  			level.Debug(util_log.Logger).Log("msg", "sending request to gateway", "gateway", addr, "tenant", userID)
   337  		}
   338  
   339  		genericClient, err := s.pool.GetClientFor(addr)
   340  		if err != nil {
   341  			level.Error(util_log.Logger).Log("msg", fmt.Sprintf("failed to get client for instance %s", addr), "err", err)
   342  			continue
   343  		}
   344  
   345  		client := (genericClient.(logproto.IndexGatewayClient))
   346  		if err := callback(client); err != nil {
   347  			lastErr = err
   348  			level.Error(util_log.Logger).Log("msg", fmt.Sprintf("client do failed for instance %s", addr), "err", err)
   349  			continue
   350  		}
   351  
   352  		return nil
   353  	}
   354  
   355  	return lastErr
   356  }
   357  
   358  func (s *GatewayClient) NewWriteBatch() index.WriteBatch {
   359  	panic("unsupported")
   360  }
   361  
   362  func (s *GatewayClient) BatchWrite(ctx context.Context, batch index.WriteBatch) error {
   363  	panic("unsupported")
   364  }
   365  
   366  type readBatch struct {
   367  	*logproto.QueryIndexResponse
   368  }
   369  
   370  func (r *readBatch) Iterator() index.ReadBatchIterator {
   371  	return &grpcIter{
   372  		i:                  -1,
   373  		QueryIndexResponse: r.QueryIndexResponse,
   374  	}
   375  }
   376  
   377  type grpcIter struct {
   378  	i int
   379  	*logproto.QueryIndexResponse
   380  }
   381  
   382  func (b *grpcIter) Next() bool {
   383  	b.i++
   384  	return b.i < len(b.Rows)
   385  }
   386  
   387  func (b *grpcIter) RangeValue() []byte {
   388  	return b.Rows[b.i].RangeValue
   389  }
   390  
   391  func (b *grpcIter) Value() []byte {
   392  	return b.Rows[b.i].Value
   393  }