github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/query/remote/client.go (about)

     1  // Copyright (c) 2018 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package remote
    22  
    23  import (
    24  	"context"
    25  	goerrors "errors"
    26  	"io"
    27  	"strings"
    28  	"sync"
    29  	"time"
    30  
    31  	"github.com/m3db/m3/src/dbnode/encoding"
    32  	"github.com/m3db/m3/src/query/block"
    33  	"github.com/m3db/m3/src/query/errors"
    34  	rpc "github.com/m3db/m3/src/query/generated/proto/rpcpb"
    35  	"github.com/m3db/m3/src/query/models"
    36  	"github.com/m3db/m3/src/query/pools"
    37  	"github.com/m3db/m3/src/query/storage"
    38  	"github.com/m3db/m3/src/query/storage/m3"
    39  	"github.com/m3db/m3/src/query/storage/m3/consolidators"
    40  	"github.com/m3db/m3/src/query/storage/m3/storagemetadata"
    41  	"github.com/m3db/m3/src/query/util/logging"
    42  	xgrpc "github.com/m3db/m3/src/x/grpc"
    43  	"github.com/m3db/m3/src/x/instrument"
    44  
    45  	"github.com/uber-go/tally"
    46  	"go.uber.org/zap"
    47  	"google.golang.org/grpc"
    48  	"google.golang.org/grpc/keepalive"
    49  )
    50  
    51  const (
    52  	initResultSize             = 10
    53  	healthCheckInterval        = 60 * time.Second
    54  	healthCheckTimeout         = 5 * time.Second
    55  	healthCheckMetricName      = "health-check"
    56  	healthCheckMetricResultTag = "result"
    57  )
    58  
    59  var (
    60  	errAlreadyClosed = goerrors.New("already closed")
    61  
    62  	errQueryStorageMetadataAttributesNotImplemented = goerrors.New(
    63  		"remote storage does not implement QueryStorageMetadataAttributes",
    64  	)
    65  
    66  	// NB(r): These options tries to ensure we don't let connections go stale
    67  	// and cause failed RPCs as a result.
    68  	defaultDialOptions = []grpc.DialOption{
    69  		grpc.WithKeepaliveParams(keepalive.ClientParameters{
    70  			// After a duration of this time if the client doesn't see any activity it
    71  			// pings the server to see if the transport is still alive.
    72  			// If set below 10s, a minimum value of 10s will be used instead.
    73  			Time: 10 * time.Second,
    74  			// After having pinged for keepalive check, the client waits for a duration
    75  			// of Timeout and if no activity is seen even after that the connection is
    76  			// closed.
    77  			Timeout: 20 * time.Second,
    78  			// If true, client sends keepalive pings even with no active RPCs. If false,
    79  			// when there are no active RPCs, Time and Timeout will be ignored and no
    80  			// keepalive pings will be sent.
    81  			PermitWithoutStream: true,
    82  		}),
    83  	}
    84  )
    85  
    86  // Client is the remote GRPC client.
    87  type Client interface {
    88  	storage.Querier
    89  	Close() error
    90  }
    91  
    92  type grpcClient struct {
    93  	state       grpcClientState
    94  	client      rpc.QueryClient
    95  	connection  *grpc.ClientConn
    96  	poolWrapper *pools.PoolWrapper
    97  	once        sync.Once
    98  	pools       encoding.IteratorPools
    99  	poolErr     error
   100  	opts        m3.Options
   101  	logger      *zap.Logger
   102  	metrics     grpcClientMetrics
   103  }
   104  
   105  type grpcClientState struct {
   106  	sync.RWMutex
   107  	closed  bool
   108  	closeCh chan struct{}
   109  }
   110  
   111  type grpcClientMetrics struct {
   112  	healthCheckSuccess tally.Counter
   113  	healthCheckError   tally.Counter
   114  }
   115  
   116  func newGRPCClientMetrics(s tally.Scope) grpcClientMetrics {
   117  	s = s.SubScope("remote-client")
   118  	return grpcClientMetrics{
   119  		healthCheckSuccess: s.Tagged(map[string]string{
   120  			healthCheckMetricResultTag: "success",
   121  		}).Counter(healthCheckMetricName),
   122  		healthCheckError: s.Tagged(map[string]string{
   123  			healthCheckMetricResultTag: "error",
   124  		}).Counter(healthCheckMetricName),
   125  	}
   126  }
   127  
   128  // NewGRPCClient creates a new remote GRPC client.
   129  func NewGRPCClient(
   130  	name string,
   131  	addresses []string,
   132  	poolWrapper *pools.PoolWrapper,
   133  	opts m3.Options,
   134  	instrumentOpts instrument.Options,
   135  	additionalDialOpts ...grpc.DialOption,
   136  ) (Client, error) {
   137  	if len(addresses) == 0 {
   138  		return nil, errors.ErrNoClientAddresses
   139  	}
   140  
   141  	// Set name if using a named client.
   142  	if remote := strings.TrimSpace(name); remote != "" {
   143  		instrumentOpts = instrumentOpts.
   144  			SetMetricsScope(instrumentOpts.MetricsScope().Tagged(map[string]string{
   145  				"remote-name": remote,
   146  			}))
   147  	}
   148  
   149  	scope := instrumentOpts.MetricsScope()
   150  	interceptorOpts := xgrpc.InterceptorInstrumentOptions{Scope: scope}
   151  
   152  	dialOptions := append([]grpc.DialOption{
   153  		// N.B.: the static resolver also specifies the load balancing policy, which is
   154  		// round robin.
   155  		grpc.WithResolvers(newStaticResolverBuilder(addresses)),
   156  		grpc.WithInsecure(),
   157  		grpc.WithUnaryInterceptor(xgrpc.UnaryClientInterceptor(interceptorOpts)),
   158  		grpc.WithStreamInterceptor(xgrpc.StreamClientInterceptor(interceptorOpts)),
   159  	}, defaultDialOptions...)
   160  	dialOptions = append(dialOptions, additionalDialOpts...)
   161  
   162  	// The resolver handles routing correctly for us, which is why the "endpoint" here is static.
   163  	cc, err := grpc.Dial(_staticResolverURL, dialOptions...)
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  
   168  	client := rpc.NewQueryClient(cc)
   169  	c := &grpcClient{
   170  		state: grpcClientState{
   171  			closeCh: make(chan struct{}),
   172  		},
   173  		client:      client,
   174  		connection:  cc,
   175  		poolWrapper: poolWrapper,
   176  		opts:        opts,
   177  		logger:      instrumentOpts.Logger(),
   178  		metrics:     newGRPCClientMetrics(scope),
   179  	}
   180  	go c.healthCheckUntilClosed()
   181  	return c, nil
   182  }
   183  
   184  func (c *grpcClient) QueryStorageMetadataAttributes(
   185  	ctx context.Context,
   186  	queryStart, queryEnd time.Time,
   187  	opts *storage.FetchOptions,
   188  ) ([]storagemetadata.Attributes, error) {
   189  	return nil, errQueryStorageMetadataAttributesNotImplemented
   190  }
   191  
   192  func (c *grpcClient) healthCheckUntilClosed() {
   193  	ticker := time.NewTicker(healthCheckInterval)
   194  	defer ticker.Stop()
   195  
   196  	for {
   197  		if c.closed() {
   198  			return // Abort early, closed already.
   199  		}
   200  
   201  		// Perform immediately so first check isn't delayed.
   202  		err := c.healthCheck()
   203  
   204  		if c.closed() {
   205  			return // Don't report results, closed already.
   206  		}
   207  
   208  		if err != nil {
   209  			c.metrics.healthCheckError.Inc(1)
   210  			c.logger.Debug("remote storage client health check failed",
   211  				zap.Error(err))
   212  		} else {
   213  			c.metrics.healthCheckSuccess.Inc(1)
   214  		}
   215  
   216  		select {
   217  		case <-c.state.closeCh:
   218  			return
   219  		case <-ticker.C:
   220  			// Continue to next check.
   221  			continue
   222  		}
   223  	}
   224  }
   225  
   226  func (c *grpcClient) healthCheck() error {
   227  	ctx, cancel := context.WithTimeout(context.Background(),
   228  		healthCheckTimeout)
   229  	_, err := c.client.Health(ctx, &rpc.HealthRequest{})
   230  	cancel()
   231  	return err
   232  }
   233  
   234  func (c *grpcClient) closed() bool {
   235  	c.state.RLock()
   236  	closed := c.state.closed
   237  	c.state.RUnlock()
   238  	return closed
   239  }
   240  
   241  func (c *grpcClient) waitForPools() (encoding.IteratorPools, error) {
   242  	c.once.Do(func() {
   243  		c.pools, c.poolErr = c.poolWrapper.WaitForIteratorPools(poolTimeout)
   244  	})
   245  
   246  	return c.pools, c.poolErr
   247  }
   248  
   249  func (c *grpcClient) FetchProm(
   250  	ctx context.Context,
   251  	query *storage.FetchQuery,
   252  	options *storage.FetchOptions,
   253  ) (storage.PromResult, error) {
   254  	result, err := c.fetchRaw(ctx, query, options)
   255  	if err != nil {
   256  		return storage.PromResult{}, err
   257  	}
   258  
   259  	return storage.SeriesIteratorsToPromResult(
   260  		ctx,
   261  		result,
   262  		c.opts.ReadWorkerPool(),
   263  		c.opts.TagOptions(),
   264  		c.opts.PromConvertOptions(),
   265  		options)
   266  }
   267  
   268  func (c *grpcClient) fetchRaw(
   269  	ctx context.Context,
   270  	query *storage.FetchQuery,
   271  	options *storage.FetchOptions,
   272  ) (consolidators.SeriesFetchResult, error) {
   273  	result, err := c.FetchCompressed(ctx, query, options)
   274  	if err != nil {
   275  		return consolidators.SeriesFetchResult{}, err
   276  	}
   277  
   278  	return result.FinalResult()
   279  }
   280  
   281  func (c *grpcClient) FetchCompressed(
   282  	ctx context.Context,
   283  	query *storage.FetchQuery,
   284  	options *storage.FetchOptions,
   285  ) (consolidators.MultiFetchResult, error) {
   286  	if err := options.BlockType.Validate(); err != nil {
   287  		// This is an invariant error; should not be able to get to here.
   288  		return nil, instrument.InvariantErrorf("invalid block type on "+
   289  			"fetch, got: %v with error %v", options.BlockType, err)
   290  	}
   291  
   292  	pools, err := c.waitForPools()
   293  	if err != nil {
   294  		return nil, err
   295  	}
   296  
   297  	request, err := encodeFetchRequest(query, options)
   298  	if err != nil {
   299  		return nil, err
   300  	}
   301  
   302  	// Send the id from the client to the remote server so that provides logging
   303  	// TODO: replace id propagation with opentracing
   304  	id := logging.ReadContextID(ctx)
   305  	mdCtx := encodeMetadata(ctx, id)
   306  	fetchClient, err := c.client.Fetch(mdCtx, request)
   307  	if err != nil {
   308  		return nil, err
   309  	}
   310  
   311  	defer fetchClient.CloseSend()
   312  
   313  	var (
   314  		fanout    = consolidators.NamespaceCoversAllQueryRange
   315  		matchOpts = c.opts.SeriesConsolidationMatchOptions()
   316  		tagOpts   = c.opts.TagOptions()
   317  		limitOpts = consolidators.LimitOptions{
   318  			Limit:             options.SeriesLimit,
   319  			RequireExhaustive: options.RequireExhaustive,
   320  		}
   321  
   322  		result = consolidators.NewMultiFetchResult(fanout, matchOpts, tagOpts, limitOpts)
   323  	)
   324  
   325  	for {
   326  		select {
   327  		// If query is killed during gRPC streaming, close the channel
   328  		case <-ctx.Done():
   329  			return nil, ctx.Err()
   330  		default:
   331  		}
   332  
   333  		recvResult, err := fetchClient.Recv()
   334  		if err == io.EOF {
   335  			break
   336  		}
   337  		if err != nil {
   338  			return nil, err
   339  		}
   340  
   341  		receivedMeta := decodeResultMetadata(recvResult.GetMeta())
   342  		iters, err := DecodeCompressedFetchResponse(recvResult, pools)
   343  		result.Add(consolidators.MultiFetchResults{
   344  			SeriesIterators: iters,
   345  			Metadata:        receivedMeta,
   346  			Attrs:           storagemetadata.Attributes{},
   347  			Err:             err,
   348  		})
   349  	}
   350  
   351  	return result, nil
   352  }
   353  
   354  func (c *grpcClient) FetchBlocks(
   355  	ctx context.Context,
   356  	query *storage.FetchQuery,
   357  	options *storage.FetchOptions,
   358  ) (block.Result, error) {
   359  	// Override options with whatever is the current specified lookback duration.
   360  	opts := c.opts.SetLookbackDuration(
   361  		options.LookbackDurationOrDefault(c.opts.LookbackDuration()))
   362  
   363  	fetchResult, err := c.fetchRaw(ctx, query, options)
   364  	if err != nil {
   365  		return block.Result{
   366  			Metadata: block.NewResultMetadata(),
   367  		}, err
   368  	}
   369  
   370  	return m3.FetchResultToBlockResult(fetchResult, query, options, opts)
   371  }
   372  
   373  func (c *grpcClient) SearchSeries(
   374  	ctx context.Context,
   375  	query *storage.FetchQuery,
   376  	options *storage.FetchOptions,
   377  ) (*storage.SearchResults, error) {
   378  	pools, err := c.waitForPools()
   379  	if err != nil {
   380  		return nil, err
   381  	}
   382  
   383  	request, err := encodeSearchRequest(query, options)
   384  	if err != nil {
   385  		return nil, err
   386  	}
   387  
   388  	// Send the id from the client to the remote server so that provides logging
   389  	// TODO: replace id propagation with opentracing
   390  	id := logging.ReadContextID(ctx)
   391  	// TODO: add relevant fields to the metadata
   392  	mdCtx := encodeMetadata(ctx, id)
   393  	searchClient, err := c.client.Search(mdCtx, request)
   394  	if err != nil {
   395  		return nil, err
   396  	}
   397  
   398  	metrics := make(models.Metrics, 0, initResultSize)
   399  	meta := block.NewResultMetadata()
   400  	defer searchClient.CloseSend()
   401  	for {
   402  		select {
   403  		// If query is killed during gRPC streaming, close the channel
   404  		case <-ctx.Done():
   405  			return nil, ctx.Err()
   406  		default:
   407  		}
   408  
   409  		received, err := searchClient.Recv()
   410  		if err == io.EOF {
   411  			break
   412  		}
   413  
   414  		if err != nil {
   415  			return nil, err
   416  		}
   417  
   418  		receivedMeta := decodeResultMetadata(received.GetMeta())
   419  		meta = meta.CombineMetadata(receivedMeta)
   420  		m, err := decodeSearchResponse(received, pools, c.opts.TagOptions())
   421  		if err != nil {
   422  			return nil, err
   423  		}
   424  
   425  		metrics = append(metrics, m...)
   426  	}
   427  
   428  	return &storage.SearchResults{
   429  		Metrics:  metrics,
   430  		Metadata: meta,
   431  	}, nil
   432  }
   433  
   434  func (c *grpcClient) CompleteTags(
   435  	ctx context.Context,
   436  	query *storage.CompleteTagsQuery,
   437  	options *storage.FetchOptions,
   438  ) (*consolidators.CompleteTagsResult, error) {
   439  	request, err := encodeCompleteTagsRequest(query, options)
   440  	if err != nil {
   441  		return nil, err
   442  	}
   443  
   444  	// Send the id from the client to the remote server so that provides logging
   445  	// TODO: replace id propagation with opentracing
   446  	id := logging.ReadContextID(ctx)
   447  	// TODO: add relevant fields to the metadata
   448  	mdCtx := encodeMetadata(ctx, id)
   449  	completeTagsClient, err := c.client.CompleteTags(mdCtx, request)
   450  	if err != nil {
   451  		return nil, err
   452  	}
   453  
   454  	tags := make([]consolidators.CompletedTag, 0, initResultSize)
   455  	meta := block.NewResultMetadata()
   456  	defer completeTagsClient.CloseSend()
   457  	for {
   458  		select {
   459  		// If query is killed during gRPC streaming, close the channel
   460  		case <-ctx.Done():
   461  			return nil, ctx.Err()
   462  		default:
   463  		}
   464  
   465  		received, err := completeTagsClient.Recv()
   466  		if err == io.EOF {
   467  			break
   468  		} else if err != nil {
   469  			return nil, err
   470  		}
   471  
   472  		receivedMeta := decodeResultMetadata(received.GetMeta())
   473  		meta = meta.CombineMetadata(receivedMeta)
   474  		result, err := decodeCompleteTagsResponse(received, query.CompleteNameOnly)
   475  		if err != nil {
   476  			return nil, err
   477  		}
   478  
   479  		tags = append(tags, result...)
   480  	}
   481  
   482  	return &consolidators.CompleteTagsResult{
   483  		CompleteNameOnly: query.CompleteNameOnly,
   484  		CompletedTags:    tags,
   485  		Metadata:         meta,
   486  	}, nil
   487  }
   488  
   489  func (c *grpcClient) Close() error {
   490  	c.state.Lock()
   491  	defer c.state.Unlock()
   492  
   493  	if c.state.closed {
   494  		return errAlreadyClosed
   495  	}
   496  	c.state.closed = true
   497  
   498  	close(c.state.closeCh)
   499  	return c.connection.Close()
   500  }