github.com/m3db/m3@v1.5.0/src/query/remote/client.go (about) 1 // Copyright (c) 2018 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package remote 22 23 import ( 24 "context" 25 goerrors "errors" 26 "io" 27 "strings" 28 "sync" 29 "time" 30 31 "github.com/m3db/m3/src/dbnode/encoding" 32 "github.com/m3db/m3/src/query/block" 33 "github.com/m3db/m3/src/query/errors" 34 rpc "github.com/m3db/m3/src/query/generated/proto/rpcpb" 35 "github.com/m3db/m3/src/query/models" 36 "github.com/m3db/m3/src/query/pools" 37 "github.com/m3db/m3/src/query/storage" 38 "github.com/m3db/m3/src/query/storage/m3" 39 "github.com/m3db/m3/src/query/storage/m3/consolidators" 40 "github.com/m3db/m3/src/query/storage/m3/storagemetadata" 41 "github.com/m3db/m3/src/query/util/logging" 42 xgrpc "github.com/m3db/m3/src/x/grpc" 43 "github.com/m3db/m3/src/x/instrument" 44 45 "github.com/uber-go/tally" 46 "go.uber.org/zap" 47 "google.golang.org/grpc" 48 "google.golang.org/grpc/keepalive" 49 ) 50 51 const ( 52 initResultSize = 10 53 healthCheckInterval = 60 * time.Second 54 healthCheckTimeout = 5 * time.Second 55 healthCheckMetricName = "health-check" 56 healthCheckMetricResultTag = "result" 57 ) 58 59 var ( 60 errAlreadyClosed = goerrors.New("already closed") 61 62 errQueryStorageMetadataAttributesNotImplemented = goerrors.New( 63 "remote storage does not implement QueryStorageMetadataAttributes", 64 ) 65 66 // NB(r): These options tries to ensure we don't let connections go stale 67 // and cause failed RPCs as a result. 68 defaultDialOptions = []grpc.DialOption{ 69 grpc.WithKeepaliveParams(keepalive.ClientParameters{ 70 // After a duration of this time if the client doesn't see any activity it 71 // pings the server to see if the transport is still alive. 72 // If set below 10s, a minimum value of 10s will be used instead. 73 Time: 10 * time.Second, 74 // After having pinged for keepalive check, the client waits for a duration 75 // of Timeout and if no activity is seen even after that the connection is 76 // closed. 77 Timeout: 20 * time.Second, 78 // If true, client sends keepalive pings even with no active RPCs. If false, 79 // when there are no active RPCs, Time and Timeout will be ignored and no 80 // keepalive pings will be sent. 81 PermitWithoutStream: true, 82 }), 83 } 84 ) 85 86 // Client is the remote GRPC client. 87 type Client interface { 88 storage.Querier 89 Close() error 90 } 91 92 type grpcClient struct { 93 state grpcClientState 94 client rpc.QueryClient 95 connection *grpc.ClientConn 96 poolWrapper *pools.PoolWrapper 97 once sync.Once 98 pools encoding.IteratorPools 99 poolErr error 100 opts m3.Options 101 logger *zap.Logger 102 metrics grpcClientMetrics 103 } 104 105 type grpcClientState struct { 106 sync.RWMutex 107 closed bool 108 closeCh chan struct{} 109 } 110 111 type grpcClientMetrics struct { 112 healthCheckSuccess tally.Counter 113 healthCheckError tally.Counter 114 } 115 116 func newGRPCClientMetrics(s tally.Scope) grpcClientMetrics { 117 s = s.SubScope("remote-client") 118 return grpcClientMetrics{ 119 healthCheckSuccess: s.Tagged(map[string]string{ 120 healthCheckMetricResultTag: "success", 121 }).Counter(healthCheckMetricName), 122 healthCheckError: s.Tagged(map[string]string{ 123 healthCheckMetricResultTag: "error", 124 }).Counter(healthCheckMetricName), 125 } 126 } 127 128 // NewGRPCClient creates a new remote GRPC client. 129 func NewGRPCClient( 130 name string, 131 addresses []string, 132 poolWrapper *pools.PoolWrapper, 133 opts m3.Options, 134 instrumentOpts instrument.Options, 135 additionalDialOpts ...grpc.DialOption, 136 ) (Client, error) { 137 if len(addresses) == 0 { 138 return nil, errors.ErrNoClientAddresses 139 } 140 141 // Set name if using a named client. 142 if remote := strings.TrimSpace(name); remote != "" { 143 instrumentOpts = instrumentOpts. 144 SetMetricsScope(instrumentOpts.MetricsScope().Tagged(map[string]string{ 145 "remote-name": remote, 146 })) 147 } 148 149 scope := instrumentOpts.MetricsScope() 150 interceptorOpts := xgrpc.InterceptorInstrumentOptions{Scope: scope} 151 152 dialOptions := append([]grpc.DialOption{ 153 // N.B.: the static resolver also specifies the load balancing policy, which is 154 // round robin. 155 grpc.WithResolvers(newStaticResolverBuilder(addresses)), 156 grpc.WithInsecure(), 157 grpc.WithUnaryInterceptor(xgrpc.UnaryClientInterceptor(interceptorOpts)), 158 grpc.WithStreamInterceptor(xgrpc.StreamClientInterceptor(interceptorOpts)), 159 }, defaultDialOptions...) 160 dialOptions = append(dialOptions, additionalDialOpts...) 161 162 // The resolver handles routing correctly for us, which is why the "endpoint" here is static. 163 cc, err := grpc.Dial(_staticResolverURL, dialOptions...) 164 if err != nil { 165 return nil, err 166 } 167 168 client := rpc.NewQueryClient(cc) 169 c := &grpcClient{ 170 state: grpcClientState{ 171 closeCh: make(chan struct{}), 172 }, 173 client: client, 174 connection: cc, 175 poolWrapper: poolWrapper, 176 opts: opts, 177 logger: instrumentOpts.Logger(), 178 metrics: newGRPCClientMetrics(scope), 179 } 180 go c.healthCheckUntilClosed() 181 return c, nil 182 } 183 184 func (c *grpcClient) QueryStorageMetadataAttributes( 185 ctx context.Context, 186 queryStart, queryEnd time.Time, 187 opts *storage.FetchOptions, 188 ) ([]storagemetadata.Attributes, error) { 189 return nil, errQueryStorageMetadataAttributesNotImplemented 190 } 191 192 func (c *grpcClient) healthCheckUntilClosed() { 193 ticker := time.NewTicker(healthCheckInterval) 194 defer ticker.Stop() 195 196 for { 197 if c.closed() { 198 return // Abort early, closed already. 199 } 200 201 // Perform immediately so first check isn't delayed. 202 err := c.healthCheck() 203 204 if c.closed() { 205 return // Don't report results, closed already. 206 } 207 208 if err != nil { 209 c.metrics.healthCheckError.Inc(1) 210 c.logger.Debug("remote storage client health check failed", 211 zap.Error(err)) 212 } else { 213 c.metrics.healthCheckSuccess.Inc(1) 214 } 215 216 select { 217 case <-c.state.closeCh: 218 return 219 case <-ticker.C: 220 // Continue to next check. 221 continue 222 } 223 } 224 } 225 226 func (c *grpcClient) healthCheck() error { 227 ctx, cancel := context.WithTimeout(context.Background(), 228 healthCheckTimeout) 229 _, err := c.client.Health(ctx, &rpc.HealthRequest{}) 230 cancel() 231 return err 232 } 233 234 func (c *grpcClient) closed() bool { 235 c.state.RLock() 236 closed := c.state.closed 237 c.state.RUnlock() 238 return closed 239 } 240 241 func (c *grpcClient) waitForPools() (encoding.IteratorPools, error) { 242 c.once.Do(func() { 243 c.pools, c.poolErr = c.poolWrapper.WaitForIteratorPools(poolTimeout) 244 }) 245 246 return c.pools, c.poolErr 247 } 248 249 func (c *grpcClient) FetchProm( 250 ctx context.Context, 251 query *storage.FetchQuery, 252 options *storage.FetchOptions, 253 ) (storage.PromResult, error) { 254 result, err := c.fetchRaw(ctx, query, options) 255 if err != nil { 256 return storage.PromResult{}, err 257 } 258 259 return storage.SeriesIteratorsToPromResult( 260 ctx, 261 result, 262 c.opts.ReadWorkerPool(), 263 c.opts.TagOptions(), 264 c.opts.PromConvertOptions(), 265 options) 266 } 267 268 func (c *grpcClient) fetchRaw( 269 ctx context.Context, 270 query *storage.FetchQuery, 271 options *storage.FetchOptions, 272 ) (consolidators.SeriesFetchResult, error) { 273 result, err := c.FetchCompressed(ctx, query, options) 274 if err != nil { 275 return consolidators.SeriesFetchResult{}, err 276 } 277 278 return result.FinalResult() 279 } 280 281 func (c *grpcClient) FetchCompressed( 282 ctx context.Context, 283 query *storage.FetchQuery, 284 options *storage.FetchOptions, 285 ) (consolidators.MultiFetchResult, error) { 286 if err := options.BlockType.Validate(); err != nil { 287 // This is an invariant error; should not be able to get to here. 288 return nil, instrument.InvariantErrorf("invalid block type on "+ 289 "fetch, got: %v with error %v", options.BlockType, err) 290 } 291 292 pools, err := c.waitForPools() 293 if err != nil { 294 return nil, err 295 } 296 297 request, err := encodeFetchRequest(query, options) 298 if err != nil { 299 return nil, err 300 } 301 302 // Send the id from the client to the remote server so that provides logging 303 // TODO: replace id propagation with opentracing 304 id := logging.ReadContextID(ctx) 305 mdCtx := encodeMetadata(ctx, id) 306 fetchClient, err := c.client.Fetch(mdCtx, request) 307 if err != nil { 308 return nil, err 309 } 310 311 defer fetchClient.CloseSend() 312 313 var ( 314 fanout = consolidators.NamespaceCoversAllQueryRange 315 matchOpts = c.opts.SeriesConsolidationMatchOptions() 316 tagOpts = c.opts.TagOptions() 317 limitOpts = consolidators.LimitOptions{ 318 Limit: options.SeriesLimit, 319 RequireExhaustive: options.RequireExhaustive, 320 } 321 322 result = consolidators.NewMultiFetchResult(fanout, matchOpts, tagOpts, limitOpts) 323 ) 324 325 for { 326 select { 327 // If query is killed during gRPC streaming, close the channel 328 case <-ctx.Done(): 329 return nil, ctx.Err() 330 default: 331 } 332 333 recvResult, err := fetchClient.Recv() 334 if err == io.EOF { 335 break 336 } 337 if err != nil { 338 return nil, err 339 } 340 341 receivedMeta := decodeResultMetadata(recvResult.GetMeta()) 342 iters, err := DecodeCompressedFetchResponse(recvResult, pools) 343 result.Add(consolidators.MultiFetchResults{ 344 SeriesIterators: iters, 345 Metadata: receivedMeta, 346 Attrs: storagemetadata.Attributes{}, 347 Err: err, 348 }) 349 } 350 351 return result, nil 352 } 353 354 func (c *grpcClient) FetchBlocks( 355 ctx context.Context, 356 query *storage.FetchQuery, 357 options *storage.FetchOptions, 358 ) (block.Result, error) { 359 // Override options with whatever is the current specified lookback duration. 360 opts := c.opts.SetLookbackDuration( 361 options.LookbackDurationOrDefault(c.opts.LookbackDuration())) 362 363 fetchResult, err := c.fetchRaw(ctx, query, options) 364 if err != nil { 365 return block.Result{ 366 Metadata: block.NewResultMetadata(), 367 }, err 368 } 369 370 return m3.FetchResultToBlockResult(fetchResult, query, options, opts) 371 } 372 373 func (c *grpcClient) SearchSeries( 374 ctx context.Context, 375 query *storage.FetchQuery, 376 options *storage.FetchOptions, 377 ) (*storage.SearchResults, error) { 378 pools, err := c.waitForPools() 379 if err != nil { 380 return nil, err 381 } 382 383 request, err := encodeSearchRequest(query, options) 384 if err != nil { 385 return nil, err 386 } 387 388 // Send the id from the client to the remote server so that provides logging 389 // TODO: replace id propagation with opentracing 390 id := logging.ReadContextID(ctx) 391 // TODO: add relevant fields to the metadata 392 mdCtx := encodeMetadata(ctx, id) 393 searchClient, err := c.client.Search(mdCtx, request) 394 if err != nil { 395 return nil, err 396 } 397 398 metrics := make(models.Metrics, 0, initResultSize) 399 meta := block.NewResultMetadata() 400 defer searchClient.CloseSend() 401 for { 402 select { 403 // If query is killed during gRPC streaming, close the channel 404 case <-ctx.Done(): 405 return nil, ctx.Err() 406 default: 407 } 408 409 received, err := searchClient.Recv() 410 if err == io.EOF { 411 break 412 } 413 414 if err != nil { 415 return nil, err 416 } 417 418 receivedMeta := decodeResultMetadata(received.GetMeta()) 419 meta = meta.CombineMetadata(receivedMeta) 420 m, err := decodeSearchResponse(received, pools, c.opts.TagOptions()) 421 if err != nil { 422 return nil, err 423 } 424 425 metrics = append(metrics, m...) 426 } 427 428 return &storage.SearchResults{ 429 Metrics: metrics, 430 Metadata: meta, 431 }, nil 432 } 433 434 func (c *grpcClient) CompleteTags( 435 ctx context.Context, 436 query *storage.CompleteTagsQuery, 437 options *storage.FetchOptions, 438 ) (*consolidators.CompleteTagsResult, error) { 439 request, err := encodeCompleteTagsRequest(query, options) 440 if err != nil { 441 return nil, err 442 } 443 444 // Send the id from the client to the remote server so that provides logging 445 // TODO: replace id propagation with opentracing 446 id := logging.ReadContextID(ctx) 447 // TODO: add relevant fields to the metadata 448 mdCtx := encodeMetadata(ctx, id) 449 completeTagsClient, err := c.client.CompleteTags(mdCtx, request) 450 if err != nil { 451 return nil, err 452 } 453 454 tags := make([]consolidators.CompletedTag, 0, initResultSize) 455 meta := block.NewResultMetadata() 456 defer completeTagsClient.CloseSend() 457 for { 458 select { 459 // If query is killed during gRPC streaming, close the channel 460 case <-ctx.Done(): 461 return nil, ctx.Err() 462 default: 463 } 464 465 received, err := completeTagsClient.Recv() 466 if err == io.EOF { 467 break 468 } else if err != nil { 469 return nil, err 470 } 471 472 receivedMeta := decodeResultMetadata(received.GetMeta()) 473 meta = meta.CombineMetadata(receivedMeta) 474 result, err := decodeCompleteTagsResponse(received, query.CompleteNameOnly) 475 if err != nil { 476 return nil, err 477 } 478 479 tags = append(tags, result...) 480 } 481 482 return &consolidators.CompleteTagsResult{ 483 CompleteNameOnly: query.CompleteNameOnly, 484 CompletedTags: tags, 485 Metadata: meta, 486 }, nil 487 } 488 489 func (c *grpcClient) Close() error { 490 c.state.Lock() 491 defer c.state.Unlock() 492 493 if c.state.closed { 494 return errAlreadyClosed 495 } 496 c.state.closed = true 497 498 close(c.state.closeCh) 499 return c.connection.Close() 500 }