github.com/go-graphite/carbonapi@v0.17.0/zipper/broadcast/broadcast_group.go (about) 1 package broadcast 2 3 import ( 4 "context" 5 "fmt" 6 "net" 7 "sort" 8 "strings" 9 10 "github.com/ansel1/merry" 11 protov3 "github.com/go-graphite/protocol/carbonapi_v3_pb" 12 "github.com/lomik/zapwriter" 13 14 "github.com/go-graphite/carbonapi/limiter" 15 "github.com/go-graphite/carbonapi/pathcache" 16 utilctx "github.com/go-graphite/carbonapi/util/ctx" 17 "github.com/go-graphite/carbonapi/zipper/helper" 18 "github.com/go-graphite/carbonapi/zipper/types" 19 20 "go.uber.org/zap" 21 ) 22 23 type BroadcastGroup struct { 24 limiter limiter.ServerLimiter 25 groupName string 26 timeout types.Timeouts 27 backends []types.BackendServer 28 servers []string 29 maxMetricsPerRequest int 30 doMultipleRequestsIfSplit bool 31 tldCacheDisabled bool 32 concurrencyLimit int 33 requireSuccessAll bool 34 35 fetcher types.Fetcher 36 pathCache pathcache.PathCache 37 logger *zap.Logger 38 dialer *net.Dialer 39 } 40 41 type Option func(group *BroadcastGroup) 42 43 func WithLogger(logger *zap.Logger) Option { 44 return func(bg *BroadcastGroup) { 45 bg.logger = logger 46 } 47 } 48 49 func WithGroupName(name string) Option { 50 return func(bg *BroadcastGroup) { 51 bg.groupName = name 52 } 53 } 54 55 func WithSplitMultipleRequests(multiRequests bool) Option { 56 if multiRequests { 57 return func(bg *BroadcastGroup) { 58 bg.doMultipleRequestsIfSplit = true 59 bg.fetcher = bg.doMultiFetch 60 } 61 } 62 63 return func(bg *BroadcastGroup) { 64 bg.doMultipleRequestsIfSplit = false 65 bg.fetcher = bg.doSingleFetch 66 } 67 } 68 69 func WithBackends(backends []types.BackendServer) Option { 70 return func(bg *BroadcastGroup) { 71 serverNames := make([]string, 0, len(backends)) 72 for _, b := range backends { 73 serverNames = append(serverNames, b.Name()) 74 } 75 bg.backends = backends 76 bg.servers = serverNames 77 } 78 } 79 80 func WithPathCache(expireDelaySec int32) Option { 81 return func(bg *BroadcastGroup) { 82 bg.pathCache = pathcache.NewPathCache(expireDelaySec) 83 } 84 } 85 86 func WithLimiter(concurrencyLimit int) Option { 87 return func(bg *BroadcastGroup) { 88 bg.concurrencyLimit = concurrencyLimit 89 } 90 } 91 92 func WithMaxMetricsPerRequest(maxMetricsPerRequest int) Option { 93 return func(bg *BroadcastGroup) { 94 bg.maxMetricsPerRequest = maxMetricsPerRequest 95 } 96 } 97 98 func WithTLDCache(enableTLDCache bool) Option { 99 return func(bg *BroadcastGroup) { 100 bg.tldCacheDisabled = !enableTLDCache 101 } 102 } 103 104 func WithTimeouts(timeouts types.Timeouts) Option { 105 return func(bg *BroadcastGroup) { 106 bg.timeout = timeouts 107 } 108 } 109 110 func WithDialer(dialer *net.Dialer) Option { 111 return func(bg *BroadcastGroup) { 112 bg.dialer = dialer 113 } 114 } 115 116 func WithSuccess(requireSuccessAll bool) Option { 117 return func(bg *BroadcastGroup) { 118 bg.requireSuccessAll = requireSuccessAll 119 } 120 } 121 122 func New(opts ...Option) (*BroadcastGroup, merry.Error) { 123 bg := &BroadcastGroup{ 124 limiter: limiter.NoopLimiter{}, 125 } 126 127 for _, opt := range opts { 128 opt(bg) 129 } 130 131 if bg.logger == nil { 132 logger := zapwriter.Logger("init") 133 logger.Fatal("failed to initialize backend") 134 } 135 136 bg.logger = bg.logger.With(zap.String("type", "broadcastGroup"), zap.String("groupName", bg.groupName)) 137 138 if len(bg.backends) == 0 { 139 return nil, types.ErrNoServersSpecified 140 } 141 142 if bg.concurrencyLimit != 0 { 143 bg.limiter = limiter.NewServerLimiter(bg.servers, bg.concurrencyLimit) 144 } 145 146 return bg, nil 147 } 148 149 func (bg *BroadcastGroup) Children() []types.BackendServer { 150 return bg.backends 151 } 152 153 func (bg *BroadcastGroup) SetDoMultipleRequestIfSplit(v bool) { 154 bg.doMultipleRequestsIfSplit = v 155 if v { 156 bg.fetcher = bg.doMultiFetch 157 } else { 158 bg.fetcher = bg.doSingleFetch 159 } 160 } 161 162 func NewBroadcastGroup(logger *zap.Logger, groupName string, doMultipleRequestsIfSplit bool, servers []types.BackendServer, expireDelaySec int32, concurrencyLimit, maxBatchSize int, timeouts types.Timeouts, tldCacheDisabled bool, requireSuccessAll bool) (*BroadcastGroup, merry.Error) { 163 return New( 164 WithLogger(logger), 165 WithGroupName(groupName), 166 WithSplitMultipleRequests(doMultipleRequestsIfSplit), 167 WithBackends(servers), 168 WithPathCache(expireDelaySec), 169 WithLimiter(concurrencyLimit), 170 WithMaxMetricsPerRequest(maxBatchSize), 171 WithTimeouts(timeouts), 172 WithTLDCache(!tldCacheDisabled), 173 WithSuccess(requireSuccessAll), 174 ) 175 } 176 177 func (bg BroadcastGroup) Name() string { 178 return bg.groupName 179 } 180 181 func (bg BroadcastGroup) Backends() []string { 182 return bg.servers 183 } 184 185 func (bg *BroadcastGroup) filterServersByTLD(requests []string, backends []types.BackendServer) []types.BackendServer { 186 // do not check TLDs if internal routing cache is disabled 187 if bg.tldCacheDisabled { 188 return backends 189 } 190 191 tldBackends := make(map[types.BackendServer]bool) 192 for _, request := range requests { 193 // TODO(Civil): Tags: improve logic 194 if strings.HasPrefix(request, "seriesByTag") { 195 return backends 196 } 197 idx := strings.Index(request, ".") 198 if idx > 0 { 199 request = request[:idx] 200 } 201 if cachedBackends, ok := bg.pathCache.Get(request); ok && len(backends) > 0 { 202 for _, cachedBackend := range cachedBackends { 203 tldBackends[cachedBackend] = true 204 } 205 } 206 } 207 208 var filteredBackends []types.BackendServer 209 for _, k := range backends { 210 if tldBackends[k] { 211 filteredBackends = append(filteredBackends, k) 212 } 213 } 214 215 if len(filteredBackends) == 0 { 216 return backends 217 } 218 219 return filteredBackends 220 } 221 222 func (bg BroadcastGroup) MaxMetricsPerRequest() int { 223 return bg.maxMetricsPerRequest 224 } 225 226 func (bg *BroadcastGroup) doMultiFetch(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) { 227 logger = logger.With(zap.Bool("multi_fetch", true)) 228 request, ok := reqs.(*protov3.MultiFetchRequest) 229 if !ok { 230 logger.Fatal("unhandled error in doMultiFetch", 231 zap.Stack("stack"), 232 zap.String("got_type", fmt.Sprintf("%T", reqs)), 233 zap.String("expected_type", fmt.Sprintf("%T", request)), 234 ) 235 } 236 237 requests, err := bg.splitRequest(ctx, request, backend) 238 if len(requests) == 0 && err != nil { 239 response := types.NewServerFetchResponse() 240 response.Server = backend.Name() 241 response.AddError(err) 242 resCh <- response 243 return 244 } 245 246 for _, req := range requests { 247 go func(req *protov3.MultiFetchRequest) { 248 logger = logger.With(zap.String("backend_name", backend.Name())) 249 logger.Debug("waiting for slot", 250 zap.Int("max_connections", bg.limiter.Capacity()), 251 ) 252 253 response := types.NewServerFetchResponse() 254 response.Server = backend.Name() 255 256 if err := bg.limiter.Enter(ctx, backend.Name()); err != nil { 257 logger.Debug("timeout waiting for a slot") 258 resCh <- response.NonFatalError(merry.Prepend(err, "timeout waiting for slot")) 259 return 260 } 261 262 logger.Debug("got slot") 263 defer bg.limiter.Leave(ctx, backend.Name()) 264 265 // uuid := util.GetUUID(ctx) 266 var err merry.Error 267 logger.Debug("sending request") 268 response.Response, response.Stats, err = backend.Fetch(ctx, req) 269 response.AddError(err) 270 if response.Response != nil && response.Stats != nil { 271 logger.Debug("got response", 272 zap.Int("metrics_in_response", len(response.Response.Metrics)), 273 zap.Int("errors_count", len(response.Err)), 274 zap.Uint64("timeouts_count", response.Stats.Timeouts), 275 zap.Uint64("render_requests_count", response.Stats.RenderRequests), 276 zap.Uint64("render_errors_count", response.Stats.RenderErrors), 277 zap.Uint64("render_timeouts_count", response.Stats.RenderTimeouts), 278 zap.Uint64("zipper_requests_count", response.Stats.ZipperRequests), 279 zap.Uint64("total_metric_count", response.Stats.TotalMetricsCount), 280 zap.Int("servers_count", len(response.Stats.Servers)), 281 zap.Int("failed_servers_count", len(response.Stats.FailedServers)), 282 ) 283 } else { 284 logger.Debug("got response", 285 zap.Bool("response_is_nil", response.Response == nil), 286 zap.Bool("stats_is_nil", response.Stats == nil), 287 zap.Any("err", err), 288 ) 289 } 290 291 resCh <- response 292 }(req) 293 } 294 295 } 296 297 func (bg *BroadcastGroup) doSingleFetch(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) { 298 logger = logger.With(zap.Bool("multi_fetch", false)) 299 request, ok := reqs.(*protov3.MultiFetchRequest) 300 if !ok { 301 logger.Fatal("unhandled error in doSingleFetch", 302 zap.Stack("stack"), 303 zap.String("got_type", fmt.Sprintf("%T", reqs)), 304 zap.String("expected_type", fmt.Sprintf("%T", request)), 305 ) 306 } 307 308 // TODO(Civil): migrate limiter to merry 309 requests, splitErr := bg.splitRequest(ctx, request, backend) 310 if len(requests) == 0 { 311 if splitErr != nil { 312 response := types.NewServerFetchResponse() 313 response.Server = backend.Name() 314 response.AddError(splitErr) 315 resCh <- response 316 return 317 } 318 } 319 320 logger = logger.With(zap.String("backend_name", backend.Name())) 321 logger.Debug("waiting for slot", 322 zap.Int("max_connections", bg.limiter.Capacity()), 323 ) 324 325 response := types.NewServerFetchResponse() 326 response.Server = backend.Name() 327 328 if err := bg.limiter.Enter(ctx, backend.Name()); err != nil { 329 logger.Debug("timeout waiting for a slot") 330 resCh <- response.NonFatalError(merry.Prepend(err, "timeout waiting for slot")) 331 return 332 } 333 334 logger.Debug("got slot") 335 defer bg.limiter.Leave(ctx, backend.Name()) 336 337 // uuid := util.GetUUID(ctx) 338 var err merry.Error 339 for _, req := range requests { 340 logger.Debug("sending request") 341 r := types.NewServerFetchResponse() 342 r.Response, r.Stats, err = backend.Fetch(ctx, req) 343 r.AddError(err) 344 if r.Stats != nil && r.Response != nil { 345 logger.Debug("got response", 346 zap.Int("metrics_in_response", len(r.Response.Metrics)), 347 zap.Int("errors_count", len(r.Err)), 348 zap.Uint64("timeouts_count", r.Stats.Timeouts), 349 zap.Uint64("render_requests_count", r.Stats.RenderRequests), 350 zap.Uint64("render_errors_count", r.Stats.RenderErrors), 351 zap.Uint64("render_timeouts_count", r.Stats.RenderTimeouts), 352 zap.Uint64("zipper_requests_count", r.Stats.ZipperRequests), 353 zap.Uint64("total_metric_count", r.Stats.TotalMetricsCount), 354 zap.Int("servers_count", len(r.Stats.Servers)), 355 zap.Int("failed_servers_count", len(r.Stats.FailedServers)), 356 ) 357 } else { 358 logger.Debug("got response", 359 zap.Bool("response_is_nil", r.Response == nil), 360 zap.Bool("stats_is_nil", r.Stats == nil), 361 zap.Any("err", err), 362 ) 363 } 364 _ = response.Merge(r) 365 } 366 logger.Debug("got response (after merge)", 367 zap.Int("metrics_in_response", len(response.Response.Metrics)), 368 zap.Int("errors_count", len(response.Err)), 369 zap.Uint64("timeouts_count", response.Stats.Timeouts), 370 zap.Uint64("render_requests_count", response.Stats.RenderRequests), 371 zap.Uint64("render_errors_count", response.Stats.RenderErrors), 372 zap.Uint64("render_timeouts_count", response.Stats.RenderTimeouts), 373 zap.Uint64("zipper_requests_count", response.Stats.ZipperRequests), 374 zap.Uint64("total_metric_count", response.Stats.TotalMetricsCount), 375 zap.Int("servers_count", len(response.Stats.Servers)), 376 zap.Int("failed_servers_count", len(response.Stats.FailedServers)), 377 ) 378 379 resCh <- response 380 } 381 382 func (bg *BroadcastGroup) splitRequest(ctx context.Context, request *protov3.MultiFetchRequest, backend types.BackendServer) ([]*protov3.MultiFetchRequest, merry.Error) { 383 if backend.MaxMetricsPerRequest() == 0 { 384 return []*protov3.MultiFetchRequest{request}, nil 385 } 386 387 var requests []*protov3.MultiFetchRequest 388 newRequest := &protov3.MultiFetchRequest{} 389 390 var err merry.Error 391 for _, metric := range request.Metrics { 392 if len(newRequest.Metrics) >= backend.MaxMetricsPerRequest() { 393 requests = append(requests, newRequest) 394 newRequest = &protov3.MultiFetchRequest{} 395 } 396 397 // TODO(Civil): Tags: improve logic 398 if strings.HasPrefix(metric.Name, "seriesByTag") { 399 newRequest.Metrics = append(newRequest.Metrics, protov3.FetchRequest{ 400 Name: metric.PathExpression, 401 StartTime: metric.StartTime, 402 StopTime: metric.StopTime, 403 PathExpression: metric.PathExpression, 404 FilterFunctions: metric.FilterFunctions, 405 }) 406 407 continue 408 } 409 410 // Do not send Find requests if we have neither globs in the request nor metric expansions 411 if !strings.ContainsAny(metric.Name, "*{") { 412 newRequest.Metrics = append(newRequest.Metrics, protov3.FetchRequest{ 413 Name: metric.Name, 414 StartTime: metric.StartTime, 415 StopTime: metric.StopTime, 416 PathExpression: metric.PathExpression, 417 FilterFunctions: metric.FilterFunctions, 418 }) 419 420 continue 421 } 422 423 f, _, e := backend.Find(ctx, &protov3.MultiGlobRequest{Metrics: []string{metric.Name}}) 424 if e != nil || f == nil || len(f.Metrics) == 0 { 425 if e == nil { 426 e = merry.Errorf("no result fetched") 427 if f == nil { 428 e = e.WithCause(types.ErrUnmarshalFailed) 429 } else { 430 e = e.WithCause(types.ErrNoMetricsFetched) 431 } 432 } 433 err = e 434 435 errStr := "" 436 if e.Cause() != nil { 437 errStr = e.Cause().Error() 438 } else { 439 // e != nil, but len(f.Metrics) == 0 or f == nil, then Cause could be nil 440 errStr = e.Error() 441 } 442 443 if ce := bg.logger.Check(zap.DebugLevel, "find request failed when resolving globs (verbose)"); ce != nil { 444 ce.Write( 445 zap.String("metric_name", metric.Name), 446 zap.String("error", errStr), 447 zap.Any("stack", e), 448 ) 449 } else { 450 bg.logger.Warn("find request failed when resolving globs", 451 zap.String("metric_name", metric.Name), 452 zap.String("error", errStr), 453 ) 454 } 455 456 if f == nil { 457 continue 458 } 459 } 460 461 for _, m := range f.Metrics { 462 for _, match := range m.Matches { 463 if !match.IsLeaf { 464 continue 465 } 466 newRequest.Metrics = append(newRequest.Metrics, protov3.FetchRequest{ 467 Name: match.Path, 468 StartTime: metric.StartTime, 469 StopTime: metric.StopTime, 470 PathExpression: metric.PathExpression, 471 FilterFunctions: metric.FilterFunctions, 472 }) 473 474 if len(newRequest.Metrics) >= backend.MaxMetricsPerRequest() { 475 requests = append(requests, newRequest) 476 newRequest = &protov3.MultiFetchRequest{} 477 } 478 } 479 } 480 } 481 482 if len(newRequest.Metrics) > 0 { 483 requests = append(requests, newRequest) 484 } 485 486 return requests, err 487 } 488 489 func (bg *BroadcastGroup) Fetch(ctx context.Context, request *protov3.MultiFetchRequest) (*protov3.MultiFetchResponse, *types.Stats, merry.Error) { 490 requestNames := make([]string, 0, len(request.Metrics)) 491 for i := range request.Metrics { 492 requestNames = append(requestNames, request.Metrics[i].Name) 493 } 494 logger := bg.logger.With(zap.String("type", "fetch"), zap.Strings("request", requestNames), zap.String("carbonapi_uuid", utilctx.GetUUID(ctx))) 495 logger.Debug("will try to fetch data") 496 497 backends := bg.filterServersByTLD(requestNames, bg.Children()) 498 499 result := types.NewServerFetchResponse() 500 501 ctxNew, cancel := context.WithTimeout(ctx, bg.timeout.Render) 502 defer cancel() 503 504 resultNew, responseCount := types.DoRequest(ctxNew, logger, backends, result, request, bg.fetcher) 505 506 result, ok := resultNew.Self().(*types.ServerFetchResponse) 507 if !ok { 508 logger.Fatal("unhandled error in Fetch", 509 zap.Stack("stack"), 510 zap.String("got_type", fmt.Sprintf("%T", resultNew.Self())), 511 zap.String("expected_type", fmt.Sprintf("%T", result)), 512 ) 513 } 514 515 if len(result.Response.Metrics) == 0 || (bg.requireSuccessAll && len(result.Err) > 0) { 516 code, errors := helper.MergeHttpErrors(result.Err) 517 if len(errors) > 0 { 518 err := types.ErrFailedToFetch.WithHTTPCode(code).WithMessage(strings.Join(errors, "\n")) 519 logger.Debug("errors while fetching data from backends", 520 zap.Int("httpCode", code), 521 zap.Strings("errors", errors), 522 ) 523 return nil, result.Stats, err 524 } 525 return nil, result.Stats, types.ErrNotFound.WithHTTPCode(404) 526 } 527 528 // Recalculate metrics start/step/stop parameters to avoid upstream misbehavior 529 for i, metric := range result.Response.Metrics { 530 result.Response.Metrics[i].StopTime = metric.StartTime + int64(len(metric.Values))*metric.StepTime 531 } 532 533 logger.Debug("got some fetch responses", 534 zap.Int("backends_count", len(backends)), 535 zap.Int("response_count", responseCount), 536 zap.Bool("have_errors", len(result.Err) != 0), 537 zap.Any("errors", result.Err), 538 zap.Int("metrics_in_response", len(result.Response.Metrics)), 539 ) 540 541 var err merry.Error 542 if len(result.Err) > 0 { 543 if bg.requireSuccessAll { 544 code, errors := helper.MergeHttpErrors(result.Err) 545 if len(errors) > 0 { 546 err := types.ErrFailedToFetch.WithHTTPCode(code).WithMessage(strings.Join(errors, "\n")) 547 logger.Debug("errors while fetching data from backends", 548 zap.Int("httpCode", code), 549 zap.Strings("errors", errors), 550 ) 551 return nil, result.Stats, err 552 } 553 } else { 554 err = types.ErrNonFatalErrors 555 for _, e := range result.Err { 556 err = err.WithCause(e) 557 } 558 } 559 } 560 561 return result.Response, result.Stats, err 562 } 563 564 // Find request handling 565 func (bg *BroadcastGroup) doFind(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) { 566 request, ok := reqs.(*protov3.MultiGlobRequest) 567 if !ok { 568 logger.Fatal("unhandled error", 569 zap.Stack("stack"), 570 zap.String("got_type", fmt.Sprintf("%T", reqs)), 571 zap.String("expected_type", fmt.Sprintf("%T", request)), 572 ) 573 } 574 logger = logger.With( 575 zap.String("group_name", bg.groupName), 576 zap.String("backend_name", backend.Name()), 577 ) 578 logger.Debug("waiting for a slot") 579 580 r := types.NewServerFindResponse() 581 r.Server = backend.Name() 582 583 if err := bg.limiter.Enter(ctx, backend.Name()); err != nil { 584 logger.Debug("timeout waiting for a slot") 585 r.AddError(merry.Prepend(err, "timeout waiting for slot")) 586 resCh <- r 587 return 588 } 589 590 logger.Debug("got slot") 591 defer bg.limiter.Leave(ctx, backend.Name()) 592 593 var err merry.Error 594 r.Response, r.Stats, err = backend.Find(ctx, request) 595 r.AddError(err) 596 // TODO: Add a separate logger that would log full response 597 logger.Debug("fetched response", 598 zap.Int("response_size", r.Response.Size()), 599 ) 600 resCh <- r 601 } 602 603 func (bg *BroadcastGroup) Find(ctx context.Context, request *protov3.MultiGlobRequest) (*protov3.MultiGlobResponse, *types.Stats, merry.Error) { 604 logger := bg.logger.With(zap.String("type", "find"), zap.Strings("request", request.Metrics)) 605 606 backends := bg.Children() 607 608 logger.Debug("will do query with timeout", 609 zap.Any("backends", backends), 610 zap.Float64("timeout", bg.timeout.Find.Seconds()), 611 ) 612 613 ctxNew, cancel := context.WithTimeout(ctx, bg.timeout.Find) 614 defer cancel() 615 616 result := types.NewServerFindResponse() 617 result.Server = bg.Name() 618 result.Stats.ZipperRequests = uint64(len(backends)) 619 resultNew, responseCount := types.DoRequest(ctxNew, logger, backends, result, request, bg.doFind) 620 621 result, ok := resultNew.Self().(*types.ServerFindResponse) 622 if !ok { 623 logger.Fatal("unhandled error in Find", 624 zap.Stack("stack"), 625 zap.String("got_type", fmt.Sprintf("%T", resultNew.Self())), 626 zap.String("expected_type", fmt.Sprintf("%T", result)), 627 ) 628 } 629 630 var err merry.Error 631 if len(result.Response.Metrics) == 0 || (bg.requireSuccessAll && len(result.Err) > 0) { 632 code, errors := helper.MergeHttpErrors(result.Err) 633 if len(errors) > 0 { 634 err = types.ErrFailedToFetch.WithHTTPCode(code).WithMessage(strings.Join(errors, "\n")) 635 logger.Debug("errors while fetching data from backends", 636 zap.Int("httpCode", code), 637 zap.Strings("errors", errors), 638 ) 639 return nil, result.Stats, err 640 } 641 } 642 643 logger.Debug("got some find responses", 644 zap.Int("backends_count", len(backends)), 645 zap.Int("response_count", responseCount), 646 zap.Bool("have_errors", len(result.Err) != 0), 647 zap.Any("errors", result.Err), 648 zap.Any("response", result.Response), 649 ) 650 651 if len(result.Response.Metrics) == 0 { 652 return &protov3.MultiGlobResponse{}, result.Stats, types.ErrNotFound.WithHTTPCode(404) 653 } 654 result.Stats.TotalMetricsCount = 0 655 for _, x := range result.Response.Metrics { 656 result.Stats.TotalMetricsCount += uint64(len(x.Matches)) 657 } 658 659 if result.Err != nil { 660 err = types.ErrNonFatalErrors 661 for _, e := range result.Err { 662 err = err.WithCause(e) 663 } 664 } 665 666 return result.Response, result.Stats, err 667 } 668 669 // Info request handling 670 func (bg *BroadcastGroup) doInfoRequest(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) { 671 logger = logger.With( 672 zap.String("group_name", bg.groupName), 673 zap.String("backend_name", backend.Name()), 674 ) 675 request, ok := reqs.(*protov3.MultiMetricsInfoRequest) 676 if !ok { 677 logger.Fatal("unhandled error", 678 zap.Stack("stack"), 679 zap.String("got_type", fmt.Sprintf("%T", reqs)), 680 zap.String("expected_type", fmt.Sprintf("%T", request)), 681 ) 682 } 683 r := &types.ServerInfoResponse{ 684 Server: backend.Name(), 685 } 686 687 if err := bg.limiter.Enter(ctx, backend.Name()); err != nil { 688 logger.Debug("timeout waiting for a slot") 689 r.AddError(merry.Prepend(err, "timeout waiting for slot")) 690 resCh <- r 691 return 692 } 693 defer bg.limiter.Leave(ctx, backend.Name()) 694 695 logger.Debug("got a slot") 696 var err merry.Error 697 r.Response, r.Stats, err = backend.Info(ctx, request) 698 r.AddError(err) 699 resCh <- r 700 } 701 702 func (bg *BroadcastGroup) Info(ctx context.Context, request *protov3.MultiMetricsInfoRequest) (*protov3.ZipperInfoResponse, *types.Stats, merry.Error) { 703 logger := bg.logger.With(zap.String("type", "info"), zap.Strings("request", request.Names)) 704 705 ctxNew, cancel := context.WithTimeout(ctx, bg.timeout.Render) 706 defer cancel() 707 backends := bg.Children() 708 result := types.NewServerInfoResponse() 709 result.Server = bg.Name() 710 result.Stats.ZipperRequests = uint64(len(backends)) 711 712 resultNew, responseCount := types.DoRequest(ctxNew, logger, backends, result, request, bg.doInfoRequest) 713 714 result, ok := resultNew.Self().(*types.ServerInfoResponse) 715 if !ok { 716 logger.Fatal("unhandled error in Find", 717 zap.Stack("stack"), 718 zap.String("got_type", fmt.Sprintf("%T", resultNew.Self())), 719 zap.String("expected_type", fmt.Sprintf("%T", result)), 720 ) 721 } 722 723 logger.Debug("got some responses", 724 zap.Int("backends_count", len(backends)), 725 zap.Int("response_count", responseCount), 726 zap.Bool("have_errors", len(result.Err) != 0), 727 ) 728 729 var err merry.Error 730 if result.Err != nil { 731 if bg.requireSuccessAll { 732 err = types.ErrFailedToFetch 733 } else { 734 err = types.ErrNonFatalErrors 735 } 736 for _, e := range result.Err { 737 err = err.WithCause(e) 738 } 739 } 740 741 return result.Response, result.Stats, err 742 } 743 744 func (bg *BroadcastGroup) List(ctx context.Context) (*protov3.ListMetricsResponse, *types.Stats, merry.Error) { 745 return nil, nil, types.ErrNotImplementedYet 746 } 747 func (bg *BroadcastGroup) Stats(ctx context.Context) (*protov3.MetricDetailsResponse, *types.Stats, merry.Error) { 748 return nil, nil, types.ErrNotImplementedYet 749 } 750 751 type tagQuery struct { 752 Query string 753 Limit int64 754 IsName bool 755 } 756 757 // Info request handling 758 func (bg *BroadcastGroup) doTagRequest(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) { 759 request, ok := reqs.(tagQuery) 760 logger = logger.With( 761 zap.String("group_name", bg.groupName), 762 zap.String("backend_name", backend.Name()), 763 ) 764 if !ok { 765 logger.Fatal("unhandled error", 766 zap.Stack("stack"), 767 zap.String("got_type", fmt.Sprintf("%T", reqs)), 768 zap.String("expected_type", fmt.Sprintf("%T", request)), 769 ) 770 } 771 r := &types.ServerTagResponse{ 772 Server: backend.Name(), 773 Response: []string{}, 774 } 775 776 logger.Debug("waiting for a slot") 777 778 if err := bg.limiter.Enter(ctx, backend.Name()); err != nil { 779 logger.Debug("timeout waiting for a slot") 780 r.AddError(merry.Prepend(err, "timeout waiting for slot")) 781 resCh <- r 782 return 783 } 784 defer bg.limiter.Leave(ctx, backend.Name()) 785 786 logger.Debug("got a slot") 787 var err merry.Error 788 if request.IsName { 789 r.Response, err = backend.TagNames(ctx, request.Query, request.Limit) 790 } else { 791 r.Response, err = backend.TagValues(ctx, request.Query, request.Limit) 792 } 793 794 if err != nil { 795 r.AddError(err) 796 } 797 798 if r.Response == nil { 799 r.Response = []string{} 800 } 801 resCh <- r 802 } 803 804 func (bg *BroadcastGroup) tagEverything(ctx context.Context, isTagName bool, query string, limit int64) ([]string, merry.Error) { 805 logger := bg.logger.With(zap.String("query", query)) 806 if isTagName { 807 logger = logger.With(zap.String("type", "tagName")) 808 } else { 809 logger = logger.With(zap.String("type", "tagValues")) 810 } 811 812 request := tagQuery{ 813 Query: query, 814 Limit: limit, 815 IsName: isTagName, 816 } 817 818 ctxNew, cancel := context.WithTimeout(ctx, bg.timeout.Find) 819 defer cancel() 820 821 backends := bg.Children() 822 result := types.NewServerTagResponse() 823 result.Server = bg.Name() 824 825 resultNew, responseCount := types.DoRequest(ctxNew, logger, backends, result, request, bg.doTagRequest) 826 827 result, ok := resultNew.Self().(*types.ServerTagResponse) 828 if !ok { 829 logger.Fatal("unhandled error in Find", 830 zap.Stack("stack"), 831 zap.String("got_type", fmt.Sprintf("%T", resultNew.Self())), 832 zap.String("expected_type", fmt.Sprintf("%T", result)), 833 ) 834 } 835 836 if limit != -1 && int64(len(result.Response)) > limit { 837 sort.Strings(result.Response) 838 result.Response = result.Response[:limit-1] 839 } 840 841 logger.Debug("got some responses", 842 zap.Int("backends_count", len(backends)), 843 zap.Int("response_count", responseCount), 844 zap.Bool("have_errors", len(result.Err) != 0), 845 ) 846 847 var err merry.Error 848 if result.Err != nil { 849 code, errors := helper.MergeHttpErrors(result.Err) 850 if len(errors) > 0 { 851 err = types.ErrNonFatalErrors.WithHTTPCode(code).WithMessage(strings.Join(errors, "\n")) 852 } 853 } 854 855 return result.Response, err 856 } 857 858 func (bg *BroadcastGroup) TagNames(ctx context.Context, query string, limit int64) ([]string, merry.Error) { 859 return bg.tagEverything(ctx, true, query, limit) 860 } 861 862 func (bg *BroadcastGroup) TagValues(ctx context.Context, query string, limit int64) ([]string, merry.Error) { 863 return bg.tagEverything(ctx, false, query, limit) 864 } 865 866 type tldResponse struct { 867 server types.BackendServer 868 tlds []string 869 err merry.Error 870 } 871 872 func doProbe(ctx context.Context, backend types.BackendServer, resCh chan<- tldResponse) { 873 res, err := backend.ProbeTLDs(ctx) 874 875 resCh <- tldResponse{ 876 server: backend, 877 tlds: res, 878 err: err, 879 } 880 } 881 882 func (bg *BroadcastGroup) ProbeTLDs(ctx context.Context) ([]string, merry.Error) { 883 logger := bg.logger.With(zap.String("function", "prober")) 884 885 ctx, cancel := context.WithTimeout(ctx, bg.timeout.Find) 886 defer cancel() 887 888 backends := bg.Children() 889 resCh := make(chan tldResponse, len(backends)) 890 for _, backend := range backends { 891 go doProbe(ctx, backend, resCh) 892 } 893 894 responses := 0 895 var errs []merry.Error 896 answeredServers := make(map[string]struct{}) 897 cache := make(map[string][]types.BackendServer) 898 tldSet := make(map[string]struct{}) 899 900 GATHER: 901 for { 902 if responses == len(backends) { 903 break GATHER 904 } 905 906 select { 907 case r := <-resCh: 908 answeredServers[r.server.Name()] = struct{}{} 909 responses++ 910 if r.err != nil { 911 errs = append(errs, r.err) 912 continue 913 } 914 for _, tld := range r.tlds { 915 tldSet[tld] = struct{}{} 916 cache[tld] = append(cache[tld], r.server) 917 } 918 919 case <-ctx.Done(): 920 logger.Warn("timeout waiting for more responses", 921 zap.Strings("no_answers_from", types.NoAnswerBackends(backends, answeredServers)), 922 ) 923 errs = append(errs, types.ErrTimeoutExceeded) 924 break GATHER 925 } 926 } 927 928 var tlds []string 929 for tld := range tldSet { 930 tlds = append(tlds, tld) 931 } 932 933 for k, v := range cache { 934 bg.pathCache.Set(k, v) 935 } 936 937 var err merry.Error 938 if errs != nil { 939 err = types.ErrNonFatalErrors 940 for _, e := range errs { 941 err = err.WithCause(e) 942 } 943 } 944 945 return tlds, err 946 }