github.com/go-graphite/carbonapi@v0.17.0/zipper/broadcast/broadcast_group.go (about)

     1  package broadcast
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"net"
     7  	"sort"
     8  	"strings"
     9  
    10  	"github.com/ansel1/merry"
    11  	protov3 "github.com/go-graphite/protocol/carbonapi_v3_pb"
    12  	"github.com/lomik/zapwriter"
    13  
    14  	"github.com/go-graphite/carbonapi/limiter"
    15  	"github.com/go-graphite/carbonapi/pathcache"
    16  	utilctx "github.com/go-graphite/carbonapi/util/ctx"
    17  	"github.com/go-graphite/carbonapi/zipper/helper"
    18  	"github.com/go-graphite/carbonapi/zipper/types"
    19  
    20  	"go.uber.org/zap"
    21  )
    22  
    23  type BroadcastGroup struct {
    24  	limiter                   limiter.ServerLimiter
    25  	groupName                 string
    26  	timeout                   types.Timeouts
    27  	backends                  []types.BackendServer
    28  	servers                   []string
    29  	maxMetricsPerRequest      int
    30  	doMultipleRequestsIfSplit bool
    31  	tldCacheDisabled          bool
    32  	concurrencyLimit          int
    33  	requireSuccessAll         bool
    34  
    35  	fetcher   types.Fetcher
    36  	pathCache pathcache.PathCache
    37  	logger    *zap.Logger
    38  	dialer    *net.Dialer
    39  }
    40  
    41  type Option func(group *BroadcastGroup)
    42  
    43  func WithLogger(logger *zap.Logger) Option {
    44  	return func(bg *BroadcastGroup) {
    45  		bg.logger = logger
    46  	}
    47  }
    48  
    49  func WithGroupName(name string) Option {
    50  	return func(bg *BroadcastGroup) {
    51  		bg.groupName = name
    52  	}
    53  }
    54  
    55  func WithSplitMultipleRequests(multiRequests bool) Option {
    56  	if multiRequests {
    57  		return func(bg *BroadcastGroup) {
    58  			bg.doMultipleRequestsIfSplit = true
    59  			bg.fetcher = bg.doMultiFetch
    60  		}
    61  	}
    62  
    63  	return func(bg *BroadcastGroup) {
    64  		bg.doMultipleRequestsIfSplit = false
    65  		bg.fetcher = bg.doSingleFetch
    66  	}
    67  }
    68  
    69  func WithBackends(backends []types.BackendServer) Option {
    70  	return func(bg *BroadcastGroup) {
    71  		serverNames := make([]string, 0, len(backends))
    72  		for _, b := range backends {
    73  			serverNames = append(serverNames, b.Name())
    74  		}
    75  		bg.backends = backends
    76  		bg.servers = serverNames
    77  	}
    78  }
    79  
    80  func WithPathCache(expireDelaySec int32) Option {
    81  	return func(bg *BroadcastGroup) {
    82  		bg.pathCache = pathcache.NewPathCache(expireDelaySec)
    83  	}
    84  }
    85  
    86  func WithLimiter(concurrencyLimit int) Option {
    87  	return func(bg *BroadcastGroup) {
    88  		bg.concurrencyLimit = concurrencyLimit
    89  	}
    90  }
    91  
    92  func WithMaxMetricsPerRequest(maxMetricsPerRequest int) Option {
    93  	return func(bg *BroadcastGroup) {
    94  		bg.maxMetricsPerRequest = maxMetricsPerRequest
    95  	}
    96  }
    97  
    98  func WithTLDCache(enableTLDCache bool) Option {
    99  	return func(bg *BroadcastGroup) {
   100  		bg.tldCacheDisabled = !enableTLDCache
   101  	}
   102  }
   103  
   104  func WithTimeouts(timeouts types.Timeouts) Option {
   105  	return func(bg *BroadcastGroup) {
   106  		bg.timeout = timeouts
   107  	}
   108  }
   109  
   110  func WithDialer(dialer *net.Dialer) Option {
   111  	return func(bg *BroadcastGroup) {
   112  		bg.dialer = dialer
   113  	}
   114  }
   115  
   116  func WithSuccess(requireSuccessAll bool) Option {
   117  	return func(bg *BroadcastGroup) {
   118  		bg.requireSuccessAll = requireSuccessAll
   119  	}
   120  }
   121  
   122  func New(opts ...Option) (*BroadcastGroup, merry.Error) {
   123  	bg := &BroadcastGroup{
   124  		limiter: limiter.NoopLimiter{},
   125  	}
   126  
   127  	for _, opt := range opts {
   128  		opt(bg)
   129  	}
   130  
   131  	if bg.logger == nil {
   132  		logger := zapwriter.Logger("init")
   133  		logger.Fatal("failed to initialize backend")
   134  	}
   135  
   136  	bg.logger = bg.logger.With(zap.String("type", "broadcastGroup"), zap.String("groupName", bg.groupName))
   137  
   138  	if len(bg.backends) == 0 {
   139  		return nil, types.ErrNoServersSpecified
   140  	}
   141  
   142  	if bg.concurrencyLimit != 0 {
   143  		bg.limiter = limiter.NewServerLimiter(bg.servers, bg.concurrencyLimit)
   144  	}
   145  
   146  	return bg, nil
   147  }
   148  
   149  func (bg *BroadcastGroup) Children() []types.BackendServer {
   150  	return bg.backends
   151  }
   152  
   153  func (bg *BroadcastGroup) SetDoMultipleRequestIfSplit(v bool) {
   154  	bg.doMultipleRequestsIfSplit = v
   155  	if v {
   156  		bg.fetcher = bg.doMultiFetch
   157  	} else {
   158  		bg.fetcher = bg.doSingleFetch
   159  	}
   160  }
   161  
   162  func NewBroadcastGroup(logger *zap.Logger, groupName string, doMultipleRequestsIfSplit bool, servers []types.BackendServer, expireDelaySec int32, concurrencyLimit, maxBatchSize int, timeouts types.Timeouts, tldCacheDisabled bool, requireSuccessAll bool) (*BroadcastGroup, merry.Error) {
   163  	return New(
   164  		WithLogger(logger),
   165  		WithGroupName(groupName),
   166  		WithSplitMultipleRequests(doMultipleRequestsIfSplit),
   167  		WithBackends(servers),
   168  		WithPathCache(expireDelaySec),
   169  		WithLimiter(concurrencyLimit),
   170  		WithMaxMetricsPerRequest(maxBatchSize),
   171  		WithTimeouts(timeouts),
   172  		WithTLDCache(!tldCacheDisabled),
   173  		WithSuccess(requireSuccessAll),
   174  	)
   175  }
   176  
   177  func (bg BroadcastGroup) Name() string {
   178  	return bg.groupName
   179  }
   180  
   181  func (bg BroadcastGroup) Backends() []string {
   182  	return bg.servers
   183  }
   184  
   185  func (bg *BroadcastGroup) filterServersByTLD(requests []string, backends []types.BackendServer) []types.BackendServer {
   186  	// do not check TLDs if internal routing cache is disabled
   187  	if bg.tldCacheDisabled {
   188  		return backends
   189  	}
   190  
   191  	tldBackends := make(map[types.BackendServer]bool)
   192  	for _, request := range requests {
   193  		// TODO(Civil): Tags: improve logic
   194  		if strings.HasPrefix(request, "seriesByTag") {
   195  			return backends
   196  		}
   197  		idx := strings.Index(request, ".")
   198  		if idx > 0 {
   199  			request = request[:idx]
   200  		}
   201  		if cachedBackends, ok := bg.pathCache.Get(request); ok && len(backends) > 0 {
   202  			for _, cachedBackend := range cachedBackends {
   203  				tldBackends[cachedBackend] = true
   204  			}
   205  		}
   206  	}
   207  
   208  	var filteredBackends []types.BackendServer
   209  	for _, k := range backends {
   210  		if tldBackends[k] {
   211  			filteredBackends = append(filteredBackends, k)
   212  		}
   213  	}
   214  
   215  	if len(filteredBackends) == 0 {
   216  		return backends
   217  	}
   218  
   219  	return filteredBackends
   220  }
   221  
   222  func (bg BroadcastGroup) MaxMetricsPerRequest() int {
   223  	return bg.maxMetricsPerRequest
   224  }
   225  
   226  func (bg *BroadcastGroup) doMultiFetch(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) {
   227  	logger = logger.With(zap.Bool("multi_fetch", true))
   228  	request, ok := reqs.(*protov3.MultiFetchRequest)
   229  	if !ok {
   230  		logger.Fatal("unhandled error in doMultiFetch",
   231  			zap.Stack("stack"),
   232  			zap.String("got_type", fmt.Sprintf("%T", reqs)),
   233  			zap.String("expected_type", fmt.Sprintf("%T", request)),
   234  		)
   235  	}
   236  
   237  	requests, err := bg.splitRequest(ctx, request, backend)
   238  	if len(requests) == 0 && err != nil {
   239  		response := types.NewServerFetchResponse()
   240  		response.Server = backend.Name()
   241  		response.AddError(err)
   242  		resCh <- response
   243  		return
   244  	}
   245  
   246  	for _, req := range requests {
   247  		go func(req *protov3.MultiFetchRequest) {
   248  			logger = logger.With(zap.String("backend_name", backend.Name()))
   249  			logger.Debug("waiting for slot",
   250  				zap.Int("max_connections", bg.limiter.Capacity()),
   251  			)
   252  
   253  			response := types.NewServerFetchResponse()
   254  			response.Server = backend.Name()
   255  
   256  			if err := bg.limiter.Enter(ctx, backend.Name()); err != nil {
   257  				logger.Debug("timeout waiting for a slot")
   258  				resCh <- response.NonFatalError(merry.Prepend(err, "timeout waiting for slot"))
   259  				return
   260  			}
   261  
   262  			logger.Debug("got slot")
   263  			defer bg.limiter.Leave(ctx, backend.Name())
   264  
   265  			// uuid := util.GetUUID(ctx)
   266  			var err merry.Error
   267  			logger.Debug("sending request")
   268  			response.Response, response.Stats, err = backend.Fetch(ctx, req)
   269  			response.AddError(err)
   270  			if response.Response != nil && response.Stats != nil {
   271  				logger.Debug("got response",
   272  					zap.Int("metrics_in_response", len(response.Response.Metrics)),
   273  					zap.Int("errors_count", len(response.Err)),
   274  					zap.Uint64("timeouts_count", response.Stats.Timeouts),
   275  					zap.Uint64("render_requests_count", response.Stats.RenderRequests),
   276  					zap.Uint64("render_errors_count", response.Stats.RenderErrors),
   277  					zap.Uint64("render_timeouts_count", response.Stats.RenderTimeouts),
   278  					zap.Uint64("zipper_requests_count", response.Stats.ZipperRequests),
   279  					zap.Uint64("total_metric_count", response.Stats.TotalMetricsCount),
   280  					zap.Int("servers_count", len(response.Stats.Servers)),
   281  					zap.Int("failed_servers_count", len(response.Stats.FailedServers)),
   282  				)
   283  			} else {
   284  				logger.Debug("got response",
   285  					zap.Bool("response_is_nil", response.Response == nil),
   286  					zap.Bool("stats_is_nil", response.Stats == nil),
   287  					zap.Any("err", err),
   288  				)
   289  			}
   290  
   291  			resCh <- response
   292  		}(req)
   293  	}
   294  
   295  }
   296  
   297  func (bg *BroadcastGroup) doSingleFetch(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) {
   298  	logger = logger.With(zap.Bool("multi_fetch", false))
   299  	request, ok := reqs.(*protov3.MultiFetchRequest)
   300  	if !ok {
   301  		logger.Fatal("unhandled error in doSingleFetch",
   302  			zap.Stack("stack"),
   303  			zap.String("got_type", fmt.Sprintf("%T", reqs)),
   304  			zap.String("expected_type", fmt.Sprintf("%T", request)),
   305  		)
   306  	}
   307  
   308  	// TODO(Civil): migrate limiter to merry
   309  	requests, splitErr := bg.splitRequest(ctx, request, backend)
   310  	if len(requests) == 0 {
   311  		if splitErr != nil {
   312  			response := types.NewServerFetchResponse()
   313  			response.Server = backend.Name()
   314  			response.AddError(splitErr)
   315  			resCh <- response
   316  			return
   317  		}
   318  	}
   319  
   320  	logger = logger.With(zap.String("backend_name", backend.Name()))
   321  	logger.Debug("waiting for slot",
   322  		zap.Int("max_connections", bg.limiter.Capacity()),
   323  	)
   324  
   325  	response := types.NewServerFetchResponse()
   326  	response.Server = backend.Name()
   327  
   328  	if err := bg.limiter.Enter(ctx, backend.Name()); err != nil {
   329  		logger.Debug("timeout waiting for a slot")
   330  		resCh <- response.NonFatalError(merry.Prepend(err, "timeout waiting for slot"))
   331  		return
   332  	}
   333  
   334  	logger.Debug("got slot")
   335  	defer bg.limiter.Leave(ctx, backend.Name())
   336  
   337  	// uuid := util.GetUUID(ctx)
   338  	var err merry.Error
   339  	for _, req := range requests {
   340  		logger.Debug("sending request")
   341  		r := types.NewServerFetchResponse()
   342  		r.Response, r.Stats, err = backend.Fetch(ctx, req)
   343  		r.AddError(err)
   344  		if r.Stats != nil && r.Response != nil {
   345  			logger.Debug("got response",
   346  				zap.Int("metrics_in_response", len(r.Response.Metrics)),
   347  				zap.Int("errors_count", len(r.Err)),
   348  				zap.Uint64("timeouts_count", r.Stats.Timeouts),
   349  				zap.Uint64("render_requests_count", r.Stats.RenderRequests),
   350  				zap.Uint64("render_errors_count", r.Stats.RenderErrors),
   351  				zap.Uint64("render_timeouts_count", r.Stats.RenderTimeouts),
   352  				zap.Uint64("zipper_requests_count", r.Stats.ZipperRequests),
   353  				zap.Uint64("total_metric_count", r.Stats.TotalMetricsCount),
   354  				zap.Int("servers_count", len(r.Stats.Servers)),
   355  				zap.Int("failed_servers_count", len(r.Stats.FailedServers)),
   356  			)
   357  		} else {
   358  			logger.Debug("got response",
   359  				zap.Bool("response_is_nil", r.Response == nil),
   360  				zap.Bool("stats_is_nil", r.Stats == nil),
   361  				zap.Any("err", err),
   362  			)
   363  		}
   364  		_ = response.Merge(r)
   365  	}
   366  	logger.Debug("got response (after merge)",
   367  		zap.Int("metrics_in_response", len(response.Response.Metrics)),
   368  		zap.Int("errors_count", len(response.Err)),
   369  		zap.Uint64("timeouts_count", response.Stats.Timeouts),
   370  		zap.Uint64("render_requests_count", response.Stats.RenderRequests),
   371  		zap.Uint64("render_errors_count", response.Stats.RenderErrors),
   372  		zap.Uint64("render_timeouts_count", response.Stats.RenderTimeouts),
   373  		zap.Uint64("zipper_requests_count", response.Stats.ZipperRequests),
   374  		zap.Uint64("total_metric_count", response.Stats.TotalMetricsCount),
   375  		zap.Int("servers_count", len(response.Stats.Servers)),
   376  		zap.Int("failed_servers_count", len(response.Stats.FailedServers)),
   377  	)
   378  
   379  	resCh <- response
   380  }
   381  
   382  func (bg *BroadcastGroup) splitRequest(ctx context.Context, request *protov3.MultiFetchRequest, backend types.BackendServer) ([]*protov3.MultiFetchRequest, merry.Error) {
   383  	if backend.MaxMetricsPerRequest() == 0 {
   384  		return []*protov3.MultiFetchRequest{request}, nil
   385  	}
   386  
   387  	var requests []*protov3.MultiFetchRequest
   388  	newRequest := &protov3.MultiFetchRequest{}
   389  
   390  	var err merry.Error
   391  	for _, metric := range request.Metrics {
   392  		if len(newRequest.Metrics) >= backend.MaxMetricsPerRequest() {
   393  			requests = append(requests, newRequest)
   394  			newRequest = &protov3.MultiFetchRequest{}
   395  		}
   396  
   397  		// TODO(Civil): Tags: improve logic
   398  		if strings.HasPrefix(metric.Name, "seriesByTag") {
   399  			newRequest.Metrics = append(newRequest.Metrics, protov3.FetchRequest{
   400  				Name:            metric.PathExpression,
   401  				StartTime:       metric.StartTime,
   402  				StopTime:        metric.StopTime,
   403  				PathExpression:  metric.PathExpression,
   404  				FilterFunctions: metric.FilterFunctions,
   405  			})
   406  
   407  			continue
   408  		}
   409  
   410  		// Do not send Find requests if we have neither globs in the request nor metric expansions
   411  		if !strings.ContainsAny(metric.Name, "*{") {
   412  			newRequest.Metrics = append(newRequest.Metrics, protov3.FetchRequest{
   413  				Name:            metric.Name,
   414  				StartTime:       metric.StartTime,
   415  				StopTime:        metric.StopTime,
   416  				PathExpression:  metric.PathExpression,
   417  				FilterFunctions: metric.FilterFunctions,
   418  			})
   419  
   420  			continue
   421  		}
   422  
   423  		f, _, e := backend.Find(ctx, &protov3.MultiGlobRequest{Metrics: []string{metric.Name}})
   424  		if e != nil || f == nil || len(f.Metrics) == 0 {
   425  			if e == nil {
   426  				e = merry.Errorf("no result fetched")
   427  				if f == nil {
   428  					e = e.WithCause(types.ErrUnmarshalFailed)
   429  				} else {
   430  					e = e.WithCause(types.ErrNoMetricsFetched)
   431  				}
   432  			}
   433  			err = e
   434  
   435  			errStr := ""
   436  			if e.Cause() != nil {
   437  				errStr = e.Cause().Error()
   438  			} else {
   439  				// e != nil, but len(f.Metrics) == 0 or f == nil, then Cause could be nil
   440  				errStr = e.Error()
   441  			}
   442  
   443  			if ce := bg.logger.Check(zap.DebugLevel, "find request failed when resolving globs (verbose)"); ce != nil {
   444  				ce.Write(
   445  					zap.String("metric_name", metric.Name),
   446  					zap.String("error", errStr),
   447  					zap.Any("stack", e),
   448  				)
   449  			} else {
   450  				bg.logger.Warn("find request failed when resolving globs",
   451  					zap.String("metric_name", metric.Name),
   452  					zap.String("error", errStr),
   453  				)
   454  			}
   455  
   456  			if f == nil {
   457  				continue
   458  			}
   459  		}
   460  
   461  		for _, m := range f.Metrics {
   462  			for _, match := range m.Matches {
   463  				if !match.IsLeaf {
   464  					continue
   465  				}
   466  				newRequest.Metrics = append(newRequest.Metrics, protov3.FetchRequest{
   467  					Name:            match.Path,
   468  					StartTime:       metric.StartTime,
   469  					StopTime:        metric.StopTime,
   470  					PathExpression:  metric.PathExpression,
   471  					FilterFunctions: metric.FilterFunctions,
   472  				})
   473  
   474  				if len(newRequest.Metrics) >= backend.MaxMetricsPerRequest() {
   475  					requests = append(requests, newRequest)
   476  					newRequest = &protov3.MultiFetchRequest{}
   477  				}
   478  			}
   479  		}
   480  	}
   481  
   482  	if len(newRequest.Metrics) > 0 {
   483  		requests = append(requests, newRequest)
   484  	}
   485  
   486  	return requests, err
   487  }
   488  
   489  func (bg *BroadcastGroup) Fetch(ctx context.Context, request *protov3.MultiFetchRequest) (*protov3.MultiFetchResponse, *types.Stats, merry.Error) {
   490  	requestNames := make([]string, 0, len(request.Metrics))
   491  	for i := range request.Metrics {
   492  		requestNames = append(requestNames, request.Metrics[i].Name)
   493  	}
   494  	logger := bg.logger.With(zap.String("type", "fetch"), zap.Strings("request", requestNames), zap.String("carbonapi_uuid", utilctx.GetUUID(ctx)))
   495  	logger.Debug("will try to fetch data")
   496  
   497  	backends := bg.filterServersByTLD(requestNames, bg.Children())
   498  
   499  	result := types.NewServerFetchResponse()
   500  
   501  	ctxNew, cancel := context.WithTimeout(ctx, bg.timeout.Render)
   502  	defer cancel()
   503  
   504  	resultNew, responseCount := types.DoRequest(ctxNew, logger, backends, result, request, bg.fetcher)
   505  
   506  	result, ok := resultNew.Self().(*types.ServerFetchResponse)
   507  	if !ok {
   508  		logger.Fatal("unhandled error in Fetch",
   509  			zap.Stack("stack"),
   510  			zap.String("got_type", fmt.Sprintf("%T", resultNew.Self())),
   511  			zap.String("expected_type", fmt.Sprintf("%T", result)),
   512  		)
   513  	}
   514  
   515  	if len(result.Response.Metrics) == 0 || (bg.requireSuccessAll && len(result.Err) > 0) {
   516  		code, errors := helper.MergeHttpErrors(result.Err)
   517  		if len(errors) > 0 {
   518  			err := types.ErrFailedToFetch.WithHTTPCode(code).WithMessage(strings.Join(errors, "\n"))
   519  			logger.Debug("errors while fetching data from backends",
   520  				zap.Int("httpCode", code),
   521  				zap.Strings("errors", errors),
   522  			)
   523  			return nil, result.Stats, err
   524  		}
   525  		return nil, result.Stats, types.ErrNotFound.WithHTTPCode(404)
   526  	}
   527  
   528  	// Recalculate metrics start/step/stop parameters to avoid upstream misbehavior
   529  	for i, metric := range result.Response.Metrics {
   530  		result.Response.Metrics[i].StopTime = metric.StartTime + int64(len(metric.Values))*metric.StepTime
   531  	}
   532  
   533  	logger.Debug("got some fetch responses",
   534  		zap.Int("backends_count", len(backends)),
   535  		zap.Int("response_count", responseCount),
   536  		zap.Bool("have_errors", len(result.Err) != 0),
   537  		zap.Any("errors", result.Err),
   538  		zap.Int("metrics_in_response", len(result.Response.Metrics)),
   539  	)
   540  
   541  	var err merry.Error
   542  	if len(result.Err) > 0 {
   543  		if bg.requireSuccessAll {
   544  			code, errors := helper.MergeHttpErrors(result.Err)
   545  			if len(errors) > 0 {
   546  				err := types.ErrFailedToFetch.WithHTTPCode(code).WithMessage(strings.Join(errors, "\n"))
   547  				logger.Debug("errors while fetching data from backends",
   548  					zap.Int("httpCode", code),
   549  					zap.Strings("errors", errors),
   550  				)
   551  				return nil, result.Stats, err
   552  			}
   553  		} else {
   554  			err = types.ErrNonFatalErrors
   555  			for _, e := range result.Err {
   556  				err = err.WithCause(e)
   557  			}
   558  		}
   559  	}
   560  
   561  	return result.Response, result.Stats, err
   562  }
   563  
   564  // Find request handling
   565  func (bg *BroadcastGroup) doFind(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) {
   566  	request, ok := reqs.(*protov3.MultiGlobRequest)
   567  	if !ok {
   568  		logger.Fatal("unhandled error",
   569  			zap.Stack("stack"),
   570  			zap.String("got_type", fmt.Sprintf("%T", reqs)),
   571  			zap.String("expected_type", fmt.Sprintf("%T", request)),
   572  		)
   573  	}
   574  	logger = logger.With(
   575  		zap.String("group_name", bg.groupName),
   576  		zap.String("backend_name", backend.Name()),
   577  	)
   578  	logger.Debug("waiting for a slot")
   579  
   580  	r := types.NewServerFindResponse()
   581  	r.Server = backend.Name()
   582  
   583  	if err := bg.limiter.Enter(ctx, backend.Name()); err != nil {
   584  		logger.Debug("timeout waiting for a slot")
   585  		r.AddError(merry.Prepend(err, "timeout waiting for slot"))
   586  		resCh <- r
   587  		return
   588  	}
   589  
   590  	logger.Debug("got slot")
   591  	defer bg.limiter.Leave(ctx, backend.Name())
   592  
   593  	var err merry.Error
   594  	r.Response, r.Stats, err = backend.Find(ctx, request)
   595  	r.AddError(err)
   596  	// TODO: Add a separate logger that would log full response
   597  	logger.Debug("fetched response",
   598  		zap.Int("response_size", r.Response.Size()),
   599  	)
   600  	resCh <- r
   601  }
   602  
   603  func (bg *BroadcastGroup) Find(ctx context.Context, request *protov3.MultiGlobRequest) (*protov3.MultiGlobResponse, *types.Stats, merry.Error) {
   604  	logger := bg.logger.With(zap.String("type", "find"), zap.Strings("request", request.Metrics))
   605  
   606  	backends := bg.Children()
   607  
   608  	logger.Debug("will do query with timeout",
   609  		zap.Any("backends", backends),
   610  		zap.Float64("timeout", bg.timeout.Find.Seconds()),
   611  	)
   612  
   613  	ctxNew, cancel := context.WithTimeout(ctx, bg.timeout.Find)
   614  	defer cancel()
   615  
   616  	result := types.NewServerFindResponse()
   617  	result.Server = bg.Name()
   618  	result.Stats.ZipperRequests = uint64(len(backends))
   619  	resultNew, responseCount := types.DoRequest(ctxNew, logger, backends, result, request, bg.doFind)
   620  
   621  	result, ok := resultNew.Self().(*types.ServerFindResponse)
   622  	if !ok {
   623  		logger.Fatal("unhandled error in Find",
   624  			zap.Stack("stack"),
   625  			zap.String("got_type", fmt.Sprintf("%T", resultNew.Self())),
   626  			zap.String("expected_type", fmt.Sprintf("%T", result)),
   627  		)
   628  	}
   629  
   630  	var err merry.Error
   631  	if len(result.Response.Metrics) == 0 || (bg.requireSuccessAll && len(result.Err) > 0) {
   632  		code, errors := helper.MergeHttpErrors(result.Err)
   633  		if len(errors) > 0 {
   634  			err = types.ErrFailedToFetch.WithHTTPCode(code).WithMessage(strings.Join(errors, "\n"))
   635  			logger.Debug("errors while fetching data from backends",
   636  				zap.Int("httpCode", code),
   637  				zap.Strings("errors", errors),
   638  			)
   639  			return nil, result.Stats, err
   640  		}
   641  	}
   642  
   643  	logger.Debug("got some find responses",
   644  		zap.Int("backends_count", len(backends)),
   645  		zap.Int("response_count", responseCount),
   646  		zap.Bool("have_errors", len(result.Err) != 0),
   647  		zap.Any("errors", result.Err),
   648  		zap.Any("response", result.Response),
   649  	)
   650  
   651  	if len(result.Response.Metrics) == 0 {
   652  		return &protov3.MultiGlobResponse{}, result.Stats, types.ErrNotFound.WithHTTPCode(404)
   653  	}
   654  	result.Stats.TotalMetricsCount = 0
   655  	for _, x := range result.Response.Metrics {
   656  		result.Stats.TotalMetricsCount += uint64(len(x.Matches))
   657  	}
   658  
   659  	if result.Err != nil {
   660  		err = types.ErrNonFatalErrors
   661  		for _, e := range result.Err {
   662  			err = err.WithCause(e)
   663  		}
   664  	}
   665  
   666  	return result.Response, result.Stats, err
   667  }
   668  
   669  // Info request handling
   670  func (bg *BroadcastGroup) doInfoRequest(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) {
   671  	logger = logger.With(
   672  		zap.String("group_name", bg.groupName),
   673  		zap.String("backend_name", backend.Name()),
   674  	)
   675  	request, ok := reqs.(*protov3.MultiMetricsInfoRequest)
   676  	if !ok {
   677  		logger.Fatal("unhandled error",
   678  			zap.Stack("stack"),
   679  			zap.String("got_type", fmt.Sprintf("%T", reqs)),
   680  			zap.String("expected_type", fmt.Sprintf("%T", request)),
   681  		)
   682  	}
   683  	r := &types.ServerInfoResponse{
   684  		Server: backend.Name(),
   685  	}
   686  
   687  	if err := bg.limiter.Enter(ctx, backend.Name()); err != nil {
   688  		logger.Debug("timeout waiting for a slot")
   689  		r.AddError(merry.Prepend(err, "timeout waiting for slot"))
   690  		resCh <- r
   691  		return
   692  	}
   693  	defer bg.limiter.Leave(ctx, backend.Name())
   694  
   695  	logger.Debug("got a slot")
   696  	var err merry.Error
   697  	r.Response, r.Stats, err = backend.Info(ctx, request)
   698  	r.AddError(err)
   699  	resCh <- r
   700  }
   701  
   702  func (bg *BroadcastGroup) Info(ctx context.Context, request *protov3.MultiMetricsInfoRequest) (*protov3.ZipperInfoResponse, *types.Stats, merry.Error) {
   703  	logger := bg.logger.With(zap.String("type", "info"), zap.Strings("request", request.Names))
   704  
   705  	ctxNew, cancel := context.WithTimeout(ctx, bg.timeout.Render)
   706  	defer cancel()
   707  	backends := bg.Children()
   708  	result := types.NewServerInfoResponse()
   709  	result.Server = bg.Name()
   710  	result.Stats.ZipperRequests = uint64(len(backends))
   711  
   712  	resultNew, responseCount := types.DoRequest(ctxNew, logger, backends, result, request, bg.doInfoRequest)
   713  
   714  	result, ok := resultNew.Self().(*types.ServerInfoResponse)
   715  	if !ok {
   716  		logger.Fatal("unhandled error in Find",
   717  			zap.Stack("stack"),
   718  			zap.String("got_type", fmt.Sprintf("%T", resultNew.Self())),
   719  			zap.String("expected_type", fmt.Sprintf("%T", result)),
   720  		)
   721  	}
   722  
   723  	logger.Debug("got some responses",
   724  		zap.Int("backends_count", len(backends)),
   725  		zap.Int("response_count", responseCount),
   726  		zap.Bool("have_errors", len(result.Err) != 0),
   727  	)
   728  
   729  	var err merry.Error
   730  	if result.Err != nil {
   731  		if bg.requireSuccessAll {
   732  			err = types.ErrFailedToFetch
   733  		} else {
   734  			err = types.ErrNonFatalErrors
   735  		}
   736  		for _, e := range result.Err {
   737  			err = err.WithCause(e)
   738  		}
   739  	}
   740  
   741  	return result.Response, result.Stats, err
   742  }
   743  
   744  func (bg *BroadcastGroup) List(ctx context.Context) (*protov3.ListMetricsResponse, *types.Stats, merry.Error) {
   745  	return nil, nil, types.ErrNotImplementedYet
   746  }
   747  func (bg *BroadcastGroup) Stats(ctx context.Context) (*protov3.MetricDetailsResponse, *types.Stats, merry.Error) {
   748  	return nil, nil, types.ErrNotImplementedYet
   749  }
   750  
   751  type tagQuery struct {
   752  	Query  string
   753  	Limit  int64
   754  	IsName bool
   755  }
   756  
   757  // Info request handling
   758  func (bg *BroadcastGroup) doTagRequest(ctx context.Context, logger *zap.Logger, backend types.BackendServer, reqs interface{}, resCh chan types.ServerFetcherResponse) {
   759  	request, ok := reqs.(tagQuery)
   760  	logger = logger.With(
   761  		zap.String("group_name", bg.groupName),
   762  		zap.String("backend_name", backend.Name()),
   763  	)
   764  	if !ok {
   765  		logger.Fatal("unhandled error",
   766  			zap.Stack("stack"),
   767  			zap.String("got_type", fmt.Sprintf("%T", reqs)),
   768  			zap.String("expected_type", fmt.Sprintf("%T", request)),
   769  		)
   770  	}
   771  	r := &types.ServerTagResponse{
   772  		Server:   backend.Name(),
   773  		Response: []string{},
   774  	}
   775  
   776  	logger.Debug("waiting for a slot")
   777  
   778  	if err := bg.limiter.Enter(ctx, backend.Name()); err != nil {
   779  		logger.Debug("timeout waiting for a slot")
   780  		r.AddError(merry.Prepend(err, "timeout waiting for slot"))
   781  		resCh <- r
   782  		return
   783  	}
   784  	defer bg.limiter.Leave(ctx, backend.Name())
   785  
   786  	logger.Debug("got a slot")
   787  	var err merry.Error
   788  	if request.IsName {
   789  		r.Response, err = backend.TagNames(ctx, request.Query, request.Limit)
   790  	} else {
   791  		r.Response, err = backend.TagValues(ctx, request.Query, request.Limit)
   792  	}
   793  
   794  	if err != nil {
   795  		r.AddError(err)
   796  	}
   797  
   798  	if r.Response == nil {
   799  		r.Response = []string{}
   800  	}
   801  	resCh <- r
   802  }
   803  
   804  func (bg *BroadcastGroup) tagEverything(ctx context.Context, isTagName bool, query string, limit int64) ([]string, merry.Error) {
   805  	logger := bg.logger.With(zap.String("query", query))
   806  	if isTagName {
   807  		logger = logger.With(zap.String("type", "tagName"))
   808  	} else {
   809  		logger = logger.With(zap.String("type", "tagValues"))
   810  	}
   811  
   812  	request := tagQuery{
   813  		Query:  query,
   814  		Limit:  limit,
   815  		IsName: isTagName,
   816  	}
   817  
   818  	ctxNew, cancel := context.WithTimeout(ctx, bg.timeout.Find)
   819  	defer cancel()
   820  
   821  	backends := bg.Children()
   822  	result := types.NewServerTagResponse()
   823  	result.Server = bg.Name()
   824  
   825  	resultNew, responseCount := types.DoRequest(ctxNew, logger, backends, result, request, bg.doTagRequest)
   826  
   827  	result, ok := resultNew.Self().(*types.ServerTagResponse)
   828  	if !ok {
   829  		logger.Fatal("unhandled error in Find",
   830  			zap.Stack("stack"),
   831  			zap.String("got_type", fmt.Sprintf("%T", resultNew.Self())),
   832  			zap.String("expected_type", fmt.Sprintf("%T", result)),
   833  		)
   834  	}
   835  
   836  	if limit != -1 && int64(len(result.Response)) > limit {
   837  		sort.Strings(result.Response)
   838  		result.Response = result.Response[:limit-1]
   839  	}
   840  
   841  	logger.Debug("got some responses",
   842  		zap.Int("backends_count", len(backends)),
   843  		zap.Int("response_count", responseCount),
   844  		zap.Bool("have_errors", len(result.Err) != 0),
   845  	)
   846  
   847  	var err merry.Error
   848  	if result.Err != nil {
   849  		code, errors := helper.MergeHttpErrors(result.Err)
   850  		if len(errors) > 0 {
   851  			err = types.ErrNonFatalErrors.WithHTTPCode(code).WithMessage(strings.Join(errors, "\n"))
   852  		}
   853  	}
   854  
   855  	return result.Response, err
   856  }
   857  
   858  func (bg *BroadcastGroup) TagNames(ctx context.Context, query string, limit int64) ([]string, merry.Error) {
   859  	return bg.tagEverything(ctx, true, query, limit)
   860  }
   861  
   862  func (bg *BroadcastGroup) TagValues(ctx context.Context, query string, limit int64) ([]string, merry.Error) {
   863  	return bg.tagEverything(ctx, false, query, limit)
   864  }
   865  
   866  type tldResponse struct {
   867  	server types.BackendServer
   868  	tlds   []string
   869  	err    merry.Error
   870  }
   871  
   872  func doProbe(ctx context.Context, backend types.BackendServer, resCh chan<- tldResponse) {
   873  	res, err := backend.ProbeTLDs(ctx)
   874  
   875  	resCh <- tldResponse{
   876  		server: backend,
   877  		tlds:   res,
   878  		err:    err,
   879  	}
   880  }
   881  
   882  func (bg *BroadcastGroup) ProbeTLDs(ctx context.Context) ([]string, merry.Error) {
   883  	logger := bg.logger.With(zap.String("function", "prober"))
   884  
   885  	ctx, cancel := context.WithTimeout(ctx, bg.timeout.Find)
   886  	defer cancel()
   887  
   888  	backends := bg.Children()
   889  	resCh := make(chan tldResponse, len(backends))
   890  	for _, backend := range backends {
   891  		go doProbe(ctx, backend, resCh)
   892  	}
   893  
   894  	responses := 0
   895  	var errs []merry.Error
   896  	answeredServers := make(map[string]struct{})
   897  	cache := make(map[string][]types.BackendServer)
   898  	tldSet := make(map[string]struct{})
   899  
   900  GATHER:
   901  	for {
   902  		if responses == len(backends) {
   903  			break GATHER
   904  		}
   905  
   906  		select {
   907  		case r := <-resCh:
   908  			answeredServers[r.server.Name()] = struct{}{}
   909  			responses++
   910  			if r.err != nil {
   911  				errs = append(errs, r.err)
   912  				continue
   913  			}
   914  			for _, tld := range r.tlds {
   915  				tldSet[tld] = struct{}{}
   916  				cache[tld] = append(cache[tld], r.server)
   917  			}
   918  
   919  		case <-ctx.Done():
   920  			logger.Warn("timeout waiting for more responses",
   921  				zap.Strings("no_answers_from", types.NoAnswerBackends(backends, answeredServers)),
   922  			)
   923  			errs = append(errs, types.ErrTimeoutExceeded)
   924  			break GATHER
   925  		}
   926  	}
   927  
   928  	var tlds []string
   929  	for tld := range tldSet {
   930  		tlds = append(tlds, tld)
   931  	}
   932  
   933  	for k, v := range cache {
   934  		bg.pathCache.Set(k, v)
   935  	}
   936  
   937  	var err merry.Error
   938  	if errs != nil {
   939  		err = types.ErrNonFatalErrors
   940  		for _, e := range errs {
   941  			err = err.WithCause(e)
   942  		}
   943  	}
   944  
   945  	return tlds, err
   946  }