github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/ingester/limiter.go (about)

     1  package ingester
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  
     7  	"github.com/pkg/errors"
     8  
     9  	"github.com/cortexproject/cortex/pkg/util"
    10  	util_math "github.com/cortexproject/cortex/pkg/util/math"
    11  	"github.com/cortexproject/cortex/pkg/util/validation"
    12  )
    13  
    14  var (
    15  	errMaxSeriesPerMetricLimitExceeded   = errors.New("per-metric series limit exceeded")
    16  	errMaxMetadataPerMetricLimitExceeded = errors.New("per-metric metadata limit exceeded")
    17  	errMaxSeriesPerUserLimitExceeded     = errors.New("per-user series limit exceeded")
    18  	errMaxMetadataPerUserLimitExceeded   = errors.New("per-user metric metadata limit exceeded")
    19  )
    20  
    21  // RingCount is the interface exposed by a ring implementation which allows
    22  // to count members
    23  type RingCount interface {
    24  	HealthyInstancesCount() int
    25  	ZonesCount() int
    26  }
    27  
    28  // Limiter implements primitives to get the maximum number of series
    29  // an ingester can handle for a specific tenant
    30  type Limiter struct {
    31  	limits                 *validation.Overrides
    32  	ring                   RingCount
    33  	replicationFactor      int
    34  	shuffleShardingEnabled bool
    35  	shardByAllLabels       bool
    36  	zoneAwarenessEnabled   bool
    37  }
    38  
    39  // NewLimiter makes a new in-memory series limiter
    40  func NewLimiter(
    41  	limits *validation.Overrides,
    42  	ring RingCount,
    43  	shardingStrategy string,
    44  	shardByAllLabels bool,
    45  	replicationFactor int,
    46  	zoneAwarenessEnabled bool,
    47  ) *Limiter {
    48  	return &Limiter{
    49  		limits:                 limits,
    50  		ring:                   ring,
    51  		replicationFactor:      replicationFactor,
    52  		shuffleShardingEnabled: shardingStrategy == util.ShardingStrategyShuffle,
    53  		shardByAllLabels:       shardByAllLabels,
    54  		zoneAwarenessEnabled:   zoneAwarenessEnabled,
    55  	}
    56  }
    57  
    58  // AssertMaxSeriesPerMetric limit has not been reached compared to the current
    59  // number of series in input and returns an error if so.
    60  func (l *Limiter) AssertMaxSeriesPerMetric(userID string, series int) error {
    61  	if actualLimit := l.maxSeriesPerMetric(userID); series < actualLimit {
    62  		return nil
    63  	}
    64  
    65  	return errMaxSeriesPerMetricLimitExceeded
    66  }
    67  
    68  // AssertMaxMetadataPerMetric limit has not been reached compared to the current
    69  // number of metadata per metric in input and returns an error if so.
    70  func (l *Limiter) AssertMaxMetadataPerMetric(userID string, metadata int) error {
    71  	if actualLimit := l.maxMetadataPerMetric(userID); metadata < actualLimit {
    72  		return nil
    73  	}
    74  
    75  	return errMaxMetadataPerMetricLimitExceeded
    76  }
    77  
    78  // AssertMaxSeriesPerUser limit has not been reached compared to the current
    79  // number of series in input and returns an error if so.
    80  func (l *Limiter) AssertMaxSeriesPerUser(userID string, series int) error {
    81  	if actualLimit := l.maxSeriesPerUser(userID); series < actualLimit {
    82  		return nil
    83  	}
    84  
    85  	return errMaxSeriesPerUserLimitExceeded
    86  }
    87  
    88  // AssertMaxMetricsWithMetadataPerUser limit has not been reached compared to the current
    89  // number of metrics with metadata in input and returns an error if so.
    90  func (l *Limiter) AssertMaxMetricsWithMetadataPerUser(userID string, metrics int) error {
    91  	if actualLimit := l.maxMetadataPerUser(userID); metrics < actualLimit {
    92  		return nil
    93  	}
    94  
    95  	return errMaxMetadataPerUserLimitExceeded
    96  }
    97  
    98  // MaxSeriesPerQuery returns the maximum number of series a query is allowed to hit.
    99  func (l *Limiter) MaxSeriesPerQuery(userID string) int {
   100  	return l.limits.MaxSeriesPerQuery(userID)
   101  }
   102  
   103  // FormatError returns the input error enriched with the actual limits for the given user.
   104  // It acts as pass-through if the input error is unknown.
   105  func (l *Limiter) FormatError(userID string, err error) error {
   106  	switch err {
   107  	case errMaxSeriesPerUserLimitExceeded:
   108  		return l.formatMaxSeriesPerUserError(userID)
   109  	case errMaxSeriesPerMetricLimitExceeded:
   110  		return l.formatMaxSeriesPerMetricError(userID)
   111  	case errMaxMetadataPerUserLimitExceeded:
   112  		return l.formatMaxMetadataPerUserError(userID)
   113  	case errMaxMetadataPerMetricLimitExceeded:
   114  		return l.formatMaxMetadataPerMetricError(userID)
   115  	default:
   116  		return err
   117  	}
   118  }
   119  
   120  func (l *Limiter) formatMaxSeriesPerUserError(userID string) error {
   121  	actualLimit := l.maxSeriesPerUser(userID)
   122  	localLimit := l.limits.MaxLocalSeriesPerUser(userID)
   123  	globalLimit := l.limits.MaxGlobalSeriesPerUser(userID)
   124  
   125  	return fmt.Errorf("per-user series limit of %d exceeded, please contact administrator to raise it (local limit: %d global limit: %d actual local limit: %d)",
   126  		minNonZero(localLimit, globalLimit), localLimit, globalLimit, actualLimit)
   127  }
   128  
   129  func (l *Limiter) formatMaxSeriesPerMetricError(userID string) error {
   130  	actualLimit := l.maxSeriesPerMetric(userID)
   131  	localLimit := l.limits.MaxLocalSeriesPerMetric(userID)
   132  	globalLimit := l.limits.MaxGlobalSeriesPerMetric(userID)
   133  
   134  	return fmt.Errorf("per-metric series limit of %d exceeded, please contact administrator to raise it (local limit: %d global limit: %d actual local limit: %d)",
   135  		minNonZero(localLimit, globalLimit), localLimit, globalLimit, actualLimit)
   136  }
   137  
   138  func (l *Limiter) formatMaxMetadataPerUserError(userID string) error {
   139  	actualLimit := l.maxMetadataPerUser(userID)
   140  	localLimit := l.limits.MaxLocalMetricsWithMetadataPerUser(userID)
   141  	globalLimit := l.limits.MaxGlobalMetricsWithMetadataPerUser(userID)
   142  
   143  	return fmt.Errorf("per-user metric metadata limit of %d exceeded, please contact administrator to raise it (local limit: %d global limit: %d actual local limit: %d)",
   144  		minNonZero(localLimit, globalLimit), localLimit, globalLimit, actualLimit)
   145  }
   146  
   147  func (l *Limiter) formatMaxMetadataPerMetricError(userID string) error {
   148  	actualLimit := l.maxMetadataPerMetric(userID)
   149  	localLimit := l.limits.MaxLocalMetadataPerMetric(userID)
   150  	globalLimit := l.limits.MaxGlobalMetadataPerMetric(userID)
   151  
   152  	return fmt.Errorf("per-metric metadata limit of %d exceeded, please contact administrator to raise it (local limit: %d global limit: %d actual local limit: %d)",
   153  		minNonZero(localLimit, globalLimit), localLimit, globalLimit, actualLimit)
   154  }
   155  
   156  func (l *Limiter) maxSeriesPerMetric(userID string) int {
   157  	localLimit := l.limits.MaxLocalSeriesPerMetric(userID)
   158  	globalLimit := l.limits.MaxGlobalSeriesPerMetric(userID)
   159  
   160  	if globalLimit > 0 {
   161  		if l.shardByAllLabels {
   162  			// We can assume that series are evenly distributed across ingesters
   163  			// so we do convert the global limit into a local limit
   164  			localLimit = minNonZero(localLimit, l.convertGlobalToLocalLimit(userID, globalLimit))
   165  		} else {
   166  			// Given a metric is always pushed to the same set of ingesters (based on
   167  			// the replication factor), we can configure the per-ingester local limit
   168  			// equal to the global limit.
   169  			localLimit = minNonZero(localLimit, globalLimit)
   170  		}
   171  	}
   172  
   173  	// If both the local and global limits are disabled, we just
   174  	// use the largest int value
   175  	if localLimit == 0 {
   176  		localLimit = math.MaxInt32
   177  	}
   178  
   179  	return localLimit
   180  }
   181  
   182  func (l *Limiter) maxMetadataPerMetric(userID string) int {
   183  	localLimit := l.limits.MaxLocalMetadataPerMetric(userID)
   184  	globalLimit := l.limits.MaxGlobalMetadataPerMetric(userID)
   185  
   186  	if globalLimit > 0 {
   187  		if l.shardByAllLabels {
   188  			localLimit = minNonZero(localLimit, l.convertGlobalToLocalLimit(userID, globalLimit))
   189  		} else {
   190  			localLimit = minNonZero(localLimit, globalLimit)
   191  		}
   192  	}
   193  
   194  	if localLimit == 0 {
   195  		localLimit = math.MaxInt32
   196  	}
   197  
   198  	return localLimit
   199  }
   200  
   201  func (l *Limiter) maxSeriesPerUser(userID string) int {
   202  	return l.maxByLocalAndGlobal(
   203  		userID,
   204  		l.limits.MaxLocalSeriesPerUser,
   205  		l.limits.MaxGlobalSeriesPerUser,
   206  	)
   207  }
   208  
   209  func (l *Limiter) maxMetadataPerUser(userID string) int {
   210  	return l.maxByLocalAndGlobal(
   211  		userID,
   212  		l.limits.MaxLocalMetricsWithMetadataPerUser,
   213  		l.limits.MaxGlobalMetricsWithMetadataPerUser,
   214  	)
   215  }
   216  
   217  func (l *Limiter) maxByLocalAndGlobal(userID string, localLimitFn, globalLimitFn func(string) int) int {
   218  	localLimit := localLimitFn(userID)
   219  
   220  	// The global limit is supported only when shard-by-all-labels is enabled,
   221  	// otherwise we wouldn't get an even split of series/metadata across ingesters and
   222  	// can't take a "local decision" without any centralized coordination.
   223  	if l.shardByAllLabels {
   224  		// We can assume that series/metadata are evenly distributed across ingesters
   225  		// so we do convert the global limit into a local limit
   226  		globalLimit := globalLimitFn(userID)
   227  		localLimit = minNonZero(localLimit, l.convertGlobalToLocalLimit(userID, globalLimit))
   228  	}
   229  
   230  	// If both the local and global limits are disabled, we just
   231  	// use the largest int value
   232  	if localLimit == 0 {
   233  		localLimit = math.MaxInt32
   234  	}
   235  
   236  	return localLimit
   237  }
   238  
   239  func (l *Limiter) convertGlobalToLocalLimit(userID string, globalLimit int) int {
   240  	if globalLimit == 0 {
   241  		return 0
   242  	}
   243  
   244  	// Given we don't need a super accurate count (ie. when the ingesters
   245  	// topology changes) and we prefer to always be in favor of the tenant,
   246  	// we can use a per-ingester limit equal to:
   247  	// (global limit / number of ingesters) * replication factor
   248  	numIngesters := l.ring.HealthyInstancesCount()
   249  
   250  	// May happen because the number of ingesters is asynchronously updated.
   251  	// If happens, we just temporarily ignore the global limit.
   252  	if numIngesters == 0 {
   253  		return 0
   254  	}
   255  
   256  	// If the number of available ingesters is greater than the tenant's shard
   257  	// size, then we should honor the shard size because series/metadata won't
   258  	// be written to more ingesters than it.
   259  	if shardSize := l.getShardSize(userID); shardSize > 0 {
   260  		// We use Min() to protect from the case the expected shard size is > available ingesters.
   261  		numIngesters = util_math.Min(numIngesters, util.ShuffleShardExpectedInstances(shardSize, l.getNumZones()))
   262  	}
   263  
   264  	return int((float64(globalLimit) / float64(numIngesters)) * float64(l.replicationFactor))
   265  }
   266  
   267  func (l *Limiter) getShardSize(userID string) int {
   268  	if !l.shuffleShardingEnabled {
   269  		return 0
   270  	}
   271  
   272  	return l.limits.IngestionTenantShardSize(userID)
   273  }
   274  
   275  func (l *Limiter) getNumZones() int {
   276  	if l.zoneAwarenessEnabled {
   277  		return util_math.Max(l.ring.ZonesCount(), 1)
   278  	}
   279  	return 1
   280  }
   281  
   282  func minNonZero(first, second int) int {
   283  	if first == 0 || (second != 0 && first > second) {
   284  		return second
   285  	}
   286  
   287  	return first
   288  }