github.com/thanos-io/thanos@v0.32.5/internal/cortex/util/validation/limits.go (about)

     1  // Copyright (c) The Cortex Authors.
     2  // Licensed under the Apache License 2.0.
     3  
     4  package validation
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/json"
     9  	"errors"
    10  	"flag"
    11  	"math"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/prometheus/common/model"
    16  	"github.com/prometheus/prometheus/model/relabel"
    17  	"golang.org/x/time/rate"
    18  
    19  	"github.com/thanos-io/thanos/internal/cortex/util/flagext"
    20  )
    21  
    22  var errMaxGlobalSeriesPerUserValidation = errors.New("The ingester.max-global-series-per-user limit is unsupported if distributor.shard-by-all-labels is disabled")
    23  
    24  // Supported values for enum limits
    25  const (
    26  	LocalIngestionRateStrategy  = "local"
    27  	GlobalIngestionRateStrategy = "global"
    28  )
    29  
    30  // LimitError are errors that do not comply with the limits specified.
    31  type LimitError string
    32  
    33  func (e LimitError) Error() string {
    34  	return string(e)
    35  }
    36  
    37  // Limits describe all the limits for users; can be used to describe global default
    38  // limits via flags, or per-user limits via yaml config.
    39  type Limits struct {
    40  	// Distributor enforced limits.
    41  	IngestionRate             float64             `yaml:"ingestion_rate" json:"ingestion_rate"`
    42  	IngestionRateStrategy     string              `yaml:"ingestion_rate_strategy" json:"ingestion_rate_strategy"`
    43  	IngestionBurstSize        int                 `yaml:"ingestion_burst_size" json:"ingestion_burst_size"`
    44  	AcceptHASamples           bool                `yaml:"accept_ha_samples" json:"accept_ha_samples"`
    45  	HAClusterLabel            string              `yaml:"ha_cluster_label" json:"ha_cluster_label"`
    46  	HAReplicaLabel            string              `yaml:"ha_replica_label" json:"ha_replica_label"`
    47  	HAMaxClusters             int                 `yaml:"ha_max_clusters" json:"ha_max_clusters"`
    48  	DropLabels                flagext.StringSlice `yaml:"drop_labels" json:"drop_labels"`
    49  	MaxLabelNameLength        int                 `yaml:"max_label_name_length" json:"max_label_name_length"`
    50  	MaxLabelValueLength       int                 `yaml:"max_label_value_length" json:"max_label_value_length"`
    51  	MaxLabelNamesPerSeries    int                 `yaml:"max_label_names_per_series" json:"max_label_names_per_series"`
    52  	MaxMetadataLength         int                 `yaml:"max_metadata_length" json:"max_metadata_length"`
    53  	RejectOldSamples          bool                `yaml:"reject_old_samples" json:"reject_old_samples"`
    54  	RejectOldSamplesMaxAge    model.Duration      `yaml:"reject_old_samples_max_age" json:"reject_old_samples_max_age"`
    55  	CreationGracePeriod       model.Duration      `yaml:"creation_grace_period" json:"creation_grace_period"`
    56  	EnforceMetadataMetricName bool                `yaml:"enforce_metadata_metric_name" json:"enforce_metadata_metric_name"`
    57  	EnforceMetricName         bool                `yaml:"enforce_metric_name" json:"enforce_metric_name"`
    58  	IngestionTenantShardSize  int                 `yaml:"ingestion_tenant_shard_size" json:"ingestion_tenant_shard_size"`
    59  	MetricRelabelConfigs      []*relabel.Config   `yaml:"metric_relabel_configs,omitempty" json:"metric_relabel_configs,omitempty" doc:"nocli|description=List of metric relabel configurations. Note that in most situations, it is more effective to use metrics relabeling directly in the Prometheus server, e.g. remote_write.write_relabel_configs."`
    60  
    61  	// Ingester enforced limits.
    62  	// Series
    63  	MaxSeriesPerQuery        int `yaml:"max_series_per_query" json:"max_series_per_query"`
    64  	MaxSamplesPerQuery       int `yaml:"max_samples_per_query" json:"max_samples_per_query"`
    65  	MaxLocalSeriesPerUser    int `yaml:"max_series_per_user" json:"max_series_per_user"`
    66  	MaxLocalSeriesPerMetric  int `yaml:"max_series_per_metric" json:"max_series_per_metric"`
    67  	MaxGlobalSeriesPerUser   int `yaml:"max_global_series_per_user" json:"max_global_series_per_user"`
    68  	MaxGlobalSeriesPerMetric int `yaml:"max_global_series_per_metric" json:"max_global_series_per_metric"`
    69  	MinChunkLength           int `yaml:"min_chunk_length" json:"min_chunk_length"`
    70  	// Metadata
    71  	MaxLocalMetricsWithMetadataPerUser  int `yaml:"max_metadata_per_user" json:"max_metadata_per_user"`
    72  	MaxLocalMetadataPerMetric           int `yaml:"max_metadata_per_metric" json:"max_metadata_per_metric"`
    73  	MaxGlobalMetricsWithMetadataPerUser int `yaml:"max_global_metadata_per_user" json:"max_global_metadata_per_user"`
    74  	MaxGlobalMetadataPerMetric          int `yaml:"max_global_metadata_per_metric" json:"max_global_metadata_per_metric"`
    75  
    76  	// Querier enforced limits.
    77  	MaxChunksPerQueryFromStore   int            `yaml:"max_chunks_per_query" json:"max_chunks_per_query"` // TODO Remove in Cortex 1.12.
    78  	MaxChunksPerQuery            int            `yaml:"max_fetched_chunks_per_query" json:"max_fetched_chunks_per_query"`
    79  	MaxFetchedSeriesPerQuery     int            `yaml:"max_fetched_series_per_query" json:"max_fetched_series_per_query"`
    80  	MaxFetchedChunkBytesPerQuery int            `yaml:"max_fetched_chunk_bytes_per_query" json:"max_fetched_chunk_bytes_per_query"`
    81  	MaxQueryLookback             model.Duration `yaml:"max_query_lookback" json:"max_query_lookback"`
    82  	MaxQueryLength               model.Duration `yaml:"max_query_length" json:"max_query_length"`
    83  	MaxQueryParallelism          int            `yaml:"max_query_parallelism" json:"max_query_parallelism"`
    84  	CardinalityLimit             int            `yaml:"cardinality_limit" json:"cardinality_limit"`
    85  	MaxCacheFreshness            model.Duration `yaml:"max_cache_freshness" json:"max_cache_freshness"`
    86  	MaxQueriersPerTenant         int            `yaml:"max_queriers_per_tenant" json:"max_queriers_per_tenant"`
    87  
    88  	// Ruler defaults and limits.
    89  	RulerEvaluationDelay        model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"`
    90  	RulerTenantShardSize        int            `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"`
    91  	RulerMaxRulesPerRuleGroup   int            `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"`
    92  	RulerMaxRuleGroupsPerTenant int            `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"`
    93  
    94  	// Store-gateway.
    95  	StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"`
    96  
    97  	// Compactor.
    98  	CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"`
    99  	CompactorTenantShardSize       int            `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"`
   100  
   101  	// This config doesn't have a CLI flag registered here because they're registered in
   102  	// their own original config struct.
   103  	S3SSEType                 string `yaml:"s3_sse_type" json:"s3_sse_type" doc:"nocli|description=S3 server-side encryption type. Required to enable server-side encryption overrides for a specific tenant. If not set, the default S3 client settings are used."`
   104  	S3SSEKMSKeyID             string `yaml:"s3_sse_kms_key_id" json:"s3_sse_kms_key_id" doc:"nocli|description=S3 server-side encryption KMS Key ID. Ignored if the SSE type override is not set."`
   105  	S3SSEKMSEncryptionContext string `yaml:"s3_sse_kms_encryption_context" json:"s3_sse_kms_encryption_context" doc:"nocli|description=S3 server-side encryption KMS encryption context. If unset and the key ID override is set, the encryption context will not be provided to S3. Ignored if the SSE type override is not set."`
   106  
   107  	// Alertmanager.
   108  	AlertmanagerReceiversBlockCIDRNetworks     flagext.CIDRSliceCSV `yaml:"alertmanager_receivers_firewall_block_cidr_networks" json:"alertmanager_receivers_firewall_block_cidr_networks"`
   109  	AlertmanagerReceiversBlockPrivateAddresses bool                 `yaml:"alertmanager_receivers_firewall_block_private_addresses" json:"alertmanager_receivers_firewall_block_private_addresses"`
   110  
   111  	NotificationRateLimit               float64                  `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"`
   112  	NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"`
   113  
   114  	AlertmanagerMaxConfigSizeBytes             int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"`
   115  	AlertmanagerMaxTemplatesCount              int `yaml:"alertmanager_max_templates_count" json:"alertmanager_max_templates_count"`
   116  	AlertmanagerMaxTemplateSizeBytes           int `yaml:"alertmanager_max_template_size_bytes" json:"alertmanager_max_template_size_bytes"`
   117  	AlertmanagerMaxDispatcherAggregationGroups int `yaml:"alertmanager_max_dispatcher_aggregation_groups" json:"alertmanager_max_dispatcher_aggregation_groups"`
   118  	AlertmanagerMaxAlertsCount                 int `yaml:"alertmanager_max_alerts_count" json:"alertmanager_max_alerts_count"`
   119  	AlertmanagerMaxAlertsSizeBytes             int `yaml:"alertmanager_max_alerts_size_bytes" json:"alertmanager_max_alerts_size_bytes"`
   120  }
   121  
   122  // RegisterFlags adds the flags required to config this to the given FlagSet
   123  func (l *Limits) RegisterFlags(f *flag.FlagSet) {
   124  	f.IntVar(&l.IngestionTenantShardSize, "distributor.ingestion-tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set both on ingesters and distributors. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
   125  	f.Float64Var(&l.IngestionRate, "distributor.ingestion-rate-limit", 25000, "Per-user ingestion rate limit in samples per second.")
   126  	f.StringVar(&l.IngestionRateStrategy, "distributor.ingestion-rate-limit-strategy", "local", "Whether the ingestion rate limit should be applied individually to each distributor instance (local), or evenly shared across the cluster (global).")
   127  	f.IntVar(&l.IngestionBurstSize, "distributor.ingestion-burst-size", 50000, "Per-user allowed ingestion burst size (in number of samples).")
   128  	f.BoolVar(&l.AcceptHASamples, "distributor.ha-tracker.enable-for-all-users", false, "Flag to enable, for all users, handling of samples with external labels identifying replicas in an HA Prometheus setup.")
   129  	f.StringVar(&l.HAClusterLabel, "distributor.ha-tracker.cluster", "cluster", "Prometheus label to look for in samples to identify a Prometheus HA cluster.")
   130  	f.StringVar(&l.HAReplicaLabel, "distributor.ha-tracker.replica", "__replica__", "Prometheus label to look for in samples to identify a Prometheus HA replica.")
   131  	f.IntVar(&l.HAMaxClusters, "distributor.ha-tracker.max-clusters", 0, "Maximum number of clusters that HA tracker will keep track of for single user. 0 to disable the limit.")
   132  	f.Var(&l.DropLabels, "distributor.drop-label", "This flag can be used to specify label names that to drop during sample ingestion within the distributor and can be repeated in order to drop multiple labels.")
   133  	f.IntVar(&l.MaxLabelNameLength, "validation.max-length-label-name", 1024, "Maximum length accepted for label names")
   134  	f.IntVar(&l.MaxLabelValueLength, "validation.max-length-label-value", 2048, "Maximum length accepted for label value. This setting also applies to the metric name")
   135  	f.IntVar(&l.MaxLabelNamesPerSeries, "validation.max-label-names-per-series", 30, "Maximum number of label names per series.")
   136  	f.IntVar(&l.MaxMetadataLength, "validation.max-metadata-length", 1024, "Maximum length accepted for metric metadata. Metadata refers to Metric Name, HELP and UNIT.")
   137  	f.BoolVar(&l.RejectOldSamples, "validation.reject-old-samples", false, "Reject old samples.")
   138  	_ = l.RejectOldSamplesMaxAge.Set("14d")
   139  	f.Var(&l.RejectOldSamplesMaxAge, "validation.reject-old-samples.max-age", "Maximum accepted sample age before rejecting.")
   140  	_ = l.CreationGracePeriod.Set("10m")
   141  	f.Var(&l.CreationGracePeriod, "validation.create-grace-period", "Duration which table will be created/deleted before/after it's needed; we won't accept sample from before this time.")
   142  	f.BoolVar(&l.EnforceMetricName, "validation.enforce-metric-name", true, "Enforce every sample has a metric name.")
   143  	f.BoolVar(&l.EnforceMetadataMetricName, "validation.enforce-metadata-metric-name", true, "Enforce every metadata has a metric name.")
   144  
   145  	f.IntVar(&l.MaxSeriesPerQuery, "ingester.max-series-per-query", 100000, "The maximum number of series for which a query can fetch samples from each ingester. This limit is enforced only in the ingesters (when querying samples not flushed to the storage yet) and it's a per-instance limit. This limit is ignored when running the Cortex blocks storage. When running Cortex with blocks storage use -querier.max-fetched-series-per-query limit instead.")
   146  	f.IntVar(&l.MaxSamplesPerQuery, "ingester.max-samples-per-query", 1000000, "The maximum number of samples that a query can return. This limit only applies when running the Cortex chunks storage with -querier.ingester-streaming=false.")
   147  	f.IntVar(&l.MaxLocalSeriesPerUser, "ingester.max-series-per-user", 5000000, "The maximum number of active series per user, per ingester. 0 to disable.")
   148  	f.IntVar(&l.MaxLocalSeriesPerMetric, "ingester.max-series-per-metric", 50000, "The maximum number of active series per metric name, per ingester. 0 to disable.")
   149  	f.IntVar(&l.MaxGlobalSeriesPerUser, "ingester.max-global-series-per-user", 0, "The maximum number of active series per user, across the cluster before replication. 0 to disable. Supported only if -distributor.shard-by-all-labels is true.")
   150  	f.IntVar(&l.MaxGlobalSeriesPerMetric, "ingester.max-global-series-per-metric", 0, "The maximum number of active series per metric name, across the cluster before replication. 0 to disable.")
   151  	f.IntVar(&l.MinChunkLength, "ingester.min-chunk-length", 0, "Minimum number of samples in an idle chunk to flush it to the store. Use with care, if chunks are less than this size they will be discarded. This option is ignored when running the Cortex blocks storage. 0 to disable.")
   152  
   153  	f.IntVar(&l.MaxLocalMetricsWithMetadataPerUser, "ingester.max-metadata-per-user", 8000, "The maximum number of active metrics with metadata per user, per ingester. 0 to disable.")
   154  	f.IntVar(&l.MaxLocalMetadataPerMetric, "ingester.max-metadata-per-metric", 10, "The maximum number of metadata per metric, per ingester. 0 to disable.")
   155  	f.IntVar(&l.MaxGlobalMetricsWithMetadataPerUser, "ingester.max-global-metadata-per-user", 0, "The maximum number of active metrics with metadata per user, across the cluster. 0 to disable. Supported only if -distributor.shard-by-all-labels is true.")
   156  	f.IntVar(&l.MaxGlobalMetadataPerMetric, "ingester.max-global-metadata-per-metric", 0, "The maximum number of metadata per metric, across the cluster. 0 to disable.")
   157  	f.IntVar(&l.MaxChunksPerQueryFromStore, "store.query-chunk-limit", 2e6, "Deprecated. Use -querier.max-fetched-chunks-per-query CLI flag and its respective YAML config option instead. Maximum number of chunks that can be fetched in a single query. This limit is enforced when fetching chunks from the long-term storage only. When running the Cortex chunks storage, this limit is enforced in the querier and ruler, while when running the Cortex blocks storage this limit is enforced in the querier, ruler and store-gateway. 0 to disable.")
   158  	f.IntVar(&l.MaxChunksPerQuery, "querier.max-fetched-chunks-per-query", 0, "Maximum number of chunks that can be fetched in a single query from ingesters and long-term storage. This limit is enforced in the querier, ruler and store-gateway. Takes precedence over the deprecated -store.query-chunk-limit. 0 to disable.")
   159  	f.IntVar(&l.MaxFetchedSeriesPerQuery, "querier.max-fetched-series-per-query", 0, "The maximum number of unique series for which a query can fetch samples from each ingesters and blocks storage. This limit is enforced in the querier only when running Cortex with blocks storage. 0 to disable")
   160  	f.IntVar(&l.MaxFetchedChunkBytesPerQuery, "querier.max-fetched-chunk-bytes-per-query", 0, "The maximum size of all chunks in bytes that a query can fetch from each ingester and storage. This limit is enforced in the querier and ruler only when running Cortex with blocks storage. 0 to disable.")
   161  	f.Var(&l.MaxQueryLength, "store.max-query-length", "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and in the chunks storage. 0 to disable.")
   162  	f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until <lookback> duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.")
   163  	f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 14, "Maximum number of split queries will be scheduled in parallel by the frontend.")
   164  	f.IntVar(&l.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries. This limit is ignored when running the Cortex blocks storage. 0 to disable.")
   165  	_ = l.MaxCacheFreshness.Set("1m")
   166  	f.Var(&l.MaxCacheFreshness, "frontend.max-cache-freshness", "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.")
   167  	f.IntVar(&l.MaxQueriersPerTenant, "frontend.max-queriers-per-tenant", 0, "Maximum number of queriers that can handle requests for a single tenant. If set to 0 or value higher than number of available queriers, *all* queriers will handle requests for the tenant. Each frontend (or query-scheduler, if used) will select the same set of queriers for the same tenant (given that all queriers are connected to all frontends / query-schedulers). This option only works with queriers connecting to the query-frontend / query-scheduler, not when using downstream URL.")
   168  
   169  	f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.")
   170  	f.IntVar(&l.RulerTenantShardSize, "ruler.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by ruler. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
   171  	f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.")
   172  	f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.")
   173  
   174  	f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.")
   175  	f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
   176  
   177  	// Store-gateway.
   178  	f.IntVar(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
   179  
   180  	// Alertmanager.
   181  	f.Var(&l.AlertmanagerReceiversBlockCIDRNetworks, "alertmanager.receivers-firewall-block-cidr-networks", "Comma-separated list of network CIDRs to block in Alertmanager receiver integrations.")
   182  	f.BoolVar(&l.AlertmanagerReceiversBlockPrivateAddresses, "alertmanager.receivers-firewall-block-private-addresses", false, "True to block private and local addresses in Alertmanager receiver integrations. It blocks private addresses defined by  RFC 1918 (IPv4 addresses) and RFC 4193 (IPv6 addresses), as well as loopback, local unicast and local multicast addresses.")
   183  
   184  	f.Float64Var(&l.NotificationRateLimit, "alertmanager.notification-rate-limit", 0, "Per-user rate limit for sending notifications from Alertmanager in notifications/sec. 0 = rate limit disabled. Negative value = no notifications are allowed.")
   185  
   186  	if l.NotificationRateLimitPerIntegration == nil {
   187  		l.NotificationRateLimitPerIntegration = NotificationRateLimitMap{}
   188  	}
   189  	f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".")
   190  	f.IntVar(&l.AlertmanagerMaxConfigSizeBytes, "alertmanager.max-config-size-bytes", 0, "Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.")
   191  	f.IntVar(&l.AlertmanagerMaxTemplatesCount, "alertmanager.max-templates-count", 0, "Maximum number of templates in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.")
   192  	f.IntVar(&l.AlertmanagerMaxTemplateSizeBytes, "alertmanager.max-template-size-bytes", 0, "Maximum size of single template in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.")
   193  	f.IntVar(&l.AlertmanagerMaxDispatcherAggregationGroups, "alertmanager.max-dispatcher-aggregation-groups", 0, "Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit.")
   194  	f.IntVar(&l.AlertmanagerMaxAlertsCount, "alertmanager.max-alerts-count", 0, "Maximum number of alerts that a single user can have. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.")
   195  	f.IntVar(&l.AlertmanagerMaxAlertsSizeBytes, "alertmanager.max-alerts-size-bytes", 0, "Maximum total size of alerts that a single user can have, alert size is the sum of the bytes of its labels, annotations and generatorURL. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.")
   196  }
   197  
   198  // Validate the limits config and returns an error if the validation
   199  // doesn't pass
   200  func (l *Limits) Validate(shardByAllLabels bool) error {
   201  	// The ingester.max-global-series-per-user metric is not supported
   202  	// if shard-by-all-labels is disabled
   203  	if l.MaxGlobalSeriesPerUser > 0 && !shardByAllLabels {
   204  		return errMaxGlobalSeriesPerUserValidation
   205  	}
   206  
   207  	return nil
   208  }
   209  
   210  // UnmarshalYAML implements the yaml.Unmarshaler interface.
   211  func (l *Limits) UnmarshalYAML(unmarshal func(interface{}) error) error {
   212  	// We want to set l to the defaults and then overwrite it with the input.
   213  	// To make unmarshal fill the plain data struct rather than calling UnmarshalYAML
   214  	// again, we have to hide it using a type indirection.  See prometheus/config.
   215  
   216  	// During startup we wont have a default value so we don't want to overwrite them
   217  	if defaultLimits != nil {
   218  		*l = *defaultLimits
   219  		// Make copy of default limits. Otherwise unmarshalling would modify map in default limits.
   220  		l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration)
   221  	}
   222  	type plain Limits
   223  	return unmarshal((*plain)(l))
   224  }
   225  
   226  // UnmarshalJSON implements the json.Unmarshaler interface.
   227  func (l *Limits) UnmarshalJSON(data []byte) error {
   228  	// Like the YAML method above, we want to set l to the defaults and then overwrite
   229  	// it with the input. We prevent an infinite loop of calling UnmarshalJSON by hiding
   230  	// behind type indirection.
   231  	if defaultLimits != nil {
   232  		*l = *defaultLimits
   233  		// Make copy of default limits. Otherwise unmarshalling would modify map in default limits.
   234  		l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration)
   235  	}
   236  
   237  	type plain Limits
   238  	dec := json.NewDecoder(bytes.NewReader(data))
   239  	dec.DisallowUnknownFields()
   240  
   241  	return dec.Decode((*plain)(l))
   242  }
   243  
   244  func (l *Limits) copyNotificationIntegrationLimits(defaults NotificationRateLimitMap) {
   245  	l.NotificationRateLimitPerIntegration = make(map[string]float64, len(defaults))
   246  	for k, v := range defaults {
   247  		l.NotificationRateLimitPerIntegration[k] = v
   248  	}
   249  }
   250  
   251  // When we load YAML from disk, we want the various per-customer limits
   252  // to default to any values specified on the command line, not default
   253  // command line values.  This global contains those values.  I (Tom) cannot
   254  // find a nicer way I'm afraid.
   255  var defaultLimits *Limits
   256  
   257  // SetDefaultLimitsForYAMLUnmarshalling sets global default limits, used when loading
   258  // Limits from YAML files. This is used to ensure per-tenant limits are defaulted to
   259  // those values.
   260  func SetDefaultLimitsForYAMLUnmarshalling(defaults Limits) {
   261  	defaultLimits = &defaults
   262  }
   263  
   264  // TenantLimits exposes per-tenant limit overrides to various resource usage limits
   265  type TenantLimits interface {
   266  	// ByUserID gets limits specific to a particular tenant or nil if there are none
   267  	ByUserID(userID string) *Limits
   268  
   269  	// AllByUserID gets a mapping of all tenant IDs and limits for that user
   270  	AllByUserID() map[string]*Limits
   271  }
   272  
   273  // Overrides periodically fetch a set of per-user overrides, and provides convenience
   274  // functions for fetching the correct value.
   275  type Overrides struct {
   276  	defaultLimits *Limits
   277  	tenantLimits  TenantLimits
   278  }
   279  
   280  // NewOverrides makes a new Overrides.
   281  func NewOverrides(defaults Limits, tenantLimits TenantLimits) (*Overrides, error) {
   282  	return &Overrides{
   283  		tenantLimits:  tenantLimits,
   284  		defaultLimits: &defaults,
   285  	}, nil
   286  }
   287  
   288  // IngestionRate returns the limit on ingester rate (samples per second).
   289  func (o *Overrides) IngestionRate(userID string) float64 {
   290  	return o.getOverridesForUser(userID).IngestionRate
   291  }
   292  
   293  // IngestionRateStrategy returns whether the ingestion rate limit should be individually applied
   294  // to each distributor instance (local) or evenly shared across the cluster (global).
   295  func (o *Overrides) IngestionRateStrategy() string {
   296  	// The ingestion rate strategy can't be overridden on a per-tenant basis
   297  	return o.defaultLimits.IngestionRateStrategy
   298  }
   299  
   300  // IngestionBurstSize returns the burst size for ingestion rate.
   301  func (o *Overrides) IngestionBurstSize(userID string) int {
   302  	return o.getOverridesForUser(userID).IngestionBurstSize
   303  }
   304  
   305  // AcceptHASamples returns whether the distributor should track and accept samples from HA replicas for this user.
   306  func (o *Overrides) AcceptHASamples(userID string) bool {
   307  	return o.getOverridesForUser(userID).AcceptHASamples
   308  }
   309  
   310  // HAClusterLabel returns the cluster label to look for when deciding whether to accept a sample from a Prometheus HA replica.
   311  func (o *Overrides) HAClusterLabel(userID string) string {
   312  	return o.getOverridesForUser(userID).HAClusterLabel
   313  }
   314  
   315  // HAReplicaLabel returns the replica label to look for when deciding whether to accept a sample from a Prometheus HA replica.
   316  func (o *Overrides) HAReplicaLabel(userID string) string {
   317  	return o.getOverridesForUser(userID).HAReplicaLabel
   318  }
   319  
   320  // DropLabels returns the list of labels to be dropped when ingesting HA samples for the user.
   321  func (o *Overrides) DropLabels(userID string) flagext.StringSlice {
   322  	return o.getOverridesForUser(userID).DropLabels
   323  }
   324  
   325  // MaxLabelNameLength returns maximum length a label name can be.
   326  func (o *Overrides) MaxLabelNameLength(userID string) int {
   327  	return o.getOverridesForUser(userID).MaxLabelNameLength
   328  }
   329  
   330  // MaxLabelValueLength returns maximum length a label value can be. This also is
   331  // the maximum length of a metric name.
   332  func (o *Overrides) MaxLabelValueLength(userID string) int {
   333  	return o.getOverridesForUser(userID).MaxLabelValueLength
   334  }
   335  
   336  // MaxLabelNamesPerSeries returns maximum number of label/value pairs timeseries.
   337  func (o *Overrides) MaxLabelNamesPerSeries(userID string) int {
   338  	return o.getOverridesForUser(userID).MaxLabelNamesPerSeries
   339  }
   340  
   341  // MaxMetadataLength returns maximum length metadata can be. Metadata refers
   342  // to the Metric Name, HELP and UNIT.
   343  func (o *Overrides) MaxMetadataLength(userID string) int {
   344  	return o.getOverridesForUser(userID).MaxMetadataLength
   345  }
   346  
   347  // RejectOldSamples returns true when we should reject samples older than certain
   348  // age.
   349  func (o *Overrides) RejectOldSamples(userID string) bool {
   350  	return o.getOverridesForUser(userID).RejectOldSamples
   351  }
   352  
   353  // RejectOldSamplesMaxAge returns the age at which samples should be rejected.
   354  func (o *Overrides) RejectOldSamplesMaxAge(userID string) time.Duration {
   355  	return time.Duration(o.getOverridesForUser(userID).RejectOldSamplesMaxAge)
   356  }
   357  
   358  // CreationGracePeriod is misnamed, and actually returns how far into the future
   359  // we should accept samples.
   360  func (o *Overrides) CreationGracePeriod(userID string) time.Duration {
   361  	return time.Duration(o.getOverridesForUser(userID).CreationGracePeriod)
   362  }
   363  
   364  // MaxSeriesPerQuery returns the maximum number of series a query is allowed to hit.
   365  func (o *Overrides) MaxSeriesPerQuery(userID string) int {
   366  	return o.getOverridesForUser(userID).MaxSeriesPerQuery
   367  }
   368  
   369  // MaxSamplesPerQuery returns the maximum number of samples in a query (from the ingester).
   370  func (o *Overrides) MaxSamplesPerQuery(userID string) int {
   371  	return o.getOverridesForUser(userID).MaxSamplesPerQuery
   372  }
   373  
   374  // MaxLocalSeriesPerUser returns the maximum number of series a user is allowed to store in a single ingester.
   375  func (o *Overrides) MaxLocalSeriesPerUser(userID string) int {
   376  	return o.getOverridesForUser(userID).MaxLocalSeriesPerUser
   377  }
   378  
   379  // MaxLocalSeriesPerMetric returns the maximum number of series allowed per metric in a single ingester.
   380  func (o *Overrides) MaxLocalSeriesPerMetric(userID string) int {
   381  	return o.getOverridesForUser(userID).MaxLocalSeriesPerMetric
   382  }
   383  
   384  // MaxGlobalSeriesPerUser returns the maximum number of series a user is allowed to store across the cluster.
   385  func (o *Overrides) MaxGlobalSeriesPerUser(userID string) int {
   386  	return o.getOverridesForUser(userID).MaxGlobalSeriesPerUser
   387  }
   388  
   389  // MaxGlobalSeriesPerMetric returns the maximum number of series allowed per metric across the cluster.
   390  func (o *Overrides) MaxGlobalSeriesPerMetric(userID string) int {
   391  	return o.getOverridesForUser(userID).MaxGlobalSeriesPerMetric
   392  }
   393  
   394  // MaxChunksPerQueryFromStore returns the maximum number of chunks allowed per query when fetching
   395  // chunks from the long-term storage.
   396  func (o *Overrides) MaxChunksPerQueryFromStore(userID string) int {
   397  	// If the new config option is set, then it should take precedence.
   398  	if value := o.getOverridesForUser(userID).MaxChunksPerQuery; value > 0 {
   399  		return value
   400  	}
   401  
   402  	// Fallback to the deprecated config option.
   403  	return o.getOverridesForUser(userID).MaxChunksPerQueryFromStore
   404  }
   405  
   406  func (o *Overrides) MaxChunksPerQuery(userID string) int {
   407  	return o.getOverridesForUser(userID).MaxChunksPerQuery
   408  }
   409  
   410  // MaxFetchedSeriesPerQuery returns the maximum number of series allowed per query when fetching
   411  // chunks from ingesters and blocks storage.
   412  func (o *Overrides) MaxFetchedSeriesPerQuery(userID string) int {
   413  	return o.getOverridesForUser(userID).MaxFetchedSeriesPerQuery
   414  }
   415  
   416  // MaxFetchedChunkBytesPerQuery returns the maximum number of bytes for chunks allowed per query when fetching
   417  // chunks from ingesters and blocks storage.
   418  func (o *Overrides) MaxFetchedChunkBytesPerQuery(userID string) int {
   419  	return o.getOverridesForUser(userID).MaxFetchedChunkBytesPerQuery
   420  }
   421  
   422  // MaxQueryLookback returns the max lookback period of queries.
   423  func (o *Overrides) MaxQueryLookback(userID string) time.Duration {
   424  	return time.Duration(o.getOverridesForUser(userID).MaxQueryLookback)
   425  }
   426  
   427  // MaxQueryLength returns the limit of the length (in time) of a query.
   428  func (o *Overrides) MaxQueryLength(userID string) time.Duration {
   429  	return time.Duration(o.getOverridesForUser(userID).MaxQueryLength)
   430  }
   431  
   432  // MaxCacheFreshness returns the period after which results are cacheable,
   433  // to prevent caching of very recent results.
   434  func (o *Overrides) MaxCacheFreshness(userID string) time.Duration {
   435  	return time.Duration(o.getOverridesForUser(userID).MaxCacheFreshness)
   436  }
   437  
   438  // MaxQueriersPerUser returns the maximum number of queriers that can handle requests for this user.
   439  func (o *Overrides) MaxQueriersPerUser(userID string) int {
   440  	return o.getOverridesForUser(userID).MaxQueriersPerTenant
   441  }
   442  
   443  // MaxQueryParallelism returns the limit to the number of split queries the
   444  // frontend will process in parallel.
   445  func (o *Overrides) MaxQueryParallelism(userID string) int {
   446  	return o.getOverridesForUser(userID).MaxQueryParallelism
   447  }
   448  
   449  // EnforceMetricName whether to enforce the presence of a metric name.
   450  func (o *Overrides) EnforceMetricName(userID string) bool {
   451  	return o.getOverridesForUser(userID).EnforceMetricName
   452  }
   453  
   454  // EnforceMetadataMetricName whether to enforce the presence of a metric name on metadata.
   455  func (o *Overrides) EnforceMetadataMetricName(userID string) bool {
   456  	return o.getOverridesForUser(userID).EnforceMetadataMetricName
   457  }
   458  
   459  // CardinalityLimit returns the maximum number of timeseries allowed in a query.
   460  func (o *Overrides) CardinalityLimit(userID string) int {
   461  	return o.getOverridesForUser(userID).CardinalityLimit
   462  }
   463  
   464  // MinChunkLength returns the minimum size of chunk that will be saved by ingesters
   465  func (o *Overrides) MinChunkLength(userID string) int {
   466  	return o.getOverridesForUser(userID).MinChunkLength
   467  }
   468  
   469  // MaxLocalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store in a single ingester.
   470  func (o *Overrides) MaxLocalMetricsWithMetadataPerUser(userID string) int {
   471  	return o.getOverridesForUser(userID).MaxLocalMetricsWithMetadataPerUser
   472  }
   473  
   474  // MaxLocalMetadataPerMetric returns the maximum number of metadata allowed per metric in a single ingester.
   475  func (o *Overrides) MaxLocalMetadataPerMetric(userID string) int {
   476  	return o.getOverridesForUser(userID).MaxLocalMetadataPerMetric
   477  }
   478  
   479  // MaxGlobalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store across the cluster.
   480  func (o *Overrides) MaxGlobalMetricsWithMetadataPerUser(userID string) int {
   481  	return o.getOverridesForUser(userID).MaxGlobalMetricsWithMetadataPerUser
   482  }
   483  
   484  // MaxGlobalMetadataPerMetric returns the maximum number of metadata allowed per metric across the cluster.
   485  func (o *Overrides) MaxGlobalMetadataPerMetric(userID string) int {
   486  	return o.getOverridesForUser(userID).MaxGlobalMetadataPerMetric
   487  }
   488  
   489  // IngestionTenantShardSize returns the ingesters shard size for a given user.
   490  func (o *Overrides) IngestionTenantShardSize(userID string) int {
   491  	return o.getOverridesForUser(userID).IngestionTenantShardSize
   492  }
   493  
   494  // EvaluationDelay returns the rules evaluation delay for a given user.
   495  func (o *Overrides) EvaluationDelay(userID string) time.Duration {
   496  	return time.Duration(o.getOverridesForUser(userID).RulerEvaluationDelay)
   497  }
   498  
   499  // CompactorBlocksRetentionPeriod returns the retention period for a given user.
   500  func (o *Overrides) CompactorBlocksRetentionPeriod(userID string) time.Duration {
   501  	return time.Duration(o.getOverridesForUser(userID).CompactorBlocksRetentionPeriod)
   502  }
   503  
   504  // CompactorTenantShardSize returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy.
   505  func (o *Overrides) CompactorTenantShardSize(userID string) int {
   506  	return o.getOverridesForUser(userID).CompactorTenantShardSize
   507  }
   508  
   509  // MetricRelabelConfigs returns the metric relabel configs for a given user.
   510  func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config {
   511  	return o.getOverridesForUser(userID).MetricRelabelConfigs
   512  }
   513  
   514  // RulerTenantShardSize returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy.
   515  func (o *Overrides) RulerTenantShardSize(userID string) int {
   516  	return o.getOverridesForUser(userID).RulerTenantShardSize
   517  }
   518  
   519  // RulerMaxRulesPerRuleGroup returns the maximum number of rules per rule group for a given user.
   520  func (o *Overrides) RulerMaxRulesPerRuleGroup(userID string) int {
   521  	return o.getOverridesForUser(userID).RulerMaxRulesPerRuleGroup
   522  }
   523  
   524  // RulerMaxRuleGroupsPerTenant returns the maximum number of rule groups for a given user.
   525  func (o *Overrides) RulerMaxRuleGroupsPerTenant(userID string) int {
   526  	return o.getOverridesForUser(userID).RulerMaxRuleGroupsPerTenant
   527  }
   528  
   529  // StoreGatewayTenantShardSize returns the store-gateway shard size for a given user.
   530  func (o *Overrides) StoreGatewayTenantShardSize(userID string) int {
   531  	return o.getOverridesForUser(userID).StoreGatewayTenantShardSize
   532  }
   533  
   534  // MaxHAClusters returns maximum number of clusters that HA tracker will track for a user.
   535  func (o *Overrides) MaxHAClusters(user string) int {
   536  	return o.getOverridesForUser(user).HAMaxClusters
   537  }
   538  
   539  // S3SSEType returns the per-tenant S3 SSE type.
   540  func (o *Overrides) S3SSEType(user string) string {
   541  	return o.getOverridesForUser(user).S3SSEType
   542  }
   543  
   544  // S3SSEKMSKeyID returns the per-tenant S3 KMS-SSE key id.
   545  func (o *Overrides) S3SSEKMSKeyID(user string) string {
   546  	return o.getOverridesForUser(user).S3SSEKMSKeyID
   547  }
   548  
   549  // S3SSEKMSEncryptionContext returns the per-tenant S3 KMS-SSE encryption context.
   550  func (o *Overrides) S3SSEKMSEncryptionContext(user string) string {
   551  	return o.getOverridesForUser(user).S3SSEKMSEncryptionContext
   552  }
   553  
   554  // AlertmanagerReceiversBlockCIDRNetworks returns the list of network CIDRs that should be blocked
   555  // in the Alertmanager receivers for the given user.
   556  func (o *Overrides) AlertmanagerReceiversBlockCIDRNetworks(user string) []flagext.CIDR {
   557  	return o.getOverridesForUser(user).AlertmanagerReceiversBlockCIDRNetworks
   558  }
   559  
   560  // AlertmanagerReceiversBlockPrivateAddresses returns true if private addresses should be blocked
   561  // in the Alertmanager receivers for the given user.
   562  func (o *Overrides) AlertmanagerReceiversBlockPrivateAddresses(user string) bool {
   563  	return o.getOverridesForUser(user).AlertmanagerReceiversBlockPrivateAddresses
   564  }
   565  
   566  // Notification limits are special. Limits are returned in following order:
   567  // 1. per-tenant limits for given integration
   568  // 2. default limits for given integration
   569  // 3. per-tenant limits
   570  // 4. default limits
   571  func (o *Overrides) getNotificationLimitForUser(user, integration string) float64 {
   572  	u := o.getOverridesForUser(user)
   573  	if n, ok := u.NotificationRateLimitPerIntegration[integration]; ok {
   574  		return n
   575  	}
   576  
   577  	return u.NotificationRateLimit
   578  }
   579  
   580  func (o *Overrides) NotificationRateLimit(user string, integration string) rate.Limit {
   581  	l := o.getNotificationLimitForUser(user, integration)
   582  	if l == 0 || math.IsInf(l, 1) {
   583  		return rate.Inf // No rate limit.
   584  	}
   585  
   586  	if l < 0 {
   587  		l = 0 // No notifications will be sent.
   588  	}
   589  	return rate.Limit(l)
   590  }
   591  
   592  const maxInt = int(^uint(0) >> 1)
   593  
   594  func (o *Overrides) NotificationBurstSize(user string, integration string) int {
   595  	// Burst size is computed from rate limit. Rate limit is already normalized to [0, +inf), where 0 means disabled.
   596  	l := o.NotificationRateLimit(user, integration)
   597  	if l == 0 {
   598  		return 0
   599  	}
   600  
   601  	// floats can be larger than max int. This also handles case where l == rate.Inf.
   602  	if float64(l) >= float64(maxInt) {
   603  		return maxInt
   604  	}
   605  
   606  	// For values between (0, 1), allow single notification per second (every 1/limit seconds).
   607  	if l < 1 {
   608  		return 1
   609  	}
   610  
   611  	return int(l)
   612  }
   613  
   614  func (o *Overrides) AlertmanagerMaxConfigSize(userID string) int {
   615  	return o.getOverridesForUser(userID).AlertmanagerMaxConfigSizeBytes
   616  }
   617  
   618  func (o *Overrides) AlertmanagerMaxTemplatesCount(userID string) int {
   619  	return o.getOverridesForUser(userID).AlertmanagerMaxTemplatesCount
   620  }
   621  
   622  func (o *Overrides) AlertmanagerMaxTemplateSize(userID string) int {
   623  	return o.getOverridesForUser(userID).AlertmanagerMaxTemplateSizeBytes
   624  }
   625  
   626  func (o *Overrides) AlertmanagerMaxDispatcherAggregationGroups(userID string) int {
   627  	return o.getOverridesForUser(userID).AlertmanagerMaxDispatcherAggregationGroups
   628  }
   629  
   630  func (o *Overrides) AlertmanagerMaxAlertsCount(userID string) int {
   631  	return o.getOverridesForUser(userID).AlertmanagerMaxAlertsCount
   632  }
   633  
   634  func (o *Overrides) AlertmanagerMaxAlertsSizeBytes(userID string) int {
   635  	return o.getOverridesForUser(userID).AlertmanagerMaxAlertsSizeBytes
   636  }
   637  
   638  func (o *Overrides) getOverridesForUser(userID string) *Limits {
   639  	if o.tenantLimits != nil {
   640  		l := o.tenantLimits.ByUserID(userID)
   641  		if l != nil {
   642  			return l
   643  		}
   644  	}
   645  	return o.defaultLimits
   646  }
   647  
   648  // SmallestPositiveIntPerTenant is returning the minimal positive value of the
   649  // supplied limit function for all given tenants.
   650  func SmallestPositiveIntPerTenant(tenantIDs []string, f func(string) int) int {
   651  	var result *int
   652  	for _, tenantID := range tenantIDs {
   653  		v := f(tenantID)
   654  		if result == nil || v < *result {
   655  			result = &v
   656  		}
   657  	}
   658  	if result == nil {
   659  		return 0
   660  	}
   661  	return *result
   662  }
   663  
   664  // SmallestPositiveNonZeroIntPerTenant is returning the minimal positive and
   665  // non-zero value of the supplied limit function for all given tenants. In many
   666  // limits a value of 0 means unlimted so the method will return 0 only if all
   667  // inputs have a limit of 0 or an empty tenant list is given.
   668  func SmallestPositiveNonZeroIntPerTenant(tenantIDs []string, f func(string) int) int {
   669  	var result *int
   670  	for _, tenantID := range tenantIDs {
   671  		v := f(tenantID)
   672  		if v > 0 && (result == nil || v < *result) {
   673  			result = &v
   674  		}
   675  	}
   676  	if result == nil {
   677  		return 0
   678  	}
   679  	return *result
   680  }
   681  
   682  // SmallestPositiveNonZeroDurationPerTenant is returning the minimal positive
   683  // and non-zero value of the supplied limit function for all given tenants. In
   684  // many limits a value of 0 means unlimted so the method will return 0 only if
   685  // all inputs have a limit of 0 or an empty tenant list is given.
   686  func SmallestPositiveNonZeroDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration {
   687  	var result *time.Duration
   688  	for _, tenantID := range tenantIDs {
   689  		v := f(tenantID)
   690  		if v > 0 && (result == nil || v < *result) {
   691  			result = &v
   692  		}
   693  	}
   694  	if result == nil {
   695  		return 0
   696  	}
   697  	return *result
   698  }
   699  
   700  // MaxDurationPerTenant is returning the maximum duration per tenant. Without
   701  // tenants given it will return a time.Duration(0).
   702  func MaxDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration {
   703  	result := time.Duration(0)
   704  	for _, tenantID := range tenantIDs {
   705  		v := f(tenantID)
   706  		if v > result {
   707  			result = v
   708  		}
   709  	}
   710  	return result
   711  }