github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/util/validation/limits.go (about)

     1  package validation
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"errors"
     7  	"flag"
     8  	"math"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/grafana/dskit/flagext"
    13  	"github.com/prometheus/common/model"
    14  	"github.com/prometheus/prometheus/pkg/relabel"
    15  	"golang.org/x/time/rate"
    16  )
    17  
    18  var errMaxGlobalSeriesPerUserValidation = errors.New("The ingester.max-global-series-per-user limit is unsupported if distributor.shard-by-all-labels is disabled")
    19  
    20  // Supported values for enum limits
    21  const (
    22  	LocalIngestionRateStrategy  = "local"
    23  	GlobalIngestionRateStrategy = "global"
    24  )
    25  
    26  // LimitError are errors that do not comply with the limits specified.
    27  type LimitError string
    28  
    29  func (e LimitError) Error() string {
    30  	return string(e)
    31  }
    32  
    33  // Limits describe all the limits for users; can be used to describe global default
    34  // limits via flags, or per-user limits via yaml config.
    35  type Limits struct {
    36  	// Distributor enforced limits.
    37  	IngestionRate             float64             `yaml:"ingestion_rate" json:"ingestion_rate"`
    38  	IngestionRateStrategy     string              `yaml:"ingestion_rate_strategy" json:"ingestion_rate_strategy"`
    39  	IngestionBurstSize        int                 `yaml:"ingestion_burst_size" json:"ingestion_burst_size"`
    40  	AcceptHASamples           bool                `yaml:"accept_ha_samples" json:"accept_ha_samples"`
    41  	HAClusterLabel            string              `yaml:"ha_cluster_label" json:"ha_cluster_label"`
    42  	HAReplicaLabel            string              `yaml:"ha_replica_label" json:"ha_replica_label"`
    43  	HAMaxClusters             int                 `yaml:"ha_max_clusters" json:"ha_max_clusters"`
    44  	DropLabels                flagext.StringSlice `yaml:"drop_labels" json:"drop_labels"`
    45  	MaxLabelNameLength        int                 `yaml:"max_label_name_length" json:"max_label_name_length"`
    46  	MaxLabelValueLength       int                 `yaml:"max_label_value_length" json:"max_label_value_length"`
    47  	MaxLabelNamesPerSeries    int                 `yaml:"max_label_names_per_series" json:"max_label_names_per_series"`
    48  	MaxMetadataLength         int                 `yaml:"max_metadata_length" json:"max_metadata_length"`
    49  	RejectOldSamples          bool                `yaml:"reject_old_samples" json:"reject_old_samples"`
    50  	RejectOldSamplesMaxAge    model.Duration      `yaml:"reject_old_samples_max_age" json:"reject_old_samples_max_age"`
    51  	CreationGracePeriod       model.Duration      `yaml:"creation_grace_period" json:"creation_grace_period"`
    52  	EnforceMetadataMetricName bool                `yaml:"enforce_metadata_metric_name" json:"enforce_metadata_metric_name"`
    53  	EnforceMetricName         bool                `yaml:"enforce_metric_name" json:"enforce_metric_name"`
    54  	IngestionTenantShardSize  int                 `yaml:"ingestion_tenant_shard_size" json:"ingestion_tenant_shard_size"`
    55  	MetricRelabelConfigs      []*relabel.Config   `yaml:"metric_relabel_configs,omitempty" json:"metric_relabel_configs,omitempty" doc:"nocli|description=List of metric relabel configurations. Note that in most situations, it is more effective to use metrics relabeling directly in the Prometheus server, e.g. remote_write.write_relabel_configs."`
    56  
    57  	// Ingester enforced limits.
    58  	// Series
    59  	MaxSeriesPerQuery        int `yaml:"max_series_per_query" json:"max_series_per_query"`
    60  	MaxSamplesPerQuery       int `yaml:"max_samples_per_query" json:"max_samples_per_query"`
    61  	MaxLocalSeriesPerUser    int `yaml:"max_series_per_user" json:"max_series_per_user"`
    62  	MaxLocalSeriesPerMetric  int `yaml:"max_series_per_metric" json:"max_series_per_metric"`
    63  	MaxGlobalSeriesPerUser   int `yaml:"max_global_series_per_user" json:"max_global_series_per_user"`
    64  	MaxGlobalSeriesPerMetric int `yaml:"max_global_series_per_metric" json:"max_global_series_per_metric"`
    65  	MinChunkLength           int `yaml:"min_chunk_length" json:"min_chunk_length"`
    66  	// Metadata
    67  	MaxLocalMetricsWithMetadataPerUser  int `yaml:"max_metadata_per_user" json:"max_metadata_per_user"`
    68  	MaxLocalMetadataPerMetric           int `yaml:"max_metadata_per_metric" json:"max_metadata_per_metric"`
    69  	MaxGlobalMetricsWithMetadataPerUser int `yaml:"max_global_metadata_per_user" json:"max_global_metadata_per_user"`
    70  	MaxGlobalMetadataPerMetric          int `yaml:"max_global_metadata_per_metric" json:"max_global_metadata_per_metric"`
    71  
    72  	// Querier enforced limits.
    73  	MaxChunksPerQueryFromStore   int            `yaml:"max_chunks_per_query" json:"max_chunks_per_query"` // TODO Remove in Cortex 1.12.
    74  	MaxChunksPerQuery            int            `yaml:"max_fetched_chunks_per_query" json:"max_fetched_chunks_per_query"`
    75  	MaxFetchedSeriesPerQuery     int            `yaml:"max_fetched_series_per_query" json:"max_fetched_series_per_query"`
    76  	MaxFetchedChunkBytesPerQuery int            `yaml:"max_fetched_chunk_bytes_per_query" json:"max_fetched_chunk_bytes_per_query"`
    77  	MaxQueryLookback             model.Duration `yaml:"max_query_lookback" json:"max_query_lookback"`
    78  	MaxQueryLength               model.Duration `yaml:"max_query_length" json:"max_query_length"`
    79  	MaxQueryParallelism          int            `yaml:"max_query_parallelism" json:"max_query_parallelism"`
    80  	CardinalityLimit             int            `yaml:"cardinality_limit" json:"cardinality_limit"`
    81  	MaxCacheFreshness            model.Duration `yaml:"max_cache_freshness" json:"max_cache_freshness"`
    82  	MaxQueriersPerTenant         int            `yaml:"max_queriers_per_tenant" json:"max_queriers_per_tenant"`
    83  
    84  	// Ruler defaults and limits.
    85  	RulerEvaluationDelay        model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"`
    86  	RulerTenantShardSize        int            `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"`
    87  	RulerMaxRulesPerRuleGroup   int            `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"`
    88  	RulerMaxRuleGroupsPerTenant int            `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"`
    89  
    90  	// Store-gateway.
    91  	StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"`
    92  
    93  	// Compactor.
    94  	CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"`
    95  
    96  	// This config doesn't have a CLI flag registered here because they're registered in
    97  	// their own original config struct.
    98  	S3SSEType                 string `yaml:"s3_sse_type" json:"s3_sse_type" doc:"nocli|description=S3 server-side encryption type. Required to enable server-side encryption overrides for a specific tenant. If not set, the default S3 client settings are used."`
    99  	S3SSEKMSKeyID             string `yaml:"s3_sse_kms_key_id" json:"s3_sse_kms_key_id" doc:"nocli|description=S3 server-side encryption KMS Key ID. Ignored if the SSE type override is not set."`
   100  	S3SSEKMSEncryptionContext string `yaml:"s3_sse_kms_encryption_context" json:"s3_sse_kms_encryption_context" doc:"nocli|description=S3 server-side encryption KMS encryption context. If unset and the key ID override is set, the encryption context will not be provided to S3. Ignored if the SSE type override is not set."`
   101  
   102  	// Alertmanager.
   103  	AlertmanagerReceiversBlockCIDRNetworks     flagext.CIDRSliceCSV `yaml:"alertmanager_receivers_firewall_block_cidr_networks" json:"alertmanager_receivers_firewall_block_cidr_networks"`
   104  	AlertmanagerReceiversBlockPrivateAddresses bool                 `yaml:"alertmanager_receivers_firewall_block_private_addresses" json:"alertmanager_receivers_firewall_block_private_addresses"`
   105  
   106  	NotificationRateLimit               float64                  `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"`
   107  	NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"`
   108  
   109  	AlertmanagerMaxConfigSizeBytes             int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"`
   110  	AlertmanagerMaxTemplatesCount              int `yaml:"alertmanager_max_templates_count" json:"alertmanager_max_templates_count"`
   111  	AlertmanagerMaxTemplateSizeBytes           int `yaml:"alertmanager_max_template_size_bytes" json:"alertmanager_max_template_size_bytes"`
   112  	AlertmanagerMaxDispatcherAggregationGroups int `yaml:"alertmanager_max_dispatcher_aggregation_groups" json:"alertmanager_max_dispatcher_aggregation_groups"`
   113  	AlertmanagerMaxAlertsCount                 int `yaml:"alertmanager_max_alerts_count" json:"alertmanager_max_alerts_count"`
   114  	AlertmanagerMaxAlertsSizeBytes             int `yaml:"alertmanager_max_alerts_size_bytes" json:"alertmanager_max_alerts_size_bytes"`
   115  }
   116  
   117  // RegisterFlags adds the flags required to config this to the given FlagSet
   118  func (l *Limits) RegisterFlags(f *flag.FlagSet) {
   119  	f.IntVar(&l.IngestionTenantShardSize, "distributor.ingestion-tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set both on ingesters and distributors. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
   120  	f.Float64Var(&l.IngestionRate, "distributor.ingestion-rate-limit", 25000, "Per-user ingestion rate limit in samples per second.")
   121  	f.StringVar(&l.IngestionRateStrategy, "distributor.ingestion-rate-limit-strategy", "local", "Whether the ingestion rate limit should be applied individually to each distributor instance (local), or evenly shared across the cluster (global).")
   122  	f.IntVar(&l.IngestionBurstSize, "distributor.ingestion-burst-size", 50000, "Per-user allowed ingestion burst size (in number of samples).")
   123  	f.BoolVar(&l.AcceptHASamples, "distributor.ha-tracker.enable-for-all-users", false, "Flag to enable, for all users, handling of samples with external labels identifying replicas in an HA Prometheus setup.")
   124  	f.StringVar(&l.HAClusterLabel, "distributor.ha-tracker.cluster", "cluster", "Prometheus label to look for in samples to identify a Prometheus HA cluster.")
   125  	f.StringVar(&l.HAReplicaLabel, "distributor.ha-tracker.replica", "__replica__", "Prometheus label to look for in samples to identify a Prometheus HA replica.")
   126  	f.IntVar(&l.HAMaxClusters, "distributor.ha-tracker.max-clusters", 0, "Maximum number of clusters that HA tracker will keep track of for single user. 0 to disable the limit.")
   127  	f.Var(&l.DropLabels, "distributor.drop-label", "This flag can be used to specify label names that to drop during sample ingestion within the distributor and can be repeated in order to drop multiple labels.")
   128  	f.IntVar(&l.MaxLabelNameLength, "validation.max-length-label-name", 1024, "Maximum length accepted for label names")
   129  	f.IntVar(&l.MaxLabelValueLength, "validation.max-length-label-value", 2048, "Maximum length accepted for label value. This setting also applies to the metric name")
   130  	f.IntVar(&l.MaxLabelNamesPerSeries, "validation.max-label-names-per-series", 30, "Maximum number of label names per series.")
   131  	f.IntVar(&l.MaxMetadataLength, "validation.max-metadata-length", 1024, "Maximum length accepted for metric metadata. Metadata refers to Metric Name, HELP and UNIT.")
   132  	f.BoolVar(&l.RejectOldSamples, "validation.reject-old-samples", false, "Reject old samples.")
   133  	_ = l.RejectOldSamplesMaxAge.Set("14d")
   134  	f.Var(&l.RejectOldSamplesMaxAge, "validation.reject-old-samples.max-age", "Maximum accepted sample age before rejecting.")
   135  	_ = l.CreationGracePeriod.Set("10m")
   136  	f.Var(&l.CreationGracePeriod, "validation.create-grace-period", "Duration which table will be created/deleted before/after it's needed; we won't accept sample from before this time.")
   137  	f.BoolVar(&l.EnforceMetricName, "validation.enforce-metric-name", true, "Enforce every sample has a metric name.")
   138  	f.BoolVar(&l.EnforceMetadataMetricName, "validation.enforce-metadata-metric-name", true, "Enforce every metadata has a metric name.")
   139  
   140  	f.IntVar(&l.MaxSeriesPerQuery, "ingester.max-series-per-query", 100000, "The maximum number of series for which a query can fetch samples from each ingester. This limit is enforced only in the ingesters (when querying samples not flushed to the storage yet) and it's a per-instance limit. This limit is ignored when running the Cortex blocks storage. When running Cortex with blocks storage use -querier.max-fetched-series-per-query limit instead.")
   141  	f.IntVar(&l.MaxSamplesPerQuery, "ingester.max-samples-per-query", 1000000, "The maximum number of samples that a query can return. This limit only applies when running the Cortex chunks storage with -querier.ingester-streaming=false.")
   142  	f.IntVar(&l.MaxLocalSeriesPerUser, "ingester.max-series-per-user", 5000000, "The maximum number of active series per user, per ingester. 0 to disable.")
   143  	f.IntVar(&l.MaxLocalSeriesPerMetric, "ingester.max-series-per-metric", 50000, "The maximum number of active series per metric name, per ingester. 0 to disable.")
   144  	f.IntVar(&l.MaxGlobalSeriesPerUser, "ingester.max-global-series-per-user", 0, "The maximum number of active series per user, across the cluster before replication. 0 to disable. Supported only if -distributor.shard-by-all-labels is true.")
   145  	f.IntVar(&l.MaxGlobalSeriesPerMetric, "ingester.max-global-series-per-metric", 0, "The maximum number of active series per metric name, across the cluster before replication. 0 to disable.")
   146  	f.IntVar(&l.MinChunkLength, "ingester.min-chunk-length", 0, "Minimum number of samples in an idle chunk to flush it to the store. Use with care, if chunks are less than this size they will be discarded. This option is ignored when running the Cortex blocks storage. 0 to disable.")
   147  
   148  	f.IntVar(&l.MaxLocalMetricsWithMetadataPerUser, "ingester.max-metadata-per-user", 8000, "The maximum number of active metrics with metadata per user, per ingester. 0 to disable.")
   149  	f.IntVar(&l.MaxLocalMetadataPerMetric, "ingester.max-metadata-per-metric", 10, "The maximum number of metadata per metric, per ingester. 0 to disable.")
   150  	f.IntVar(&l.MaxGlobalMetricsWithMetadataPerUser, "ingester.max-global-metadata-per-user", 0, "The maximum number of active metrics with metadata per user, across the cluster. 0 to disable. Supported only if -distributor.shard-by-all-labels is true.")
   151  	f.IntVar(&l.MaxGlobalMetadataPerMetric, "ingester.max-global-metadata-per-metric", 0, "The maximum number of metadata per metric, across the cluster. 0 to disable.")
   152  	f.IntVar(&l.MaxChunksPerQueryFromStore, "store.query-chunk-limit", 2e6, "Deprecated. Use -querier.max-fetched-chunks-per-query CLI flag and its respective YAML config option instead. Maximum number of chunks that can be fetched in a single query. This limit is enforced when fetching chunks from the long-term storage only. When running the Cortex chunks storage, this limit is enforced in the querier and ruler, while when running the Cortex blocks storage this limit is enforced in the querier, ruler and store-gateway. 0 to disable.")
   153  	f.IntVar(&l.MaxChunksPerQuery, "querier.max-fetched-chunks-per-query", 0, "Maximum number of chunks that can be fetched in a single query from ingesters and long-term storage. This limit is enforced in the querier, ruler and store-gateway. Takes precedence over the deprecated -store.query-chunk-limit. 0 to disable.")
   154  	f.IntVar(&l.MaxFetchedSeriesPerQuery, "querier.max-fetched-series-per-query", 0, "The maximum number of unique series for which a query can fetch samples from each ingesters and blocks storage. This limit is enforced in the querier only when running Cortex with blocks storage. 0 to disable")
   155  	f.IntVar(&l.MaxFetchedChunkBytesPerQuery, "querier.max-fetched-chunk-bytes-per-query", 0, "The maximum size of all chunks in bytes that a query can fetch from each ingester and storage. This limit is enforced in the querier and ruler only when running Cortex with blocks storage. 0 to disable.")
   156  	f.Var(&l.MaxQueryLength, "store.max-query-length", "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and in the chunks storage. 0 to disable.")
   157  	f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until <lookback> duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.")
   158  	f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 14, "Maximum number of split queries will be scheduled in parallel by the frontend.")
   159  	f.IntVar(&l.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries. This limit is ignored when running the Cortex blocks storage. 0 to disable.")
   160  	_ = l.MaxCacheFreshness.Set("1m")
   161  	f.Var(&l.MaxCacheFreshness, "frontend.max-cache-freshness", "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.")
   162  	f.IntVar(&l.MaxQueriersPerTenant, "frontend.max-queriers-per-tenant", 0, "Maximum number of queriers that can handle requests for a single tenant. If set to 0 or value higher than number of available queriers, *all* queriers will handle requests for the tenant. Each frontend (or query-scheduler, if used) will select the same set of queriers for the same tenant (given that all queriers are connected to all frontends / query-schedulers). This option only works with queriers connecting to the query-frontend / query-scheduler, not when using downstream URL.")
   163  
   164  	f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.")
   165  	f.IntVar(&l.RulerTenantShardSize, "ruler.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by ruler. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
   166  	f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.")
   167  	f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.")
   168  
   169  	f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.")
   170  
   171  	// Store-gateway.
   172  	f.IntVar(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.")
   173  
   174  	// Alertmanager.
   175  	f.Var(&l.AlertmanagerReceiversBlockCIDRNetworks, "alertmanager.receivers-firewall-block-cidr-networks", "Comma-separated list of network CIDRs to block in Alertmanager receiver integrations.")
   176  	f.BoolVar(&l.AlertmanagerReceiversBlockPrivateAddresses, "alertmanager.receivers-firewall-block-private-addresses", false, "True to block private and local addresses in Alertmanager receiver integrations. It blocks private addresses defined by  RFC 1918 (IPv4 addresses) and RFC 4193 (IPv6 addresses), as well as loopback, local unicast and local multicast addresses.")
   177  
   178  	f.Float64Var(&l.NotificationRateLimit, "alertmanager.notification-rate-limit", 0, "Per-user rate limit for sending notifications from Alertmanager in notifications/sec. 0 = rate limit disabled. Negative value = no notifications are allowed.")
   179  
   180  	if l.NotificationRateLimitPerIntegration == nil {
   181  		l.NotificationRateLimitPerIntegration = NotificationRateLimitMap{}
   182  	}
   183  	f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".")
   184  	f.IntVar(&l.AlertmanagerMaxConfigSizeBytes, "alertmanager.max-config-size-bytes", 0, "Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.")
   185  	f.IntVar(&l.AlertmanagerMaxTemplatesCount, "alertmanager.max-templates-count", 0, "Maximum number of templates in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.")
   186  	f.IntVar(&l.AlertmanagerMaxTemplateSizeBytes, "alertmanager.max-template-size-bytes", 0, "Maximum size of single template in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.")
   187  	f.IntVar(&l.AlertmanagerMaxDispatcherAggregationGroups, "alertmanager.max-dispatcher-aggregation-groups", 0, "Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit.")
   188  	f.IntVar(&l.AlertmanagerMaxAlertsCount, "alertmanager.max-alerts-count", 0, "Maximum number of alerts that a single user can have. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.")
   189  	f.IntVar(&l.AlertmanagerMaxAlertsSizeBytes, "alertmanager.max-alerts-size-bytes", 0, "Maximum total size of alerts that a single user can have, alert size is the sum of the bytes of its labels, annotations and generatorURL. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.")
   190  }
   191  
   192  // Validate the limits config and returns an error if the validation
   193  // doesn't pass
   194  func (l *Limits) Validate(shardByAllLabels bool) error {
   195  	// The ingester.max-global-series-per-user metric is not supported
   196  	// if shard-by-all-labels is disabled
   197  	if l.MaxGlobalSeriesPerUser > 0 && !shardByAllLabels {
   198  		return errMaxGlobalSeriesPerUserValidation
   199  	}
   200  
   201  	return nil
   202  }
   203  
   204  // UnmarshalYAML implements the yaml.Unmarshaler interface.
   205  func (l *Limits) UnmarshalYAML(unmarshal func(interface{}) error) error {
   206  	// We want to set l to the defaults and then overwrite it with the input.
   207  	// To make unmarshal fill the plain data struct rather than calling UnmarshalYAML
   208  	// again, we have to hide it using a type indirection.  See prometheus/config.
   209  
   210  	// During startup we wont have a default value so we don't want to overwrite them
   211  	if defaultLimits != nil {
   212  		*l = *defaultLimits
   213  		// Make copy of default limits. Otherwise unmarshalling would modify map in default limits.
   214  		l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration)
   215  	}
   216  	type plain Limits
   217  	return unmarshal((*plain)(l))
   218  }
   219  
   220  // UnmarshalJSON implements the json.Unmarshaler interface.
   221  func (l *Limits) UnmarshalJSON(data []byte) error {
   222  	// Like the YAML method above, we want to set l to the defaults and then overwrite
   223  	// it with the input. We prevent an infinite loop of calling UnmarshalJSON by hiding
   224  	// behind type indirection.
   225  	if defaultLimits != nil {
   226  		*l = *defaultLimits
   227  		// Make copy of default limits. Otherwise unmarshalling would modify map in default limits.
   228  		l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration)
   229  	}
   230  
   231  	type plain Limits
   232  	dec := json.NewDecoder(bytes.NewReader(data))
   233  	dec.DisallowUnknownFields()
   234  
   235  	return dec.Decode((*plain)(l))
   236  }
   237  
   238  func (l *Limits) copyNotificationIntegrationLimits(defaults NotificationRateLimitMap) {
   239  	l.NotificationRateLimitPerIntegration = make(map[string]float64, len(defaults))
   240  	for k, v := range defaults {
   241  		l.NotificationRateLimitPerIntegration[k] = v
   242  	}
   243  }
   244  
   245  // When we load YAML from disk, we want the various per-customer limits
   246  // to default to any values specified on the command line, not default
   247  // command line values.  This global contains those values.  I (Tom) cannot
   248  // find a nicer way I'm afraid.
   249  var defaultLimits *Limits
   250  
   251  // SetDefaultLimitsForYAMLUnmarshalling sets global default limits, used when loading
   252  // Limits from YAML files. This is used to ensure per-tenant limits are defaulted to
   253  // those values.
   254  func SetDefaultLimitsForYAMLUnmarshalling(defaults Limits) {
   255  	defaultLimits = &defaults
   256  }
   257  
   258  // TenantLimits exposes per-tenant limit overrides to various resource usage limits
   259  type TenantLimits interface {
   260  	// ByUserID gets limits specific to a particular tenant or nil if there are none
   261  	ByUserID(userID string) *Limits
   262  
   263  	// AllByUserID gets a mapping of all tenant IDs and limits for that user
   264  	AllByUserID() map[string]*Limits
   265  }
   266  
   267  // Overrides periodically fetch a set of per-user overrides, and provides convenience
   268  // functions for fetching the correct value.
   269  type Overrides struct {
   270  	defaultLimits *Limits
   271  	tenantLimits  TenantLimits
   272  }
   273  
   274  // NewOverrides makes a new Overrides.
   275  func NewOverrides(defaults Limits, tenantLimits TenantLimits) (*Overrides, error) {
   276  	return &Overrides{
   277  		tenantLimits:  tenantLimits,
   278  		defaultLimits: &defaults,
   279  	}, nil
   280  }
   281  
   282  // IngestionRate returns the limit on ingester rate (samples per second).
   283  func (o *Overrides) IngestionRate(userID string) float64 {
   284  	return o.getOverridesForUser(userID).IngestionRate
   285  }
   286  
   287  // IngestionRateStrategy returns whether the ingestion rate limit should be individually applied
   288  // to each distributor instance (local) or evenly shared across the cluster (global).
   289  func (o *Overrides) IngestionRateStrategy() string {
   290  	// The ingestion rate strategy can't be overridden on a per-tenant basis
   291  	return o.defaultLimits.IngestionRateStrategy
   292  }
   293  
   294  // IngestionBurstSize returns the burst size for ingestion rate.
   295  func (o *Overrides) IngestionBurstSize(userID string) int {
   296  	return o.getOverridesForUser(userID).IngestionBurstSize
   297  }
   298  
   299  // AcceptHASamples returns whether the distributor should track and accept samples from HA replicas for this user.
   300  func (o *Overrides) AcceptHASamples(userID string) bool {
   301  	return o.getOverridesForUser(userID).AcceptHASamples
   302  }
   303  
   304  // HAClusterLabel returns the cluster label to look for when deciding whether to accept a sample from a Prometheus HA replica.
   305  func (o *Overrides) HAClusterLabel(userID string) string {
   306  	return o.getOverridesForUser(userID).HAClusterLabel
   307  }
   308  
   309  // HAReplicaLabel returns the replica label to look for when deciding whether to accept a sample from a Prometheus HA replica.
   310  func (o *Overrides) HAReplicaLabel(userID string) string {
   311  	return o.getOverridesForUser(userID).HAReplicaLabel
   312  }
   313  
   314  // DropLabels returns the list of labels to be dropped when ingesting HA samples for the user.
   315  func (o *Overrides) DropLabels(userID string) flagext.StringSlice {
   316  	return o.getOverridesForUser(userID).DropLabels
   317  }
   318  
   319  // MaxLabelNameLength returns maximum length a label name can be.
   320  func (o *Overrides) MaxLabelNameLength(userID string) int {
   321  	return o.getOverridesForUser(userID).MaxLabelNameLength
   322  }
   323  
   324  // MaxLabelValueLength returns maximum length a label value can be. This also is
   325  // the maximum length of a metric name.
   326  func (o *Overrides) MaxLabelValueLength(userID string) int {
   327  	return o.getOverridesForUser(userID).MaxLabelValueLength
   328  }
   329  
   330  // MaxLabelNamesPerSeries returns maximum number of label/value pairs timeseries.
   331  func (o *Overrides) MaxLabelNamesPerSeries(userID string) int {
   332  	return o.getOverridesForUser(userID).MaxLabelNamesPerSeries
   333  }
   334  
   335  // MaxMetadataLength returns maximum length metadata can be. Metadata refers
   336  // to the Metric Name, HELP and UNIT.
   337  func (o *Overrides) MaxMetadataLength(userID string) int {
   338  	return o.getOverridesForUser(userID).MaxMetadataLength
   339  }
   340  
   341  // RejectOldSamples returns true when we should reject samples older than certain
   342  // age.
   343  func (o *Overrides) RejectOldSamples(userID string) bool {
   344  	return o.getOverridesForUser(userID).RejectOldSamples
   345  }
   346  
   347  // RejectOldSamplesMaxAge returns the age at which samples should be rejected.
   348  func (o *Overrides) RejectOldSamplesMaxAge(userID string) time.Duration {
   349  	return time.Duration(o.getOverridesForUser(userID).RejectOldSamplesMaxAge)
   350  }
   351  
   352  // CreationGracePeriod is misnamed, and actually returns how far into the future
   353  // we should accept samples.
   354  func (o *Overrides) CreationGracePeriod(userID string) time.Duration {
   355  	return time.Duration(o.getOverridesForUser(userID).CreationGracePeriod)
   356  }
   357  
   358  // MaxSeriesPerQuery returns the maximum number of series a query is allowed to hit.
   359  func (o *Overrides) MaxSeriesPerQuery(userID string) int {
   360  	return o.getOverridesForUser(userID).MaxSeriesPerQuery
   361  }
   362  
   363  // MaxSamplesPerQuery returns the maximum number of samples in a query (from the ingester).
   364  func (o *Overrides) MaxSamplesPerQuery(userID string) int {
   365  	return o.getOverridesForUser(userID).MaxSamplesPerQuery
   366  }
   367  
   368  // MaxLocalSeriesPerUser returns the maximum number of series a user is allowed to store in a single ingester.
   369  func (o *Overrides) MaxLocalSeriesPerUser(userID string) int {
   370  	return o.getOverridesForUser(userID).MaxLocalSeriesPerUser
   371  }
   372  
   373  // MaxLocalSeriesPerMetric returns the maximum number of series allowed per metric in a single ingester.
   374  func (o *Overrides) MaxLocalSeriesPerMetric(userID string) int {
   375  	return o.getOverridesForUser(userID).MaxLocalSeriesPerMetric
   376  }
   377  
   378  // MaxGlobalSeriesPerUser returns the maximum number of series a user is allowed to store across the cluster.
   379  func (o *Overrides) MaxGlobalSeriesPerUser(userID string) int {
   380  	return o.getOverridesForUser(userID).MaxGlobalSeriesPerUser
   381  }
   382  
   383  // MaxGlobalSeriesPerMetric returns the maximum number of series allowed per metric across the cluster.
   384  func (o *Overrides) MaxGlobalSeriesPerMetric(userID string) int {
   385  	return o.getOverridesForUser(userID).MaxGlobalSeriesPerMetric
   386  }
   387  
   388  // MaxChunksPerQueryFromStore returns the maximum number of chunks allowed per query when fetching
   389  // chunks from the long-term storage.
   390  func (o *Overrides) MaxChunksPerQueryFromStore(userID string) int {
   391  	// If the new config option is set, then it should take precedence.
   392  	if value := o.getOverridesForUser(userID).MaxChunksPerQuery; value > 0 {
   393  		return value
   394  	}
   395  
   396  	// Fallback to the deprecated config option.
   397  	return o.getOverridesForUser(userID).MaxChunksPerQueryFromStore
   398  }
   399  
   400  func (o *Overrides) MaxChunksPerQuery(userID string) int {
   401  	return o.getOverridesForUser(userID).MaxChunksPerQuery
   402  }
   403  
   404  // MaxFetchedSeriesPerQuery returns the maximum number of series allowed per query when fetching
   405  // chunks from ingesters and blocks storage.
   406  func (o *Overrides) MaxFetchedSeriesPerQuery(userID string) int {
   407  	return o.getOverridesForUser(userID).MaxFetchedSeriesPerQuery
   408  }
   409  
   410  // MaxFetchedChunkBytesPerQuery returns the maximum number of bytes for chunks allowed per query when fetching
   411  // chunks from ingesters and blocks storage.
   412  func (o *Overrides) MaxFetchedChunkBytesPerQuery(userID string) int {
   413  	return o.getOverridesForUser(userID).MaxFetchedChunkBytesPerQuery
   414  }
   415  
   416  // MaxQueryLookback returns the max lookback period of queries.
   417  func (o *Overrides) MaxQueryLookback(userID string) time.Duration {
   418  	return time.Duration(o.getOverridesForUser(userID).MaxQueryLookback)
   419  }
   420  
   421  // MaxQueryLength returns the limit of the length (in time) of a query.
   422  func (o *Overrides) MaxQueryLength(userID string) time.Duration {
   423  	return time.Duration(o.getOverridesForUser(userID).MaxQueryLength)
   424  }
   425  
   426  // MaxCacheFreshness returns the period after which results are cacheable,
   427  // to prevent caching of very recent results.
   428  func (o *Overrides) MaxCacheFreshness(userID string) time.Duration {
   429  	return time.Duration(o.getOverridesForUser(userID).MaxCacheFreshness)
   430  }
   431  
   432  // MaxQueriersPerUser returns the maximum number of queriers that can handle requests for this user.
   433  func (o *Overrides) MaxQueriersPerUser(userID string) int {
   434  	return o.getOverridesForUser(userID).MaxQueriersPerTenant
   435  }
   436  
   437  // MaxQueryParallelism returns the limit to the number of split queries the
   438  // frontend will process in parallel.
   439  func (o *Overrides) MaxQueryParallelism(userID string) int {
   440  	return o.getOverridesForUser(userID).MaxQueryParallelism
   441  }
   442  
   443  // EnforceMetricName whether to enforce the presence of a metric name.
   444  func (o *Overrides) EnforceMetricName(userID string) bool {
   445  	return o.getOverridesForUser(userID).EnforceMetricName
   446  }
   447  
   448  // EnforceMetadataMetricName whether to enforce the presence of a metric name on metadata.
   449  func (o *Overrides) EnforceMetadataMetricName(userID string) bool {
   450  	return o.getOverridesForUser(userID).EnforceMetadataMetricName
   451  }
   452  
   453  // CardinalityLimit returns the maximum number of timeseries allowed in a query.
   454  func (o *Overrides) CardinalityLimit(userID string) int {
   455  	return o.getOverridesForUser(userID).CardinalityLimit
   456  }
   457  
   458  // MinChunkLength returns the minimum size of chunk that will be saved by ingesters
   459  func (o *Overrides) MinChunkLength(userID string) int {
   460  	return o.getOverridesForUser(userID).MinChunkLength
   461  }
   462  
   463  // MaxLocalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store in a single ingester.
   464  func (o *Overrides) MaxLocalMetricsWithMetadataPerUser(userID string) int {
   465  	return o.getOverridesForUser(userID).MaxLocalMetricsWithMetadataPerUser
   466  }
   467  
   468  // MaxLocalMetadataPerMetric returns the maximum number of metadata allowed per metric in a single ingester.
   469  func (o *Overrides) MaxLocalMetadataPerMetric(userID string) int {
   470  	return o.getOverridesForUser(userID).MaxLocalMetadataPerMetric
   471  }
   472  
   473  // MaxGlobalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store across the cluster.
   474  func (o *Overrides) MaxGlobalMetricsWithMetadataPerUser(userID string) int {
   475  	return o.getOverridesForUser(userID).MaxGlobalMetricsWithMetadataPerUser
   476  }
   477  
   478  // MaxGlobalMetadataPerMetric returns the maximum number of metadata allowed per metric across the cluster.
   479  func (o *Overrides) MaxGlobalMetadataPerMetric(userID string) int {
   480  	return o.getOverridesForUser(userID).MaxGlobalMetadataPerMetric
   481  }
   482  
   483  // IngestionTenantShardSize returns the ingesters shard size for a given user.
   484  func (o *Overrides) IngestionTenantShardSize(userID string) int {
   485  	return o.getOverridesForUser(userID).IngestionTenantShardSize
   486  }
   487  
   488  // EvaluationDelay returns the rules evaluation delay for a given user.
   489  func (o *Overrides) EvaluationDelay(userID string) time.Duration {
   490  	return time.Duration(o.getOverridesForUser(userID).RulerEvaluationDelay)
   491  }
   492  
   493  // CompactorBlocksRetentionPeriod returns the retention period for a given user.
   494  func (o *Overrides) CompactorBlocksRetentionPeriod(userID string) time.Duration {
   495  	return time.Duration(o.getOverridesForUser(userID).CompactorBlocksRetentionPeriod)
   496  }
   497  
   498  // MetricRelabelConfigs returns the metric relabel configs for a given user.
   499  func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config {
   500  	return o.getOverridesForUser(userID).MetricRelabelConfigs
   501  }
   502  
   503  // RulerTenantShardSize returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy.
   504  func (o *Overrides) RulerTenantShardSize(userID string) int {
   505  	return o.getOverridesForUser(userID).RulerTenantShardSize
   506  }
   507  
   508  // RulerMaxRulesPerRuleGroup returns the maximum number of rules per rule group for a given user.
   509  func (o *Overrides) RulerMaxRulesPerRuleGroup(userID string) int {
   510  	return o.getOverridesForUser(userID).RulerMaxRulesPerRuleGroup
   511  }
   512  
   513  // RulerMaxRuleGroupsPerTenant returns the maximum number of rule groups for a given user.
   514  func (o *Overrides) RulerMaxRuleGroupsPerTenant(userID string) int {
   515  	return o.getOverridesForUser(userID).RulerMaxRuleGroupsPerTenant
   516  }
   517  
   518  // StoreGatewayTenantShardSize returns the store-gateway shard size for a given user.
   519  func (o *Overrides) StoreGatewayTenantShardSize(userID string) int {
   520  	return o.getOverridesForUser(userID).StoreGatewayTenantShardSize
   521  }
   522  
   523  // MaxHAClusters returns maximum number of clusters that HA tracker will track for a user.
   524  func (o *Overrides) MaxHAClusters(user string) int {
   525  	return o.getOverridesForUser(user).HAMaxClusters
   526  }
   527  
   528  // S3SSEType returns the per-tenant S3 SSE type.
   529  func (o *Overrides) S3SSEType(user string) string {
   530  	return o.getOverridesForUser(user).S3SSEType
   531  }
   532  
   533  // S3SSEKMSKeyID returns the per-tenant S3 KMS-SSE key id.
   534  func (o *Overrides) S3SSEKMSKeyID(user string) string {
   535  	return o.getOverridesForUser(user).S3SSEKMSKeyID
   536  }
   537  
   538  // S3SSEKMSEncryptionContext returns the per-tenant S3 KMS-SSE encryption context.
   539  func (o *Overrides) S3SSEKMSEncryptionContext(user string) string {
   540  	return o.getOverridesForUser(user).S3SSEKMSEncryptionContext
   541  }
   542  
   543  // AlertmanagerReceiversBlockCIDRNetworks returns the list of network CIDRs that should be blocked
   544  // in the Alertmanager receivers for the given user.
   545  func (o *Overrides) AlertmanagerReceiversBlockCIDRNetworks(user string) []flagext.CIDR {
   546  	return o.getOverridesForUser(user).AlertmanagerReceiversBlockCIDRNetworks
   547  }
   548  
   549  // AlertmanagerReceiversBlockPrivateAddresses returns true if private addresses should be blocked
   550  // in the Alertmanager receivers for the given user.
   551  func (o *Overrides) AlertmanagerReceiversBlockPrivateAddresses(user string) bool {
   552  	return o.getOverridesForUser(user).AlertmanagerReceiversBlockPrivateAddresses
   553  }
   554  
   555  // Notification limits are special. Limits are returned in following order:
   556  // 1. per-tenant limits for given integration
   557  // 2. default limits for given integration
   558  // 3. per-tenant limits
   559  // 4. default limits
   560  func (o *Overrides) getNotificationLimitForUser(user, integration string) float64 {
   561  	u := o.getOverridesForUser(user)
   562  	if n, ok := u.NotificationRateLimitPerIntegration[integration]; ok {
   563  		return n
   564  	}
   565  
   566  	return u.NotificationRateLimit
   567  }
   568  
   569  func (o *Overrides) NotificationRateLimit(user string, integration string) rate.Limit {
   570  	l := o.getNotificationLimitForUser(user, integration)
   571  	if l == 0 || math.IsInf(l, 1) {
   572  		return rate.Inf // No rate limit.
   573  	}
   574  
   575  	if l < 0 {
   576  		l = 0 // No notifications will be sent.
   577  	}
   578  	return rate.Limit(l)
   579  }
   580  
   581  const maxInt = int(^uint(0) >> 1)
   582  
   583  func (o *Overrides) NotificationBurstSize(user string, integration string) int {
   584  	// Burst size is computed from rate limit. Rate limit is already normalized to [0, +inf), where 0 means disabled.
   585  	l := o.NotificationRateLimit(user, integration)
   586  	if l == 0 {
   587  		return 0
   588  	}
   589  
   590  	// floats can be larger than max int. This also handles case where l == rate.Inf.
   591  	if float64(l) >= float64(maxInt) {
   592  		return maxInt
   593  	}
   594  
   595  	// For values between (0, 1), allow single notification per second (every 1/limit seconds).
   596  	if l < 1 {
   597  		return 1
   598  	}
   599  
   600  	return int(l)
   601  }
   602  
   603  func (o *Overrides) AlertmanagerMaxConfigSize(userID string) int {
   604  	return o.getOverridesForUser(userID).AlertmanagerMaxConfigSizeBytes
   605  }
   606  
   607  func (o *Overrides) AlertmanagerMaxTemplatesCount(userID string) int {
   608  	return o.getOverridesForUser(userID).AlertmanagerMaxTemplatesCount
   609  }
   610  
   611  func (o *Overrides) AlertmanagerMaxTemplateSize(userID string) int {
   612  	return o.getOverridesForUser(userID).AlertmanagerMaxTemplateSizeBytes
   613  }
   614  
   615  func (o *Overrides) AlertmanagerMaxDispatcherAggregationGroups(userID string) int {
   616  	return o.getOverridesForUser(userID).AlertmanagerMaxDispatcherAggregationGroups
   617  }
   618  
   619  func (o *Overrides) AlertmanagerMaxAlertsCount(userID string) int {
   620  	return o.getOverridesForUser(userID).AlertmanagerMaxAlertsCount
   621  }
   622  
   623  func (o *Overrides) AlertmanagerMaxAlertsSizeBytes(userID string) int {
   624  	return o.getOverridesForUser(userID).AlertmanagerMaxAlertsSizeBytes
   625  }
   626  
   627  func (o *Overrides) getOverridesForUser(userID string) *Limits {
   628  	if o.tenantLimits != nil {
   629  		l := o.tenantLimits.ByUserID(userID)
   630  		if l != nil {
   631  			return l
   632  		}
   633  	}
   634  	return o.defaultLimits
   635  }
   636  
   637  // SmallestPositiveIntPerTenant is returning the minimal positive value of the
   638  // supplied limit function for all given tenants.
   639  func SmallestPositiveIntPerTenant(tenantIDs []string, f func(string) int) int {
   640  	var result *int
   641  	for _, tenantID := range tenantIDs {
   642  		v := f(tenantID)
   643  		if result == nil || v < *result {
   644  			result = &v
   645  		}
   646  	}
   647  	if result == nil {
   648  		return 0
   649  	}
   650  	return *result
   651  }
   652  
   653  // SmallestPositiveNonZeroIntPerTenant is returning the minimal positive and
   654  // non-zero value of the supplied limit function for all given tenants. In many
   655  // limits a value of 0 means unlimted so the method will return 0 only if all
   656  // inputs have a limit of 0 or an empty tenant list is given.
   657  func SmallestPositiveNonZeroIntPerTenant(tenantIDs []string, f func(string) int) int {
   658  	var result *int
   659  	for _, tenantID := range tenantIDs {
   660  		v := f(tenantID)
   661  		if v > 0 && (result == nil || v < *result) {
   662  			result = &v
   663  		}
   664  	}
   665  	if result == nil {
   666  		return 0
   667  	}
   668  	return *result
   669  }
   670  
   671  // SmallestPositiveNonZeroDurationPerTenant is returning the minimal positive
   672  // and non-zero value of the supplied limit function for all given tenants. In
   673  // many limits a value of 0 means unlimted so the method will return 0 only if
   674  // all inputs have a limit of 0 or an empty tenant list is given.
   675  func SmallestPositiveNonZeroDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration {
   676  	var result *time.Duration
   677  	for _, tenantID := range tenantIDs {
   678  		v := f(tenantID)
   679  		if v > 0 && (result == nil || v < *result) {
   680  			result = &v
   681  		}
   682  	}
   683  	if result == nil {
   684  		return 0
   685  	}
   686  	return *result
   687  }
   688  
   689  // MaxDurationPerTenant is returning the maximum duration per tenant. Without
   690  // tenants given it will return a time.Duration(0).
   691  func MaxDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration {
   692  	result := time.Duration(0)
   693  	for _, tenantID := range tenantIDs {
   694  		v := f(tenantID)
   695  		if v > result {
   696  			result = v
   697  		}
   698  	}
   699  	return result
   700  }