github.com/thanos-io/thanos@v0.32.5/internal/cortex/util/validation/limits.go (about) 1 // Copyright (c) The Cortex Authors. 2 // Licensed under the Apache License 2.0. 3 4 package validation 5 6 import ( 7 "bytes" 8 "encoding/json" 9 "errors" 10 "flag" 11 "math" 12 "strings" 13 "time" 14 15 "github.com/prometheus/common/model" 16 "github.com/prometheus/prometheus/model/relabel" 17 "golang.org/x/time/rate" 18 19 "github.com/thanos-io/thanos/internal/cortex/util/flagext" 20 ) 21 22 var errMaxGlobalSeriesPerUserValidation = errors.New("The ingester.max-global-series-per-user limit is unsupported if distributor.shard-by-all-labels is disabled") 23 24 // Supported values for enum limits 25 const ( 26 LocalIngestionRateStrategy = "local" 27 GlobalIngestionRateStrategy = "global" 28 ) 29 30 // LimitError are errors that do not comply with the limits specified. 31 type LimitError string 32 33 func (e LimitError) Error() string { 34 return string(e) 35 } 36 37 // Limits describe all the limits for users; can be used to describe global default 38 // limits via flags, or per-user limits via yaml config. 39 type Limits struct { 40 // Distributor enforced limits. 41 IngestionRate float64 `yaml:"ingestion_rate" json:"ingestion_rate"` 42 IngestionRateStrategy string `yaml:"ingestion_rate_strategy" json:"ingestion_rate_strategy"` 43 IngestionBurstSize int `yaml:"ingestion_burst_size" json:"ingestion_burst_size"` 44 AcceptHASamples bool `yaml:"accept_ha_samples" json:"accept_ha_samples"` 45 HAClusterLabel string `yaml:"ha_cluster_label" json:"ha_cluster_label"` 46 HAReplicaLabel string `yaml:"ha_replica_label" json:"ha_replica_label"` 47 HAMaxClusters int `yaml:"ha_max_clusters" json:"ha_max_clusters"` 48 DropLabels flagext.StringSlice `yaml:"drop_labels" json:"drop_labels"` 49 MaxLabelNameLength int `yaml:"max_label_name_length" json:"max_label_name_length"` 50 MaxLabelValueLength int `yaml:"max_label_value_length" json:"max_label_value_length"` 51 MaxLabelNamesPerSeries int `yaml:"max_label_names_per_series" json:"max_label_names_per_series"` 52 MaxMetadataLength int `yaml:"max_metadata_length" json:"max_metadata_length"` 53 RejectOldSamples bool `yaml:"reject_old_samples" json:"reject_old_samples"` 54 RejectOldSamplesMaxAge model.Duration `yaml:"reject_old_samples_max_age" json:"reject_old_samples_max_age"` 55 CreationGracePeriod model.Duration `yaml:"creation_grace_period" json:"creation_grace_period"` 56 EnforceMetadataMetricName bool `yaml:"enforce_metadata_metric_name" json:"enforce_metadata_metric_name"` 57 EnforceMetricName bool `yaml:"enforce_metric_name" json:"enforce_metric_name"` 58 IngestionTenantShardSize int `yaml:"ingestion_tenant_shard_size" json:"ingestion_tenant_shard_size"` 59 MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty" json:"metric_relabel_configs,omitempty" doc:"nocli|description=List of metric relabel configurations. Note that in most situations, it is more effective to use metrics relabeling directly in the Prometheus server, e.g. remote_write.write_relabel_configs."` 60 61 // Ingester enforced limits. 62 // Series 63 MaxSeriesPerQuery int `yaml:"max_series_per_query" json:"max_series_per_query"` 64 MaxSamplesPerQuery int `yaml:"max_samples_per_query" json:"max_samples_per_query"` 65 MaxLocalSeriesPerUser int `yaml:"max_series_per_user" json:"max_series_per_user"` 66 MaxLocalSeriesPerMetric int `yaml:"max_series_per_metric" json:"max_series_per_metric"` 67 MaxGlobalSeriesPerUser int `yaml:"max_global_series_per_user" json:"max_global_series_per_user"` 68 MaxGlobalSeriesPerMetric int `yaml:"max_global_series_per_metric" json:"max_global_series_per_metric"` 69 MinChunkLength int `yaml:"min_chunk_length" json:"min_chunk_length"` 70 // Metadata 71 MaxLocalMetricsWithMetadataPerUser int `yaml:"max_metadata_per_user" json:"max_metadata_per_user"` 72 MaxLocalMetadataPerMetric int `yaml:"max_metadata_per_metric" json:"max_metadata_per_metric"` 73 MaxGlobalMetricsWithMetadataPerUser int `yaml:"max_global_metadata_per_user" json:"max_global_metadata_per_user"` 74 MaxGlobalMetadataPerMetric int `yaml:"max_global_metadata_per_metric" json:"max_global_metadata_per_metric"` 75 76 // Querier enforced limits. 77 MaxChunksPerQueryFromStore int `yaml:"max_chunks_per_query" json:"max_chunks_per_query"` // TODO Remove in Cortex 1.12. 78 MaxChunksPerQuery int `yaml:"max_fetched_chunks_per_query" json:"max_fetched_chunks_per_query"` 79 MaxFetchedSeriesPerQuery int `yaml:"max_fetched_series_per_query" json:"max_fetched_series_per_query"` 80 MaxFetchedChunkBytesPerQuery int `yaml:"max_fetched_chunk_bytes_per_query" json:"max_fetched_chunk_bytes_per_query"` 81 MaxQueryLookback model.Duration `yaml:"max_query_lookback" json:"max_query_lookback"` 82 MaxQueryLength model.Duration `yaml:"max_query_length" json:"max_query_length"` 83 MaxQueryParallelism int `yaml:"max_query_parallelism" json:"max_query_parallelism"` 84 CardinalityLimit int `yaml:"cardinality_limit" json:"cardinality_limit"` 85 MaxCacheFreshness model.Duration `yaml:"max_cache_freshness" json:"max_cache_freshness"` 86 MaxQueriersPerTenant int `yaml:"max_queriers_per_tenant" json:"max_queriers_per_tenant"` 87 88 // Ruler defaults and limits. 89 RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` 90 RulerTenantShardSize int `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` 91 RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` 92 RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` 93 94 // Store-gateway. 95 StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"` 96 97 // Compactor. 98 CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` 99 CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` 100 101 // This config doesn't have a CLI flag registered here because they're registered in 102 // their own original config struct. 103 S3SSEType string `yaml:"s3_sse_type" json:"s3_sse_type" doc:"nocli|description=S3 server-side encryption type. Required to enable server-side encryption overrides for a specific tenant. If not set, the default S3 client settings are used."` 104 S3SSEKMSKeyID string `yaml:"s3_sse_kms_key_id" json:"s3_sse_kms_key_id" doc:"nocli|description=S3 server-side encryption KMS Key ID. Ignored if the SSE type override is not set."` 105 S3SSEKMSEncryptionContext string `yaml:"s3_sse_kms_encryption_context" json:"s3_sse_kms_encryption_context" doc:"nocli|description=S3 server-side encryption KMS encryption context. If unset and the key ID override is set, the encryption context will not be provided to S3. Ignored if the SSE type override is not set."` 106 107 // Alertmanager. 108 AlertmanagerReceiversBlockCIDRNetworks flagext.CIDRSliceCSV `yaml:"alertmanager_receivers_firewall_block_cidr_networks" json:"alertmanager_receivers_firewall_block_cidr_networks"` 109 AlertmanagerReceiversBlockPrivateAddresses bool `yaml:"alertmanager_receivers_firewall_block_private_addresses" json:"alertmanager_receivers_firewall_block_private_addresses"` 110 111 NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"` 112 NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"` 113 114 AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"` 115 AlertmanagerMaxTemplatesCount int `yaml:"alertmanager_max_templates_count" json:"alertmanager_max_templates_count"` 116 AlertmanagerMaxTemplateSizeBytes int `yaml:"alertmanager_max_template_size_bytes" json:"alertmanager_max_template_size_bytes"` 117 AlertmanagerMaxDispatcherAggregationGroups int `yaml:"alertmanager_max_dispatcher_aggregation_groups" json:"alertmanager_max_dispatcher_aggregation_groups"` 118 AlertmanagerMaxAlertsCount int `yaml:"alertmanager_max_alerts_count" json:"alertmanager_max_alerts_count"` 119 AlertmanagerMaxAlertsSizeBytes int `yaml:"alertmanager_max_alerts_size_bytes" json:"alertmanager_max_alerts_size_bytes"` 120 } 121 122 // RegisterFlags adds the flags required to config this to the given FlagSet 123 func (l *Limits) RegisterFlags(f *flag.FlagSet) { 124 f.IntVar(&l.IngestionTenantShardSize, "distributor.ingestion-tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set both on ingesters and distributors. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") 125 f.Float64Var(&l.IngestionRate, "distributor.ingestion-rate-limit", 25000, "Per-user ingestion rate limit in samples per second.") 126 f.StringVar(&l.IngestionRateStrategy, "distributor.ingestion-rate-limit-strategy", "local", "Whether the ingestion rate limit should be applied individually to each distributor instance (local), or evenly shared across the cluster (global).") 127 f.IntVar(&l.IngestionBurstSize, "distributor.ingestion-burst-size", 50000, "Per-user allowed ingestion burst size (in number of samples).") 128 f.BoolVar(&l.AcceptHASamples, "distributor.ha-tracker.enable-for-all-users", false, "Flag to enable, for all users, handling of samples with external labels identifying replicas in an HA Prometheus setup.") 129 f.StringVar(&l.HAClusterLabel, "distributor.ha-tracker.cluster", "cluster", "Prometheus label to look for in samples to identify a Prometheus HA cluster.") 130 f.StringVar(&l.HAReplicaLabel, "distributor.ha-tracker.replica", "__replica__", "Prometheus label to look for in samples to identify a Prometheus HA replica.") 131 f.IntVar(&l.HAMaxClusters, "distributor.ha-tracker.max-clusters", 0, "Maximum number of clusters that HA tracker will keep track of for single user. 0 to disable the limit.") 132 f.Var(&l.DropLabels, "distributor.drop-label", "This flag can be used to specify label names that to drop during sample ingestion within the distributor and can be repeated in order to drop multiple labels.") 133 f.IntVar(&l.MaxLabelNameLength, "validation.max-length-label-name", 1024, "Maximum length accepted for label names") 134 f.IntVar(&l.MaxLabelValueLength, "validation.max-length-label-value", 2048, "Maximum length accepted for label value. This setting also applies to the metric name") 135 f.IntVar(&l.MaxLabelNamesPerSeries, "validation.max-label-names-per-series", 30, "Maximum number of label names per series.") 136 f.IntVar(&l.MaxMetadataLength, "validation.max-metadata-length", 1024, "Maximum length accepted for metric metadata. Metadata refers to Metric Name, HELP and UNIT.") 137 f.BoolVar(&l.RejectOldSamples, "validation.reject-old-samples", false, "Reject old samples.") 138 _ = l.RejectOldSamplesMaxAge.Set("14d") 139 f.Var(&l.RejectOldSamplesMaxAge, "validation.reject-old-samples.max-age", "Maximum accepted sample age before rejecting.") 140 _ = l.CreationGracePeriod.Set("10m") 141 f.Var(&l.CreationGracePeriod, "validation.create-grace-period", "Duration which table will be created/deleted before/after it's needed; we won't accept sample from before this time.") 142 f.BoolVar(&l.EnforceMetricName, "validation.enforce-metric-name", true, "Enforce every sample has a metric name.") 143 f.BoolVar(&l.EnforceMetadataMetricName, "validation.enforce-metadata-metric-name", true, "Enforce every metadata has a metric name.") 144 145 f.IntVar(&l.MaxSeriesPerQuery, "ingester.max-series-per-query", 100000, "The maximum number of series for which a query can fetch samples from each ingester. This limit is enforced only in the ingesters (when querying samples not flushed to the storage yet) and it's a per-instance limit. This limit is ignored when running the Cortex blocks storage. When running Cortex with blocks storage use -querier.max-fetched-series-per-query limit instead.") 146 f.IntVar(&l.MaxSamplesPerQuery, "ingester.max-samples-per-query", 1000000, "The maximum number of samples that a query can return. This limit only applies when running the Cortex chunks storage with -querier.ingester-streaming=false.") 147 f.IntVar(&l.MaxLocalSeriesPerUser, "ingester.max-series-per-user", 5000000, "The maximum number of active series per user, per ingester. 0 to disable.") 148 f.IntVar(&l.MaxLocalSeriesPerMetric, "ingester.max-series-per-metric", 50000, "The maximum number of active series per metric name, per ingester. 0 to disable.") 149 f.IntVar(&l.MaxGlobalSeriesPerUser, "ingester.max-global-series-per-user", 0, "The maximum number of active series per user, across the cluster before replication. 0 to disable. Supported only if -distributor.shard-by-all-labels is true.") 150 f.IntVar(&l.MaxGlobalSeriesPerMetric, "ingester.max-global-series-per-metric", 0, "The maximum number of active series per metric name, across the cluster before replication. 0 to disable.") 151 f.IntVar(&l.MinChunkLength, "ingester.min-chunk-length", 0, "Minimum number of samples in an idle chunk to flush it to the store. Use with care, if chunks are less than this size they will be discarded. This option is ignored when running the Cortex blocks storage. 0 to disable.") 152 153 f.IntVar(&l.MaxLocalMetricsWithMetadataPerUser, "ingester.max-metadata-per-user", 8000, "The maximum number of active metrics with metadata per user, per ingester. 0 to disable.") 154 f.IntVar(&l.MaxLocalMetadataPerMetric, "ingester.max-metadata-per-metric", 10, "The maximum number of metadata per metric, per ingester. 0 to disable.") 155 f.IntVar(&l.MaxGlobalMetricsWithMetadataPerUser, "ingester.max-global-metadata-per-user", 0, "The maximum number of active metrics with metadata per user, across the cluster. 0 to disable. Supported only if -distributor.shard-by-all-labels is true.") 156 f.IntVar(&l.MaxGlobalMetadataPerMetric, "ingester.max-global-metadata-per-metric", 0, "The maximum number of metadata per metric, across the cluster. 0 to disable.") 157 f.IntVar(&l.MaxChunksPerQueryFromStore, "store.query-chunk-limit", 2e6, "Deprecated. Use -querier.max-fetched-chunks-per-query CLI flag and its respective YAML config option instead. Maximum number of chunks that can be fetched in a single query. This limit is enforced when fetching chunks from the long-term storage only. When running the Cortex chunks storage, this limit is enforced in the querier and ruler, while when running the Cortex blocks storage this limit is enforced in the querier, ruler and store-gateway. 0 to disable.") 158 f.IntVar(&l.MaxChunksPerQuery, "querier.max-fetched-chunks-per-query", 0, "Maximum number of chunks that can be fetched in a single query from ingesters and long-term storage. This limit is enforced in the querier, ruler and store-gateway. Takes precedence over the deprecated -store.query-chunk-limit. 0 to disable.") 159 f.IntVar(&l.MaxFetchedSeriesPerQuery, "querier.max-fetched-series-per-query", 0, "The maximum number of unique series for which a query can fetch samples from each ingesters and blocks storage. This limit is enforced in the querier only when running Cortex with blocks storage. 0 to disable") 160 f.IntVar(&l.MaxFetchedChunkBytesPerQuery, "querier.max-fetched-chunk-bytes-per-query", 0, "The maximum size of all chunks in bytes that a query can fetch from each ingester and storage. This limit is enforced in the querier and ruler only when running Cortex with blocks storage. 0 to disable.") 161 f.Var(&l.MaxQueryLength, "store.max-query-length", "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and in the chunks storage. 0 to disable.") 162 f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until <lookback> duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.") 163 f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 14, "Maximum number of split queries will be scheduled in parallel by the frontend.") 164 f.IntVar(&l.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries. This limit is ignored when running the Cortex blocks storage. 0 to disable.") 165 _ = l.MaxCacheFreshness.Set("1m") 166 f.Var(&l.MaxCacheFreshness, "frontend.max-cache-freshness", "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.") 167 f.IntVar(&l.MaxQueriersPerTenant, "frontend.max-queriers-per-tenant", 0, "Maximum number of queriers that can handle requests for a single tenant. If set to 0 or value higher than number of available queriers, *all* queriers will handle requests for the tenant. Each frontend (or query-scheduler, if used) will select the same set of queriers for the same tenant (given that all queriers are connected to all frontends / query-schedulers). This option only works with queriers connecting to the query-frontend / query-scheduler, not when using downstream URL.") 168 169 f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.") 170 f.IntVar(&l.RulerTenantShardSize, "ruler.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by ruler. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") 171 f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.") 172 f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.") 173 174 f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.") 175 f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") 176 177 // Store-gateway. 178 f.IntVar(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") 179 180 // Alertmanager. 181 f.Var(&l.AlertmanagerReceiversBlockCIDRNetworks, "alertmanager.receivers-firewall-block-cidr-networks", "Comma-separated list of network CIDRs to block in Alertmanager receiver integrations.") 182 f.BoolVar(&l.AlertmanagerReceiversBlockPrivateAddresses, "alertmanager.receivers-firewall-block-private-addresses", false, "True to block private and local addresses in Alertmanager receiver integrations. It blocks private addresses defined by RFC 1918 (IPv4 addresses) and RFC 4193 (IPv6 addresses), as well as loopback, local unicast and local multicast addresses.") 183 184 f.Float64Var(&l.NotificationRateLimit, "alertmanager.notification-rate-limit", 0, "Per-user rate limit for sending notifications from Alertmanager in notifications/sec. 0 = rate limit disabled. Negative value = no notifications are allowed.") 185 186 if l.NotificationRateLimitPerIntegration == nil { 187 l.NotificationRateLimitPerIntegration = NotificationRateLimitMap{} 188 } 189 f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".") 190 f.IntVar(&l.AlertmanagerMaxConfigSizeBytes, "alertmanager.max-config-size-bytes", 0, "Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.") 191 f.IntVar(&l.AlertmanagerMaxTemplatesCount, "alertmanager.max-templates-count", 0, "Maximum number of templates in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.") 192 f.IntVar(&l.AlertmanagerMaxTemplateSizeBytes, "alertmanager.max-template-size-bytes", 0, "Maximum size of single template in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.") 193 f.IntVar(&l.AlertmanagerMaxDispatcherAggregationGroups, "alertmanager.max-dispatcher-aggregation-groups", 0, "Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit.") 194 f.IntVar(&l.AlertmanagerMaxAlertsCount, "alertmanager.max-alerts-count", 0, "Maximum number of alerts that a single user can have. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.") 195 f.IntVar(&l.AlertmanagerMaxAlertsSizeBytes, "alertmanager.max-alerts-size-bytes", 0, "Maximum total size of alerts that a single user can have, alert size is the sum of the bytes of its labels, annotations and generatorURL. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.") 196 } 197 198 // Validate the limits config and returns an error if the validation 199 // doesn't pass 200 func (l *Limits) Validate(shardByAllLabels bool) error { 201 // The ingester.max-global-series-per-user metric is not supported 202 // if shard-by-all-labels is disabled 203 if l.MaxGlobalSeriesPerUser > 0 && !shardByAllLabels { 204 return errMaxGlobalSeriesPerUserValidation 205 } 206 207 return nil 208 } 209 210 // UnmarshalYAML implements the yaml.Unmarshaler interface. 211 func (l *Limits) UnmarshalYAML(unmarshal func(interface{}) error) error { 212 // We want to set l to the defaults and then overwrite it with the input. 213 // To make unmarshal fill the plain data struct rather than calling UnmarshalYAML 214 // again, we have to hide it using a type indirection. See prometheus/config. 215 216 // During startup we wont have a default value so we don't want to overwrite them 217 if defaultLimits != nil { 218 *l = *defaultLimits 219 // Make copy of default limits. Otherwise unmarshalling would modify map in default limits. 220 l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration) 221 } 222 type plain Limits 223 return unmarshal((*plain)(l)) 224 } 225 226 // UnmarshalJSON implements the json.Unmarshaler interface. 227 func (l *Limits) UnmarshalJSON(data []byte) error { 228 // Like the YAML method above, we want to set l to the defaults and then overwrite 229 // it with the input. We prevent an infinite loop of calling UnmarshalJSON by hiding 230 // behind type indirection. 231 if defaultLimits != nil { 232 *l = *defaultLimits 233 // Make copy of default limits. Otherwise unmarshalling would modify map in default limits. 234 l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration) 235 } 236 237 type plain Limits 238 dec := json.NewDecoder(bytes.NewReader(data)) 239 dec.DisallowUnknownFields() 240 241 return dec.Decode((*plain)(l)) 242 } 243 244 func (l *Limits) copyNotificationIntegrationLimits(defaults NotificationRateLimitMap) { 245 l.NotificationRateLimitPerIntegration = make(map[string]float64, len(defaults)) 246 for k, v := range defaults { 247 l.NotificationRateLimitPerIntegration[k] = v 248 } 249 } 250 251 // When we load YAML from disk, we want the various per-customer limits 252 // to default to any values specified on the command line, not default 253 // command line values. This global contains those values. I (Tom) cannot 254 // find a nicer way I'm afraid. 255 var defaultLimits *Limits 256 257 // SetDefaultLimitsForYAMLUnmarshalling sets global default limits, used when loading 258 // Limits from YAML files. This is used to ensure per-tenant limits are defaulted to 259 // those values. 260 func SetDefaultLimitsForYAMLUnmarshalling(defaults Limits) { 261 defaultLimits = &defaults 262 } 263 264 // TenantLimits exposes per-tenant limit overrides to various resource usage limits 265 type TenantLimits interface { 266 // ByUserID gets limits specific to a particular tenant or nil if there are none 267 ByUserID(userID string) *Limits 268 269 // AllByUserID gets a mapping of all tenant IDs and limits for that user 270 AllByUserID() map[string]*Limits 271 } 272 273 // Overrides periodically fetch a set of per-user overrides, and provides convenience 274 // functions for fetching the correct value. 275 type Overrides struct { 276 defaultLimits *Limits 277 tenantLimits TenantLimits 278 } 279 280 // NewOverrides makes a new Overrides. 281 func NewOverrides(defaults Limits, tenantLimits TenantLimits) (*Overrides, error) { 282 return &Overrides{ 283 tenantLimits: tenantLimits, 284 defaultLimits: &defaults, 285 }, nil 286 } 287 288 // IngestionRate returns the limit on ingester rate (samples per second). 289 func (o *Overrides) IngestionRate(userID string) float64 { 290 return o.getOverridesForUser(userID).IngestionRate 291 } 292 293 // IngestionRateStrategy returns whether the ingestion rate limit should be individually applied 294 // to each distributor instance (local) or evenly shared across the cluster (global). 295 func (o *Overrides) IngestionRateStrategy() string { 296 // The ingestion rate strategy can't be overridden on a per-tenant basis 297 return o.defaultLimits.IngestionRateStrategy 298 } 299 300 // IngestionBurstSize returns the burst size for ingestion rate. 301 func (o *Overrides) IngestionBurstSize(userID string) int { 302 return o.getOverridesForUser(userID).IngestionBurstSize 303 } 304 305 // AcceptHASamples returns whether the distributor should track and accept samples from HA replicas for this user. 306 func (o *Overrides) AcceptHASamples(userID string) bool { 307 return o.getOverridesForUser(userID).AcceptHASamples 308 } 309 310 // HAClusterLabel returns the cluster label to look for when deciding whether to accept a sample from a Prometheus HA replica. 311 func (o *Overrides) HAClusterLabel(userID string) string { 312 return o.getOverridesForUser(userID).HAClusterLabel 313 } 314 315 // HAReplicaLabel returns the replica label to look for when deciding whether to accept a sample from a Prometheus HA replica. 316 func (o *Overrides) HAReplicaLabel(userID string) string { 317 return o.getOverridesForUser(userID).HAReplicaLabel 318 } 319 320 // DropLabels returns the list of labels to be dropped when ingesting HA samples for the user. 321 func (o *Overrides) DropLabels(userID string) flagext.StringSlice { 322 return o.getOverridesForUser(userID).DropLabels 323 } 324 325 // MaxLabelNameLength returns maximum length a label name can be. 326 func (o *Overrides) MaxLabelNameLength(userID string) int { 327 return o.getOverridesForUser(userID).MaxLabelNameLength 328 } 329 330 // MaxLabelValueLength returns maximum length a label value can be. This also is 331 // the maximum length of a metric name. 332 func (o *Overrides) MaxLabelValueLength(userID string) int { 333 return o.getOverridesForUser(userID).MaxLabelValueLength 334 } 335 336 // MaxLabelNamesPerSeries returns maximum number of label/value pairs timeseries. 337 func (o *Overrides) MaxLabelNamesPerSeries(userID string) int { 338 return o.getOverridesForUser(userID).MaxLabelNamesPerSeries 339 } 340 341 // MaxMetadataLength returns maximum length metadata can be. Metadata refers 342 // to the Metric Name, HELP and UNIT. 343 func (o *Overrides) MaxMetadataLength(userID string) int { 344 return o.getOverridesForUser(userID).MaxMetadataLength 345 } 346 347 // RejectOldSamples returns true when we should reject samples older than certain 348 // age. 349 func (o *Overrides) RejectOldSamples(userID string) bool { 350 return o.getOverridesForUser(userID).RejectOldSamples 351 } 352 353 // RejectOldSamplesMaxAge returns the age at which samples should be rejected. 354 func (o *Overrides) RejectOldSamplesMaxAge(userID string) time.Duration { 355 return time.Duration(o.getOverridesForUser(userID).RejectOldSamplesMaxAge) 356 } 357 358 // CreationGracePeriod is misnamed, and actually returns how far into the future 359 // we should accept samples. 360 func (o *Overrides) CreationGracePeriod(userID string) time.Duration { 361 return time.Duration(o.getOverridesForUser(userID).CreationGracePeriod) 362 } 363 364 // MaxSeriesPerQuery returns the maximum number of series a query is allowed to hit. 365 func (o *Overrides) MaxSeriesPerQuery(userID string) int { 366 return o.getOverridesForUser(userID).MaxSeriesPerQuery 367 } 368 369 // MaxSamplesPerQuery returns the maximum number of samples in a query (from the ingester). 370 func (o *Overrides) MaxSamplesPerQuery(userID string) int { 371 return o.getOverridesForUser(userID).MaxSamplesPerQuery 372 } 373 374 // MaxLocalSeriesPerUser returns the maximum number of series a user is allowed to store in a single ingester. 375 func (o *Overrides) MaxLocalSeriesPerUser(userID string) int { 376 return o.getOverridesForUser(userID).MaxLocalSeriesPerUser 377 } 378 379 // MaxLocalSeriesPerMetric returns the maximum number of series allowed per metric in a single ingester. 380 func (o *Overrides) MaxLocalSeriesPerMetric(userID string) int { 381 return o.getOverridesForUser(userID).MaxLocalSeriesPerMetric 382 } 383 384 // MaxGlobalSeriesPerUser returns the maximum number of series a user is allowed to store across the cluster. 385 func (o *Overrides) MaxGlobalSeriesPerUser(userID string) int { 386 return o.getOverridesForUser(userID).MaxGlobalSeriesPerUser 387 } 388 389 // MaxGlobalSeriesPerMetric returns the maximum number of series allowed per metric across the cluster. 390 func (o *Overrides) MaxGlobalSeriesPerMetric(userID string) int { 391 return o.getOverridesForUser(userID).MaxGlobalSeriesPerMetric 392 } 393 394 // MaxChunksPerQueryFromStore returns the maximum number of chunks allowed per query when fetching 395 // chunks from the long-term storage. 396 func (o *Overrides) MaxChunksPerQueryFromStore(userID string) int { 397 // If the new config option is set, then it should take precedence. 398 if value := o.getOverridesForUser(userID).MaxChunksPerQuery; value > 0 { 399 return value 400 } 401 402 // Fallback to the deprecated config option. 403 return o.getOverridesForUser(userID).MaxChunksPerQueryFromStore 404 } 405 406 func (o *Overrides) MaxChunksPerQuery(userID string) int { 407 return o.getOverridesForUser(userID).MaxChunksPerQuery 408 } 409 410 // MaxFetchedSeriesPerQuery returns the maximum number of series allowed per query when fetching 411 // chunks from ingesters and blocks storage. 412 func (o *Overrides) MaxFetchedSeriesPerQuery(userID string) int { 413 return o.getOverridesForUser(userID).MaxFetchedSeriesPerQuery 414 } 415 416 // MaxFetchedChunkBytesPerQuery returns the maximum number of bytes for chunks allowed per query when fetching 417 // chunks from ingesters and blocks storage. 418 func (o *Overrides) MaxFetchedChunkBytesPerQuery(userID string) int { 419 return o.getOverridesForUser(userID).MaxFetchedChunkBytesPerQuery 420 } 421 422 // MaxQueryLookback returns the max lookback period of queries. 423 func (o *Overrides) MaxQueryLookback(userID string) time.Duration { 424 return time.Duration(o.getOverridesForUser(userID).MaxQueryLookback) 425 } 426 427 // MaxQueryLength returns the limit of the length (in time) of a query. 428 func (o *Overrides) MaxQueryLength(userID string) time.Duration { 429 return time.Duration(o.getOverridesForUser(userID).MaxQueryLength) 430 } 431 432 // MaxCacheFreshness returns the period after which results are cacheable, 433 // to prevent caching of very recent results. 434 func (o *Overrides) MaxCacheFreshness(userID string) time.Duration { 435 return time.Duration(o.getOverridesForUser(userID).MaxCacheFreshness) 436 } 437 438 // MaxQueriersPerUser returns the maximum number of queriers that can handle requests for this user. 439 func (o *Overrides) MaxQueriersPerUser(userID string) int { 440 return o.getOverridesForUser(userID).MaxQueriersPerTenant 441 } 442 443 // MaxQueryParallelism returns the limit to the number of split queries the 444 // frontend will process in parallel. 445 func (o *Overrides) MaxQueryParallelism(userID string) int { 446 return o.getOverridesForUser(userID).MaxQueryParallelism 447 } 448 449 // EnforceMetricName whether to enforce the presence of a metric name. 450 func (o *Overrides) EnforceMetricName(userID string) bool { 451 return o.getOverridesForUser(userID).EnforceMetricName 452 } 453 454 // EnforceMetadataMetricName whether to enforce the presence of a metric name on metadata. 455 func (o *Overrides) EnforceMetadataMetricName(userID string) bool { 456 return o.getOverridesForUser(userID).EnforceMetadataMetricName 457 } 458 459 // CardinalityLimit returns the maximum number of timeseries allowed in a query. 460 func (o *Overrides) CardinalityLimit(userID string) int { 461 return o.getOverridesForUser(userID).CardinalityLimit 462 } 463 464 // MinChunkLength returns the minimum size of chunk that will be saved by ingesters 465 func (o *Overrides) MinChunkLength(userID string) int { 466 return o.getOverridesForUser(userID).MinChunkLength 467 } 468 469 // MaxLocalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store in a single ingester. 470 func (o *Overrides) MaxLocalMetricsWithMetadataPerUser(userID string) int { 471 return o.getOverridesForUser(userID).MaxLocalMetricsWithMetadataPerUser 472 } 473 474 // MaxLocalMetadataPerMetric returns the maximum number of metadata allowed per metric in a single ingester. 475 func (o *Overrides) MaxLocalMetadataPerMetric(userID string) int { 476 return o.getOverridesForUser(userID).MaxLocalMetadataPerMetric 477 } 478 479 // MaxGlobalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store across the cluster. 480 func (o *Overrides) MaxGlobalMetricsWithMetadataPerUser(userID string) int { 481 return o.getOverridesForUser(userID).MaxGlobalMetricsWithMetadataPerUser 482 } 483 484 // MaxGlobalMetadataPerMetric returns the maximum number of metadata allowed per metric across the cluster. 485 func (o *Overrides) MaxGlobalMetadataPerMetric(userID string) int { 486 return o.getOverridesForUser(userID).MaxGlobalMetadataPerMetric 487 } 488 489 // IngestionTenantShardSize returns the ingesters shard size for a given user. 490 func (o *Overrides) IngestionTenantShardSize(userID string) int { 491 return o.getOverridesForUser(userID).IngestionTenantShardSize 492 } 493 494 // EvaluationDelay returns the rules evaluation delay for a given user. 495 func (o *Overrides) EvaluationDelay(userID string) time.Duration { 496 return time.Duration(o.getOverridesForUser(userID).RulerEvaluationDelay) 497 } 498 499 // CompactorBlocksRetentionPeriod returns the retention period for a given user. 500 func (o *Overrides) CompactorBlocksRetentionPeriod(userID string) time.Duration { 501 return time.Duration(o.getOverridesForUser(userID).CompactorBlocksRetentionPeriod) 502 } 503 504 // CompactorTenantShardSize returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. 505 func (o *Overrides) CompactorTenantShardSize(userID string) int { 506 return o.getOverridesForUser(userID).CompactorTenantShardSize 507 } 508 509 // MetricRelabelConfigs returns the metric relabel configs for a given user. 510 func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config { 511 return o.getOverridesForUser(userID).MetricRelabelConfigs 512 } 513 514 // RulerTenantShardSize returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. 515 func (o *Overrides) RulerTenantShardSize(userID string) int { 516 return o.getOverridesForUser(userID).RulerTenantShardSize 517 } 518 519 // RulerMaxRulesPerRuleGroup returns the maximum number of rules per rule group for a given user. 520 func (o *Overrides) RulerMaxRulesPerRuleGroup(userID string) int { 521 return o.getOverridesForUser(userID).RulerMaxRulesPerRuleGroup 522 } 523 524 // RulerMaxRuleGroupsPerTenant returns the maximum number of rule groups for a given user. 525 func (o *Overrides) RulerMaxRuleGroupsPerTenant(userID string) int { 526 return o.getOverridesForUser(userID).RulerMaxRuleGroupsPerTenant 527 } 528 529 // StoreGatewayTenantShardSize returns the store-gateway shard size for a given user. 530 func (o *Overrides) StoreGatewayTenantShardSize(userID string) int { 531 return o.getOverridesForUser(userID).StoreGatewayTenantShardSize 532 } 533 534 // MaxHAClusters returns maximum number of clusters that HA tracker will track for a user. 535 func (o *Overrides) MaxHAClusters(user string) int { 536 return o.getOverridesForUser(user).HAMaxClusters 537 } 538 539 // S3SSEType returns the per-tenant S3 SSE type. 540 func (o *Overrides) S3SSEType(user string) string { 541 return o.getOverridesForUser(user).S3SSEType 542 } 543 544 // S3SSEKMSKeyID returns the per-tenant S3 KMS-SSE key id. 545 func (o *Overrides) S3SSEKMSKeyID(user string) string { 546 return o.getOverridesForUser(user).S3SSEKMSKeyID 547 } 548 549 // S3SSEKMSEncryptionContext returns the per-tenant S3 KMS-SSE encryption context. 550 func (o *Overrides) S3SSEKMSEncryptionContext(user string) string { 551 return o.getOverridesForUser(user).S3SSEKMSEncryptionContext 552 } 553 554 // AlertmanagerReceiversBlockCIDRNetworks returns the list of network CIDRs that should be blocked 555 // in the Alertmanager receivers for the given user. 556 func (o *Overrides) AlertmanagerReceiversBlockCIDRNetworks(user string) []flagext.CIDR { 557 return o.getOverridesForUser(user).AlertmanagerReceiversBlockCIDRNetworks 558 } 559 560 // AlertmanagerReceiversBlockPrivateAddresses returns true if private addresses should be blocked 561 // in the Alertmanager receivers for the given user. 562 func (o *Overrides) AlertmanagerReceiversBlockPrivateAddresses(user string) bool { 563 return o.getOverridesForUser(user).AlertmanagerReceiversBlockPrivateAddresses 564 } 565 566 // Notification limits are special. Limits are returned in following order: 567 // 1. per-tenant limits for given integration 568 // 2. default limits for given integration 569 // 3. per-tenant limits 570 // 4. default limits 571 func (o *Overrides) getNotificationLimitForUser(user, integration string) float64 { 572 u := o.getOverridesForUser(user) 573 if n, ok := u.NotificationRateLimitPerIntegration[integration]; ok { 574 return n 575 } 576 577 return u.NotificationRateLimit 578 } 579 580 func (o *Overrides) NotificationRateLimit(user string, integration string) rate.Limit { 581 l := o.getNotificationLimitForUser(user, integration) 582 if l == 0 || math.IsInf(l, 1) { 583 return rate.Inf // No rate limit. 584 } 585 586 if l < 0 { 587 l = 0 // No notifications will be sent. 588 } 589 return rate.Limit(l) 590 } 591 592 const maxInt = int(^uint(0) >> 1) 593 594 func (o *Overrides) NotificationBurstSize(user string, integration string) int { 595 // Burst size is computed from rate limit. Rate limit is already normalized to [0, +inf), where 0 means disabled. 596 l := o.NotificationRateLimit(user, integration) 597 if l == 0 { 598 return 0 599 } 600 601 // floats can be larger than max int. This also handles case where l == rate.Inf. 602 if float64(l) >= float64(maxInt) { 603 return maxInt 604 } 605 606 // For values between (0, 1), allow single notification per second (every 1/limit seconds). 607 if l < 1 { 608 return 1 609 } 610 611 return int(l) 612 } 613 614 func (o *Overrides) AlertmanagerMaxConfigSize(userID string) int { 615 return o.getOverridesForUser(userID).AlertmanagerMaxConfigSizeBytes 616 } 617 618 func (o *Overrides) AlertmanagerMaxTemplatesCount(userID string) int { 619 return o.getOverridesForUser(userID).AlertmanagerMaxTemplatesCount 620 } 621 622 func (o *Overrides) AlertmanagerMaxTemplateSize(userID string) int { 623 return o.getOverridesForUser(userID).AlertmanagerMaxTemplateSizeBytes 624 } 625 626 func (o *Overrides) AlertmanagerMaxDispatcherAggregationGroups(userID string) int { 627 return o.getOverridesForUser(userID).AlertmanagerMaxDispatcherAggregationGroups 628 } 629 630 func (o *Overrides) AlertmanagerMaxAlertsCount(userID string) int { 631 return o.getOverridesForUser(userID).AlertmanagerMaxAlertsCount 632 } 633 634 func (o *Overrides) AlertmanagerMaxAlertsSizeBytes(userID string) int { 635 return o.getOverridesForUser(userID).AlertmanagerMaxAlertsSizeBytes 636 } 637 638 func (o *Overrides) getOverridesForUser(userID string) *Limits { 639 if o.tenantLimits != nil { 640 l := o.tenantLimits.ByUserID(userID) 641 if l != nil { 642 return l 643 } 644 } 645 return o.defaultLimits 646 } 647 648 // SmallestPositiveIntPerTenant is returning the minimal positive value of the 649 // supplied limit function for all given tenants. 650 func SmallestPositiveIntPerTenant(tenantIDs []string, f func(string) int) int { 651 var result *int 652 for _, tenantID := range tenantIDs { 653 v := f(tenantID) 654 if result == nil || v < *result { 655 result = &v 656 } 657 } 658 if result == nil { 659 return 0 660 } 661 return *result 662 } 663 664 // SmallestPositiveNonZeroIntPerTenant is returning the minimal positive and 665 // non-zero value of the supplied limit function for all given tenants. In many 666 // limits a value of 0 means unlimted so the method will return 0 only if all 667 // inputs have a limit of 0 or an empty tenant list is given. 668 func SmallestPositiveNonZeroIntPerTenant(tenantIDs []string, f func(string) int) int { 669 var result *int 670 for _, tenantID := range tenantIDs { 671 v := f(tenantID) 672 if v > 0 && (result == nil || v < *result) { 673 result = &v 674 } 675 } 676 if result == nil { 677 return 0 678 } 679 return *result 680 } 681 682 // SmallestPositiveNonZeroDurationPerTenant is returning the minimal positive 683 // and non-zero value of the supplied limit function for all given tenants. In 684 // many limits a value of 0 means unlimted so the method will return 0 only if 685 // all inputs have a limit of 0 or an empty tenant list is given. 686 func SmallestPositiveNonZeroDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration { 687 var result *time.Duration 688 for _, tenantID := range tenantIDs { 689 v := f(tenantID) 690 if v > 0 && (result == nil || v < *result) { 691 result = &v 692 } 693 } 694 if result == nil { 695 return 0 696 } 697 return *result 698 } 699 700 // MaxDurationPerTenant is returning the maximum duration per tenant. Without 701 // tenants given it will return a time.Duration(0). 702 func MaxDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration { 703 result := time.Duration(0) 704 for _, tenantID := range tenantIDs { 705 v := f(tenantID) 706 if v > result { 707 result = v 708 } 709 } 710 return result 711 }