github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/util/validation/limits.go (about) 1 package validation 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "errors" 7 "flag" 8 "math" 9 "strings" 10 "time" 11 12 "github.com/grafana/dskit/flagext" 13 "github.com/prometheus/common/model" 14 "github.com/prometheus/prometheus/pkg/relabel" 15 "golang.org/x/time/rate" 16 ) 17 18 var errMaxGlobalSeriesPerUserValidation = errors.New("The ingester.max-global-series-per-user limit is unsupported if distributor.shard-by-all-labels is disabled") 19 20 // Supported values for enum limits 21 const ( 22 LocalIngestionRateStrategy = "local" 23 GlobalIngestionRateStrategy = "global" 24 ) 25 26 // LimitError are errors that do not comply with the limits specified. 27 type LimitError string 28 29 func (e LimitError) Error() string { 30 return string(e) 31 } 32 33 // Limits describe all the limits for users; can be used to describe global default 34 // limits via flags, or per-user limits via yaml config. 35 type Limits struct { 36 // Distributor enforced limits. 37 IngestionRate float64 `yaml:"ingestion_rate" json:"ingestion_rate"` 38 IngestionRateStrategy string `yaml:"ingestion_rate_strategy" json:"ingestion_rate_strategy"` 39 IngestionBurstSize int `yaml:"ingestion_burst_size" json:"ingestion_burst_size"` 40 AcceptHASamples bool `yaml:"accept_ha_samples" json:"accept_ha_samples"` 41 HAClusterLabel string `yaml:"ha_cluster_label" json:"ha_cluster_label"` 42 HAReplicaLabel string `yaml:"ha_replica_label" json:"ha_replica_label"` 43 HAMaxClusters int `yaml:"ha_max_clusters" json:"ha_max_clusters"` 44 DropLabels flagext.StringSlice `yaml:"drop_labels" json:"drop_labels"` 45 MaxLabelNameLength int `yaml:"max_label_name_length" json:"max_label_name_length"` 46 MaxLabelValueLength int `yaml:"max_label_value_length" json:"max_label_value_length"` 47 MaxLabelNamesPerSeries int `yaml:"max_label_names_per_series" json:"max_label_names_per_series"` 48 MaxMetadataLength int `yaml:"max_metadata_length" json:"max_metadata_length"` 49 RejectOldSamples bool `yaml:"reject_old_samples" json:"reject_old_samples"` 50 RejectOldSamplesMaxAge model.Duration `yaml:"reject_old_samples_max_age" json:"reject_old_samples_max_age"` 51 CreationGracePeriod model.Duration `yaml:"creation_grace_period" json:"creation_grace_period"` 52 EnforceMetadataMetricName bool `yaml:"enforce_metadata_metric_name" json:"enforce_metadata_metric_name"` 53 EnforceMetricName bool `yaml:"enforce_metric_name" json:"enforce_metric_name"` 54 IngestionTenantShardSize int `yaml:"ingestion_tenant_shard_size" json:"ingestion_tenant_shard_size"` 55 MetricRelabelConfigs []*relabel.Config `yaml:"metric_relabel_configs,omitempty" json:"metric_relabel_configs,omitempty" doc:"nocli|description=List of metric relabel configurations. Note that in most situations, it is more effective to use metrics relabeling directly in the Prometheus server, e.g. remote_write.write_relabel_configs."` 56 57 // Ingester enforced limits. 58 // Series 59 MaxSeriesPerQuery int `yaml:"max_series_per_query" json:"max_series_per_query"` 60 MaxSamplesPerQuery int `yaml:"max_samples_per_query" json:"max_samples_per_query"` 61 MaxLocalSeriesPerUser int `yaml:"max_series_per_user" json:"max_series_per_user"` 62 MaxLocalSeriesPerMetric int `yaml:"max_series_per_metric" json:"max_series_per_metric"` 63 MaxGlobalSeriesPerUser int `yaml:"max_global_series_per_user" json:"max_global_series_per_user"` 64 MaxGlobalSeriesPerMetric int `yaml:"max_global_series_per_metric" json:"max_global_series_per_metric"` 65 MinChunkLength int `yaml:"min_chunk_length" json:"min_chunk_length"` 66 // Metadata 67 MaxLocalMetricsWithMetadataPerUser int `yaml:"max_metadata_per_user" json:"max_metadata_per_user"` 68 MaxLocalMetadataPerMetric int `yaml:"max_metadata_per_metric" json:"max_metadata_per_metric"` 69 MaxGlobalMetricsWithMetadataPerUser int `yaml:"max_global_metadata_per_user" json:"max_global_metadata_per_user"` 70 MaxGlobalMetadataPerMetric int `yaml:"max_global_metadata_per_metric" json:"max_global_metadata_per_metric"` 71 72 // Querier enforced limits. 73 MaxChunksPerQueryFromStore int `yaml:"max_chunks_per_query" json:"max_chunks_per_query"` // TODO Remove in Cortex 1.12. 74 MaxChunksPerQuery int `yaml:"max_fetched_chunks_per_query" json:"max_fetched_chunks_per_query"` 75 MaxFetchedSeriesPerQuery int `yaml:"max_fetched_series_per_query" json:"max_fetched_series_per_query"` 76 MaxFetchedChunkBytesPerQuery int `yaml:"max_fetched_chunk_bytes_per_query" json:"max_fetched_chunk_bytes_per_query"` 77 MaxQueryLookback model.Duration `yaml:"max_query_lookback" json:"max_query_lookback"` 78 MaxQueryLength model.Duration `yaml:"max_query_length" json:"max_query_length"` 79 MaxQueryParallelism int `yaml:"max_query_parallelism" json:"max_query_parallelism"` 80 CardinalityLimit int `yaml:"cardinality_limit" json:"cardinality_limit"` 81 MaxCacheFreshness model.Duration `yaml:"max_cache_freshness" json:"max_cache_freshness"` 82 MaxQueriersPerTenant int `yaml:"max_queriers_per_tenant" json:"max_queriers_per_tenant"` 83 84 // Ruler defaults and limits. 85 RulerEvaluationDelay model.Duration `yaml:"ruler_evaluation_delay_duration" json:"ruler_evaluation_delay_duration"` 86 RulerTenantShardSize int `yaml:"ruler_tenant_shard_size" json:"ruler_tenant_shard_size"` 87 RulerMaxRulesPerRuleGroup int `yaml:"ruler_max_rules_per_rule_group" json:"ruler_max_rules_per_rule_group"` 88 RulerMaxRuleGroupsPerTenant int `yaml:"ruler_max_rule_groups_per_tenant" json:"ruler_max_rule_groups_per_tenant"` 89 90 // Store-gateway. 91 StoreGatewayTenantShardSize int `yaml:"store_gateway_tenant_shard_size" json:"store_gateway_tenant_shard_size"` 92 93 // Compactor. 94 CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` 95 96 // This config doesn't have a CLI flag registered here because they're registered in 97 // their own original config struct. 98 S3SSEType string `yaml:"s3_sse_type" json:"s3_sse_type" doc:"nocli|description=S3 server-side encryption type. Required to enable server-side encryption overrides for a specific tenant. If not set, the default S3 client settings are used."` 99 S3SSEKMSKeyID string `yaml:"s3_sse_kms_key_id" json:"s3_sse_kms_key_id" doc:"nocli|description=S3 server-side encryption KMS Key ID. Ignored if the SSE type override is not set."` 100 S3SSEKMSEncryptionContext string `yaml:"s3_sse_kms_encryption_context" json:"s3_sse_kms_encryption_context" doc:"nocli|description=S3 server-side encryption KMS encryption context. If unset and the key ID override is set, the encryption context will not be provided to S3. Ignored if the SSE type override is not set."` 101 102 // Alertmanager. 103 AlertmanagerReceiversBlockCIDRNetworks flagext.CIDRSliceCSV `yaml:"alertmanager_receivers_firewall_block_cidr_networks" json:"alertmanager_receivers_firewall_block_cidr_networks"` 104 AlertmanagerReceiversBlockPrivateAddresses bool `yaml:"alertmanager_receivers_firewall_block_private_addresses" json:"alertmanager_receivers_firewall_block_private_addresses"` 105 106 NotificationRateLimit float64 `yaml:"alertmanager_notification_rate_limit" json:"alertmanager_notification_rate_limit"` 107 NotificationRateLimitPerIntegration NotificationRateLimitMap `yaml:"alertmanager_notification_rate_limit_per_integration" json:"alertmanager_notification_rate_limit_per_integration"` 108 109 AlertmanagerMaxConfigSizeBytes int `yaml:"alertmanager_max_config_size_bytes" json:"alertmanager_max_config_size_bytes"` 110 AlertmanagerMaxTemplatesCount int `yaml:"alertmanager_max_templates_count" json:"alertmanager_max_templates_count"` 111 AlertmanagerMaxTemplateSizeBytes int `yaml:"alertmanager_max_template_size_bytes" json:"alertmanager_max_template_size_bytes"` 112 AlertmanagerMaxDispatcherAggregationGroups int `yaml:"alertmanager_max_dispatcher_aggregation_groups" json:"alertmanager_max_dispatcher_aggregation_groups"` 113 AlertmanagerMaxAlertsCount int `yaml:"alertmanager_max_alerts_count" json:"alertmanager_max_alerts_count"` 114 AlertmanagerMaxAlertsSizeBytes int `yaml:"alertmanager_max_alerts_size_bytes" json:"alertmanager_max_alerts_size_bytes"` 115 } 116 117 // RegisterFlags adds the flags required to config this to the given FlagSet 118 func (l *Limits) RegisterFlags(f *flag.FlagSet) { 119 f.IntVar(&l.IngestionTenantShardSize, "distributor.ingestion-tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set both on ingesters and distributors. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") 120 f.Float64Var(&l.IngestionRate, "distributor.ingestion-rate-limit", 25000, "Per-user ingestion rate limit in samples per second.") 121 f.StringVar(&l.IngestionRateStrategy, "distributor.ingestion-rate-limit-strategy", "local", "Whether the ingestion rate limit should be applied individually to each distributor instance (local), or evenly shared across the cluster (global).") 122 f.IntVar(&l.IngestionBurstSize, "distributor.ingestion-burst-size", 50000, "Per-user allowed ingestion burst size (in number of samples).") 123 f.BoolVar(&l.AcceptHASamples, "distributor.ha-tracker.enable-for-all-users", false, "Flag to enable, for all users, handling of samples with external labels identifying replicas in an HA Prometheus setup.") 124 f.StringVar(&l.HAClusterLabel, "distributor.ha-tracker.cluster", "cluster", "Prometheus label to look for in samples to identify a Prometheus HA cluster.") 125 f.StringVar(&l.HAReplicaLabel, "distributor.ha-tracker.replica", "__replica__", "Prometheus label to look for in samples to identify a Prometheus HA replica.") 126 f.IntVar(&l.HAMaxClusters, "distributor.ha-tracker.max-clusters", 0, "Maximum number of clusters that HA tracker will keep track of for single user. 0 to disable the limit.") 127 f.Var(&l.DropLabels, "distributor.drop-label", "This flag can be used to specify label names that to drop during sample ingestion within the distributor and can be repeated in order to drop multiple labels.") 128 f.IntVar(&l.MaxLabelNameLength, "validation.max-length-label-name", 1024, "Maximum length accepted for label names") 129 f.IntVar(&l.MaxLabelValueLength, "validation.max-length-label-value", 2048, "Maximum length accepted for label value. This setting also applies to the metric name") 130 f.IntVar(&l.MaxLabelNamesPerSeries, "validation.max-label-names-per-series", 30, "Maximum number of label names per series.") 131 f.IntVar(&l.MaxMetadataLength, "validation.max-metadata-length", 1024, "Maximum length accepted for metric metadata. Metadata refers to Metric Name, HELP and UNIT.") 132 f.BoolVar(&l.RejectOldSamples, "validation.reject-old-samples", false, "Reject old samples.") 133 _ = l.RejectOldSamplesMaxAge.Set("14d") 134 f.Var(&l.RejectOldSamplesMaxAge, "validation.reject-old-samples.max-age", "Maximum accepted sample age before rejecting.") 135 _ = l.CreationGracePeriod.Set("10m") 136 f.Var(&l.CreationGracePeriod, "validation.create-grace-period", "Duration which table will be created/deleted before/after it's needed; we won't accept sample from before this time.") 137 f.BoolVar(&l.EnforceMetricName, "validation.enforce-metric-name", true, "Enforce every sample has a metric name.") 138 f.BoolVar(&l.EnforceMetadataMetricName, "validation.enforce-metadata-metric-name", true, "Enforce every metadata has a metric name.") 139 140 f.IntVar(&l.MaxSeriesPerQuery, "ingester.max-series-per-query", 100000, "The maximum number of series for which a query can fetch samples from each ingester. This limit is enforced only in the ingesters (when querying samples not flushed to the storage yet) and it's a per-instance limit. This limit is ignored when running the Cortex blocks storage. When running Cortex with blocks storage use -querier.max-fetched-series-per-query limit instead.") 141 f.IntVar(&l.MaxSamplesPerQuery, "ingester.max-samples-per-query", 1000000, "The maximum number of samples that a query can return. This limit only applies when running the Cortex chunks storage with -querier.ingester-streaming=false.") 142 f.IntVar(&l.MaxLocalSeriesPerUser, "ingester.max-series-per-user", 5000000, "The maximum number of active series per user, per ingester. 0 to disable.") 143 f.IntVar(&l.MaxLocalSeriesPerMetric, "ingester.max-series-per-metric", 50000, "The maximum number of active series per metric name, per ingester. 0 to disable.") 144 f.IntVar(&l.MaxGlobalSeriesPerUser, "ingester.max-global-series-per-user", 0, "The maximum number of active series per user, across the cluster before replication. 0 to disable. Supported only if -distributor.shard-by-all-labels is true.") 145 f.IntVar(&l.MaxGlobalSeriesPerMetric, "ingester.max-global-series-per-metric", 0, "The maximum number of active series per metric name, across the cluster before replication. 0 to disable.") 146 f.IntVar(&l.MinChunkLength, "ingester.min-chunk-length", 0, "Minimum number of samples in an idle chunk to flush it to the store. Use with care, if chunks are less than this size they will be discarded. This option is ignored when running the Cortex blocks storage. 0 to disable.") 147 148 f.IntVar(&l.MaxLocalMetricsWithMetadataPerUser, "ingester.max-metadata-per-user", 8000, "The maximum number of active metrics with metadata per user, per ingester. 0 to disable.") 149 f.IntVar(&l.MaxLocalMetadataPerMetric, "ingester.max-metadata-per-metric", 10, "The maximum number of metadata per metric, per ingester. 0 to disable.") 150 f.IntVar(&l.MaxGlobalMetricsWithMetadataPerUser, "ingester.max-global-metadata-per-user", 0, "The maximum number of active metrics with metadata per user, across the cluster. 0 to disable. Supported only if -distributor.shard-by-all-labels is true.") 151 f.IntVar(&l.MaxGlobalMetadataPerMetric, "ingester.max-global-metadata-per-metric", 0, "The maximum number of metadata per metric, across the cluster. 0 to disable.") 152 f.IntVar(&l.MaxChunksPerQueryFromStore, "store.query-chunk-limit", 2e6, "Deprecated. Use -querier.max-fetched-chunks-per-query CLI flag and its respective YAML config option instead. Maximum number of chunks that can be fetched in a single query. This limit is enforced when fetching chunks from the long-term storage only. When running the Cortex chunks storage, this limit is enforced in the querier and ruler, while when running the Cortex blocks storage this limit is enforced in the querier, ruler and store-gateway. 0 to disable.") 153 f.IntVar(&l.MaxChunksPerQuery, "querier.max-fetched-chunks-per-query", 0, "Maximum number of chunks that can be fetched in a single query from ingesters and long-term storage. This limit is enforced in the querier, ruler and store-gateway. Takes precedence over the deprecated -store.query-chunk-limit. 0 to disable.") 154 f.IntVar(&l.MaxFetchedSeriesPerQuery, "querier.max-fetched-series-per-query", 0, "The maximum number of unique series for which a query can fetch samples from each ingesters and blocks storage. This limit is enforced in the querier only when running Cortex with blocks storage. 0 to disable") 155 f.IntVar(&l.MaxFetchedChunkBytesPerQuery, "querier.max-fetched-chunk-bytes-per-query", 0, "The maximum size of all chunks in bytes that a query can fetch from each ingester and storage. This limit is enforced in the querier and ruler only when running Cortex with blocks storage. 0 to disable.") 156 f.Var(&l.MaxQueryLength, "store.max-query-length", "Limit the query time range (end - start time). This limit is enforced in the query-frontend (on the received query), in the querier (on the query possibly split by the query-frontend) and in the chunks storage. 0 to disable.") 157 f.Var(&l.MaxQueryLookback, "querier.max-query-lookback", "Limit how long back data (series and metadata) can be queried, up until <lookback> duration ago. This limit is enforced in the query-frontend, querier and ruler. If the requested time range is outside the allowed range, the request will not fail but will be manipulated to only query data within the allowed time range. 0 to disable.") 158 f.IntVar(&l.MaxQueryParallelism, "querier.max-query-parallelism", 14, "Maximum number of split queries will be scheduled in parallel by the frontend.") 159 f.IntVar(&l.CardinalityLimit, "store.cardinality-limit", 1e5, "Cardinality limit for index queries. This limit is ignored when running the Cortex blocks storage. 0 to disable.") 160 _ = l.MaxCacheFreshness.Set("1m") 161 f.Var(&l.MaxCacheFreshness, "frontend.max-cache-freshness", "Most recent allowed cacheable result per-tenant, to prevent caching very recent results that might still be in flux.") 162 f.IntVar(&l.MaxQueriersPerTenant, "frontend.max-queriers-per-tenant", 0, "Maximum number of queriers that can handle requests for a single tenant. If set to 0 or value higher than number of available queriers, *all* queriers will handle requests for the tenant. Each frontend (or query-scheduler, if used) will select the same set of queriers for the same tenant (given that all queriers are connected to all frontends / query-schedulers). This option only works with queriers connecting to the query-frontend / query-scheduler, not when using downstream URL.") 163 164 f.Var(&l.RulerEvaluationDelay, "ruler.evaluation-delay-duration", "Duration to delay the evaluation of rules to ensure the underlying metrics have been pushed to Cortex.") 165 f.IntVar(&l.RulerTenantShardSize, "ruler.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by ruler. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") 166 f.IntVar(&l.RulerMaxRulesPerRuleGroup, "ruler.max-rules-per-rule-group", 0, "Maximum number of rules per rule group per-tenant. 0 to disable.") 167 f.IntVar(&l.RulerMaxRuleGroupsPerTenant, "ruler.max-rule-groups-per-tenant", 0, "Maximum number of rule groups per-tenant. 0 to disable.") 168 169 f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.") 170 171 // Store-gateway. 172 f.IntVar(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") 173 174 // Alertmanager. 175 f.Var(&l.AlertmanagerReceiversBlockCIDRNetworks, "alertmanager.receivers-firewall-block-cidr-networks", "Comma-separated list of network CIDRs to block in Alertmanager receiver integrations.") 176 f.BoolVar(&l.AlertmanagerReceiversBlockPrivateAddresses, "alertmanager.receivers-firewall-block-private-addresses", false, "True to block private and local addresses in Alertmanager receiver integrations. It blocks private addresses defined by RFC 1918 (IPv4 addresses) and RFC 4193 (IPv6 addresses), as well as loopback, local unicast and local multicast addresses.") 177 178 f.Float64Var(&l.NotificationRateLimit, "alertmanager.notification-rate-limit", 0, "Per-user rate limit for sending notifications from Alertmanager in notifications/sec. 0 = rate limit disabled. Negative value = no notifications are allowed.") 179 180 if l.NotificationRateLimitPerIntegration == nil { 181 l.NotificationRateLimitPerIntegration = NotificationRateLimitMap{} 182 } 183 f.Var(&l.NotificationRateLimitPerIntegration, "alertmanager.notification-rate-limit-per-integration", "Per-integration notification rate limits. Value is a map, where each key is integration name and value is a rate-limit (float). On command line, this map is given in JSON format. Rate limit has the same meaning as -alertmanager.notification-rate-limit, but only applies for specific integration. Allowed integration names: "+strings.Join(allowedIntegrationNames, ", ")+".") 184 f.IntVar(&l.AlertmanagerMaxConfigSizeBytes, "alertmanager.max-config-size-bytes", 0, "Maximum size of configuration file for Alertmanager that tenant can upload via Alertmanager API. 0 = no limit.") 185 f.IntVar(&l.AlertmanagerMaxTemplatesCount, "alertmanager.max-templates-count", 0, "Maximum number of templates in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.") 186 f.IntVar(&l.AlertmanagerMaxTemplateSizeBytes, "alertmanager.max-template-size-bytes", 0, "Maximum size of single template in tenant's Alertmanager configuration uploaded via Alertmanager API. 0 = no limit.") 187 f.IntVar(&l.AlertmanagerMaxDispatcherAggregationGroups, "alertmanager.max-dispatcher-aggregation-groups", 0, "Maximum number of aggregation groups in Alertmanager's dispatcher that a tenant can have. Each active aggregation group uses single goroutine. When the limit is reached, dispatcher will not dispatch alerts that belong to additional aggregation groups, but existing groups will keep working properly. 0 = no limit.") 188 f.IntVar(&l.AlertmanagerMaxAlertsCount, "alertmanager.max-alerts-count", 0, "Maximum number of alerts that a single user can have. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.") 189 f.IntVar(&l.AlertmanagerMaxAlertsSizeBytes, "alertmanager.max-alerts-size-bytes", 0, "Maximum total size of alerts that a single user can have, alert size is the sum of the bytes of its labels, annotations and generatorURL. Inserting more alerts will fail with a log message and metric increment. 0 = no limit.") 190 } 191 192 // Validate the limits config and returns an error if the validation 193 // doesn't pass 194 func (l *Limits) Validate(shardByAllLabels bool) error { 195 // The ingester.max-global-series-per-user metric is not supported 196 // if shard-by-all-labels is disabled 197 if l.MaxGlobalSeriesPerUser > 0 && !shardByAllLabels { 198 return errMaxGlobalSeriesPerUserValidation 199 } 200 201 return nil 202 } 203 204 // UnmarshalYAML implements the yaml.Unmarshaler interface. 205 func (l *Limits) UnmarshalYAML(unmarshal func(interface{}) error) error { 206 // We want to set l to the defaults and then overwrite it with the input. 207 // To make unmarshal fill the plain data struct rather than calling UnmarshalYAML 208 // again, we have to hide it using a type indirection. See prometheus/config. 209 210 // During startup we wont have a default value so we don't want to overwrite them 211 if defaultLimits != nil { 212 *l = *defaultLimits 213 // Make copy of default limits. Otherwise unmarshalling would modify map in default limits. 214 l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration) 215 } 216 type plain Limits 217 return unmarshal((*plain)(l)) 218 } 219 220 // UnmarshalJSON implements the json.Unmarshaler interface. 221 func (l *Limits) UnmarshalJSON(data []byte) error { 222 // Like the YAML method above, we want to set l to the defaults and then overwrite 223 // it with the input. We prevent an infinite loop of calling UnmarshalJSON by hiding 224 // behind type indirection. 225 if defaultLimits != nil { 226 *l = *defaultLimits 227 // Make copy of default limits. Otherwise unmarshalling would modify map in default limits. 228 l.copyNotificationIntegrationLimits(defaultLimits.NotificationRateLimitPerIntegration) 229 } 230 231 type plain Limits 232 dec := json.NewDecoder(bytes.NewReader(data)) 233 dec.DisallowUnknownFields() 234 235 return dec.Decode((*plain)(l)) 236 } 237 238 func (l *Limits) copyNotificationIntegrationLimits(defaults NotificationRateLimitMap) { 239 l.NotificationRateLimitPerIntegration = make(map[string]float64, len(defaults)) 240 for k, v := range defaults { 241 l.NotificationRateLimitPerIntegration[k] = v 242 } 243 } 244 245 // When we load YAML from disk, we want the various per-customer limits 246 // to default to any values specified on the command line, not default 247 // command line values. This global contains those values. I (Tom) cannot 248 // find a nicer way I'm afraid. 249 var defaultLimits *Limits 250 251 // SetDefaultLimitsForYAMLUnmarshalling sets global default limits, used when loading 252 // Limits from YAML files. This is used to ensure per-tenant limits are defaulted to 253 // those values. 254 func SetDefaultLimitsForYAMLUnmarshalling(defaults Limits) { 255 defaultLimits = &defaults 256 } 257 258 // TenantLimits exposes per-tenant limit overrides to various resource usage limits 259 type TenantLimits interface { 260 // ByUserID gets limits specific to a particular tenant or nil if there are none 261 ByUserID(userID string) *Limits 262 263 // AllByUserID gets a mapping of all tenant IDs and limits for that user 264 AllByUserID() map[string]*Limits 265 } 266 267 // Overrides periodically fetch a set of per-user overrides, and provides convenience 268 // functions for fetching the correct value. 269 type Overrides struct { 270 defaultLimits *Limits 271 tenantLimits TenantLimits 272 } 273 274 // NewOverrides makes a new Overrides. 275 func NewOverrides(defaults Limits, tenantLimits TenantLimits) (*Overrides, error) { 276 return &Overrides{ 277 tenantLimits: tenantLimits, 278 defaultLimits: &defaults, 279 }, nil 280 } 281 282 // IngestionRate returns the limit on ingester rate (samples per second). 283 func (o *Overrides) IngestionRate(userID string) float64 { 284 return o.getOverridesForUser(userID).IngestionRate 285 } 286 287 // IngestionRateStrategy returns whether the ingestion rate limit should be individually applied 288 // to each distributor instance (local) or evenly shared across the cluster (global). 289 func (o *Overrides) IngestionRateStrategy() string { 290 // The ingestion rate strategy can't be overridden on a per-tenant basis 291 return o.defaultLimits.IngestionRateStrategy 292 } 293 294 // IngestionBurstSize returns the burst size for ingestion rate. 295 func (o *Overrides) IngestionBurstSize(userID string) int { 296 return o.getOverridesForUser(userID).IngestionBurstSize 297 } 298 299 // AcceptHASamples returns whether the distributor should track and accept samples from HA replicas for this user. 300 func (o *Overrides) AcceptHASamples(userID string) bool { 301 return o.getOverridesForUser(userID).AcceptHASamples 302 } 303 304 // HAClusterLabel returns the cluster label to look for when deciding whether to accept a sample from a Prometheus HA replica. 305 func (o *Overrides) HAClusterLabel(userID string) string { 306 return o.getOverridesForUser(userID).HAClusterLabel 307 } 308 309 // HAReplicaLabel returns the replica label to look for when deciding whether to accept a sample from a Prometheus HA replica. 310 func (o *Overrides) HAReplicaLabel(userID string) string { 311 return o.getOverridesForUser(userID).HAReplicaLabel 312 } 313 314 // DropLabels returns the list of labels to be dropped when ingesting HA samples for the user. 315 func (o *Overrides) DropLabels(userID string) flagext.StringSlice { 316 return o.getOverridesForUser(userID).DropLabels 317 } 318 319 // MaxLabelNameLength returns maximum length a label name can be. 320 func (o *Overrides) MaxLabelNameLength(userID string) int { 321 return o.getOverridesForUser(userID).MaxLabelNameLength 322 } 323 324 // MaxLabelValueLength returns maximum length a label value can be. This also is 325 // the maximum length of a metric name. 326 func (o *Overrides) MaxLabelValueLength(userID string) int { 327 return o.getOverridesForUser(userID).MaxLabelValueLength 328 } 329 330 // MaxLabelNamesPerSeries returns maximum number of label/value pairs timeseries. 331 func (o *Overrides) MaxLabelNamesPerSeries(userID string) int { 332 return o.getOverridesForUser(userID).MaxLabelNamesPerSeries 333 } 334 335 // MaxMetadataLength returns maximum length metadata can be. Metadata refers 336 // to the Metric Name, HELP and UNIT. 337 func (o *Overrides) MaxMetadataLength(userID string) int { 338 return o.getOverridesForUser(userID).MaxMetadataLength 339 } 340 341 // RejectOldSamples returns true when we should reject samples older than certain 342 // age. 343 func (o *Overrides) RejectOldSamples(userID string) bool { 344 return o.getOverridesForUser(userID).RejectOldSamples 345 } 346 347 // RejectOldSamplesMaxAge returns the age at which samples should be rejected. 348 func (o *Overrides) RejectOldSamplesMaxAge(userID string) time.Duration { 349 return time.Duration(o.getOverridesForUser(userID).RejectOldSamplesMaxAge) 350 } 351 352 // CreationGracePeriod is misnamed, and actually returns how far into the future 353 // we should accept samples. 354 func (o *Overrides) CreationGracePeriod(userID string) time.Duration { 355 return time.Duration(o.getOverridesForUser(userID).CreationGracePeriod) 356 } 357 358 // MaxSeriesPerQuery returns the maximum number of series a query is allowed to hit. 359 func (o *Overrides) MaxSeriesPerQuery(userID string) int { 360 return o.getOverridesForUser(userID).MaxSeriesPerQuery 361 } 362 363 // MaxSamplesPerQuery returns the maximum number of samples in a query (from the ingester). 364 func (o *Overrides) MaxSamplesPerQuery(userID string) int { 365 return o.getOverridesForUser(userID).MaxSamplesPerQuery 366 } 367 368 // MaxLocalSeriesPerUser returns the maximum number of series a user is allowed to store in a single ingester. 369 func (o *Overrides) MaxLocalSeriesPerUser(userID string) int { 370 return o.getOverridesForUser(userID).MaxLocalSeriesPerUser 371 } 372 373 // MaxLocalSeriesPerMetric returns the maximum number of series allowed per metric in a single ingester. 374 func (o *Overrides) MaxLocalSeriesPerMetric(userID string) int { 375 return o.getOverridesForUser(userID).MaxLocalSeriesPerMetric 376 } 377 378 // MaxGlobalSeriesPerUser returns the maximum number of series a user is allowed to store across the cluster. 379 func (o *Overrides) MaxGlobalSeriesPerUser(userID string) int { 380 return o.getOverridesForUser(userID).MaxGlobalSeriesPerUser 381 } 382 383 // MaxGlobalSeriesPerMetric returns the maximum number of series allowed per metric across the cluster. 384 func (o *Overrides) MaxGlobalSeriesPerMetric(userID string) int { 385 return o.getOverridesForUser(userID).MaxGlobalSeriesPerMetric 386 } 387 388 // MaxChunksPerQueryFromStore returns the maximum number of chunks allowed per query when fetching 389 // chunks from the long-term storage. 390 func (o *Overrides) MaxChunksPerQueryFromStore(userID string) int { 391 // If the new config option is set, then it should take precedence. 392 if value := o.getOverridesForUser(userID).MaxChunksPerQuery; value > 0 { 393 return value 394 } 395 396 // Fallback to the deprecated config option. 397 return o.getOverridesForUser(userID).MaxChunksPerQueryFromStore 398 } 399 400 func (o *Overrides) MaxChunksPerQuery(userID string) int { 401 return o.getOverridesForUser(userID).MaxChunksPerQuery 402 } 403 404 // MaxFetchedSeriesPerQuery returns the maximum number of series allowed per query when fetching 405 // chunks from ingesters and blocks storage. 406 func (o *Overrides) MaxFetchedSeriesPerQuery(userID string) int { 407 return o.getOverridesForUser(userID).MaxFetchedSeriesPerQuery 408 } 409 410 // MaxFetchedChunkBytesPerQuery returns the maximum number of bytes for chunks allowed per query when fetching 411 // chunks from ingesters and blocks storage. 412 func (o *Overrides) MaxFetchedChunkBytesPerQuery(userID string) int { 413 return o.getOverridesForUser(userID).MaxFetchedChunkBytesPerQuery 414 } 415 416 // MaxQueryLookback returns the max lookback period of queries. 417 func (o *Overrides) MaxQueryLookback(userID string) time.Duration { 418 return time.Duration(o.getOverridesForUser(userID).MaxQueryLookback) 419 } 420 421 // MaxQueryLength returns the limit of the length (in time) of a query. 422 func (o *Overrides) MaxQueryLength(userID string) time.Duration { 423 return time.Duration(o.getOverridesForUser(userID).MaxQueryLength) 424 } 425 426 // MaxCacheFreshness returns the period after which results are cacheable, 427 // to prevent caching of very recent results. 428 func (o *Overrides) MaxCacheFreshness(userID string) time.Duration { 429 return time.Duration(o.getOverridesForUser(userID).MaxCacheFreshness) 430 } 431 432 // MaxQueriersPerUser returns the maximum number of queriers that can handle requests for this user. 433 func (o *Overrides) MaxQueriersPerUser(userID string) int { 434 return o.getOverridesForUser(userID).MaxQueriersPerTenant 435 } 436 437 // MaxQueryParallelism returns the limit to the number of split queries the 438 // frontend will process in parallel. 439 func (o *Overrides) MaxQueryParallelism(userID string) int { 440 return o.getOverridesForUser(userID).MaxQueryParallelism 441 } 442 443 // EnforceMetricName whether to enforce the presence of a metric name. 444 func (o *Overrides) EnforceMetricName(userID string) bool { 445 return o.getOverridesForUser(userID).EnforceMetricName 446 } 447 448 // EnforceMetadataMetricName whether to enforce the presence of a metric name on metadata. 449 func (o *Overrides) EnforceMetadataMetricName(userID string) bool { 450 return o.getOverridesForUser(userID).EnforceMetadataMetricName 451 } 452 453 // CardinalityLimit returns the maximum number of timeseries allowed in a query. 454 func (o *Overrides) CardinalityLimit(userID string) int { 455 return o.getOverridesForUser(userID).CardinalityLimit 456 } 457 458 // MinChunkLength returns the minimum size of chunk that will be saved by ingesters 459 func (o *Overrides) MinChunkLength(userID string) int { 460 return o.getOverridesForUser(userID).MinChunkLength 461 } 462 463 // MaxLocalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store in a single ingester. 464 func (o *Overrides) MaxLocalMetricsWithMetadataPerUser(userID string) int { 465 return o.getOverridesForUser(userID).MaxLocalMetricsWithMetadataPerUser 466 } 467 468 // MaxLocalMetadataPerMetric returns the maximum number of metadata allowed per metric in a single ingester. 469 func (o *Overrides) MaxLocalMetadataPerMetric(userID string) int { 470 return o.getOverridesForUser(userID).MaxLocalMetadataPerMetric 471 } 472 473 // MaxGlobalMetricsWithMetadataPerUser returns the maximum number of metrics with metadata a user is allowed to store across the cluster. 474 func (o *Overrides) MaxGlobalMetricsWithMetadataPerUser(userID string) int { 475 return o.getOverridesForUser(userID).MaxGlobalMetricsWithMetadataPerUser 476 } 477 478 // MaxGlobalMetadataPerMetric returns the maximum number of metadata allowed per metric across the cluster. 479 func (o *Overrides) MaxGlobalMetadataPerMetric(userID string) int { 480 return o.getOverridesForUser(userID).MaxGlobalMetadataPerMetric 481 } 482 483 // IngestionTenantShardSize returns the ingesters shard size for a given user. 484 func (o *Overrides) IngestionTenantShardSize(userID string) int { 485 return o.getOverridesForUser(userID).IngestionTenantShardSize 486 } 487 488 // EvaluationDelay returns the rules evaluation delay for a given user. 489 func (o *Overrides) EvaluationDelay(userID string) time.Duration { 490 return time.Duration(o.getOverridesForUser(userID).RulerEvaluationDelay) 491 } 492 493 // CompactorBlocksRetentionPeriod returns the retention period for a given user. 494 func (o *Overrides) CompactorBlocksRetentionPeriod(userID string) time.Duration { 495 return time.Duration(o.getOverridesForUser(userID).CompactorBlocksRetentionPeriod) 496 } 497 498 // MetricRelabelConfigs returns the metric relabel configs for a given user. 499 func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config { 500 return o.getOverridesForUser(userID).MetricRelabelConfigs 501 } 502 503 // RulerTenantShardSize returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. 504 func (o *Overrides) RulerTenantShardSize(userID string) int { 505 return o.getOverridesForUser(userID).RulerTenantShardSize 506 } 507 508 // RulerMaxRulesPerRuleGroup returns the maximum number of rules per rule group for a given user. 509 func (o *Overrides) RulerMaxRulesPerRuleGroup(userID string) int { 510 return o.getOverridesForUser(userID).RulerMaxRulesPerRuleGroup 511 } 512 513 // RulerMaxRuleGroupsPerTenant returns the maximum number of rule groups for a given user. 514 func (o *Overrides) RulerMaxRuleGroupsPerTenant(userID string) int { 515 return o.getOverridesForUser(userID).RulerMaxRuleGroupsPerTenant 516 } 517 518 // StoreGatewayTenantShardSize returns the store-gateway shard size for a given user. 519 func (o *Overrides) StoreGatewayTenantShardSize(userID string) int { 520 return o.getOverridesForUser(userID).StoreGatewayTenantShardSize 521 } 522 523 // MaxHAClusters returns maximum number of clusters that HA tracker will track for a user. 524 func (o *Overrides) MaxHAClusters(user string) int { 525 return o.getOverridesForUser(user).HAMaxClusters 526 } 527 528 // S3SSEType returns the per-tenant S3 SSE type. 529 func (o *Overrides) S3SSEType(user string) string { 530 return o.getOverridesForUser(user).S3SSEType 531 } 532 533 // S3SSEKMSKeyID returns the per-tenant S3 KMS-SSE key id. 534 func (o *Overrides) S3SSEKMSKeyID(user string) string { 535 return o.getOverridesForUser(user).S3SSEKMSKeyID 536 } 537 538 // S3SSEKMSEncryptionContext returns the per-tenant S3 KMS-SSE encryption context. 539 func (o *Overrides) S3SSEKMSEncryptionContext(user string) string { 540 return o.getOverridesForUser(user).S3SSEKMSEncryptionContext 541 } 542 543 // AlertmanagerReceiversBlockCIDRNetworks returns the list of network CIDRs that should be blocked 544 // in the Alertmanager receivers for the given user. 545 func (o *Overrides) AlertmanagerReceiversBlockCIDRNetworks(user string) []flagext.CIDR { 546 return o.getOverridesForUser(user).AlertmanagerReceiversBlockCIDRNetworks 547 } 548 549 // AlertmanagerReceiversBlockPrivateAddresses returns true if private addresses should be blocked 550 // in the Alertmanager receivers for the given user. 551 func (o *Overrides) AlertmanagerReceiversBlockPrivateAddresses(user string) bool { 552 return o.getOverridesForUser(user).AlertmanagerReceiversBlockPrivateAddresses 553 } 554 555 // Notification limits are special. Limits are returned in following order: 556 // 1. per-tenant limits for given integration 557 // 2. default limits for given integration 558 // 3. per-tenant limits 559 // 4. default limits 560 func (o *Overrides) getNotificationLimitForUser(user, integration string) float64 { 561 u := o.getOverridesForUser(user) 562 if n, ok := u.NotificationRateLimitPerIntegration[integration]; ok { 563 return n 564 } 565 566 return u.NotificationRateLimit 567 } 568 569 func (o *Overrides) NotificationRateLimit(user string, integration string) rate.Limit { 570 l := o.getNotificationLimitForUser(user, integration) 571 if l == 0 || math.IsInf(l, 1) { 572 return rate.Inf // No rate limit. 573 } 574 575 if l < 0 { 576 l = 0 // No notifications will be sent. 577 } 578 return rate.Limit(l) 579 } 580 581 const maxInt = int(^uint(0) >> 1) 582 583 func (o *Overrides) NotificationBurstSize(user string, integration string) int { 584 // Burst size is computed from rate limit. Rate limit is already normalized to [0, +inf), where 0 means disabled. 585 l := o.NotificationRateLimit(user, integration) 586 if l == 0 { 587 return 0 588 } 589 590 // floats can be larger than max int. This also handles case where l == rate.Inf. 591 if float64(l) >= float64(maxInt) { 592 return maxInt 593 } 594 595 // For values between (0, 1), allow single notification per second (every 1/limit seconds). 596 if l < 1 { 597 return 1 598 } 599 600 return int(l) 601 } 602 603 func (o *Overrides) AlertmanagerMaxConfigSize(userID string) int { 604 return o.getOverridesForUser(userID).AlertmanagerMaxConfigSizeBytes 605 } 606 607 func (o *Overrides) AlertmanagerMaxTemplatesCount(userID string) int { 608 return o.getOverridesForUser(userID).AlertmanagerMaxTemplatesCount 609 } 610 611 func (o *Overrides) AlertmanagerMaxTemplateSize(userID string) int { 612 return o.getOverridesForUser(userID).AlertmanagerMaxTemplateSizeBytes 613 } 614 615 func (o *Overrides) AlertmanagerMaxDispatcherAggregationGroups(userID string) int { 616 return o.getOverridesForUser(userID).AlertmanagerMaxDispatcherAggregationGroups 617 } 618 619 func (o *Overrides) AlertmanagerMaxAlertsCount(userID string) int { 620 return o.getOverridesForUser(userID).AlertmanagerMaxAlertsCount 621 } 622 623 func (o *Overrides) AlertmanagerMaxAlertsSizeBytes(userID string) int { 624 return o.getOverridesForUser(userID).AlertmanagerMaxAlertsSizeBytes 625 } 626 627 func (o *Overrides) getOverridesForUser(userID string) *Limits { 628 if o.tenantLimits != nil { 629 l := o.tenantLimits.ByUserID(userID) 630 if l != nil { 631 return l 632 } 633 } 634 return o.defaultLimits 635 } 636 637 // SmallestPositiveIntPerTenant is returning the minimal positive value of the 638 // supplied limit function for all given tenants. 639 func SmallestPositiveIntPerTenant(tenantIDs []string, f func(string) int) int { 640 var result *int 641 for _, tenantID := range tenantIDs { 642 v := f(tenantID) 643 if result == nil || v < *result { 644 result = &v 645 } 646 } 647 if result == nil { 648 return 0 649 } 650 return *result 651 } 652 653 // SmallestPositiveNonZeroIntPerTenant is returning the minimal positive and 654 // non-zero value of the supplied limit function for all given tenants. In many 655 // limits a value of 0 means unlimted so the method will return 0 only if all 656 // inputs have a limit of 0 or an empty tenant list is given. 657 func SmallestPositiveNonZeroIntPerTenant(tenantIDs []string, f func(string) int) int { 658 var result *int 659 for _, tenantID := range tenantIDs { 660 v := f(tenantID) 661 if v > 0 && (result == nil || v < *result) { 662 result = &v 663 } 664 } 665 if result == nil { 666 return 0 667 } 668 return *result 669 } 670 671 // SmallestPositiveNonZeroDurationPerTenant is returning the minimal positive 672 // and non-zero value of the supplied limit function for all given tenants. In 673 // many limits a value of 0 means unlimted so the method will return 0 only if 674 // all inputs have a limit of 0 or an empty tenant list is given. 675 func SmallestPositiveNonZeroDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration { 676 var result *time.Duration 677 for _, tenantID := range tenantIDs { 678 v := f(tenantID) 679 if v > 0 && (result == nil || v < *result) { 680 result = &v 681 } 682 } 683 if result == nil { 684 return 0 685 } 686 return *result 687 } 688 689 // MaxDurationPerTenant is returning the maximum duration per tenant. Without 690 // tenants given it will return a time.Duration(0). 691 func MaxDurationPerTenant(tenantIDs []string, f func(string) time.Duration) time.Duration { 692 result := time.Duration(0) 693 for _, tenantID := range tenantIDs { 694 v := f(tenantID) 695 if v > result { 696 result = v 697 } 698 } 699 return result 700 }