github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/distributor/distributor_test.go (about) 1 package distributor 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "io" 8 "math" 9 "net/http" 10 "sort" 11 "strconv" 12 "strings" 13 "sync" 14 "testing" 15 "time" 16 17 "github.com/go-kit/log" 18 "github.com/grafana/dskit/flagext" 19 "github.com/grafana/dskit/kv" 20 "github.com/grafana/dskit/kv/consul" 21 "github.com/grafana/dskit/ring" 22 ring_client "github.com/grafana/dskit/ring/client" 23 "github.com/grafana/dskit/services" 24 "github.com/prometheus/client_golang/prometheus" 25 "github.com/prometheus/client_golang/prometheus/testutil" 26 "github.com/prometheus/common/model" 27 "github.com/prometheus/prometheus/pkg/labels" 28 "github.com/prometheus/prometheus/pkg/relabel" 29 "github.com/stretchr/testify/assert" 30 "github.com/stretchr/testify/require" 31 "github.com/weaveworks/common/httpgrpc" 32 "github.com/weaveworks/common/user" 33 "google.golang.org/grpc" 34 "google.golang.org/grpc/health/grpc_health_v1" 35 "google.golang.org/grpc/status" 36 37 "github.com/cortexproject/cortex/pkg/chunk/encoding" 38 "github.com/cortexproject/cortex/pkg/cortexpb" 39 "github.com/cortexproject/cortex/pkg/ingester/client" 40 "github.com/cortexproject/cortex/pkg/prom1/storage/metric" 41 "github.com/cortexproject/cortex/pkg/tenant" 42 "github.com/cortexproject/cortex/pkg/util" 43 "github.com/cortexproject/cortex/pkg/util/chunkcompat" 44 "github.com/cortexproject/cortex/pkg/util/limiter" 45 util_math "github.com/cortexproject/cortex/pkg/util/math" 46 "github.com/cortexproject/cortex/pkg/util/test" 47 "github.com/cortexproject/cortex/pkg/util/validation" 48 ) 49 50 var ( 51 errFail = httpgrpc.Errorf(http.StatusInternalServerError, "Fail") 52 emptyResponse = &cortexpb.WriteResponse{} 53 ) 54 55 func TestConfig_Validate(t *testing.T) { 56 tests := map[string]struct { 57 initConfig func(*Config) 58 initLimits func(*validation.Limits) 59 expected error 60 }{ 61 "default config should pass": { 62 initConfig: func(_ *Config) {}, 63 initLimits: func(_ *validation.Limits) {}, 64 expected: nil, 65 }, 66 "should fail on invalid sharding strategy": { 67 initConfig: func(cfg *Config) { 68 cfg.ShardingStrategy = "xxx" 69 }, 70 initLimits: func(_ *validation.Limits) {}, 71 expected: errInvalidShardingStrategy, 72 }, 73 "should fail if the default shard size is 0 on when sharding strategy = shuffle-sharding": { 74 initConfig: func(cfg *Config) { 75 cfg.ShardingStrategy = "shuffle-sharding" 76 }, 77 initLimits: func(limits *validation.Limits) { 78 limits.IngestionTenantShardSize = 0 79 }, 80 expected: errInvalidTenantShardSize, 81 }, 82 "should pass if the default shard size > 0 on when sharding strategy = shuffle-sharding": { 83 initConfig: func(cfg *Config) { 84 cfg.ShardingStrategy = "shuffle-sharding" 85 }, 86 initLimits: func(limits *validation.Limits) { 87 limits.IngestionTenantShardSize = 3 88 }, 89 expected: nil, 90 }, 91 } 92 93 for testName, testData := range tests { 94 t.Run(testName, func(t *testing.T) { 95 cfg := Config{} 96 limits := validation.Limits{} 97 flagext.DefaultValues(&cfg, &limits) 98 99 testData.initConfig(&cfg) 100 testData.initLimits(&limits) 101 102 assert.Equal(t, testData.expected, cfg.Validate(limits)) 103 }) 104 } 105 } 106 107 func TestDistributor_Push(t *testing.T) { 108 // Metrics to assert on. 109 lastSeenTimestamp := "cortex_distributor_latest_seen_sample_timestamp_seconds" 110 distributorAppend := "cortex_distributor_ingester_appends_total" 111 distributorAppendFailure := "cortex_distributor_ingester_append_failures_total" 112 ctx := user.InjectOrgID(context.Background(), "user") 113 114 type samplesIn struct { 115 num int 116 startTimestampMs int64 117 } 118 for name, tc := range map[string]struct { 119 metricNames []string 120 numIngesters int 121 happyIngesters int 122 samples samplesIn 123 metadata int 124 expectedResponse *cortexpb.WriteResponse 125 expectedError error 126 expectedMetrics string 127 ingesterError error 128 }{ 129 "A push of no samples shouldn't block or return error, even if ingesters are sad": { 130 numIngesters: 3, 131 happyIngesters: 0, 132 expectedResponse: emptyResponse, 133 }, 134 "A push to 3 happy ingesters should succeed": { 135 numIngesters: 3, 136 happyIngesters: 3, 137 samples: samplesIn{num: 5, startTimestampMs: 123456789000}, 138 metadata: 5, 139 expectedResponse: emptyResponse, 140 metricNames: []string{lastSeenTimestamp}, 141 expectedMetrics: ` 142 # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. 143 # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge 144 cortex_distributor_latest_seen_sample_timestamp_seconds{user="user"} 123456789.004 145 `, 146 }, 147 "A push to 2 happy ingesters should succeed": { 148 numIngesters: 3, 149 happyIngesters: 2, 150 samples: samplesIn{num: 5, startTimestampMs: 123456789000}, 151 metadata: 5, 152 expectedResponse: emptyResponse, 153 metricNames: []string{lastSeenTimestamp}, 154 expectedMetrics: ` 155 # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. 156 # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge 157 cortex_distributor_latest_seen_sample_timestamp_seconds{user="user"} 123456789.004 158 `, 159 }, 160 "A push to 1 happy ingesters should fail": { 161 numIngesters: 3, 162 happyIngesters: 1, 163 samples: samplesIn{num: 10, startTimestampMs: 123456789000}, 164 expectedError: errFail, 165 metricNames: []string{lastSeenTimestamp}, 166 expectedMetrics: ` 167 # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. 168 # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge 169 cortex_distributor_latest_seen_sample_timestamp_seconds{user="user"} 123456789.009 170 `, 171 }, 172 "A push to 0 happy ingesters should fail": { 173 numIngesters: 3, 174 happyIngesters: 0, 175 samples: samplesIn{num: 10, startTimestampMs: 123456789000}, 176 expectedError: errFail, 177 metricNames: []string{lastSeenTimestamp}, 178 expectedMetrics: ` 179 # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. 180 # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge 181 cortex_distributor_latest_seen_sample_timestamp_seconds{user="user"} 123456789.009 182 `, 183 }, 184 "A push exceeding burst size should fail": { 185 numIngesters: 3, 186 happyIngesters: 3, 187 samples: samplesIn{num: 25, startTimestampMs: 123456789000}, 188 metadata: 5, 189 expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (20) exceeded while adding 25 samples and 5 metadata"), 190 metricNames: []string{lastSeenTimestamp}, 191 expectedMetrics: ` 192 # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. 193 # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge 194 cortex_distributor_latest_seen_sample_timestamp_seconds{user="user"} 123456789.024 195 `, 196 }, 197 "A push to ingesters should report the correct metrics with no metadata": { 198 numIngesters: 3, 199 happyIngesters: 2, 200 samples: samplesIn{num: 1, startTimestampMs: 123456789000}, 201 metadata: 0, 202 metricNames: []string{distributorAppend, distributorAppendFailure}, 203 expectedResponse: emptyResponse, 204 expectedMetrics: ` 205 # HELP cortex_distributor_ingester_append_failures_total The total number of failed batch appends sent to ingesters. 206 # TYPE cortex_distributor_ingester_append_failures_total counter 207 cortex_distributor_ingester_append_failures_total{ingester="2",status="5xx",type="samples"} 1 208 # HELP cortex_distributor_ingester_appends_total The total number of batch appends sent to ingesters. 209 # TYPE cortex_distributor_ingester_appends_total counter 210 cortex_distributor_ingester_appends_total{ingester="0",type="samples"} 1 211 cortex_distributor_ingester_appends_total{ingester="1",type="samples"} 1 212 cortex_distributor_ingester_appends_total{ingester="2",type="samples"} 1 213 `, 214 }, 215 "A push to ingesters should report the correct metrics with no samples": { 216 numIngesters: 3, 217 happyIngesters: 2, 218 samples: samplesIn{num: 0, startTimestampMs: 123456789000}, 219 metadata: 1, 220 metricNames: []string{distributorAppend, distributorAppendFailure}, 221 expectedResponse: emptyResponse, 222 ingesterError: httpgrpc.Errorf(http.StatusInternalServerError, "Fail"), 223 expectedMetrics: ` 224 # HELP cortex_distributor_ingester_append_failures_total The total number of failed batch appends sent to ingesters. 225 # TYPE cortex_distributor_ingester_append_failures_total counter 226 cortex_distributor_ingester_append_failures_total{ingester="2",status="5xx",type="metadata"} 1 227 # HELP cortex_distributor_ingester_appends_total The total number of batch appends sent to ingesters. 228 # TYPE cortex_distributor_ingester_appends_total counter 229 cortex_distributor_ingester_appends_total{ingester="0",type="metadata"} 1 230 cortex_distributor_ingester_appends_total{ingester="1",type="metadata"} 1 231 cortex_distributor_ingester_appends_total{ingester="2",type="metadata"} 1 232 `, 233 }, 234 "A push to overloaded ingesters should report the correct metrics": { 235 numIngesters: 3, 236 happyIngesters: 2, 237 samples: samplesIn{num: 0, startTimestampMs: 123456789000}, 238 metadata: 1, 239 metricNames: []string{distributorAppend, distributorAppendFailure}, 240 expectedResponse: emptyResponse, 241 ingesterError: httpgrpc.Errorf(http.StatusTooManyRequests, "Fail"), 242 expectedMetrics: ` 243 # HELP cortex_distributor_ingester_append_failures_total The total number of failed batch appends sent to ingesters. 244 # TYPE cortex_distributor_ingester_append_failures_total counter 245 cortex_distributor_ingester_append_failures_total{ingester="2",status="4xx",type="metadata"} 1 246 # HELP cortex_distributor_ingester_appends_total The total number of batch appends sent to ingesters. 247 # TYPE cortex_distributor_ingester_appends_total counter 248 cortex_distributor_ingester_appends_total{ingester="0",type="metadata"} 1 249 cortex_distributor_ingester_appends_total{ingester="1",type="metadata"} 1 250 cortex_distributor_ingester_appends_total{ingester="2",type="metadata"} 1 251 `, 252 }, 253 } { 254 for _, shardByAllLabels := range []bool{true, false} { 255 t.Run(fmt.Sprintf("[%s](shardByAllLabels=%v)", name, shardByAllLabels), func(t *testing.T) { 256 limits := &validation.Limits{} 257 flagext.DefaultValues(limits) 258 limits.IngestionRate = 20 259 limits.IngestionBurstSize = 20 260 261 ds, _, regs := prepare(t, prepConfig{ 262 numIngesters: tc.numIngesters, 263 happyIngesters: tc.happyIngesters, 264 numDistributors: 1, 265 shardByAllLabels: shardByAllLabels, 266 limits: limits, 267 errFail: tc.ingesterError, 268 }) 269 270 request := makeWriteRequest(tc.samples.startTimestampMs, tc.samples.num, tc.metadata) 271 response, err := ds[0].Push(ctx, request) 272 assert.Equal(t, tc.expectedResponse, response) 273 assert.Equal(t, tc.expectedError, err) 274 275 // Check tracked Prometheus metrics. Since the Push() response is sent as soon as the quorum 276 // is reached, when we reach this point the 3rd ingester may not have received series/metadata 277 // yet. To avoid flaky test we retry metrics assertion until we hit the desired state (no error) 278 // within a reasonable timeout. 279 if tc.expectedMetrics != "" { 280 test.Poll(t, time.Second, nil, func() interface{} { 281 return testutil.GatherAndCompare(regs[0], strings.NewReader(tc.expectedMetrics), tc.metricNames...) 282 }) 283 } 284 }) 285 } 286 } 287 } 288 289 func TestDistributor_MetricsCleanup(t *testing.T) { 290 dists, _, regs := prepare(t, prepConfig{ 291 numDistributors: 1, 292 }) 293 d := dists[0] 294 reg := regs[0] 295 296 metrics := []string{ 297 "cortex_distributor_received_samples_total", 298 "cortex_distributor_received_exemplars_total", 299 "cortex_distributor_received_metadata_total", 300 "cortex_distributor_deduped_samples_total", 301 "cortex_distributor_samples_in_total", 302 "cortex_distributor_exemplars_in_total", 303 "cortex_distributor_metadata_in_total", 304 "cortex_distributor_non_ha_samples_received_total", 305 "cortex_distributor_latest_seen_sample_timestamp_seconds", 306 } 307 308 d.receivedSamples.WithLabelValues("userA").Add(5) 309 d.receivedSamples.WithLabelValues("userB").Add(10) 310 d.receivedExemplars.WithLabelValues("userA").Add(5) 311 d.receivedExemplars.WithLabelValues("userB").Add(10) 312 d.receivedMetadata.WithLabelValues("userA").Add(5) 313 d.receivedMetadata.WithLabelValues("userB").Add(10) 314 d.incomingSamples.WithLabelValues("userA").Add(5) 315 d.incomingExemplars.WithLabelValues("userA").Add(5) 316 d.incomingMetadata.WithLabelValues("userA").Add(5) 317 d.nonHASamples.WithLabelValues("userA").Add(5) 318 d.dedupedSamples.WithLabelValues("userA", "cluster1").Inc() // We cannot clean this metric 319 d.latestSeenSampleTimestampPerUser.WithLabelValues("userA").Set(1111) 320 321 require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 322 # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. 323 # TYPE cortex_distributor_deduped_samples_total counter 324 cortex_distributor_deduped_samples_total{cluster="cluster1",user="userA"} 1 325 326 # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. 327 # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge 328 cortex_distributor_latest_seen_sample_timestamp_seconds{user="userA"} 1111 329 330 # HELP cortex_distributor_metadata_in_total The total number of metadata the have come in to the distributor, including rejected. 331 # TYPE cortex_distributor_metadata_in_total counter 332 cortex_distributor_metadata_in_total{user="userA"} 5 333 334 # HELP cortex_distributor_non_ha_samples_received_total The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels. 335 # TYPE cortex_distributor_non_ha_samples_received_total counter 336 cortex_distributor_non_ha_samples_received_total{user="userA"} 5 337 338 # HELP cortex_distributor_received_metadata_total The total number of received metadata, excluding rejected. 339 # TYPE cortex_distributor_received_metadata_total counter 340 cortex_distributor_received_metadata_total{user="userA"} 5 341 cortex_distributor_received_metadata_total{user="userB"} 10 342 343 # HELP cortex_distributor_received_samples_total The total number of received samples, excluding rejected and deduped samples. 344 # TYPE cortex_distributor_received_samples_total counter 345 cortex_distributor_received_samples_total{user="userA"} 5 346 cortex_distributor_received_samples_total{user="userB"} 10 347 348 # HELP cortex_distributor_received_exemplars_total The total number of received exemplars, excluding rejected and deduped exemplars. 349 # TYPE cortex_distributor_received_exemplars_total counter 350 cortex_distributor_received_exemplars_total{user="userA"} 5 351 cortex_distributor_received_exemplars_total{user="userB"} 10 352 353 # HELP cortex_distributor_samples_in_total The total number of samples that have come in to the distributor, including rejected or deduped samples. 354 # TYPE cortex_distributor_samples_in_total counter 355 cortex_distributor_samples_in_total{user="userA"} 5 356 357 # HELP cortex_distributor_exemplars_in_total The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars. 358 # TYPE cortex_distributor_exemplars_in_total counter 359 cortex_distributor_exemplars_in_total{user="userA"} 5 360 `), metrics...)) 361 362 d.cleanupInactiveUser("userA") 363 364 require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(` 365 # HELP cortex_distributor_deduped_samples_total The total number of deduplicated samples. 366 # TYPE cortex_distributor_deduped_samples_total counter 367 368 # HELP cortex_distributor_latest_seen_sample_timestamp_seconds Unix timestamp of latest received sample per user. 369 # TYPE cortex_distributor_latest_seen_sample_timestamp_seconds gauge 370 371 # HELP cortex_distributor_metadata_in_total The total number of metadata the have come in to the distributor, including rejected. 372 # TYPE cortex_distributor_metadata_in_total counter 373 374 # HELP cortex_distributor_non_ha_samples_received_total The total number of received samples for a user that has HA tracking turned on, but the sample didn't contain both HA labels. 375 # TYPE cortex_distributor_non_ha_samples_received_total counter 376 377 # HELP cortex_distributor_received_metadata_total The total number of received metadata, excluding rejected. 378 # TYPE cortex_distributor_received_metadata_total counter 379 cortex_distributor_received_metadata_total{user="userB"} 10 380 381 # HELP cortex_distributor_received_samples_total The total number of received samples, excluding rejected and deduped samples. 382 # TYPE cortex_distributor_received_samples_total counter 383 cortex_distributor_received_samples_total{user="userB"} 10 384 385 # HELP cortex_distributor_received_exemplars_total The total number of received exemplars, excluding rejected and deduped exemplars. 386 # TYPE cortex_distributor_received_exemplars_total counter 387 cortex_distributor_received_exemplars_total{user="userB"} 10 388 389 # HELP cortex_distributor_samples_in_total The total number of samples that have come in to the distributor, including rejected or deduped samples. 390 # TYPE cortex_distributor_samples_in_total counter 391 392 # HELP cortex_distributor_exemplars_in_total The total number of exemplars that have come in to the distributor, including rejected or deduped exemplars. 393 # TYPE cortex_distributor_exemplars_in_total counter 394 `), metrics...)) 395 } 396 397 func TestDistributor_PushIngestionRateLimiter(t *testing.T) { 398 type testPush struct { 399 samples int 400 metadata int 401 expectedError error 402 } 403 404 ctx := user.InjectOrgID(context.Background(), "user") 405 tests := map[string]struct { 406 distributors int 407 ingestionRateStrategy string 408 ingestionRate float64 409 ingestionBurstSize int 410 pushes []testPush 411 }{ 412 "local strategy: limit should be set to each distributor": { 413 distributors: 2, 414 ingestionRateStrategy: validation.LocalIngestionRateStrategy, 415 ingestionRate: 10, 416 ingestionBurstSize: 10, 417 pushes: []testPush{ 418 {samples: 4, expectedError: nil}, 419 {metadata: 1, expectedError: nil}, 420 {samples: 6, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (10) exceeded while adding 6 samples and 0 metadata")}, 421 {samples: 4, metadata: 1, expectedError: nil}, 422 {samples: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (10) exceeded while adding 1 samples and 0 metadata")}, 423 {metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (10) exceeded while adding 0 samples and 1 metadata")}, 424 }, 425 }, 426 "global strategy: limit should be evenly shared across distributors": { 427 distributors: 2, 428 ingestionRateStrategy: validation.GlobalIngestionRateStrategy, 429 ingestionRate: 10, 430 ingestionBurstSize: 5, 431 pushes: []testPush{ 432 {samples: 2, expectedError: nil}, 433 {samples: 1, expectedError: nil}, 434 {samples: 2, metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (5) exceeded while adding 2 samples and 1 metadata")}, 435 {samples: 2, expectedError: nil}, 436 {samples: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (5) exceeded while adding 1 samples and 0 metadata")}, 437 {metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (5) exceeded while adding 0 samples and 1 metadata")}, 438 }, 439 }, 440 "global strategy: burst should set to each distributor": { 441 distributors: 2, 442 ingestionRateStrategy: validation.GlobalIngestionRateStrategy, 443 ingestionRate: 10, 444 ingestionBurstSize: 20, 445 pushes: []testPush{ 446 {samples: 10, expectedError: nil}, 447 {samples: 5, expectedError: nil}, 448 {samples: 5, metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (5) exceeded while adding 5 samples and 1 metadata")}, 449 {samples: 5, expectedError: nil}, 450 {samples: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (5) exceeded while adding 1 samples and 0 metadata")}, 451 {metadata: 1, expectedError: httpgrpc.Errorf(http.StatusTooManyRequests, "ingestion rate limit (5) exceeded while adding 0 samples and 1 metadata")}, 452 }, 453 }, 454 } 455 456 for testName, testData := range tests { 457 testData := testData 458 459 t.Run(testName, func(t *testing.T) { 460 limits := &validation.Limits{} 461 flagext.DefaultValues(limits) 462 limits.IngestionRateStrategy = testData.ingestionRateStrategy 463 limits.IngestionRate = testData.ingestionRate 464 limits.IngestionBurstSize = testData.ingestionBurstSize 465 466 // Start all expected distributors 467 distributors, _, _ := prepare(t, prepConfig{ 468 numIngesters: 3, 469 happyIngesters: 3, 470 numDistributors: testData.distributors, 471 shardByAllLabels: true, 472 limits: limits, 473 }) 474 475 // Push samples in multiple requests to the first distributor 476 for _, push := range testData.pushes { 477 request := makeWriteRequest(0, push.samples, push.metadata) 478 response, err := distributors[0].Push(ctx, request) 479 480 if push.expectedError == nil { 481 assert.Equal(t, emptyResponse, response) 482 assert.Nil(t, err) 483 } else { 484 assert.Nil(t, response) 485 assert.Equal(t, push.expectedError, err) 486 } 487 } 488 }) 489 } 490 } 491 492 func TestDistributor_PushInstanceLimits(t *testing.T) { 493 494 type testPush struct { 495 samples int 496 metadata int 497 expectedError error 498 } 499 500 ctx := user.InjectOrgID(context.Background(), "user") 501 tests := map[string]struct { 502 preInflight int 503 preRateSamples int // initial rate before first push 504 pushes []testPush // rate is recomputed after each push 505 506 // limits 507 inflightLimit int 508 ingestionRateLimit float64 509 510 metricNames []string 511 expectedMetrics string 512 }{ 513 "no limits limit": { 514 preInflight: 100, 515 preRateSamples: 1000, 516 517 pushes: []testPush{ 518 {samples: 100, expectedError: nil}, 519 }, 520 521 metricNames: []string{instanceLimitsMetric}, 522 expectedMetrics: ` 523 # HELP cortex_distributor_instance_limits Instance limits used by this distributor. 524 # TYPE cortex_distributor_instance_limits gauge 525 cortex_distributor_instance_limits{limit="max_inflight_push_requests"} 0 526 cortex_distributor_instance_limits{limit="max_ingestion_rate"} 0 527 `, 528 }, 529 "below inflight limit": { 530 preInflight: 100, 531 inflightLimit: 101, 532 pushes: []testPush{ 533 {samples: 100, expectedError: nil}, 534 }, 535 536 metricNames: []string{instanceLimitsMetric, "cortex_distributor_inflight_push_requests"}, 537 expectedMetrics: ` 538 # HELP cortex_distributor_inflight_push_requests Current number of inflight push requests in distributor. 539 # TYPE cortex_distributor_inflight_push_requests gauge 540 cortex_distributor_inflight_push_requests 100 541 542 # HELP cortex_distributor_instance_limits Instance limits used by this distributor. 543 # TYPE cortex_distributor_instance_limits gauge 544 cortex_distributor_instance_limits{limit="max_inflight_push_requests"} 101 545 cortex_distributor_instance_limits{limit="max_ingestion_rate"} 0 546 `, 547 }, 548 "hits inflight limit": { 549 preInflight: 101, 550 inflightLimit: 101, 551 pushes: []testPush{ 552 {samples: 100, expectedError: errTooManyInflightPushRequests}, 553 }, 554 }, 555 "below ingestion rate limit": { 556 preRateSamples: 500, 557 ingestionRateLimit: 1000, 558 559 pushes: []testPush{ 560 {samples: 1000, expectedError: nil}, 561 }, 562 563 metricNames: []string{instanceLimitsMetric, "cortex_distributor_ingestion_rate_samples_per_second"}, 564 expectedMetrics: ` 565 # HELP cortex_distributor_ingestion_rate_samples_per_second Current ingestion rate in samples/sec that distributor is using to limit access. 566 # TYPE cortex_distributor_ingestion_rate_samples_per_second gauge 567 cortex_distributor_ingestion_rate_samples_per_second 600 568 569 # HELP cortex_distributor_instance_limits Instance limits used by this distributor. 570 # TYPE cortex_distributor_instance_limits gauge 571 cortex_distributor_instance_limits{limit="max_inflight_push_requests"} 0 572 cortex_distributor_instance_limits{limit="max_ingestion_rate"} 1000 573 `, 574 }, 575 "hits rate limit on first request, but second request can proceed": { 576 preRateSamples: 1200, 577 ingestionRateLimit: 1000, 578 579 pushes: []testPush{ 580 {samples: 100, expectedError: errMaxSamplesPushRateLimitReached}, 581 {samples: 100, expectedError: nil}, 582 }, 583 }, 584 585 "below rate limit on first request, but hits the rate limit afterwards": { 586 preRateSamples: 500, 587 ingestionRateLimit: 1000, 588 589 pushes: []testPush{ 590 {samples: 5000, expectedError: nil}, // after push, rate = 500 + 0.2*(5000-500) = 1400 591 {samples: 5000, expectedError: errMaxSamplesPushRateLimitReached}, // after push, rate = 1400 + 0.2*(0 - 1400) = 1120 592 {samples: 5000, expectedError: errMaxSamplesPushRateLimitReached}, // after push, rate = 1120 + 0.2*(0 - 1120) = 896 593 {samples: 5000, expectedError: nil}, // 896 is below 1000, so this push succeeds, new rate = 896 + 0.2*(5000-896) = 1716.8 594 }, 595 }, 596 } 597 598 for testName, testData := range tests { 599 testData := testData 600 601 t.Run(testName, func(t *testing.T) { 602 limits := &validation.Limits{} 603 flagext.DefaultValues(limits) 604 605 // Start all expected distributors 606 distributors, _, regs := prepare(t, prepConfig{ 607 numIngesters: 3, 608 happyIngesters: 3, 609 numDistributors: 1, 610 shardByAllLabels: true, 611 limits: limits, 612 maxInflightRequests: testData.inflightLimit, 613 maxIngestionRate: testData.ingestionRateLimit, 614 }) 615 616 d := distributors[0] 617 d.inflightPushRequests.Add(int64(testData.preInflight)) 618 d.ingestionRate.Add(int64(testData.preRateSamples)) 619 620 d.ingestionRate.Tick() 621 622 for _, push := range testData.pushes { 623 request := makeWriteRequest(0, push.samples, push.metadata) 624 _, err := d.Push(ctx, request) 625 626 if push.expectedError == nil { 627 assert.Nil(t, err) 628 } else { 629 assert.Equal(t, push.expectedError, err) 630 } 631 632 d.ingestionRate.Tick() 633 634 if testData.expectedMetrics != "" { 635 assert.NoError(t, testutil.GatherAndCompare(regs[0], strings.NewReader(testData.expectedMetrics), testData.metricNames...)) 636 } 637 } 638 }) 639 } 640 } 641 642 func TestDistributor_PushHAInstances(t *testing.T) { 643 ctx := user.InjectOrgID(context.Background(), "user") 644 645 for i, tc := range []struct { 646 enableTracker bool 647 acceptedReplica string 648 testReplica string 649 cluster string 650 samples int 651 expectedResponse *cortexpb.WriteResponse 652 expectedCode int32 653 }{ 654 { 655 enableTracker: true, 656 acceptedReplica: "instance0", 657 testReplica: "instance0", 658 cluster: "cluster0", 659 samples: 5, 660 expectedResponse: emptyResponse, 661 }, 662 // The 202 indicates that we didn't accept this sample. 663 { 664 enableTracker: true, 665 acceptedReplica: "instance2", 666 testReplica: "instance0", 667 cluster: "cluster0", 668 samples: 5, 669 expectedCode: 202, 670 }, 671 // If the HA tracker is disabled we should still accept samples that have both labels. 672 { 673 enableTracker: false, 674 acceptedReplica: "instance0", 675 testReplica: "instance0", 676 cluster: "cluster0", 677 samples: 5, 678 expectedResponse: emptyResponse, 679 }, 680 // Using very long replica label value results in validation error. 681 { 682 enableTracker: true, 683 acceptedReplica: "instance0", 684 testReplica: "instance1234567890123456789012345678901234567890", 685 cluster: "cluster0", 686 samples: 5, 687 expectedResponse: emptyResponse, 688 expectedCode: 400, 689 }, 690 } { 691 for _, shardByAllLabels := range []bool{true, false} { 692 t.Run(fmt.Sprintf("[%d](shardByAllLabels=%v)", i, shardByAllLabels), func(t *testing.T) { 693 var limits validation.Limits 694 flagext.DefaultValues(&limits) 695 limits.AcceptHASamples = true 696 limits.MaxLabelValueLength = 15 697 698 ds, _, _ := prepare(t, prepConfig{ 699 numIngesters: 3, 700 happyIngesters: 3, 701 numDistributors: 1, 702 shardByAllLabels: shardByAllLabels, 703 limits: &limits, 704 enableTracker: tc.enableTracker, 705 }) 706 707 d := ds[0] 708 709 userID, err := tenant.TenantID(ctx) 710 assert.NoError(t, err) 711 err = d.HATracker.checkReplica(ctx, userID, tc.cluster, tc.acceptedReplica, time.Now()) 712 assert.NoError(t, err) 713 714 request := makeWriteRequestHA(tc.samples, tc.testReplica, tc.cluster) 715 response, err := d.Push(ctx, request) 716 assert.Equal(t, tc.expectedResponse, response) 717 718 httpResp, ok := httpgrpc.HTTPResponseFromError(err) 719 if ok { 720 assert.Equal(t, tc.expectedCode, httpResp.Code) 721 } else if tc.expectedCode != 0 { 722 assert.Fail(t, "expected HTTP status code", tc.expectedCode) 723 } 724 }) 725 } 726 } 727 } 728 729 func TestDistributor_PushQuery(t *testing.T) { 730 const shuffleShardSize = 5 731 732 ctx := user.InjectOrgID(context.Background(), "user") 733 nameMatcher := mustEqualMatcher(model.MetricNameLabel, "foo") 734 barMatcher := mustEqualMatcher("bar", "baz") 735 736 type testcase struct { 737 name string 738 numIngesters int 739 happyIngesters int 740 samples int 741 metadata int 742 matchers []*labels.Matcher 743 expectedIngesters int 744 expectedResponse model.Matrix 745 expectedError error 746 shardByAllLabels bool 747 shuffleShardEnabled bool 748 } 749 750 // We'll programmatically build the test cases now, as we want complete 751 // coverage along quite a few different axis. 752 testcases := []testcase{} 753 754 // Run every test in both sharding modes. 755 for _, shardByAllLabels := range []bool{true, false} { 756 757 // Test with between 2 and 10 ingesters. 758 for numIngesters := 2; numIngesters < 10; numIngesters++ { 759 760 // Test with between 0 and numIngesters "happy" ingesters. 761 for happyIngesters := 0; happyIngesters <= numIngesters; happyIngesters++ { 762 763 // Test either with shuffle-sharding enabled or disabled. 764 for _, shuffleShardEnabled := range []bool{false, true} { 765 scenario := fmt.Sprintf("shardByAllLabels=%v, numIngester=%d, happyIngester=%d, shuffleSharding=%v)", shardByAllLabels, numIngesters, happyIngesters, shuffleShardEnabled) 766 767 // The number of ingesters we expect to query depends whether shuffle sharding and/or 768 // shard by all labels are enabled. 769 var expectedIngesters int 770 if shuffleShardEnabled { 771 expectedIngesters = util_math.Min(shuffleShardSize, numIngesters) 772 } else if shardByAllLabels { 773 expectedIngesters = numIngesters 774 } else { 775 expectedIngesters = 3 // Replication factor 776 } 777 778 // When we're not sharding by metric name, queriers with more than one 779 // failed ingester should fail. 780 if shardByAllLabels && numIngesters-happyIngesters > 1 { 781 testcases = append(testcases, testcase{ 782 name: fmt.Sprintf("ExpectFail(%s)", scenario), 783 numIngesters: numIngesters, 784 happyIngesters: happyIngesters, 785 matchers: []*labels.Matcher{nameMatcher, barMatcher}, 786 expectedError: errFail, 787 shardByAllLabels: shardByAllLabels, 788 shuffleShardEnabled: shuffleShardEnabled, 789 }) 790 continue 791 } 792 793 // When we have less ingesters than replication factor, any failed ingester 794 // will cause a failure. 795 if numIngesters < 3 && happyIngesters < 2 { 796 testcases = append(testcases, testcase{ 797 name: fmt.Sprintf("ExpectFail(%s)", scenario), 798 numIngesters: numIngesters, 799 happyIngesters: happyIngesters, 800 matchers: []*labels.Matcher{nameMatcher, barMatcher}, 801 expectedError: errFail, 802 shardByAllLabels: shardByAllLabels, 803 shuffleShardEnabled: shuffleShardEnabled, 804 }) 805 continue 806 } 807 808 // If we're sharding by metric name and we have failed ingesters, we can't 809 // tell ahead of time if the query will succeed, as we don't know which 810 // ingesters will hold the results for the query. 811 if !shardByAllLabels && numIngesters-happyIngesters > 1 { 812 continue 813 } 814 815 // Reading all the samples back should succeed. 816 testcases = append(testcases, testcase{ 817 name: fmt.Sprintf("ReadAll(%s)", scenario), 818 numIngesters: numIngesters, 819 happyIngesters: happyIngesters, 820 samples: 10, 821 matchers: []*labels.Matcher{nameMatcher, barMatcher}, 822 expectedResponse: expectedResponse(0, 10), 823 expectedIngesters: expectedIngesters, 824 shardByAllLabels: shardByAllLabels, 825 shuffleShardEnabled: shuffleShardEnabled, 826 }) 827 828 // As should reading none of the samples back. 829 testcases = append(testcases, testcase{ 830 name: fmt.Sprintf("ReadNone(%s)", scenario), 831 numIngesters: numIngesters, 832 happyIngesters: happyIngesters, 833 samples: 10, 834 matchers: []*labels.Matcher{nameMatcher, mustEqualMatcher("not", "found")}, 835 expectedResponse: expectedResponse(0, 0), 836 expectedIngesters: expectedIngesters, 837 shardByAllLabels: shardByAllLabels, 838 shuffleShardEnabled: shuffleShardEnabled, 839 }) 840 841 // And reading each sample individually. 842 for i := 0; i < 10; i++ { 843 testcases = append(testcases, testcase{ 844 name: fmt.Sprintf("ReadOne(%s, sample=%d)", scenario, i), 845 numIngesters: numIngesters, 846 happyIngesters: happyIngesters, 847 samples: 10, 848 matchers: []*labels.Matcher{nameMatcher, mustEqualMatcher("sample", strconv.Itoa(i))}, 849 expectedResponse: expectedResponse(i, i+1), 850 expectedIngesters: expectedIngesters, 851 shardByAllLabels: shardByAllLabels, 852 shuffleShardEnabled: shuffleShardEnabled, 853 }) 854 } 855 } 856 } 857 } 858 } 859 860 for _, tc := range testcases { 861 t.Run(tc.name, func(t *testing.T) { 862 ds, ingesters, _ := prepare(t, prepConfig{ 863 numIngesters: tc.numIngesters, 864 happyIngesters: tc.happyIngesters, 865 numDistributors: 1, 866 shardByAllLabels: tc.shardByAllLabels, 867 shuffleShardEnabled: tc.shuffleShardEnabled, 868 shuffleShardSize: shuffleShardSize, 869 }) 870 871 request := makeWriteRequest(0, tc.samples, tc.metadata) 872 writeResponse, err := ds[0].Push(ctx, request) 873 assert.Equal(t, &cortexpb.WriteResponse{}, writeResponse) 874 assert.Nil(t, err) 875 876 response, err := ds[0].Query(ctx, 0, 10, tc.matchers...) 877 sort.Sort(response) 878 assert.Equal(t, tc.expectedResponse, response) 879 assert.Equal(t, tc.expectedError, err) 880 881 series, err := ds[0].QueryStream(ctx, 0, 10, tc.matchers...) 882 assert.Equal(t, tc.expectedError, err) 883 884 if series == nil { 885 response, err = chunkcompat.SeriesChunksToMatrix(0, 10, nil) 886 } else { 887 response, err = chunkcompat.SeriesChunksToMatrix(0, 10, series.Chunkseries) 888 } 889 assert.NoError(t, err) 890 assert.Equal(t, tc.expectedResponse.String(), response.String()) 891 892 // Check how many ingesters have been queried. 893 // Due to the quorum the distributor could cancel the last request towards ingesters 894 // if all other ones are successful, so we're good either has been queried X or X-1 895 // ingesters. 896 if tc.expectedError == nil { 897 assert.Contains(t, []int{tc.expectedIngesters, tc.expectedIngesters - 1}, countMockIngestersCalls(ingesters, "Query")) 898 assert.Contains(t, []int{tc.expectedIngesters, tc.expectedIngesters - 1}, countMockIngestersCalls(ingesters, "QueryStream")) 899 } 900 }) 901 } 902 } 903 904 func TestDistributor_QueryStream_ShouldReturnErrorIfMaxChunksPerQueryLimitIsReached(t *testing.T) { 905 const maxChunksLimit = 30 // Chunks are duplicated due to replication factor. 906 907 ctx := user.InjectOrgID(context.Background(), "user") 908 limits := &validation.Limits{} 909 flagext.DefaultValues(limits) 910 limits.MaxChunksPerQuery = maxChunksLimit 911 912 // Prepare distributors. 913 ds, _, _ := prepare(t, prepConfig{ 914 numIngesters: 3, 915 happyIngesters: 3, 916 numDistributors: 1, 917 shardByAllLabels: true, 918 limits: limits, 919 }) 920 921 ctx = limiter.AddQueryLimiterToContext(ctx, limiter.NewQueryLimiter(0, 0, maxChunksLimit)) 922 923 // Push a number of series below the max chunks limit. Each series has 1 sample, 924 // so expect 1 chunk per series when querying back. 925 initialSeries := maxChunksLimit / 3 926 writeReq := makeWriteRequest(0, initialSeries, 0) 927 writeRes, err := ds[0].Push(ctx, writeReq) 928 assert.Equal(t, &cortexpb.WriteResponse{}, writeRes) 929 assert.Nil(t, err) 930 931 allSeriesMatchers := []*labels.Matcher{ 932 labels.MustNewMatcher(labels.MatchRegexp, model.MetricNameLabel, ".+"), 933 } 934 935 // Since the number of series (and thus chunks) is equal to the limit (but doesn't 936 // exceed it), we expect a query running on all series to succeed. 937 queryRes, err := ds[0].QueryStream(ctx, math.MinInt32, math.MaxInt32, allSeriesMatchers...) 938 require.NoError(t, err) 939 assert.Len(t, queryRes.Chunkseries, initialSeries) 940 941 // Push more series to exceed the limit once we'll query back all series. 942 writeReq = &cortexpb.WriteRequest{} 943 for i := 0; i < maxChunksLimit; i++ { 944 writeReq.Timeseries = append(writeReq.Timeseries, 945 makeWriteRequestTimeseries([]cortexpb.LabelAdapter{{Name: model.MetricNameLabel, Value: fmt.Sprintf("another_series_%d", i)}}, 0, 0), 946 ) 947 } 948 949 writeRes, err = ds[0].Push(ctx, writeReq) 950 assert.Equal(t, &cortexpb.WriteResponse{}, writeRes) 951 assert.Nil(t, err) 952 953 // Since the number of series (and thus chunks) is exceeding to the limit, we expect 954 // a query running on all series to fail. 955 _, err = ds[0].QueryStream(ctx, math.MinInt32, math.MaxInt32, allSeriesMatchers...) 956 require.Error(t, err) 957 assert.Contains(t, err.Error(), "the query hit the max number of chunks limit") 958 } 959 960 func TestDistributor_QueryStream_ShouldReturnErrorIfMaxSeriesPerQueryLimitIsReached(t *testing.T) { 961 const maxSeriesLimit = 10 962 963 ctx := user.InjectOrgID(context.Background(), "user") 964 limits := &validation.Limits{} 965 flagext.DefaultValues(limits) 966 ctx = limiter.AddQueryLimiterToContext(ctx, limiter.NewQueryLimiter(maxSeriesLimit, 0, 0)) 967 968 // Prepare distributors. 969 ds, _, _ := prepare(t, prepConfig{ 970 numIngesters: 3, 971 happyIngesters: 3, 972 numDistributors: 1, 973 shardByAllLabels: true, 974 limits: limits, 975 }) 976 977 // Push a number of series below the max series limit. 978 initialSeries := maxSeriesLimit 979 writeReq := makeWriteRequest(0, initialSeries, 0) 980 writeRes, err := ds[0].Push(ctx, writeReq) 981 assert.Equal(t, &cortexpb.WriteResponse{}, writeRes) 982 assert.Nil(t, err) 983 984 allSeriesMatchers := []*labels.Matcher{ 985 labels.MustNewMatcher(labels.MatchRegexp, model.MetricNameLabel, ".+"), 986 } 987 988 // Since the number of series is equal to the limit (but doesn't 989 // exceed it), we expect a query running on all series to succeed. 990 queryRes, err := ds[0].QueryStream(ctx, math.MinInt32, math.MaxInt32, allSeriesMatchers...) 991 require.NoError(t, err) 992 assert.Len(t, queryRes.Chunkseries, initialSeries) 993 994 // Push more series to exceed the limit once we'll query back all series. 995 writeReq = &cortexpb.WriteRequest{} 996 writeReq.Timeseries = append(writeReq.Timeseries, 997 makeWriteRequestTimeseries([]cortexpb.LabelAdapter{{Name: model.MetricNameLabel, Value: "another_series"}}, 0, 0), 998 ) 999 1000 writeRes, err = ds[0].Push(ctx, writeReq) 1001 assert.Equal(t, &cortexpb.WriteResponse{}, writeRes) 1002 assert.Nil(t, err) 1003 1004 // Since the number of series is exceeding the limit, we expect 1005 // a query running on all series to fail. 1006 _, err = ds[0].QueryStream(ctx, math.MinInt32, math.MaxInt32, allSeriesMatchers...) 1007 require.Error(t, err) 1008 assert.Contains(t, err.Error(), "max number of series limit") 1009 1010 } 1011 1012 func TestDistributor_QueryStream_ShouldReturnErrorIfMaxChunkBytesPerQueryLimitIsReached(t *testing.T) { 1013 const seriesToAdd = 10 1014 1015 ctx := user.InjectOrgID(context.Background(), "user") 1016 limits := &validation.Limits{} 1017 flagext.DefaultValues(limits) 1018 1019 // Prepare distributors. 1020 // Use replication factor of 2 to always read all the chunks from both ingesters, 1021 // this guarantees us to always read the same chunks and have a stable test. 1022 ds, _, _ := prepare(t, prepConfig{ 1023 numIngesters: 2, 1024 happyIngesters: 2, 1025 numDistributors: 1, 1026 shardByAllLabels: true, 1027 limits: limits, 1028 replicationFactor: 2, 1029 }) 1030 1031 allSeriesMatchers := []*labels.Matcher{ 1032 labels.MustNewMatcher(labels.MatchRegexp, model.MetricNameLabel, ".+"), 1033 } 1034 // Push a single series to allow us to calculate the chunk size to calculate the limit for the test. 1035 writeReq := &cortexpb.WriteRequest{} 1036 writeReq.Timeseries = append(writeReq.Timeseries, 1037 makeWriteRequestTimeseries([]cortexpb.LabelAdapter{{Name: model.MetricNameLabel, Value: "another_series"}}, 0, 0), 1038 ) 1039 writeRes, err := ds[0].Push(ctx, writeReq) 1040 assert.Equal(t, &cortexpb.WriteResponse{}, writeRes) 1041 assert.Nil(t, err) 1042 chunkSizeResponse, err := ds[0].QueryStream(ctx, math.MinInt32, math.MaxInt32, allSeriesMatchers...) 1043 require.NoError(t, err) 1044 1045 // Use the resulting chunks size to calculate the limit as (series to add + our test series) * the response chunk size. 1046 var responseChunkSize = chunkSizeResponse.ChunksSize() 1047 var maxBytesLimit = (seriesToAdd) * responseChunkSize 1048 1049 // Update the limiter with the calculated limits. 1050 ctx = limiter.AddQueryLimiterToContext(ctx, limiter.NewQueryLimiter(0, maxBytesLimit, 0)) 1051 1052 // Push a number of series below the max chunk bytes limit. Subtract one for the series added above. 1053 writeReq = makeWriteRequest(0, seriesToAdd-1, 0) 1054 writeRes, err = ds[0].Push(ctx, writeReq) 1055 assert.Equal(t, &cortexpb.WriteResponse{}, writeRes) 1056 assert.Nil(t, err) 1057 1058 // Since the number of chunk bytes is equal to the limit (but doesn't 1059 // exceed it), we expect a query running on all series to succeed. 1060 queryRes, err := ds[0].QueryStream(ctx, math.MinInt32, math.MaxInt32, allSeriesMatchers...) 1061 require.NoError(t, err) 1062 assert.Len(t, queryRes.Chunkseries, seriesToAdd) 1063 1064 // Push another series to exceed the chunk bytes limit once we'll query back all series. 1065 writeReq = &cortexpb.WriteRequest{} 1066 writeReq.Timeseries = append(writeReq.Timeseries, 1067 makeWriteRequestTimeseries([]cortexpb.LabelAdapter{{Name: model.MetricNameLabel, Value: "another_series_1"}}, 0, 0), 1068 ) 1069 1070 writeRes, err = ds[0].Push(ctx, writeReq) 1071 assert.Equal(t, &cortexpb.WriteResponse{}, writeRes) 1072 assert.Nil(t, err) 1073 1074 // Since the aggregated chunk size is exceeding the limit, we expect 1075 // a query running on all series to fail. 1076 _, err = ds[0].QueryStream(ctx, math.MinInt32, math.MaxInt32, allSeriesMatchers...) 1077 require.Error(t, err) 1078 assert.Equal(t, err, validation.LimitError(fmt.Sprintf(limiter.ErrMaxChunkBytesHit, maxBytesLimit))) 1079 } 1080 1081 func TestDistributor_Push_LabelRemoval(t *testing.T) { 1082 ctx := user.InjectOrgID(context.Background(), "user") 1083 1084 type testcase struct { 1085 inputSeries labels.Labels 1086 expectedSeries labels.Labels 1087 removeReplica bool 1088 removeLabels []string 1089 } 1090 1091 cases := []testcase{ 1092 // Remove both cluster and replica label. 1093 { 1094 removeReplica: true, 1095 removeLabels: []string{"cluster"}, 1096 inputSeries: labels.Labels{ 1097 {Name: "__name__", Value: "some_metric"}, 1098 {Name: "cluster", Value: "one"}, 1099 {Name: "__replica__", Value: "two"}, 1100 }, 1101 expectedSeries: labels.Labels{ 1102 {Name: "__name__", Value: "some_metric"}, 1103 }, 1104 }, 1105 // Remove multiple labels and replica. 1106 { 1107 removeReplica: true, 1108 removeLabels: []string{"foo", "some"}, 1109 inputSeries: labels.Labels{ 1110 {Name: "__name__", Value: "some_metric"}, 1111 {Name: "cluster", Value: "one"}, 1112 {Name: "__replica__", Value: "two"}, 1113 {Name: "foo", Value: "bar"}, 1114 {Name: "some", Value: "thing"}, 1115 }, 1116 expectedSeries: labels.Labels{ 1117 {Name: "__name__", Value: "some_metric"}, 1118 {Name: "cluster", Value: "one"}, 1119 }, 1120 }, 1121 // Don't remove any labels. 1122 { 1123 removeReplica: false, 1124 inputSeries: labels.Labels{ 1125 {Name: "__name__", Value: "some_metric"}, 1126 {Name: "__replica__", Value: "two"}, 1127 {Name: "cluster", Value: "one"}, 1128 }, 1129 expectedSeries: labels.Labels{ 1130 {Name: "__name__", Value: "some_metric"}, 1131 {Name: "__replica__", Value: "two"}, 1132 {Name: "cluster", Value: "one"}, 1133 }, 1134 }, 1135 } 1136 1137 for _, tc := range cases { 1138 var err error 1139 var limits validation.Limits 1140 flagext.DefaultValues(&limits) 1141 limits.DropLabels = tc.removeLabels 1142 limits.AcceptHASamples = tc.removeReplica 1143 1144 ds, ingesters, _ := prepare(t, prepConfig{ 1145 numIngesters: 2, 1146 happyIngesters: 2, 1147 numDistributors: 1, 1148 shardByAllLabels: true, 1149 limits: &limits, 1150 }) 1151 1152 // Push the series to the distributor 1153 req := mockWriteRequest(tc.inputSeries, 1, 1) 1154 _, err = ds[0].Push(ctx, req) 1155 require.NoError(t, err) 1156 1157 // Since each test pushes only 1 series, we do expect the ingester 1158 // to have received exactly 1 series 1159 for i := range ingesters { 1160 timeseries := ingesters[i].series() 1161 assert.Equal(t, 1, len(timeseries)) 1162 for _, v := range timeseries { 1163 assert.Equal(t, tc.expectedSeries, cortexpb.FromLabelAdaptersToLabels(v.Labels)) 1164 } 1165 } 1166 } 1167 } 1168 1169 func TestDistributor_Push_ShouldGuaranteeShardingTokenConsistencyOverTheTime(t *testing.T) { 1170 ctx := user.InjectOrgID(context.Background(), "user") 1171 tests := map[string]struct { 1172 inputSeries labels.Labels 1173 expectedSeries labels.Labels 1174 expectedToken uint32 1175 }{ 1176 "metric_1 with value_1": { 1177 inputSeries: labels.Labels{ 1178 {Name: "__name__", Value: "metric_1"}, 1179 {Name: "cluster", Value: "cluster_1"}, 1180 {Name: "key", Value: "value_1"}, 1181 }, 1182 expectedSeries: labels.Labels{ 1183 {Name: "__name__", Value: "metric_1"}, 1184 {Name: "cluster", Value: "cluster_1"}, 1185 {Name: "key", Value: "value_1"}, 1186 }, 1187 expectedToken: 0xec0a2e9d, 1188 }, 1189 "metric_1 with value_1 and dropped label due to config": { 1190 inputSeries: labels.Labels{ 1191 {Name: "__name__", Value: "metric_1"}, 1192 {Name: "cluster", Value: "cluster_1"}, 1193 {Name: "key", Value: "value_1"}, 1194 {Name: "dropped", Value: "unused"}, // will be dropped, doesn't need to be in correct order 1195 }, 1196 expectedSeries: labels.Labels{ 1197 {Name: "__name__", Value: "metric_1"}, 1198 {Name: "cluster", Value: "cluster_1"}, 1199 {Name: "key", Value: "value_1"}, 1200 }, 1201 expectedToken: 0xec0a2e9d, 1202 }, 1203 "metric_1 with value_1 and dropped HA replica label": { 1204 inputSeries: labels.Labels{ 1205 {Name: "__name__", Value: "metric_1"}, 1206 {Name: "cluster", Value: "cluster_1"}, 1207 {Name: "key", Value: "value_1"}, 1208 {Name: "__replica__", Value: "replica_1"}, 1209 }, 1210 expectedSeries: labels.Labels{ 1211 {Name: "__name__", Value: "metric_1"}, 1212 {Name: "cluster", Value: "cluster_1"}, 1213 {Name: "key", Value: "value_1"}, 1214 }, 1215 expectedToken: 0xec0a2e9d, 1216 }, 1217 "metric_2 with value_1": { 1218 inputSeries: labels.Labels{ 1219 {Name: "__name__", Value: "metric_2"}, 1220 {Name: "key", Value: "value_1"}, 1221 }, 1222 expectedSeries: labels.Labels{ 1223 {Name: "__name__", Value: "metric_2"}, 1224 {Name: "key", Value: "value_1"}, 1225 }, 1226 expectedToken: 0xa60906f2, 1227 }, 1228 "metric_1 with value_2": { 1229 inputSeries: labels.Labels{ 1230 {Name: "__name__", Value: "metric_1"}, 1231 {Name: "key", Value: "value_2"}, 1232 }, 1233 expectedSeries: labels.Labels{ 1234 {Name: "__name__", Value: "metric_1"}, 1235 {Name: "key", Value: "value_2"}, 1236 }, 1237 expectedToken: 0x18abc8a2, 1238 }, 1239 } 1240 1241 var limits validation.Limits 1242 flagext.DefaultValues(&limits) 1243 limits.DropLabels = []string{"dropped"} 1244 limits.AcceptHASamples = true 1245 1246 for testName, testData := range tests { 1247 t.Run(testName, func(t *testing.T) { 1248 ds, ingesters, _ := prepare(t, prepConfig{ 1249 numIngesters: 2, 1250 happyIngesters: 2, 1251 numDistributors: 1, 1252 shardByAllLabels: true, 1253 limits: &limits, 1254 }) 1255 1256 // Push the series to the distributor 1257 req := mockWriteRequest(testData.inputSeries, 1, 1) 1258 _, err := ds[0].Push(ctx, req) 1259 require.NoError(t, err) 1260 1261 // Since each test pushes only 1 series, we do expect the ingester 1262 // to have received exactly 1 series 1263 for i := range ingesters { 1264 timeseries := ingesters[i].series() 1265 assert.Equal(t, 1, len(timeseries)) 1266 1267 series, ok := timeseries[testData.expectedToken] 1268 require.True(t, ok) 1269 assert.Equal(t, testData.expectedSeries, cortexpb.FromLabelAdaptersToLabels(series.Labels)) 1270 } 1271 }) 1272 } 1273 } 1274 1275 func TestDistributor_Push_LabelNameValidation(t *testing.T) { 1276 inputLabels := labels.Labels{ 1277 {Name: model.MetricNameLabel, Value: "foo"}, 1278 {Name: "999.illegal", Value: "baz"}, 1279 } 1280 ctx := user.InjectOrgID(context.Background(), "user") 1281 1282 tests := map[string]struct { 1283 inputLabels labels.Labels 1284 skipLabelNameValidationCfg bool 1285 skipLabelNameValidationReq bool 1286 errExpected bool 1287 errMessage string 1288 }{ 1289 "label name validation is on by default": { 1290 inputLabels: inputLabels, 1291 errExpected: true, 1292 errMessage: `sample invalid label: "999.illegal" metric "foo{999.illegal=\"baz\"}"`, 1293 }, 1294 "label name validation can be skipped via config": { 1295 inputLabels: inputLabels, 1296 skipLabelNameValidationCfg: true, 1297 errExpected: false, 1298 }, 1299 "label name validation can be skipped via WriteRequest parameter": { 1300 inputLabels: inputLabels, 1301 skipLabelNameValidationReq: true, 1302 errExpected: false, 1303 }, 1304 } 1305 1306 for testName, tc := range tests { 1307 t.Run(testName, func(t *testing.T) { 1308 ds, _, _ := prepare(t, prepConfig{ 1309 numIngesters: 2, 1310 happyIngesters: 2, 1311 numDistributors: 1, 1312 shuffleShardSize: 1, 1313 skipLabelNameValidation: tc.skipLabelNameValidationCfg, 1314 }) 1315 req := mockWriteRequest(tc.inputLabels, 42, 100000) 1316 req.SkipLabelNameValidation = tc.skipLabelNameValidationReq 1317 _, err := ds[0].Push(ctx, req) 1318 if tc.errExpected { 1319 fromError, _ := status.FromError(err) 1320 assert.Equal(t, tc.errMessage, fromError.Message()) 1321 } else { 1322 assert.Nil(t, err) 1323 } 1324 }) 1325 } 1326 } 1327 1328 func TestDistributor_Push_ExemplarValidation(t *testing.T) { 1329 ctx := user.InjectOrgID(context.Background(), "user") 1330 manyLabels := []string{model.MetricNameLabel, "test"} 1331 for i := 1; i < 31; i++ { 1332 manyLabels = append(manyLabels, fmt.Sprintf("name_%d", i), fmt.Sprintf("value_%d", i)) 1333 } 1334 1335 tests := map[string]struct { 1336 req *cortexpb.WriteRequest 1337 errMsg string 1338 }{ 1339 "valid exemplar": { 1340 req: makeWriteRequestExemplar([]string{model.MetricNameLabel, "test"}, 1000, []string{"foo", "bar"}), 1341 }, 1342 "rejects exemplar with no labels": { 1343 req: makeWriteRequestExemplar([]string{model.MetricNameLabel, "test"}, 1000, []string{}), 1344 errMsg: `exemplar missing labels, timestamp: 1000 series: {__name__="test"} labels: {}`, 1345 }, 1346 "rejects exemplar with no timestamp": { 1347 req: makeWriteRequestExemplar([]string{model.MetricNameLabel, "test"}, 0, []string{"foo", "bar"}), 1348 errMsg: `exemplar missing timestamp, timestamp: 0 series: {__name__="test"} labels: {foo="bar"}`, 1349 }, 1350 "rejects exemplar with too long labelset": { 1351 req: makeWriteRequestExemplar([]string{model.MetricNameLabel, "test"}, 1000, []string{"foo", strings.Repeat("0", 126)}), 1352 errMsg: fmt.Sprintf(`exemplar combined labelset exceeds 128 characters, timestamp: 1000 series: {__name__="test"} labels: {foo="%s"}`, strings.Repeat("0", 126)), 1353 }, 1354 "rejects exemplar with too many series labels": { 1355 req: makeWriteRequestExemplar(manyLabels, 0, nil), 1356 errMsg: "series has too many labels", 1357 }, 1358 "rejects exemplar with duplicate series labels": { 1359 req: makeWriteRequestExemplar([]string{model.MetricNameLabel, "test", "foo", "bar", "foo", "bar"}, 0, nil), 1360 errMsg: "duplicate label name", 1361 }, 1362 "rejects exemplar with empty series label name": { 1363 req: makeWriteRequestExemplar([]string{model.MetricNameLabel, "test", "", "bar"}, 0, nil), 1364 errMsg: "invalid label", 1365 }, 1366 } 1367 1368 for testName, tc := range tests { 1369 t.Run(testName, func(t *testing.T) { 1370 ds, _, _ := prepare(t, prepConfig{ 1371 numIngesters: 2, 1372 happyIngesters: 2, 1373 numDistributors: 1, 1374 shuffleShardSize: 1, 1375 }) 1376 _, err := ds[0].Push(ctx, tc.req) 1377 if tc.errMsg != "" { 1378 fromError, _ := status.FromError(err) 1379 assert.Contains(t, fromError.Message(), tc.errMsg) 1380 } else { 1381 assert.Nil(t, err) 1382 } 1383 }) 1384 } 1385 } 1386 1387 func BenchmarkDistributor_Push(b *testing.B) { 1388 const ( 1389 numSeriesPerRequest = 1000 1390 ) 1391 ctx := user.InjectOrgID(context.Background(), "user") 1392 1393 tests := map[string]struct { 1394 prepareConfig func(limits *validation.Limits) 1395 prepareSeries func() ([]labels.Labels, []cortexpb.Sample) 1396 expectedErr string 1397 }{ 1398 "all samples successfully pushed": { 1399 prepareConfig: func(limits *validation.Limits) {}, 1400 prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) { 1401 metrics := make([]labels.Labels, numSeriesPerRequest) 1402 samples := make([]cortexpb.Sample, numSeriesPerRequest) 1403 1404 for i := 0; i < numSeriesPerRequest; i++ { 1405 lbls := labels.NewBuilder(labels.Labels{{Name: model.MetricNameLabel, Value: "foo"}}) 1406 for i := 0; i < 10; i++ { 1407 lbls.Set(fmt.Sprintf("name_%d", i), fmt.Sprintf("value_%d", i)) 1408 } 1409 1410 metrics[i] = lbls.Labels() 1411 samples[i] = cortexpb.Sample{ 1412 Value: float64(i), 1413 TimestampMs: time.Now().UnixNano() / int64(time.Millisecond), 1414 } 1415 } 1416 1417 return metrics, samples 1418 }, 1419 expectedErr: "", 1420 }, 1421 "ingestion rate limit reached": { 1422 prepareConfig: func(limits *validation.Limits) { 1423 limits.IngestionRate = 1 1424 limits.IngestionBurstSize = 1 1425 }, 1426 prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) { 1427 metrics := make([]labels.Labels, numSeriesPerRequest) 1428 samples := make([]cortexpb.Sample, numSeriesPerRequest) 1429 1430 for i := 0; i < numSeriesPerRequest; i++ { 1431 lbls := labels.NewBuilder(labels.Labels{{Name: model.MetricNameLabel, Value: "foo"}}) 1432 for i := 0; i < 10; i++ { 1433 lbls.Set(fmt.Sprintf("name_%d", i), fmt.Sprintf("value_%d", i)) 1434 } 1435 1436 metrics[i] = lbls.Labels() 1437 samples[i] = cortexpb.Sample{ 1438 Value: float64(i), 1439 TimestampMs: time.Now().UnixNano() / int64(time.Millisecond), 1440 } 1441 } 1442 1443 return metrics, samples 1444 }, 1445 expectedErr: "ingestion rate limit", 1446 }, 1447 "too many labels limit reached": { 1448 prepareConfig: func(limits *validation.Limits) { 1449 limits.MaxLabelNamesPerSeries = 30 1450 }, 1451 prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) { 1452 metrics := make([]labels.Labels, numSeriesPerRequest) 1453 samples := make([]cortexpb.Sample, numSeriesPerRequest) 1454 1455 for i := 0; i < numSeriesPerRequest; i++ { 1456 lbls := labels.NewBuilder(labels.Labels{{Name: model.MetricNameLabel, Value: "foo"}}) 1457 for i := 1; i < 31; i++ { 1458 lbls.Set(fmt.Sprintf("name_%d", i), fmt.Sprintf("value_%d", i)) 1459 } 1460 1461 metrics[i] = lbls.Labels() 1462 samples[i] = cortexpb.Sample{ 1463 Value: float64(i), 1464 TimestampMs: time.Now().UnixNano() / int64(time.Millisecond), 1465 } 1466 } 1467 1468 return metrics, samples 1469 }, 1470 expectedErr: "series has too many labels", 1471 }, 1472 "max label name length limit reached": { 1473 prepareConfig: func(limits *validation.Limits) { 1474 limits.MaxLabelNameLength = 1024 1475 }, 1476 prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) { 1477 metrics := make([]labels.Labels, numSeriesPerRequest) 1478 samples := make([]cortexpb.Sample, numSeriesPerRequest) 1479 1480 for i := 0; i < numSeriesPerRequest; i++ { 1481 lbls := labels.NewBuilder(labels.Labels{{Name: model.MetricNameLabel, Value: "foo"}}) 1482 for i := 0; i < 10; i++ { 1483 lbls.Set(fmt.Sprintf("name_%d", i), fmt.Sprintf("value_%d", i)) 1484 } 1485 1486 // Add a label with a very long name. 1487 lbls.Set(fmt.Sprintf("xxx_%0.2000d", 1), "xxx") 1488 1489 metrics[i] = lbls.Labels() 1490 samples[i] = cortexpb.Sample{ 1491 Value: float64(i), 1492 TimestampMs: time.Now().UnixNano() / int64(time.Millisecond), 1493 } 1494 } 1495 1496 return metrics, samples 1497 }, 1498 expectedErr: "label name too long", 1499 }, 1500 "max label value length limit reached": { 1501 prepareConfig: func(limits *validation.Limits) { 1502 limits.MaxLabelValueLength = 1024 1503 }, 1504 prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) { 1505 metrics := make([]labels.Labels, numSeriesPerRequest) 1506 samples := make([]cortexpb.Sample, numSeriesPerRequest) 1507 1508 for i := 0; i < numSeriesPerRequest; i++ { 1509 lbls := labels.NewBuilder(labels.Labels{{Name: model.MetricNameLabel, Value: "foo"}}) 1510 for i := 0; i < 10; i++ { 1511 lbls.Set(fmt.Sprintf("name_%d", i), fmt.Sprintf("value_%d", i)) 1512 } 1513 1514 // Add a label with a very long value. 1515 lbls.Set("xxx", fmt.Sprintf("xxx_%0.2000d", 1)) 1516 1517 metrics[i] = lbls.Labels() 1518 samples[i] = cortexpb.Sample{ 1519 Value: float64(i), 1520 TimestampMs: time.Now().UnixNano() / int64(time.Millisecond), 1521 } 1522 } 1523 1524 return metrics, samples 1525 }, 1526 expectedErr: "label value too long", 1527 }, 1528 "timestamp too old": { 1529 prepareConfig: func(limits *validation.Limits) { 1530 limits.RejectOldSamples = true 1531 limits.RejectOldSamplesMaxAge = model.Duration(time.Hour) 1532 }, 1533 prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) { 1534 metrics := make([]labels.Labels, numSeriesPerRequest) 1535 samples := make([]cortexpb.Sample, numSeriesPerRequest) 1536 1537 for i := 0; i < numSeriesPerRequest; i++ { 1538 lbls := labels.NewBuilder(labels.Labels{{Name: model.MetricNameLabel, Value: "foo"}}) 1539 for i := 0; i < 10; i++ { 1540 lbls.Set(fmt.Sprintf("name_%d", i), fmt.Sprintf("value_%d", i)) 1541 } 1542 1543 metrics[i] = lbls.Labels() 1544 samples[i] = cortexpb.Sample{ 1545 Value: float64(i), 1546 TimestampMs: time.Now().Add(-2*time.Hour).UnixNano() / int64(time.Millisecond), 1547 } 1548 } 1549 1550 return metrics, samples 1551 }, 1552 expectedErr: "timestamp too old", 1553 }, 1554 "timestamp too new": { 1555 prepareConfig: func(limits *validation.Limits) { 1556 limits.CreationGracePeriod = model.Duration(time.Minute) 1557 }, 1558 prepareSeries: func() ([]labels.Labels, []cortexpb.Sample) { 1559 metrics := make([]labels.Labels, numSeriesPerRequest) 1560 samples := make([]cortexpb.Sample, numSeriesPerRequest) 1561 1562 for i := 0; i < numSeriesPerRequest; i++ { 1563 lbls := labels.NewBuilder(labels.Labels{{Name: model.MetricNameLabel, Value: "foo"}}) 1564 for i := 0; i < 10; i++ { 1565 lbls.Set(fmt.Sprintf("name_%d", i), fmt.Sprintf("value_%d", i)) 1566 } 1567 1568 metrics[i] = lbls.Labels() 1569 samples[i] = cortexpb.Sample{ 1570 Value: float64(i), 1571 TimestampMs: time.Now().Add(time.Hour).UnixNano() / int64(time.Millisecond), 1572 } 1573 } 1574 1575 return metrics, samples 1576 }, 1577 expectedErr: "timestamp too new", 1578 }, 1579 } 1580 1581 for testName, testData := range tests { 1582 b.Run(testName, func(b *testing.B) { 1583 1584 // Create an in-memory KV store for the ring with 1 ingester registered. 1585 kvStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) 1586 b.Cleanup(func() { assert.NoError(b, closer.Close()) }) 1587 1588 err := kvStore.CAS(context.Background(), ring.IngesterRingKey, 1589 func(_ interface{}) (interface{}, bool, error) { 1590 d := &ring.Desc{} 1591 d.AddIngester("ingester-1", "127.0.0.1", "", ring.GenerateTokens(128, nil), ring.ACTIVE, time.Now()) 1592 return d, true, nil 1593 }, 1594 ) 1595 require.NoError(b, err) 1596 1597 ingestersRing, err := ring.New(ring.Config{ 1598 KVStore: kv.Config{Mock: kvStore}, 1599 HeartbeatTimeout: 60 * time.Minute, 1600 ReplicationFactor: 1, 1601 }, ring.IngesterRingKey, ring.IngesterRingKey, nil, nil) 1602 require.NoError(b, err) 1603 require.NoError(b, services.StartAndAwaitRunning(context.Background(), ingestersRing)) 1604 b.Cleanup(func() { 1605 require.NoError(b, services.StopAndAwaitTerminated(context.Background(), ingestersRing)) 1606 }) 1607 1608 test.Poll(b, time.Second, 1, func() interface{} { 1609 return ingestersRing.InstancesCount() 1610 }) 1611 1612 // Prepare the distributor configuration. 1613 var distributorCfg Config 1614 var clientConfig client.Config 1615 limits := validation.Limits{} 1616 flagext.DefaultValues(&distributorCfg, &clientConfig, &limits) 1617 1618 limits.IngestionRate = 0 // Unlimited. 1619 testData.prepareConfig(&limits) 1620 1621 distributorCfg.ShardByAllLabels = true 1622 distributorCfg.IngesterClientFactory = func(addr string) (ring_client.PoolClient, error) { 1623 return &noopIngester{}, nil 1624 } 1625 1626 overrides, err := validation.NewOverrides(limits, nil) 1627 require.NoError(b, err) 1628 1629 // Start the distributor. 1630 distributor, err := New(distributorCfg, clientConfig, overrides, ingestersRing, true, nil, log.NewNopLogger()) 1631 require.NoError(b, err) 1632 require.NoError(b, services.StartAndAwaitRunning(context.Background(), distributor)) 1633 1634 b.Cleanup(func() { 1635 require.NoError(b, services.StopAndAwaitTerminated(context.Background(), distributor)) 1636 }) 1637 1638 // Prepare the series to remote write before starting the benchmark. 1639 metrics, samples := testData.prepareSeries() 1640 1641 // Run the benchmark. 1642 b.ReportAllocs() 1643 b.ResetTimer() 1644 1645 for n := 0; n < b.N; n++ { 1646 _, err := distributor.Push(ctx, cortexpb.ToWriteRequest(metrics, samples, nil, cortexpb.API)) 1647 1648 if testData.expectedErr == "" && err != nil { 1649 b.Fatalf("no error expected but got %v", err) 1650 } 1651 if testData.expectedErr != "" && (err == nil || !strings.Contains(err.Error(), testData.expectedErr)) { 1652 b.Fatalf("expected %v error but got %v", testData.expectedErr, err) 1653 } 1654 } 1655 }) 1656 } 1657 } 1658 1659 func TestSlowQueries(t *testing.T) { 1660 ctx := user.InjectOrgID(context.Background(), "user") 1661 nameMatcher := mustEqualMatcher(model.MetricNameLabel, "foo") 1662 nIngesters := 3 1663 for _, shardByAllLabels := range []bool{true, false} { 1664 for happy := 0; happy <= nIngesters; happy++ { 1665 t.Run(fmt.Sprintf("%t/%d", shardByAllLabels, happy), func(t *testing.T) { 1666 var expectedErr error 1667 if nIngesters-happy > 1 { 1668 expectedErr = errFail 1669 } 1670 1671 ds, _, _ := prepare(t, prepConfig{ 1672 numIngesters: nIngesters, 1673 happyIngesters: happy, 1674 numDistributors: 1, 1675 queryDelay: 100 * time.Millisecond, 1676 shardByAllLabels: shardByAllLabels, 1677 }) 1678 1679 _, err := ds[0].Query(ctx, 0, 10, nameMatcher) 1680 assert.Equal(t, expectedErr, err) 1681 1682 _, err = ds[0].QueryStream(ctx, 0, 10, nameMatcher) 1683 assert.Equal(t, expectedErr, err) 1684 }) 1685 } 1686 } 1687 } 1688 1689 func TestDistributor_MetricsForLabelMatchers(t *testing.T) { 1690 const numIngesters = 5 1691 1692 fixtures := []struct { 1693 lbls labels.Labels 1694 value float64 1695 timestamp int64 1696 }{ 1697 {labels.Labels{{Name: labels.MetricName, Value: "test_1"}, {Name: "status", Value: "200"}}, 1, 100000}, 1698 {labels.Labels{{Name: labels.MetricName, Value: "test_1"}, {Name: "status", Value: "500"}}, 1, 110000}, 1699 {labels.Labels{{Name: labels.MetricName, Value: "test_2"}}, 2, 200000}, 1700 // The two following series have the same FastFingerprint=e002a3a451262627 1701 {labels.Labels{{Name: labels.MetricName, Value: "fast_fingerprint_collision"}, {Name: "app", Value: "l"}, {Name: "uniq0", Value: "0"}, {Name: "uniq1", Value: "1"}}, 1, 300000}, 1702 {labels.Labels{{Name: labels.MetricName, Value: "fast_fingerprint_collision"}, {Name: "app", Value: "m"}, {Name: "uniq0", Value: "1"}, {Name: "uniq1", Value: "1"}}, 1, 300000}, 1703 } 1704 1705 tests := map[string]struct { 1706 shuffleShardEnabled bool 1707 shuffleShardSize int 1708 matchers []*labels.Matcher 1709 expectedResult []metric.Metric 1710 expectedIngesters int 1711 }{ 1712 "should return an empty response if no metric match": { 1713 matchers: []*labels.Matcher{ 1714 mustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "unknown"), 1715 }, 1716 expectedResult: []metric.Metric{}, 1717 expectedIngesters: numIngesters, 1718 }, 1719 "should filter metrics by single matcher": { 1720 matchers: []*labels.Matcher{ 1721 mustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_1"), 1722 }, 1723 expectedResult: []metric.Metric{ 1724 {Metric: util.LabelsToMetric(fixtures[0].lbls)}, 1725 {Metric: util.LabelsToMetric(fixtures[1].lbls)}, 1726 }, 1727 expectedIngesters: numIngesters, 1728 }, 1729 "should filter metrics by multiple matchers": { 1730 matchers: []*labels.Matcher{ 1731 mustNewMatcher(labels.MatchEqual, "status", "200"), 1732 mustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_1"), 1733 }, 1734 expectedResult: []metric.Metric{ 1735 {Metric: util.LabelsToMetric(fixtures[0].lbls)}, 1736 }, 1737 expectedIngesters: numIngesters, 1738 }, 1739 "should return all matching metrics even if their FastFingerprint collide": { 1740 matchers: []*labels.Matcher{ 1741 mustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "fast_fingerprint_collision"), 1742 }, 1743 expectedResult: []metric.Metric{ 1744 {Metric: util.LabelsToMetric(fixtures[3].lbls)}, 1745 {Metric: util.LabelsToMetric(fixtures[4].lbls)}, 1746 }, 1747 expectedIngesters: numIngesters, 1748 }, 1749 "should query only ingesters belonging to tenant's subring if shuffle sharding is enabled": { 1750 shuffleShardEnabled: true, 1751 shuffleShardSize: 3, 1752 matchers: []*labels.Matcher{ 1753 mustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_1"), 1754 }, 1755 expectedResult: []metric.Metric{ 1756 {Metric: util.LabelsToMetric(fixtures[0].lbls)}, 1757 {Metric: util.LabelsToMetric(fixtures[1].lbls)}, 1758 }, 1759 expectedIngesters: 3, 1760 }, 1761 "should query all ingesters if shuffle sharding is enabled but shard size is 0": { 1762 shuffleShardEnabled: true, 1763 shuffleShardSize: 0, 1764 matchers: []*labels.Matcher{ 1765 mustNewMatcher(labels.MatchEqual, model.MetricNameLabel, "test_1"), 1766 }, 1767 expectedResult: []metric.Metric{ 1768 {Metric: util.LabelsToMetric(fixtures[0].lbls)}, 1769 {Metric: util.LabelsToMetric(fixtures[1].lbls)}, 1770 }, 1771 expectedIngesters: numIngesters, 1772 }, 1773 } 1774 1775 for testName, testData := range tests { 1776 t.Run(testName, func(t *testing.T) { 1777 now := model.Now() 1778 1779 // Create distributor 1780 ds, ingesters, _ := prepare(t, prepConfig{ 1781 numIngesters: numIngesters, 1782 happyIngesters: numIngesters, 1783 numDistributors: 1, 1784 shardByAllLabels: true, 1785 shuffleShardEnabled: testData.shuffleShardEnabled, 1786 shuffleShardSize: testData.shuffleShardSize, 1787 }) 1788 1789 // Push fixtures 1790 ctx := user.InjectOrgID(context.Background(), "test") 1791 1792 for _, series := range fixtures { 1793 req := mockWriteRequest(series.lbls, series.value, series.timestamp) 1794 _, err := ds[0].Push(ctx, req) 1795 require.NoError(t, err) 1796 } 1797 1798 metrics, err := ds[0].MetricsForLabelMatchers(ctx, now, now, testData.matchers...) 1799 require.NoError(t, err) 1800 assert.ElementsMatch(t, testData.expectedResult, metrics) 1801 1802 // Check how many ingesters have been queried. 1803 // Due to the quorum the distributor could cancel the last request towards ingesters 1804 // if all other ones are successful, so we're good either has been queried X or X-1 1805 // ingesters. 1806 assert.Contains(t, []int{testData.expectedIngesters, testData.expectedIngesters - 1}, countMockIngestersCalls(ingesters, "MetricsForLabelMatchers")) 1807 }) 1808 } 1809 } 1810 1811 func TestDistributor_MetricsMetadata(t *testing.T) { 1812 const numIngesters = 5 1813 1814 tests := map[string]struct { 1815 shuffleShardEnabled bool 1816 shuffleShardSize int 1817 expectedIngesters int 1818 }{ 1819 "should query all ingesters if shuffle sharding is disabled": { 1820 shuffleShardEnabled: false, 1821 expectedIngesters: numIngesters, 1822 }, 1823 "should query all ingesters if shuffle sharding is enabled but shard size is 0": { 1824 shuffleShardEnabled: true, 1825 shuffleShardSize: 0, 1826 expectedIngesters: numIngesters, 1827 }, 1828 "should query only ingesters belonging to tenant's subring if shuffle sharding is enabled": { 1829 shuffleShardEnabled: true, 1830 shuffleShardSize: 3, 1831 expectedIngesters: 3, 1832 }, 1833 } 1834 1835 for testName, testData := range tests { 1836 t.Run(testName, func(t *testing.T) { 1837 // Create distributor 1838 ds, ingesters, _ := prepare(t, prepConfig{ 1839 numIngesters: numIngesters, 1840 happyIngesters: numIngesters, 1841 numDistributors: 1, 1842 shardByAllLabels: true, 1843 shuffleShardEnabled: testData.shuffleShardEnabled, 1844 shuffleShardSize: testData.shuffleShardSize, 1845 limits: nil, 1846 }) 1847 1848 // Push metadata 1849 ctx := user.InjectOrgID(context.Background(), "test") 1850 1851 req := makeWriteRequest(0, 0, 10) 1852 _, err := ds[0].Push(ctx, req) 1853 require.NoError(t, err) 1854 1855 // Assert on metric metadata 1856 metadata, err := ds[0].MetricsMetadata(ctx) 1857 require.NoError(t, err) 1858 assert.Equal(t, 10, len(metadata)) 1859 1860 // Check how many ingesters have been queried. 1861 // Due to the quorum the distributor could cancel the last request towards ingesters 1862 // if all other ones are successful, so we're good either has been queried X or X-1 1863 // ingesters. 1864 assert.Contains(t, []int{testData.expectedIngesters, testData.expectedIngesters - 1}, countMockIngestersCalls(ingesters, "MetricsMetadata")) 1865 }) 1866 } 1867 } 1868 1869 func mustNewMatcher(t labels.MatchType, n, v string) *labels.Matcher { 1870 m, err := labels.NewMatcher(t, n, v) 1871 if err != nil { 1872 panic(err) 1873 } 1874 1875 return m 1876 } 1877 1878 func mockWriteRequest(lbls labels.Labels, value float64, timestampMs int64) *cortexpb.WriteRequest { 1879 samples := []cortexpb.Sample{ 1880 { 1881 TimestampMs: timestampMs, 1882 Value: value, 1883 }, 1884 } 1885 1886 return cortexpb.ToWriteRequest([]labels.Labels{lbls}, samples, nil, cortexpb.API) 1887 } 1888 1889 type prepConfig struct { 1890 numIngesters, happyIngesters int 1891 queryDelay time.Duration 1892 shardByAllLabels bool 1893 shuffleShardEnabled bool 1894 shuffleShardSize int 1895 limits *validation.Limits 1896 numDistributors int 1897 skipLabelNameValidation bool 1898 maxInflightRequests int 1899 maxIngestionRate float64 1900 replicationFactor int 1901 enableTracker bool 1902 errFail error 1903 } 1904 1905 func prepare(t *testing.T, cfg prepConfig) ([]*Distributor, []mockIngester, []*prometheus.Registry) { 1906 ingesters := []mockIngester{} 1907 for i := 0; i < cfg.happyIngesters; i++ { 1908 ingesters = append(ingesters, mockIngester{ 1909 happy: true, 1910 queryDelay: cfg.queryDelay, 1911 }) 1912 } 1913 for i := cfg.happyIngesters; i < cfg.numIngesters; i++ { 1914 miError := errFail 1915 if cfg.errFail != nil { 1916 miError = cfg.errFail 1917 } 1918 1919 ingesters = append(ingesters, mockIngester{ 1920 queryDelay: cfg.queryDelay, 1921 failResp: miError, 1922 }) 1923 } 1924 1925 // Use a real ring with a mock KV store to test ring RF logic. 1926 ingesterDescs := map[string]ring.InstanceDesc{} 1927 ingestersByAddr := map[string]*mockIngester{} 1928 for i := range ingesters { 1929 addr := fmt.Sprintf("%d", i) 1930 ingesterDescs[addr] = ring.InstanceDesc{ 1931 Addr: addr, 1932 Zone: "", 1933 State: ring.ACTIVE, 1934 Timestamp: time.Now().Unix(), 1935 RegisteredTimestamp: time.Now().Add(-2 * time.Hour).Unix(), 1936 Tokens: []uint32{uint32((math.MaxUint32 / cfg.numIngesters) * i)}, 1937 } 1938 ingestersByAddr[addr] = &ingesters[i] 1939 } 1940 1941 kvStore, closer := consul.NewInMemoryClient(ring.GetCodec(), log.NewNopLogger(), nil) 1942 t.Cleanup(func() { assert.NoError(t, closer.Close()) }) 1943 1944 err := kvStore.CAS(context.Background(), ring.IngesterRingKey, 1945 func(_ interface{}) (interface{}, bool, error) { 1946 return &ring.Desc{ 1947 Ingesters: ingesterDescs, 1948 }, true, nil 1949 }, 1950 ) 1951 require.NoError(t, err) 1952 1953 // Use a default replication factor of 3 if there isn't a provided replication factor. 1954 rf := cfg.replicationFactor 1955 if rf == 0 { 1956 rf = 3 1957 } 1958 1959 ingestersRing, err := ring.New(ring.Config{ 1960 KVStore: kv.Config{ 1961 Mock: kvStore, 1962 }, 1963 HeartbeatTimeout: 60 * time.Minute, 1964 ReplicationFactor: rf, 1965 }, ring.IngesterRingKey, ring.IngesterRingKey, nil, nil) 1966 require.NoError(t, err) 1967 require.NoError(t, services.StartAndAwaitRunning(context.Background(), ingestersRing)) 1968 1969 test.Poll(t, time.Second, cfg.numIngesters, func() interface{} { 1970 return ingestersRing.InstancesCount() 1971 }) 1972 1973 factory := func(addr string) (ring_client.PoolClient, error) { 1974 return ingestersByAddr[addr], nil 1975 } 1976 1977 distributors := make([]*Distributor, 0, cfg.numDistributors) 1978 registries := make([]*prometheus.Registry, 0, cfg.numDistributors) 1979 for i := 0; i < cfg.numDistributors; i++ { 1980 if cfg.limits == nil { 1981 cfg.limits = &validation.Limits{} 1982 flagext.DefaultValues(cfg.limits) 1983 } 1984 1985 var distributorCfg Config 1986 var clientConfig client.Config 1987 flagext.DefaultValues(&distributorCfg, &clientConfig) 1988 1989 distributorCfg.IngesterClientFactory = factory 1990 distributorCfg.ShardByAllLabels = cfg.shardByAllLabels 1991 distributorCfg.ExtraQueryDelay = 50 * time.Millisecond 1992 distributorCfg.DistributorRing.HeartbeatPeriod = 100 * time.Millisecond 1993 distributorCfg.DistributorRing.InstanceID = strconv.Itoa(i) 1994 distributorCfg.DistributorRing.KVStore.Mock = kvStore 1995 distributorCfg.DistributorRing.InstanceAddr = "127.0.0.1" 1996 distributorCfg.SkipLabelNameValidation = cfg.skipLabelNameValidation 1997 distributorCfg.InstanceLimits.MaxInflightPushRequests = cfg.maxInflightRequests 1998 distributorCfg.InstanceLimits.MaxIngestionRate = cfg.maxIngestionRate 1999 2000 if cfg.shuffleShardEnabled { 2001 distributorCfg.ShardingStrategy = util.ShardingStrategyShuffle 2002 distributorCfg.ShuffleShardingLookbackPeriod = time.Hour 2003 2004 cfg.limits.IngestionTenantShardSize = cfg.shuffleShardSize 2005 } 2006 2007 if cfg.enableTracker { 2008 codec := GetReplicaDescCodec() 2009 ringStore, closer := consul.NewInMemoryClient(codec, log.NewNopLogger(), nil) 2010 t.Cleanup(func() { assert.NoError(t, closer.Close()) }) 2011 mock := kv.PrefixClient(ringStore, "prefix") 2012 distributorCfg.HATrackerConfig = HATrackerConfig{ 2013 EnableHATracker: true, 2014 KVStore: kv.Config{Mock: mock}, 2015 UpdateTimeout: 100 * time.Millisecond, 2016 FailoverTimeout: time.Second, 2017 } 2018 cfg.limits.HAMaxClusters = 100 2019 } 2020 2021 overrides, err := validation.NewOverrides(*cfg.limits, nil) 2022 require.NoError(t, err) 2023 2024 reg := prometheus.NewPedanticRegistry() 2025 d, err := New(distributorCfg, clientConfig, overrides, ingestersRing, true, reg, log.NewNopLogger()) 2026 require.NoError(t, err) 2027 require.NoError(t, services.StartAndAwaitRunning(context.Background(), d)) 2028 2029 distributors = append(distributors, d) 2030 registries = append(registries, reg) 2031 } 2032 2033 // If the distributors ring is setup, wait until the first distributor 2034 // updates to the expected size 2035 if distributors[0].distributorsRing != nil { 2036 test.Poll(t, time.Second, cfg.numDistributors, func() interface{} { 2037 return distributors[0].distributorsLifeCycler.HealthyInstancesCount() 2038 }) 2039 } 2040 2041 t.Cleanup(func() { stopAll(distributors, ingestersRing) }) 2042 2043 return distributors, ingesters, registries 2044 } 2045 2046 func stopAll(ds []*Distributor, r *ring.Ring) { 2047 for _, d := range ds { 2048 services.StopAndAwaitTerminated(context.Background(), d) //nolint:errcheck 2049 } 2050 2051 // Mock consul doesn't stop quickly, so don't wait. 2052 r.StopAsync() 2053 } 2054 2055 func makeWriteRequest(startTimestampMs int64, samples int, metadata int) *cortexpb.WriteRequest { 2056 request := &cortexpb.WriteRequest{} 2057 for i := 0; i < samples; i++ { 2058 request.Timeseries = append(request.Timeseries, makeWriteRequestTimeseries( 2059 []cortexpb.LabelAdapter{ 2060 {Name: model.MetricNameLabel, Value: "foo"}, 2061 {Name: "bar", Value: "baz"}, 2062 {Name: "sample", Value: fmt.Sprintf("%d", i)}, 2063 }, startTimestampMs+int64(i), float64(i))) 2064 } 2065 2066 for i := 0; i < metadata; i++ { 2067 m := &cortexpb.MetricMetadata{ 2068 MetricFamilyName: fmt.Sprintf("metric_%d", i), 2069 Type: cortexpb.COUNTER, 2070 Help: fmt.Sprintf("a help for metric_%d", i), 2071 } 2072 request.Metadata = append(request.Metadata, m) 2073 } 2074 2075 return request 2076 } 2077 2078 func makeWriteRequestTimeseries(labels []cortexpb.LabelAdapter, ts int64, value float64) cortexpb.PreallocTimeseries { 2079 return cortexpb.PreallocTimeseries{ 2080 TimeSeries: &cortexpb.TimeSeries{ 2081 Labels: labels, 2082 Samples: []cortexpb.Sample{ 2083 { 2084 Value: value, 2085 TimestampMs: ts, 2086 }, 2087 }, 2088 }, 2089 } 2090 } 2091 2092 func makeWriteRequestHA(samples int, replica, cluster string) *cortexpb.WriteRequest { 2093 request := &cortexpb.WriteRequest{} 2094 for i := 0; i < samples; i++ { 2095 ts := cortexpb.PreallocTimeseries{ 2096 TimeSeries: &cortexpb.TimeSeries{ 2097 Labels: []cortexpb.LabelAdapter{ 2098 {Name: "__name__", Value: "foo"}, 2099 {Name: "__replica__", Value: replica}, 2100 {Name: "bar", Value: "baz"}, 2101 {Name: "cluster", Value: cluster}, 2102 {Name: "sample", Value: fmt.Sprintf("%d", i)}, 2103 }, 2104 }, 2105 } 2106 ts.Samples = []cortexpb.Sample{ 2107 { 2108 Value: float64(i), 2109 TimestampMs: int64(i), 2110 }, 2111 } 2112 request.Timeseries = append(request.Timeseries, ts) 2113 } 2114 return request 2115 } 2116 2117 func makeWriteRequestExemplar(seriesLabels []string, timestamp int64, exemplarLabels []string) *cortexpb.WriteRequest { 2118 return &cortexpb.WriteRequest{ 2119 Timeseries: []cortexpb.PreallocTimeseries{ 2120 { 2121 TimeSeries: &cortexpb.TimeSeries{ 2122 // Labels: []cortexpb.LabelAdapter{{Name: model.MetricNameLabel, Value: "test"}}, 2123 Labels: cortexpb.FromLabelsToLabelAdapters(labels.FromStrings(seriesLabels...)), 2124 Exemplars: []cortexpb.Exemplar{ 2125 { 2126 Labels: cortexpb.FromLabelsToLabelAdapters(labels.FromStrings(exemplarLabels...)), 2127 TimestampMs: timestamp, 2128 }, 2129 }, 2130 }, 2131 }, 2132 }, 2133 } 2134 } 2135 2136 func expectedResponse(start, end int) model.Matrix { 2137 result := model.Matrix{} 2138 for i := start; i < end; i++ { 2139 result = append(result, &model.SampleStream{ 2140 Metric: model.Metric{ 2141 model.MetricNameLabel: "foo", 2142 "bar": "baz", 2143 "sample": model.LabelValue(fmt.Sprintf("%d", i)), 2144 }, 2145 Values: []model.SamplePair{ 2146 { 2147 Value: model.SampleValue(i), 2148 Timestamp: model.Time(i), 2149 }, 2150 }, 2151 }) 2152 } 2153 return result 2154 } 2155 2156 func mustEqualMatcher(k, v string) *labels.Matcher { 2157 m, err := labels.NewMatcher(labels.MatchEqual, k, v) 2158 if err != nil { 2159 panic(err) 2160 } 2161 return m 2162 } 2163 2164 type mockIngester struct { 2165 sync.Mutex 2166 client.IngesterClient 2167 grpc_health_v1.HealthClient 2168 happy bool 2169 failResp error 2170 stats client.UsersStatsResponse 2171 timeseries map[uint32]*cortexpb.PreallocTimeseries 2172 metadata map[uint32]map[cortexpb.MetricMetadata]struct{} 2173 queryDelay time.Duration 2174 calls map[string]int 2175 } 2176 2177 func (i *mockIngester) series() map[uint32]*cortexpb.PreallocTimeseries { 2178 i.Lock() 2179 defer i.Unlock() 2180 2181 result := map[uint32]*cortexpb.PreallocTimeseries{} 2182 for k, v := range i.timeseries { 2183 result[k] = v 2184 } 2185 return result 2186 } 2187 2188 func (i *mockIngester) Check(ctx context.Context, in *grpc_health_v1.HealthCheckRequest, opts ...grpc.CallOption) (*grpc_health_v1.HealthCheckResponse, error) { 2189 i.Lock() 2190 defer i.Unlock() 2191 2192 i.trackCall("Check") 2193 2194 return &grpc_health_v1.HealthCheckResponse{}, nil 2195 } 2196 2197 func (i *mockIngester) Close() error { 2198 return nil 2199 } 2200 2201 func (i *mockIngester) Push(ctx context.Context, req *cortexpb.WriteRequest, opts ...grpc.CallOption) (*cortexpb.WriteResponse, error) { 2202 i.Lock() 2203 defer i.Unlock() 2204 2205 i.trackCall("Push") 2206 2207 if !i.happy { 2208 return nil, i.failResp 2209 } 2210 2211 if i.timeseries == nil { 2212 i.timeseries = map[uint32]*cortexpb.PreallocTimeseries{} 2213 } 2214 2215 if i.metadata == nil { 2216 i.metadata = map[uint32]map[cortexpb.MetricMetadata]struct{}{} 2217 } 2218 2219 orgid, err := tenant.TenantID(ctx) 2220 if err != nil { 2221 return nil, err 2222 } 2223 2224 for j := range req.Timeseries { 2225 series := req.Timeseries[j] 2226 hash := shardByAllLabels(orgid, series.Labels) 2227 existing, ok := i.timeseries[hash] 2228 if !ok { 2229 // Make a copy because the request Timeseries are reused 2230 item := cortexpb.TimeSeries{ 2231 Labels: make([]cortexpb.LabelAdapter, len(series.TimeSeries.Labels)), 2232 Samples: make([]cortexpb.Sample, len(series.TimeSeries.Samples)), 2233 } 2234 2235 copy(item.Labels, series.TimeSeries.Labels) 2236 copy(item.Samples, series.TimeSeries.Samples) 2237 2238 i.timeseries[hash] = &cortexpb.PreallocTimeseries{TimeSeries: &item} 2239 } else { 2240 existing.Samples = append(existing.Samples, series.Samples...) 2241 } 2242 } 2243 2244 for _, m := range req.Metadata { 2245 hash := shardByMetricName(orgid, m.MetricFamilyName) 2246 set, ok := i.metadata[hash] 2247 if !ok { 2248 set = map[cortexpb.MetricMetadata]struct{}{} 2249 i.metadata[hash] = set 2250 } 2251 set[*m] = struct{}{} 2252 } 2253 2254 return &cortexpb.WriteResponse{}, nil 2255 } 2256 2257 func (i *mockIngester) Query(ctx context.Context, req *client.QueryRequest, opts ...grpc.CallOption) (*client.QueryResponse, error) { 2258 time.Sleep(i.queryDelay) 2259 2260 i.Lock() 2261 defer i.Unlock() 2262 2263 i.trackCall("Query") 2264 2265 if !i.happy { 2266 return nil, errFail 2267 } 2268 2269 _, _, matchers, err := client.FromQueryRequest(req) 2270 if err != nil { 2271 return nil, err 2272 } 2273 2274 response := client.QueryResponse{} 2275 for _, ts := range i.timeseries { 2276 if match(ts.Labels, matchers) { 2277 response.Timeseries = append(response.Timeseries, *ts.TimeSeries) 2278 } 2279 } 2280 return &response, nil 2281 } 2282 2283 func (i *mockIngester) QueryStream(ctx context.Context, req *client.QueryRequest, opts ...grpc.CallOption) (client.Ingester_QueryStreamClient, error) { 2284 time.Sleep(i.queryDelay) 2285 2286 i.Lock() 2287 defer i.Unlock() 2288 2289 i.trackCall("QueryStream") 2290 2291 if !i.happy { 2292 return nil, errFail 2293 } 2294 2295 _, _, matchers, err := client.FromQueryRequest(req) 2296 if err != nil { 2297 return nil, err 2298 } 2299 2300 results := []*client.QueryStreamResponse{} 2301 for _, ts := range i.timeseries { 2302 if !match(ts.Labels, matchers) { 2303 continue 2304 } 2305 2306 c := encoding.New() 2307 chunks := []encoding.Chunk{c} 2308 for _, sample := range ts.Samples { 2309 newChunk, err := c.Add(model.SamplePair{ 2310 Timestamp: model.Time(sample.TimestampMs), 2311 Value: model.SampleValue(sample.Value), 2312 }) 2313 if err != nil { 2314 panic(err) 2315 } 2316 if newChunk != nil { 2317 c = newChunk 2318 chunks = append(chunks, newChunk) 2319 } 2320 } 2321 2322 wireChunks := []client.Chunk{} 2323 for _, c := range chunks { 2324 var buf bytes.Buffer 2325 chunk := client.Chunk{ 2326 Encoding: int32(c.Encoding()), 2327 } 2328 if err := c.Marshal(&buf); err != nil { 2329 panic(err) 2330 } 2331 chunk.Data = buf.Bytes() 2332 wireChunks = append(wireChunks, chunk) 2333 } 2334 2335 results = append(results, &client.QueryStreamResponse{ 2336 Chunkseries: []client.TimeSeriesChunk{ 2337 { 2338 Labels: ts.Labels, 2339 Chunks: wireChunks, 2340 }, 2341 }, 2342 }) 2343 } 2344 return &stream{ 2345 results: results, 2346 }, nil 2347 } 2348 2349 func (i *mockIngester) MetricsForLabelMatchers(ctx context.Context, req *client.MetricsForLabelMatchersRequest, opts ...grpc.CallOption) (*client.MetricsForLabelMatchersResponse, error) { 2350 i.Lock() 2351 defer i.Unlock() 2352 2353 i.trackCall("MetricsForLabelMatchers") 2354 2355 if !i.happy { 2356 return nil, errFail 2357 } 2358 2359 _, _, multiMatchers, err := client.FromMetricsForLabelMatchersRequest(req) 2360 if err != nil { 2361 return nil, err 2362 } 2363 2364 response := client.MetricsForLabelMatchersResponse{} 2365 for _, matchers := range multiMatchers { 2366 for _, ts := range i.timeseries { 2367 if match(ts.Labels, matchers) { 2368 response.Metric = append(response.Metric, &cortexpb.Metric{Labels: ts.Labels}) 2369 } 2370 } 2371 } 2372 return &response, nil 2373 } 2374 2375 func (i *mockIngester) MetricsMetadata(ctx context.Context, req *client.MetricsMetadataRequest, opts ...grpc.CallOption) (*client.MetricsMetadataResponse, error) { 2376 i.Lock() 2377 defer i.Unlock() 2378 2379 i.trackCall("MetricsMetadata") 2380 2381 if !i.happy { 2382 return nil, errFail 2383 } 2384 2385 resp := &client.MetricsMetadataResponse{} 2386 for _, sets := range i.metadata { 2387 for m := range sets { 2388 resp.Metadata = append(resp.Metadata, &m) 2389 } 2390 } 2391 2392 return resp, nil 2393 } 2394 2395 func (i *mockIngester) trackCall(name string) { 2396 if i.calls == nil { 2397 i.calls = map[string]int{} 2398 } 2399 2400 i.calls[name]++ 2401 } 2402 2403 func (i *mockIngester) countCalls(name string) int { 2404 i.Lock() 2405 defer i.Unlock() 2406 2407 return i.calls[name] 2408 } 2409 2410 // noopIngester is a mocked ingester which does nothing. 2411 type noopIngester struct { 2412 client.IngesterClient 2413 grpc_health_v1.HealthClient 2414 } 2415 2416 func (i *noopIngester) Close() error { 2417 return nil 2418 } 2419 2420 func (i *noopIngester) Push(ctx context.Context, req *cortexpb.WriteRequest, opts ...grpc.CallOption) (*cortexpb.WriteResponse, error) { 2421 return nil, nil 2422 } 2423 2424 type stream struct { 2425 grpc.ClientStream 2426 i int 2427 results []*client.QueryStreamResponse 2428 } 2429 2430 func (*stream) CloseSend() error { 2431 return nil 2432 } 2433 2434 func (s *stream) Recv() (*client.QueryStreamResponse, error) { 2435 if s.i >= len(s.results) { 2436 return nil, io.EOF 2437 } 2438 result := s.results[s.i] 2439 s.i++ 2440 return result, nil 2441 } 2442 2443 func (i *mockIngester) AllUserStats(ctx context.Context, in *client.UserStatsRequest, opts ...grpc.CallOption) (*client.UsersStatsResponse, error) { 2444 return &i.stats, nil 2445 } 2446 2447 func match(labels []cortexpb.LabelAdapter, matchers []*labels.Matcher) bool { 2448 outer: 2449 for _, matcher := range matchers { 2450 for _, labels := range labels { 2451 if matcher.Name == labels.Name && matcher.Matches(labels.Value) { 2452 continue outer 2453 } 2454 } 2455 return false 2456 } 2457 return true 2458 } 2459 2460 func TestDistributorValidation(t *testing.T) { 2461 ctx := user.InjectOrgID(context.Background(), "1") 2462 now := model.Now() 2463 future, past := now.Add(5*time.Hour), now.Add(-25*time.Hour) 2464 2465 for i, tc := range []struct { 2466 metadata []*cortexpb.MetricMetadata 2467 labels []labels.Labels 2468 samples []cortexpb.Sample 2469 err error 2470 }{ 2471 // Test validation passes. 2472 { 2473 metadata: []*cortexpb.MetricMetadata{{MetricFamilyName: "testmetric", Help: "a test metric.", Unit: "", Type: cortexpb.COUNTER}}, 2474 labels: []labels.Labels{{{Name: labels.MetricName, Value: "testmetric"}, {Name: "foo", Value: "bar"}}}, 2475 samples: []cortexpb.Sample{{ 2476 TimestampMs: int64(now), 2477 Value: 1, 2478 }}, 2479 }, 2480 // Test validation fails for very old samples. 2481 { 2482 labels: []labels.Labels{{{Name: labels.MetricName, Value: "testmetric"}, {Name: "foo", Value: "bar"}}}, 2483 samples: []cortexpb.Sample{{ 2484 TimestampMs: int64(past), 2485 Value: 2, 2486 }}, 2487 err: httpgrpc.Errorf(http.StatusBadRequest, `timestamp too old: %d metric: "testmetric"`, past), 2488 }, 2489 2490 // Test validation fails for samples from the future. 2491 { 2492 labels: []labels.Labels{{{Name: labels.MetricName, Value: "testmetric"}, {Name: "foo", Value: "bar"}}}, 2493 samples: []cortexpb.Sample{{ 2494 TimestampMs: int64(future), 2495 Value: 4, 2496 }}, 2497 err: httpgrpc.Errorf(http.StatusBadRequest, `timestamp too new: %d metric: "testmetric"`, future), 2498 }, 2499 2500 // Test maximum labels names per series. 2501 { 2502 labels: []labels.Labels{{{Name: labels.MetricName, Value: "testmetric"}, {Name: "foo", Value: "bar"}, {Name: "foo2", Value: "bar2"}}}, 2503 samples: []cortexpb.Sample{{ 2504 TimestampMs: int64(now), 2505 Value: 2, 2506 }}, 2507 err: httpgrpc.Errorf(http.StatusBadRequest, `series has too many labels (actual: 3, limit: 2) series: 'testmetric{foo2="bar2", foo="bar"}'`), 2508 }, 2509 // Test multiple validation fails return the first one. 2510 { 2511 labels: []labels.Labels{ 2512 {{Name: labels.MetricName, Value: "testmetric"}, {Name: "foo", Value: "bar"}, {Name: "foo2", Value: "bar2"}}, 2513 {{Name: labels.MetricName, Value: "testmetric"}, {Name: "foo", Value: "bar"}}, 2514 }, 2515 samples: []cortexpb.Sample{ 2516 {TimestampMs: int64(now), Value: 2}, 2517 {TimestampMs: int64(past), Value: 2}, 2518 }, 2519 err: httpgrpc.Errorf(http.StatusBadRequest, `series has too many labels (actual: 3, limit: 2) series: 'testmetric{foo2="bar2", foo="bar"}'`), 2520 }, 2521 // Test metadata validation fails 2522 { 2523 metadata: []*cortexpb.MetricMetadata{{MetricFamilyName: "", Help: "a test metric.", Unit: "", Type: cortexpb.COUNTER}}, 2524 labels: []labels.Labels{{{Name: labels.MetricName, Value: "testmetric"}, {Name: "foo", Value: "bar"}}}, 2525 samples: []cortexpb.Sample{{ 2526 TimestampMs: int64(now), 2527 Value: 1, 2528 }}, 2529 err: httpgrpc.Errorf(http.StatusBadRequest, `metadata missing metric name`), 2530 }, 2531 } { 2532 t.Run(strconv.Itoa(i), func(t *testing.T) { 2533 var limits validation.Limits 2534 flagext.DefaultValues(&limits) 2535 2536 limits.CreationGracePeriod = model.Duration(2 * time.Hour) 2537 limits.RejectOldSamples = true 2538 limits.RejectOldSamplesMaxAge = model.Duration(24 * time.Hour) 2539 limits.MaxLabelNamesPerSeries = 2 2540 2541 ds, _, _ := prepare(t, prepConfig{ 2542 numIngesters: 3, 2543 happyIngesters: 3, 2544 numDistributors: 1, 2545 shardByAllLabels: true, 2546 limits: &limits, 2547 }) 2548 2549 _, err := ds[0].Push(ctx, cortexpb.ToWriteRequest(tc.labels, tc.samples, tc.metadata, cortexpb.API)) 2550 require.Equal(t, tc.err, err) 2551 }) 2552 } 2553 } 2554 2555 func TestRemoveReplicaLabel(t *testing.T) { 2556 replicaLabel := "replica" 2557 clusterLabel := "cluster" 2558 cases := []struct { 2559 labelsIn []cortexpb.LabelAdapter 2560 labelsOut []cortexpb.LabelAdapter 2561 }{ 2562 // Replica label is present 2563 { 2564 labelsIn: []cortexpb.LabelAdapter{ 2565 {Name: "__name__", Value: "foo"}, 2566 {Name: "bar", Value: "baz"}, 2567 {Name: "sample", Value: "1"}, 2568 {Name: "replica", Value: replicaLabel}, 2569 }, 2570 labelsOut: []cortexpb.LabelAdapter{ 2571 {Name: "__name__", Value: "foo"}, 2572 {Name: "bar", Value: "baz"}, 2573 {Name: "sample", Value: "1"}, 2574 }, 2575 }, 2576 // Replica label is not present 2577 { 2578 labelsIn: []cortexpb.LabelAdapter{ 2579 {Name: "__name__", Value: "foo"}, 2580 {Name: "bar", Value: "baz"}, 2581 {Name: "sample", Value: "1"}, 2582 {Name: "cluster", Value: clusterLabel}, 2583 }, 2584 labelsOut: []cortexpb.LabelAdapter{ 2585 {Name: "__name__", Value: "foo"}, 2586 {Name: "bar", Value: "baz"}, 2587 {Name: "sample", Value: "1"}, 2588 {Name: "cluster", Value: clusterLabel}, 2589 }, 2590 }, 2591 } 2592 2593 for _, c := range cases { 2594 removeLabel(replicaLabel, &c.labelsIn) 2595 assert.Equal(t, c.labelsOut, c.labelsIn) 2596 } 2597 } 2598 2599 // This is not great, but we deal with unsorted labels when validating labels. 2600 func TestShardByAllLabelsReturnsWrongResultsForUnsortedLabels(t *testing.T) { 2601 val1 := shardByAllLabels("test", []cortexpb.LabelAdapter{ 2602 {Name: "__name__", Value: "foo"}, 2603 {Name: "bar", Value: "baz"}, 2604 {Name: "sample", Value: "1"}, 2605 }) 2606 2607 val2 := shardByAllLabels("test", []cortexpb.LabelAdapter{ 2608 {Name: "__name__", Value: "foo"}, 2609 {Name: "sample", Value: "1"}, 2610 {Name: "bar", Value: "baz"}, 2611 }) 2612 2613 assert.NotEqual(t, val1, val2) 2614 } 2615 2616 func TestSortLabels(t *testing.T) { 2617 sorted := []cortexpb.LabelAdapter{ 2618 {Name: "__name__", Value: "foo"}, 2619 {Name: "bar", Value: "baz"}, 2620 {Name: "cluster", Value: "cluster"}, 2621 {Name: "sample", Value: "1"}, 2622 } 2623 2624 // no allocations if input is already sorted 2625 require.Equal(t, 0.0, testing.AllocsPerRun(100, func() { 2626 sortLabelsIfNeeded(sorted) 2627 })) 2628 2629 unsorted := []cortexpb.LabelAdapter{ 2630 {Name: "__name__", Value: "foo"}, 2631 {Name: "sample", Value: "1"}, 2632 {Name: "cluster", Value: "cluster"}, 2633 {Name: "bar", Value: "baz"}, 2634 } 2635 2636 sortLabelsIfNeeded(unsorted) 2637 2638 sort.SliceIsSorted(unsorted, func(i, j int) bool { 2639 return strings.Compare(unsorted[i].Name, unsorted[j].Name) < 0 2640 }) 2641 } 2642 2643 func TestDistributor_Push_Relabel(t *testing.T) { 2644 ctx := user.InjectOrgID(context.Background(), "user") 2645 2646 type testcase struct { 2647 inputSeries labels.Labels 2648 expectedSeries labels.Labels 2649 metricRelabelConfigs []*relabel.Config 2650 } 2651 2652 cases := []testcase{ 2653 // No relabel config. 2654 { 2655 inputSeries: labels.Labels{ 2656 {Name: "__name__", Value: "foo"}, 2657 {Name: "cluster", Value: "one"}, 2658 }, 2659 expectedSeries: labels.Labels{ 2660 {Name: "__name__", Value: "foo"}, 2661 {Name: "cluster", Value: "one"}, 2662 }, 2663 }, 2664 { 2665 inputSeries: labels.Labels{ 2666 {Name: "__name__", Value: "foo"}, 2667 {Name: "cluster", Value: "one"}, 2668 }, 2669 expectedSeries: labels.Labels{ 2670 {Name: "__name__", Value: "foo"}, 2671 {Name: "cluster", Value: "two"}, 2672 }, 2673 metricRelabelConfigs: []*relabel.Config{ 2674 { 2675 SourceLabels: []model.LabelName{"cluster"}, 2676 Action: relabel.DefaultRelabelConfig.Action, 2677 Regex: relabel.DefaultRelabelConfig.Regex, 2678 TargetLabel: "cluster", 2679 Replacement: "two", 2680 }, 2681 }, 2682 }, 2683 } 2684 2685 for _, tc := range cases { 2686 var err error 2687 var limits validation.Limits 2688 flagext.DefaultValues(&limits) 2689 limits.MetricRelabelConfigs = tc.metricRelabelConfigs 2690 2691 ds, ingesters, _ := prepare(t, prepConfig{ 2692 numIngesters: 2, 2693 happyIngesters: 2, 2694 numDistributors: 1, 2695 shardByAllLabels: true, 2696 limits: &limits, 2697 }) 2698 2699 // Push the series to the distributor 2700 req := mockWriteRequest(tc.inputSeries, 1, 1) 2701 _, err = ds[0].Push(ctx, req) 2702 require.NoError(t, err) 2703 2704 // Since each test pushes only 1 series, we do expect the ingester 2705 // to have received exactly 1 series 2706 for i := range ingesters { 2707 timeseries := ingesters[i].series() 2708 assert.Equal(t, 1, len(timeseries)) 2709 for _, v := range timeseries { 2710 assert.Equal(t, tc.expectedSeries, cortexpb.FromLabelAdaptersToLabels(v.Labels)) 2711 } 2712 } 2713 } 2714 } 2715 2716 func countMockIngestersCalls(ingesters []mockIngester, name string) int { 2717 count := 0 2718 for i := 0; i < len(ingesters); i++ { 2719 if ingesters[i].countCalls(name) > 0 { 2720 count++ 2721 } 2722 } 2723 return count 2724 }