github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/pkg/canary/comparator/comparator_test.go (about) 1 package comparator 2 3 import ( 4 "bytes" 5 "fmt" 6 "sync" 7 "testing" 8 "time" 9 10 "github.com/prometheus/client_golang/prometheus" 11 io_prometheus_client "github.com/prometheus/client_model/go" 12 "github.com/stretchr/testify/assert" 13 ) 14 15 func TestComparatorEntryReceivedOutOfOrder(t *testing.T) { 16 outOfOrderEntries = &mockCounter{} 17 wsMissingEntries = &mockCounter{} 18 unexpectedEntries = &mockCounter{} 19 duplicateEntries = &mockCounter{} 20 21 actual := &bytes.Buffer{} 22 c := NewComparator(actual, 1*time.Hour, 1*time.Hour, 1*time.Hour, 15*time.Minute, 4*time.Hour, 4*time.Hour, 0, 1*time.Minute, 0, 0, 1, make(chan time.Time), make(chan time.Time), nil, false) 23 24 t1 := time.Now() 25 t2 := t1.Add(1 * time.Second) 26 t3 := t2.Add(1 * time.Second) 27 t4 := t3.Add(1 * time.Second) 28 29 c.entrySent(t1) 30 c.entrySent(t2) 31 c.entrySent(t3) 32 c.entrySent(t4) 33 34 c.entryReceived(t1) 35 assert.Equal(t, 3, c.Size()) 36 c.entryReceived(t4) 37 assert.Equal(t, 2, c.Size()) 38 c.entryReceived(t2) 39 c.entryReceived(t3) 40 assert.Equal(t, 0, c.Size()) 41 42 expected := fmt.Sprintf(ErrOutOfOrderEntry, t4, []time.Time{t2, t3}) 43 assert.Equal(t, expected, actual.String()) 44 45 assert.Equal(t, 1, outOfOrderEntries.(*mockCounter).count) 46 assert.Equal(t, 0, unexpectedEntries.(*mockCounter).count) 47 assert.Equal(t, 0, wsMissingEntries.(*mockCounter).count) 48 assert.Equal(t, 0, duplicateEntries.(*mockCounter).count) 49 50 // This avoids a panic on subsequent test execution, 51 // seems ugly but was easy, and multiple instantiations 52 // of the comparator should be an error 53 prometheus.Unregister(responseLatency) 54 } 55 56 func TestComparatorEntryReceivedNotExpected(t *testing.T) { 57 outOfOrderEntries = &mockCounter{} 58 wsMissingEntries = &mockCounter{} 59 unexpectedEntries = &mockCounter{} 60 duplicateEntries = &mockCounter{} 61 62 actual := &bytes.Buffer{} 63 c := NewComparator(actual, 1*time.Hour, 1*time.Hour, 1*time.Hour, 15*time.Minute, 4*time.Hour, 4*time.Hour, 0, 1*time.Minute, 0, 0, 1, make(chan time.Time), make(chan time.Time), nil, false) 64 65 t1 := time.Now() 66 t2 := t1.Add(1 * time.Second) 67 t3 := t2.Add(1 * time.Second) 68 t4 := t3.Add(1 * time.Second) 69 70 c.entrySent(t2) 71 c.entrySent(t3) 72 c.entrySent(t4) 73 74 c.entryReceived(t2) 75 assert.Equal(t, 2, c.Size()) 76 c.entryReceived(t1) 77 assert.Equal(t, 2, c.Size()) 78 c.entryReceived(t3) 79 assert.Equal(t, 1, c.Size()) 80 c.entryReceived(t4) 81 assert.Equal(t, 0, c.Size()) 82 83 expected := fmt.Sprintf(ErrUnexpectedEntry, t1.UnixNano()) 84 assert.Equal(t, expected, actual.String()) 85 86 assert.Equal(t, 0, outOfOrderEntries.(*mockCounter).count) 87 assert.Equal(t, 1, unexpectedEntries.(*mockCounter).count) 88 assert.Equal(t, 0, wsMissingEntries.(*mockCounter).count) 89 assert.Equal(t, 0, duplicateEntries.(*mockCounter).count) 90 91 // This avoids a panic on subsequent test execution, 92 // seems ugly but was easy, and multiple instantiations 93 // of the comparator should be an error 94 prometheus.Unregister(responseLatency) 95 } 96 97 func TestComparatorEntryReceivedDuplicate(t *testing.T) { 98 outOfOrderEntries = &mockCounter{} 99 wsMissingEntries = &mockCounter{} 100 unexpectedEntries = &mockCounter{} 101 duplicateEntries = &mockCounter{} 102 103 actual := &bytes.Buffer{} 104 c := NewComparator(actual, 1*time.Hour, 1*time.Hour, 1*time.Hour, 15*time.Minute, 4*time.Hour, 4*time.Hour, 0, 1*time.Minute, 0, 0, 1, make(chan time.Time), make(chan time.Time), nil, false) 105 106 t1 := time.Unix(0, 0) 107 t2 := t1.Add(1 * time.Second) 108 t3 := t2.Add(1 * time.Second) 109 t4 := t3.Add(1 * time.Second) 110 111 c.entrySent(t1) 112 c.entrySent(t2) 113 c.entrySent(t3) 114 c.entrySent(t4) 115 116 c.entryReceived(t1) 117 assert.Equal(t, 3, c.Size()) 118 c.entryReceived(t2) 119 assert.Equal(t, 2, c.Size()) 120 c.entryReceived(t2) 121 assert.Equal(t, 2, c.Size()) 122 c.entryReceived(t3) 123 assert.Equal(t, 1, c.Size()) 124 c.entryReceived(t4) 125 assert.Equal(t, 0, c.Size()) 126 127 expected := fmt.Sprintf(ErrDuplicateEntry, t2.UnixNano()) 128 assert.Equal(t, expected, actual.String()) 129 130 assert.Equal(t, 0, outOfOrderEntries.(*mockCounter).count) 131 assert.Equal(t, 0, unexpectedEntries.(*mockCounter).count) 132 assert.Equal(t, 0, wsMissingEntries.(*mockCounter).count) 133 assert.Equal(t, 1, duplicateEntries.(*mockCounter).count) 134 135 // This avoids a panic on subsequent test execution, 136 // seems ugly but was easy, and multiple instantiations 137 // of the comparator should be an error 138 prometheus.Unregister(responseLatency) 139 } 140 141 func TestEntryNeverReceived(t *testing.T) { 142 outOfOrderEntries = &mockCounter{} 143 wsMissingEntries = &mockCounter{} 144 missingEntries = &mockCounter{} 145 unexpectedEntries = &mockCounter{} 146 duplicateEntries = &mockCounter{} 147 148 actual := &bytes.Buffer{} 149 150 t1 := time.Unix(10, 0) 151 t2 := time.Unix(20, 0) 152 t3 := time.Unix(30, 0) 153 t4 := time.Unix(40, 0) 154 t5 := time.Unix(50, 0) 155 156 found := []time.Time{t1, t3, t4, t5} 157 158 mr := &mockReader{resp: found} 159 wait := 60 * time.Second 160 maxWait := 300 * time.Second 161 //We set the prune interval timer to a huge value here so that it never runs, instead we call pruneEntries manually below 162 c := NewComparator(actual, wait, maxWait, 50*time.Hour, 15*time.Minute, 4*time.Hour, 4*time.Hour, 0, 1*time.Minute, 0, 0, 1, make(chan time.Time), make(chan time.Time), mr, false) 163 164 c.entrySent(t1) 165 c.entrySent(t2) 166 c.entrySent(t3) 167 c.entrySent(t4) 168 c.entrySent(t5) 169 170 assert.Equal(t, 5, c.Size()) 171 172 c.entryReceived(t1) 173 c.entryReceived(t3) 174 c.entryReceived(t5) 175 176 assert.Equal(t, 2, c.Size()) 177 178 //Set the time to 120s, this would be more than one wait (60s) and enough to go looking for the missing entries 179 c1Time := time.Unix(120, 0) 180 c.pruneEntries(c1Time) 181 assert.Equal(t, 1, len(c.missingEntries)) // One of the entries was found so only one should be missing 182 183 //Now set the time to 2x maxWait which should guarnatee we stopped looking for the other missing entry 184 c2Time := t1.Add(2 * maxWait) 185 c.pruneEntries(c2Time) 186 187 expected := fmt.Sprintf(ErrOutOfOrderEntry+ErrOutOfOrderEntry+ // 1 Out of order because we missed entries 188 ErrEntryNotReceivedWs+ErrEntryNotReceivedWs+ // 2 Log that entries weren't received over websocket 189 DebugWebsocketMissingEntry+DebugWebsocketMissingEntry+ // 3 List entries we are missing 190 DebugQueryResult+DebugQueryResult+DebugQueryResult+DebugQueryResult+ // 4 List entries we got back from Loki 191 DebugEntryFound+ // 5 We log when t4 was found on followup query 192 DebugWebsocketMissingEntry+ // 6 Log missing entries on second run of pruneEntries 193 DebugQueryResult+DebugQueryResult+DebugQueryResult+DebugQueryResult+ // 7 Because we call pruneEntries twice we get the confirmation query results back twice 194 ErrEntryNotReceived, // 8 List entry we never received and is missing completely 195 196 t3, []time.Time{t2}, t5, []time.Time{t2, t4}, // 1 Out of order entry params 197 t2.UnixNano(), wait.Seconds(), t4.UnixNano(), wait.Seconds(), // 2 Entry not received over websocket params 198 t2.UnixNano(), t4.UnixNano(), // 3 Missing entries 199 t1.UnixNano(), t3.UnixNano(), t4.UnixNano(), t5.UnixNano(), // 4 Confirmation query results first run 200 t4.UnixNano(), c1Time.Sub(t4).Seconds(), // 5 t4 Entry found in follow up query 201 t2.UnixNano(), // 6 Missing Entry 202 t1.UnixNano(), t3.UnixNano(), t4.UnixNano(), t5.UnixNano(), // 7 Confirmation query results second run 203 t2.UnixNano(), maxWait.Seconds()) // 8 Entry never found 204 205 assert.Equal(t, expected, actual.String()) 206 assert.Equal(t, 0, c.Size()) 207 208 assert.Equal(t, 2, outOfOrderEntries.(*mockCounter).count) 209 assert.Equal(t, 0, unexpectedEntries.(*mockCounter).count) 210 assert.Equal(t, 2, wsMissingEntries.(*mockCounter).count) 211 assert.Equal(t, 1, missingEntries.(*mockCounter).count) 212 assert.Equal(t, 0, duplicateEntries.(*mockCounter).count) 213 214 // This avoids a panic on subsequent test execution, 215 // seems ugly but was easy, and multiple instantiations 216 // of the comparator should be an error 217 prometheus.Unregister(responseLatency) 218 219 } 220 221 // Ensure that if confirmMissing calls pile up and run concurrently, this doesn't cause a panic 222 func TestConcurrentConfirmMissing(t *testing.T) { 223 found := []time.Time{ 224 time.Unix(0, 0), 225 time.UnixMilli(1), 226 time.UnixMilli(2), 227 } 228 mr := &mockReader{resp: found} 229 230 output := &bytes.Buffer{} 231 232 wait := 30 * time.Millisecond 233 maxWait := 30 * time.Millisecond 234 235 c := NewComparator(output, wait, maxWait, 50*time.Hour, 15*time.Minute, 4*time.Hour, 4*time.Hour, 0, 1*time.Minute, 0, 0, 1, make(chan time.Time), make(chan time.Time), mr, false) 236 237 for _, t := range found { 238 tCopy := t 239 c.missingEntries = append(c.missingEntries, &tCopy) 240 } 241 242 wg := sync.WaitGroup{} 243 for i := 0; i < 10; i++ { 244 wg.Add(1) 245 go func() { 246 defer wg.Done() 247 assert.NotPanics(t, func() { c.confirmMissing(time.UnixMilli(3)) }) 248 }() 249 } 250 assert.Eventually( 251 t, 252 func() bool { 253 wg.Wait() 254 return true 255 }, 256 time.Second, 257 10*time.Millisecond, 258 ) 259 } 260 261 func TestPruneAckdEntires(t *testing.T) { 262 actual := &bytes.Buffer{} 263 wait := 30 * time.Millisecond 264 maxWait := 30 * time.Millisecond 265 //We set the prune interval timer to a huge value here so that it never runs, instead we call pruneEntries manually below 266 c := NewComparator(actual, wait, maxWait, 50*time.Hour, 15*time.Minute, 4*time.Hour, 4*time.Hour, 0, 1*time.Minute, 0, 0, 1, make(chan time.Time), make(chan time.Time), nil, false) 267 268 t1 := time.Unix(0, 0) 269 t2 := t1.Add(1 * time.Millisecond) 270 t3 := t2.Add(1 * time.Millisecond) 271 t4 := t3.Add(100 * time.Second) 272 273 assert.Equal(t, 0, len(c.ackdEntries)) 274 275 c.entrySent(t1) 276 c.entrySent(t2) 277 c.entrySent(t3) 278 c.entrySent(t4) 279 280 assert.Equal(t, 4, c.Size()) 281 assert.Equal(t, 0, len(c.ackdEntries)) 282 283 c.entryReceived(t1) 284 c.entryReceived(t2) 285 c.entryReceived(t3) 286 c.entryReceived(t4) 287 288 assert.Equal(t, 0, c.Size()) 289 assert.Equal(t, 4, len(c.ackdEntries)) 290 291 // Wait a couple maxWaits to make sure the first 3 timestamps get pruned from the ackdEntries, 292 // the fourth should still remain because its much much newer and we only prune things older than wait 293 c.pruneEntries(t1.Add(2 * maxWait)) 294 295 assert.Equal(t, 1, len(c.ackdEntries)) 296 assert.Equal(t, t4, *c.ackdEntries[0]) 297 298 } 299 300 func TestSpotCheck(t *testing.T) { 301 spotCheckMissing = &mockCounter{} 302 spotCheckEntries = &mockCounter{} 303 304 actual := &bytes.Buffer{} 305 306 t1 := time.Unix(0, 0) 307 entries := []time.Time{} 308 found := []time.Time{} 309 entries = append(entries, t1) 310 for i := 1; i <= 20; i++ { 311 t := entries[i-1].Add(1 * time.Millisecond) 312 entries = append(entries, t) 313 // Don't add the last entry so we get one error in spot check 314 if i != 20 { 315 found = append(found, t) 316 } 317 } 318 319 mr := &mockReader{resp: found} 320 spotCheck := 10 * time.Millisecond 321 spotCheckMax := 20 * time.Millisecond 322 //We set the prune interval timer to a huge value here so that it never runs, instead we call spotCheckEntries manually below 323 c := NewComparator(actual, 1*time.Hour, 1*time.Hour, 50*time.Hour, spotCheck, spotCheckMax, 4*time.Hour, 3*time.Millisecond, 1*time.Minute, 0, 0, 1, make(chan time.Time), make(chan time.Time), mr, false) 324 325 // Send all the entries 326 for i := range entries { 327 c.entrySent(entries[i]) 328 } 329 330 // Should include the following entries 331 assert.Equal(t, 3, len(c.spotCheck)) 332 assert.Equal(t, time.Unix(0, 0), *c.spotCheck[0]) 333 assert.Equal(t, time.Unix(0, 10*time.Millisecond.Nanoseconds()), *c.spotCheck[1]) 334 assert.Equal(t, time.Unix(0, 20*time.Millisecond.Nanoseconds()), *c.spotCheck[2]) 335 336 // Run with "current time" 1ms after start which is less than spotCheckWait so nothing should be checked 337 c.spotCheckEntries(time.Unix(0, 2*time.Millisecond.Nanoseconds())) 338 assert.Equal(t, 3, len(c.spotCheck)) 339 assert.Equal(t, 0, spotCheckEntries.(*mockCounter).count) 340 341 // Run with "current time" at 25ms, the first entry should be pruned, the second entry should be found, and the last entry should come back as missing 342 c.spotCheckEntries(time.Unix(0, 25*time.Millisecond.Nanoseconds())) 343 344 // First entry should have been pruned, second and third entries have not expired yet 345 assert.Equal(t, 2, len(c.spotCheck)) 346 347 expected := fmt.Sprintf(ErrSpotCheckEntryNotReceived, // List entry not received from Loki 348 entries[20].UnixNano(), "5ms") 349 350 // We didn't send the last entry and our initial counter did not start at 0 so we should get back entries 1-19 351 for i := 1; i < 20; i++ { 352 expected = expected + fmt.Sprintf(DebugQueryResult, entries[i].UnixNano()) 353 } 354 355 assert.Equal(t, expected, actual.String()) 356 357 assert.Equal(t, 2, spotCheckEntries.(*mockCounter).count) 358 assert.Equal(t, 1, spotCheckMissing.(*mockCounter).count) 359 360 prometheus.Unregister(responseLatency) 361 } 362 363 func TestMetricTest(t *testing.T) { 364 metricTestActual = &mockGauge{} 365 metricTestExpected = &mockGauge{} 366 367 actual := &bytes.Buffer{} 368 369 writeInterval := 500 * time.Millisecond 370 371 mr := &mockReader{} 372 metricTestRange := 30 * time.Second 373 //We set the prune interval timer to a huge value here so that it never runs, instead we call spotCheckEntries manually below 374 c := NewComparator(actual, 1*time.Hour, 1*time.Hour, 50*time.Hour, 0, 0, 4*time.Hour, 0, 10*time.Minute, metricTestRange, writeInterval, 1, make(chan time.Time), make(chan time.Time), mr, false) 375 // Force the start time to a known value 376 c.startTime = time.Unix(10, 0) 377 378 // Run test at time 20s which is 10s after start 379 mr.countOverTime = float64((10 * time.Second).Milliseconds()) / float64(writeInterval.Milliseconds()) 380 c.metricTest(time.Unix(0, 20*time.Second.Nanoseconds())) 381 // We want to look back 30s but have only been running from time 10s to time 20s so the query range should be adjusted to 10s 382 assert.Equal(t, "10s", mr.queryRange) 383 // Should be no deviation we set countOverTime to the runtime/writeinterval which should be what metrictTest expected 384 assert.Equal(t, float64(20), metricTestExpected.(*mockGauge).val) 385 assert.Equal(t, float64(20), metricTestActual.(*mockGauge).val) 386 387 // Run test at time 30s which is 20s after start 388 mr.countOverTime = float64((20 * time.Second).Milliseconds()) / float64(writeInterval.Milliseconds()) 389 c.metricTest(time.Unix(0, 30*time.Second.Nanoseconds())) 390 // We want to look back 30s but have only been running from time 10s to time 20s so the query range should be adjusted to 10s 391 assert.Equal(t, "20s", mr.queryRange) 392 // Gauge should be equal to the countOverTime value 393 assert.Equal(t, float64(40), metricTestExpected.(*mockGauge).val) 394 assert.Equal(t, float64(40), metricTestActual.(*mockGauge).val) 395 396 // Run test 60s after start, we should now be capping the query range to 30s and expecting only 30s of counts 397 mr.countOverTime = float64((30 * time.Second).Milliseconds()) / float64(writeInterval.Milliseconds()) 398 c.metricTest(time.Unix(0, 60*time.Second.Nanoseconds())) 399 // We want to look back 30s but have only been running from time 10s to time 20s so the query range should be adjusted to 10s 400 assert.Equal(t, "30s", mr.queryRange) 401 // Gauge should be equal to the countOverTime value 402 assert.Equal(t, float64(60), metricTestExpected.(*mockGauge).val) 403 assert.Equal(t, float64(60), metricTestActual.(*mockGauge).val) 404 405 prometheus.Unregister(responseLatency) 406 } 407 408 func Test_pruneList(t *testing.T) { 409 t1 := time.Unix(0, 0) 410 t2 := time.Unix(1, 0) 411 t3 := time.Unix(2, 0) 412 t4 := time.Unix(3, 0) 413 t5 := time.Unix(4, 0) 414 415 list := []*time.Time{&t1, &t2, &t3, &t4, &t5} 416 417 outList := pruneList(list, func(_ int, ts *time.Time) bool { 418 // Sorry t2, nobody likes you 419 return ts.Equal(t2) 420 }, func(i int, ts *time.Time) { 421 assert.Equal(t, 1, i) 422 assert.Equal(t, t2, *ts) 423 }) 424 425 assert.Equal(t, []*time.Time{&t1, &t3, &t4, &t5}, outList) 426 } 427 428 type mockCounter struct { 429 cLck sync.Mutex 430 count int 431 } 432 433 func (m *mockCounter) Desc() *prometheus.Desc { 434 panic("implement me") 435 } 436 437 func (m *mockCounter) Write(*io_prometheus_client.Metric) error { 438 panic("implement me") 439 } 440 441 func (m *mockCounter) Describe(chan<- *prometheus.Desc) { 442 panic("implement me") 443 } 444 445 func (m *mockCounter) Collect(chan<- prometheus.Metric) { 446 panic("implement me") 447 } 448 449 func (m *mockCounter) Add(float64) { 450 panic("implement me") 451 } 452 453 func (m *mockCounter) Inc() { 454 m.cLck.Lock() 455 defer m.cLck.Unlock() 456 m.count++ 457 } 458 459 type mockGauge struct { 460 cLck sync.Mutex 461 val float64 462 } 463 464 func (m *mockGauge) Desc() *prometheus.Desc { 465 panic("implement me") 466 } 467 468 func (m *mockGauge) Write(*io_prometheus_client.Metric) error { 469 panic("implement me") 470 } 471 472 func (m *mockGauge) Describe(chan<- *prometheus.Desc) { 473 panic("implement me") 474 } 475 476 func (m *mockGauge) Collect(chan<- prometheus.Metric) { 477 panic("implement me") 478 } 479 480 func (m *mockGauge) Set(v float64) { 481 m.cLck.Lock() 482 m.val = v 483 m.cLck.Unlock() 484 } 485 486 func (m *mockGauge) Inc() { 487 panic("implement me") 488 } 489 490 func (m *mockGauge) Dec() { 491 panic("implement me") 492 } 493 494 func (m *mockGauge) Add(float64) { 495 panic("implement me") 496 } 497 498 func (m *mockGauge) Sub(float64) { 499 panic("implement me") 500 } 501 502 func (m *mockGauge) SetToCurrentTime() { 503 panic("implement me") 504 } 505 506 type mockReader struct { 507 resp []time.Time 508 countOverTime float64 509 queryRange string 510 } 511 512 func (r *mockReader) Query(start time.Time, end time.Time) ([]time.Time, error) { 513 return r.resp, nil 514 } 515 516 func (r *mockReader) QueryCountOverTime(queryRange string) (float64, error) { 517 r.queryRange = queryRange 518 return r.countOverTime, nil 519 }