github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/internal/client/requestbatcher/batcher_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package requestbatcher 12 13 import ( 14 "context" 15 "fmt" 16 "runtime" 17 "sync" 18 "sync/atomic" 19 "testing" 20 "time" 21 22 "github.com/cockroachdb/cockroach/pkg/roachpb" 23 "github.com/cockroachdb/cockroach/pkg/testutils" 24 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 25 "github.com/cockroachdb/cockroach/pkg/util/stop" 26 "github.com/cockroachdb/cockroach/pkg/util/timeutil" 27 "github.com/stretchr/testify/assert" 28 "golang.org/x/sync/errgroup" 29 ) 30 31 type batchResp struct { 32 // TODO(ajwerner): we never actually test that this result is what we expect 33 // it to be. We should add a test that does so. 34 br *roachpb.BatchResponse 35 pe *roachpb.Error 36 } 37 38 type batchSend struct { 39 ctx context.Context 40 ba roachpb.BatchRequest 41 respChan chan<- batchResp 42 } 43 44 type chanSender chan batchSend 45 46 func (c chanSender) Send( 47 ctx context.Context, ba roachpb.BatchRequest, 48 ) (*roachpb.BatchResponse, *roachpb.Error) { 49 respChan := make(chan batchResp, 1) 50 select { 51 case c <- batchSend{ctx: ctx, ba: ba, respChan: respChan}: 52 case <-ctx.Done(): 53 return nil, roachpb.NewError(ctx.Err()) 54 } 55 select { 56 case resp := <-respChan: 57 return resp.br, resp.pe 58 case <-ctx.Done(): 59 return nil, roachpb.NewError(ctx.Err()) 60 } 61 } 62 63 type senderGroup struct { 64 b *RequestBatcher 65 g errgroup.Group 66 } 67 68 func (g *senderGroup) Send(rangeID roachpb.RangeID, request roachpb.Request) { 69 g.g.Go(func() error { 70 _, err := g.b.Send(context.Background(), rangeID, request) 71 return err 72 }) 73 } 74 75 func (g *senderGroup) Wait() error { 76 return g.g.Wait() 77 } 78 79 func TestBatcherSendOnSizeWithReset(t *testing.T) { 80 // This test ensures that when a single batch ends up sending due to size 81 // constrains its timer is successfully canceled and does not lead to a 82 // nil panic due to an attempt to send a batch due to the old timer. 83 defer leaktest.AfterTest(t)() 84 stopper := stop.NewStopper() 85 defer stopper.Stop(context.Background()) 86 sc := make(chanSender) 87 // The challenge with populating this timeout is that if we set it too short 88 // then there's a chance that the batcher will send based on time and not 89 // size which somewhat defeats the purpose of the test in the first place. 90 // If we set the timeout too long then the test will take a long time for no 91 // good reason. Instead of erring on the side of being conservative with the 92 // timeout we instead allow the test to pass successfully even if it doesn't 93 // exercise the path we intended. This is better than having the test block 94 // forever or fail. We don't expect that it will take 5ms in the common case 95 // to send two messages on a channel and if it does, oh well, the logic below 96 // deals with that too and at least the test doesn't fail or hang forever. 97 const wait = 5 * time.Millisecond 98 b := New(Config{ 99 MaxIdle: wait, 100 MaxWait: wait, 101 MaxMsgsPerBatch: 2, 102 Sender: sc, 103 Stopper: stopper, 104 }) 105 g := senderGroup{b: b} 106 g.Send(1, &roachpb.GetRequest{}) 107 g.Send(1, &roachpb.GetRequest{}) 108 s := <-sc 109 s.respChan <- batchResp{} 110 // See the comment above wait. In rare cases the batch will be sent before the 111 // second request can be added. In this case we need to expect that another 112 // request will be sent and handle it so that the test does not block forever. 113 if len(s.ba.Requests) == 1 { 114 t.Logf("batch was sent due to time rather than size constraints, passing anyway") 115 s := <-sc 116 s.respChan <- batchResp{} 117 } else { 118 time.Sleep(wait) 119 } 120 if err := g.Wait(); err != nil { 121 t.Fatalf("Failed to send: %v", err) 122 } 123 } 124 125 // TestBatchesAtTheSameTime attempts to test that batches which seem to occur at 126 // exactly the same moment are eventually sent. Sometimes it may be the case 127 // that this test fails to exercise that path if the channel send to the 128 // goroutine happens to take more than 10ms but in that case both batches will 129 // definitely get sent and the test will pass. This test was added to account 130 // for a bug where the internal timer would not get set if two batches had the 131 // same deadline. This test failed regularly before that bug was fixed. 132 func TestBatchesAtTheSameTime(t *testing.T) { 133 defer leaktest.AfterTest(t)() 134 stopper := stop.NewStopper() 135 defer stopper.Stop(context.Background()) 136 sc := make(chanSender) 137 start := timeutil.Now() 138 then := start.Add(10 * time.Millisecond) 139 b := New(Config{ 140 MaxIdle: 20 * time.Millisecond, 141 Sender: sc, 142 Stopper: stopper, 143 NowFunc: func() time.Time { return then }, 144 }) 145 const N = 20 146 sendChan := make(chan Response, N) 147 for i := 0; i < N; i++ { 148 assert.Nil(t, b.SendWithChan(context.Background(), sendChan, roachpb.RangeID(i), &roachpb.GetRequest{})) 149 } 150 for i := 0; i < N; i++ { 151 bs := <-sc 152 bs.respChan <- batchResp{} 153 } 154 } 155 156 func TestBackpressure(t *testing.T) { 157 defer leaktest.AfterTest(t)() 158 stopper := stop.NewStopper() 159 defer stopper.Stop(context.Background()) 160 sc := make(chanSender) 161 b := New(Config{ 162 MaxIdle: 50 * time.Millisecond, 163 MaxWait: 50 * time.Millisecond, 164 MaxMsgsPerBatch: 1, 165 Sender: sc, 166 Stopper: stopper, 167 InFlightBackpressureLimit: 3, 168 }) 169 170 // These 3 should all send without blocking but should put the batcher into 171 // back pressure. 172 sendChan := make(chan Response, 6) 173 assert.Nil(t, b.SendWithChan(context.Background(), sendChan, 1, &roachpb.GetRequest{})) 174 assert.Nil(t, b.SendWithChan(context.Background(), sendChan, 2, &roachpb.GetRequest{})) 175 assert.Nil(t, b.SendWithChan(context.Background(), sendChan, 3, &roachpb.GetRequest{})) 176 var sent int64 177 send := func() { 178 assert.Nil(t, b.SendWithChan(context.Background(), sendChan, 4, &roachpb.GetRequest{})) 179 atomic.AddInt64(&sent, 1) 180 } 181 go send() 182 go send() 183 canReply := make(chan struct{}) 184 reply := func(bs batchSend) { 185 <-canReply 186 bs.respChan <- batchResp{} 187 } 188 for i := 0; i < 3; i++ { 189 bs := <-sc 190 go reply(bs) 191 // We don't expect either of the calls to send to have finished yet. 192 assert.Equal(t, int64(0), atomic.LoadInt64(&sent)) 193 } 194 // Allow one reply to fly which should not unblock the requests. 195 canReply <- struct{}{} 196 runtime.Gosched() // tickle the runtime in case there might be a timing bug 197 assert.Equal(t, int64(0), atomic.LoadInt64(&sent)) 198 canReply <- struct{}{} // now the two requests should send 199 defer func() { 200 if t.Failed() { 201 close(canReply) 202 } 203 }() 204 testutils.SucceedsSoon(t, func() error { 205 if numSent := atomic.LoadInt64(&sent); numSent != 2 { 206 return fmt.Errorf("expected %d to have been sent, so far %d", 2, numSent) 207 } 208 return nil 209 }) 210 close(canReply) 211 reply(<-sc) 212 reply(<-sc) 213 } 214 215 func TestBatcherSend(t *testing.T) { 216 defer leaktest.AfterTest(t)() 217 stopper := stop.NewStopper() 218 defer stopper.Stop(context.Background()) 219 sc := make(chanSender) 220 b := New(Config{ 221 MaxIdle: 50 * time.Millisecond, 222 MaxWait: 50 * time.Millisecond, 223 MaxMsgsPerBatch: 3, 224 Sender: sc, 225 Stopper: stopper, 226 }) 227 // Send 3 requests to range 2 and 2 to range 1. 228 // The 3rd range 2 request will trigger immediate sending due to the 229 // MaxMsgsPerBatch configuration. The range 1 batch will be sent after the 230 // MaxWait timeout expires. 231 g := senderGroup{b: b} 232 g.Send(1, &roachpb.GetRequest{}) 233 g.Send(2, &roachpb.GetRequest{}) 234 g.Send(1, &roachpb.GetRequest{}) 235 g.Send(2, &roachpb.GetRequest{}) 236 g.Send(2, &roachpb.GetRequest{}) 237 // Wait for the range 2 request and ensure it contains 3 requests. 238 s := <-sc 239 assert.Len(t, s.ba.Requests, 3) 240 s.respChan <- batchResp{} 241 // Wait for the range 1 request and ensure it contains 2 requests. 242 s = <-sc 243 assert.Len(t, s.ba.Requests, 2) 244 s.respChan <- batchResp{} 245 // Make sure everything gets a response. 246 if err := g.Wait(); err != nil { 247 t.Fatalf("expected no errors, got %v", err) 248 } 249 } 250 251 func TestSendAfterStopped(t *testing.T) { 252 defer leaktest.AfterTest(t)() 253 stopper := stop.NewStopper() 254 sc := make(chanSender) 255 b := New(Config{ 256 Sender: sc, 257 Stopper: stopper, 258 }) 259 stopper.Stop(context.Background()) 260 _, err := b.Send(context.Background(), 1, &roachpb.GetRequest{}) 261 assert.Equal(t, err, stop.ErrUnavailable) 262 } 263 264 func TestSendAfterCanceled(t *testing.T) { 265 defer leaktest.AfterTest(t)() 266 sc := make(chanSender) 267 stopper := stop.NewStopper() 268 defer stopper.Stop(context.Background()) 269 b := New(Config{ 270 Sender: sc, 271 Stopper: stopper, 272 }) 273 ctx, cancel := context.WithCancel(context.Background()) 274 cancel() 275 _, err := b.Send(ctx, 1, &roachpb.GetRequest{}) 276 assert.Equal(t, err, ctx.Err()) 277 } 278 279 // TestStopDuringSend ensures that in-flight requests are canceled when the 280 // RequestBatcher's stopper indicates that it should quiesce. 281 func TestStopDuringSend(t *testing.T) { 282 defer leaktest.AfterTest(t)() 283 stopper := stop.NewStopper() 284 sc := make(chanSender, 1) 285 b := New(Config{ 286 Sender: sc, 287 Stopper: stopper, 288 MaxMsgsPerBatch: 1, 289 }) 290 errChan := make(chan error) 291 go func() { 292 _, err := b.Send(context.Background(), 1, &roachpb.GetRequest{}) 293 errChan <- err 294 }() 295 // Wait for the request to get sent. 296 <-sc 297 stopper.Stop(context.Background()) 298 // Depending on the ordering of when channels close the sender might 299 // get one of two errors. 300 assert.True(t, testutils.IsError(<-errChan, 301 stop.ErrUnavailable.Error()+"|"+context.Canceled.Error())) 302 } 303 304 func TestPanicWithNilStopper(t *testing.T) { 305 defer leaktest.AfterTest(t)() 306 defer func() { 307 if r := recover(); r == nil { 308 t.Fatalf("failed to panic with a nil Stopper") 309 } 310 }() 311 New(Config{Sender: make(chanSender)}) 312 } 313 314 // TestBatchTimeout verifies the the RequestBatcher uses the context with the 315 // deadline from the latest call to send. 316 func TestBatchTimeout(t *testing.T) { 317 defer leaktest.AfterTest(t)() 318 const timeout = 5 * time.Millisecond 319 stopper := stop.NewStopper() 320 defer stopper.Stop(context.Background()) 321 sc := make(chanSender) 322 t.Run("WithTimeout", func(t *testing.T) { 323 b := New(Config{ 324 // MaxMsgsPerBatch of 1 is chosen so that the first call to Send will 325 // immediately lead to a batch being sent. 326 MaxMsgsPerBatch: 1, 327 Sender: sc, 328 Stopper: stopper, 329 }) 330 // This test attempts to verify that a batch with a request with a timeout 331 // will be sent with that timeout. The test faces challenges of timing. 332 // There are several different phases at which the timeout may fire; 333 // the request may time out before it has been sent to the batcher, it 334 // may timeout while it is being sent or it may not time out until after 335 // it has been sent. Each of these cases are handled and verified to ensure 336 // that the request was indeed sent with a timeout. 337 ctx, cancel := context.WithTimeout(context.Background(), timeout) 338 defer cancel() 339 respChan := make(chan Response, 1) 340 if err := b.SendWithChan(ctx, respChan, 1, &roachpb.GetRequest{}); err != nil { 341 testutils.IsError(err, context.DeadlineExceeded.Error()) 342 return 343 } 344 select { 345 case s := <-sc: 346 deadline, hasDeadline := s.ctx.Deadline() 347 assert.True(t, hasDeadline) 348 assert.True(t, timeutil.Until(deadline) < timeout) 349 s.respChan <- batchResp{} 350 case resp := <-respChan: 351 assert.Nil(t, resp.Resp) 352 testutils.IsError(resp.Err, context.DeadlineExceeded.Error()) 353 } 354 }) 355 t.Run("NoTimeout", func(t *testing.T) { 356 b := New(Config{ 357 // MaxMsgsPerBatch of 2 is chosen so that the second call to Send will 358 // immediately lead to a batch being sent. 359 MaxMsgsPerBatch: 2, 360 Sender: sc, 361 Stopper: stopper, 362 }) 363 // This test attempts to verify that a batch with two requests where one 364 // carries a timeout leads to the batch being sent without a timeout. 365 // There is a hazard that the goroutine which is being canceled is not 366 // able to send its request to the batcher before its deadline expires 367 // in which case the batch is never sent due to size constraints. 368 // The test will pass in this scenario with after logging and cleaning up. 369 ctx1, cancel1 := context.WithTimeout(context.Background(), timeout) 370 defer cancel1() 371 ctx2, cancel2 := context.WithCancel(context.Background()) 372 defer cancel2() 373 var wg sync.WaitGroup 374 wg.Add(2) 375 var err1, err2 error 376 err1Chan := make(chan error, 1) 377 go func() { 378 _, err1 = b.Send(ctx1, 1, &roachpb.GetRequest{}) 379 err1Chan <- err1 380 wg.Done() 381 }() 382 go func() { _, err2 = b.Send(ctx2, 1, &roachpb.GetRequest{}); wg.Done() }() 383 select { 384 case s := <-sc: 385 assert.Len(t, s.ba.Requests, 2) 386 s.respChan <- batchResp{} 387 case <-err1Chan: 388 // This case implies that the test did not exercise what was intended 389 // but that's okay, clean up the other request and return. 390 assert.Equal(t, context.DeadlineExceeded, err1) 391 t.Logf("canceled goroutine failed to send within %v, passing", timeout) 392 cancel2() 393 wg.Wait() 394 return 395 } 396 wg.Wait() 397 testutils.IsError(err1, context.DeadlineExceeded.Error()) 398 assert.Nil(t, err2) 399 }) 400 } 401 402 // TestIdleAndMaxTimeoutDisabled exercises the RequestBatcher when it is 403 // configured to send only based on batch size policies. 404 func TestIdleAndMaxTimeoutDisabled(t *testing.T) { 405 defer leaktest.AfterTest(t)() 406 stopper := stop.NewStopper() 407 defer stopper.Stop(context.Background()) 408 sc := make(chanSender) 409 b := New(Config{ 410 MaxMsgsPerBatch: 2, 411 Sender: sc, 412 Stopper: stopper, 413 }) 414 // Send 2 requests to range 1. Even with an arbitrarily large delay between 415 // the requests, they should only be sent when the MaxMsgsPerBatch limit is 416 // reached, because no MaxWait timeout is configured. 417 g := senderGroup{b: b} 418 g.Send(1, &roachpb.GetRequest{}) 419 select { 420 case <-sc: 421 t.Fatalf("RequestBatcher should not sent based on time") 422 case <-time.After(10 * time.Millisecond): 423 } 424 g.Send(1, &roachpb.GetRequest{}) 425 s := <-sc 426 assert.Len(t, s.ba.Requests, 2) 427 s.respChan <- batchResp{} 428 // Make sure everything gets a response. 429 if err := g.Wait(); err != nil { 430 t.Fatalf("expected no errors, got %v", err) 431 } 432 } 433 434 // TestMaxKeysPerBatchReq exercises the RequestBatcher when it is configured to 435 // assign each request a MaxSpanRequestKeys limit. When such a limit is used, 436 // the RequestBatcher may receive partial responses to the requests that it 437 // issues, so it needs to be prepared to paginate requests and combine partial 438 // responses. 439 func TestMaxKeysPerBatchReq(t *testing.T) { 440 defer leaktest.AfterTest(t)() 441 stopper := stop.NewStopper() 442 defer stopper.Stop(context.Background()) 443 sc := make(chanSender) 444 b := New(Config{ 445 MaxMsgsPerBatch: 3, 446 MaxKeysPerBatchReq: 5, 447 Sender: sc, 448 Stopper: stopper, 449 }) 450 // Send 3 ResolveIntentRange requests. The requests are limited so 451 // pagination will be required. The following sequence of partial 452 // results will be returned: 453 // send([{d-g}, {a-d}, {b, m}]) -> 454 // scans from [a, c) before hitting limit 455 // returns [{d-g}, {c-d}, {c-m}] 456 // send([{d-g}, {c-d}, {c-m}]) -> 457 // scans from [c, e) before hitting limit 458 // returns [{e-g}, {}, {e-m}] 459 // send([{e-g}, {e-m}]) -> 460 // scans from [e, h) before hitting limit 461 // returns [{}, {h-m}] 462 // send([{h-m}]) -> 463 // scans from [h, m) without hitting limit 464 // returns [{}] 465 type span [2]string // [key, endKey] 466 type spanMap map[span]span 467 var nilResumeSpan span 468 makeReq := func(sp span) *roachpb.ResolveIntentRangeRequest { 469 var req roachpb.ResolveIntentRangeRequest 470 req.Key = roachpb.Key(sp[0]) 471 req.EndKey = roachpb.Key(sp[1]) 472 return &req 473 } 474 makeResp := func(ba *roachpb.BatchRequest, resumeSpans spanMap) *roachpb.BatchResponse { 475 br := ba.CreateReply() 476 for i, ru := range ba.Requests { 477 req := ru.GetResolveIntentRange() 478 reqSp := span{string(req.Key), string(req.EndKey)} 479 resumeSp, ok := resumeSpans[reqSp] 480 if !ok { 481 t.Fatalf("unexpected request: %+v", req) 482 } 483 if resumeSp == nilResumeSpan { 484 continue 485 } 486 resp := br.Responses[i].GetResolveIntentRange() 487 resp.ResumeSpan = &roachpb.Span{ 488 Key: roachpb.Key(resumeSp[0]), EndKey: roachpb.Key(resumeSp[1]), 489 } 490 resp.ResumeReason = roachpb.RESUME_KEY_LIMIT 491 } 492 return br 493 } 494 g := senderGroup{b: b} 495 g.Send(1, makeReq(span{"d", "g"})) 496 g.Send(1, makeReq(span{"a", "d"})) 497 g.Send(1, makeReq(span{"b", "m"})) 498 // send([{d-g}, {a-d}, {b, m}]) -> 499 // scans from [a, c) before hitting limit 500 // returns [{d-g}, {c-d}, {c-m}] 501 s := <-sc 502 assert.Equal(t, int64(5), s.ba.MaxSpanRequestKeys) 503 assert.Len(t, s.ba.Requests, 3) 504 br := makeResp(&s.ba, spanMap{ 505 {"d", "g"}: {"d", "g"}, 506 {"a", "d"}: {"c", "d"}, 507 {"b", "m"}: {"c", "m"}, 508 }) 509 s.respChan <- batchResp{br: br} 510 // send([{d-g}, {c-d}, {c-m}]) -> 511 // scans from [c, e) before hitting limit 512 // returns [{e-g}, {}, {e-m}] 513 s = <-sc 514 assert.Equal(t, int64(5), s.ba.MaxSpanRequestKeys) 515 assert.Len(t, s.ba.Requests, 3) 516 br = makeResp(&s.ba, spanMap{ 517 {"d", "g"}: {"e", "g"}, 518 {"c", "d"}: nilResumeSpan, 519 {"c", "m"}: {"e", "m"}, 520 }) 521 s.respChan <- batchResp{br: br} 522 // send([{e-g}, {e-m}]) -> 523 // scans from [e, h) before hitting limit 524 // returns [{}, {h-m}] 525 s = <-sc 526 assert.Equal(t, int64(5), s.ba.MaxSpanRequestKeys) 527 assert.Len(t, s.ba.Requests, 2) 528 br = makeResp(&s.ba, spanMap{ 529 {"e", "g"}: nilResumeSpan, 530 {"e", "m"}: {"h", "m"}, 531 }) 532 s.respChan <- batchResp{br: br} 533 // send([{h-m}]) -> 534 // scans from [h, m) without hitting limit 535 // returns [{}] 536 s = <-sc 537 assert.Equal(t, int64(5), s.ba.MaxSpanRequestKeys) 538 assert.Len(t, s.ba.Requests, 1) 539 br = makeResp(&s.ba, spanMap{ 540 {"h", "m"}: nilResumeSpan, 541 }) 542 s.respChan <- batchResp{br: br} 543 // Make sure everything gets a response. 544 if err := g.Wait(); err != nil { 545 t.Fatalf("expected no errors, got %v", err) 546 } 547 } 548 549 func TestPanicWithNilSender(t *testing.T) { 550 defer leaktest.AfterTest(t)() 551 defer func() { 552 if r := recover(); r == nil { 553 t.Fatalf("failed to panic with a nil Sender") 554 } 555 }() 556 New(Config{Stopper: stop.NewStopper()}) 557 }