github.com/tsuna/gohbase@v0.0.0-20250731002811-4ffcadfba63e/rpc.go (about) 1 // Copyright (C) 2016 The GoHBase Authors. All rights reserved. 2 // This file is part of GoHBase. 3 // Use of this source code is governed by the Apache License 2.0 4 // that can be found in the COPYING file. 5 6 package gohbase 7 8 import ( 9 "bytes" 10 "context" 11 "errors" 12 "fmt" 13 "io" 14 "math" 15 "strconv" 16 "time" 17 18 "github.com/tsuna/gohbase/hrpc" 19 "github.com/tsuna/gohbase/internal/observability" 20 "github.com/tsuna/gohbase/pb" 21 "github.com/tsuna/gohbase/region" 22 "github.com/tsuna/gohbase/zk" 23 "go.opentelemetry.io/otel/attribute" 24 "go.opentelemetry.io/otel/codes" 25 "go.opentelemetry.io/otel/trace" 26 "google.golang.org/protobuf/proto" 27 ) 28 29 type regionInfoAndAddr struct { 30 regionInfo hrpc.RegionInfo 31 addr string 32 } 33 34 // Constants 35 var ( 36 // Name of the meta region. 37 metaTableName = []byte("hbase:meta") 38 39 infoFamily = map[string][]string{ 40 "info": nil, 41 } 42 43 // TableNotFound is returned when attempting to access a table that 44 // doesn't exist on this cluster. 45 TableNotFound = errors.New("table not found") 46 47 // ErrCannotFindRegion is returned when it took too many tries to find a 48 // region for the request. It's likely that hbase:meta has overlaps or some other 49 // inconsistency. 50 ErrCannotFindRegion = errors.New("cannot find region for the rpc") 51 52 // ErrClientClosed is returned when the gohbase client has been closed 53 ErrClientClosed = errors.New("client is closed") 54 ) 55 56 const ( 57 // maxFindRegionTries is the maximum number of times to try to send an RPC 58 maxFindRegionTries = 10 59 60 backoffStart = 16 * time.Millisecond 61 ) 62 63 func (c *client) getRegionForRpc(ctx context.Context, rpc hrpc.Call) (hrpc.RegionInfo, error) { 64 for i := 0; i < maxFindRegionTries; i++ { 65 // Check the cache for a region that can handle this request 66 if reg := c.getRegionFromCache(rpc.Table(), rpc.Key()); reg != nil { 67 return reg, nil 68 } 69 70 if reg, err := c.findRegion(ctx, rpc.Table(), rpc.Key()); reg != nil { 71 return reg, nil 72 } else if err != nil { 73 return nil, err 74 } 75 } 76 return nil, ErrCannotFindRegion 77 } 78 79 func (c *client) SendRPC(rpc hrpc.Call) (msg proto.Message, err error) { 80 start := time.Now() 81 description := rpc.Description() 82 ctx, sp := observability.StartSpan(rpc.Context(), description, 83 trace.WithAttributes( 84 attribute.String("table", strconv.Quote(string(rpc.Table()))), 85 attribute.String("key", strconv.Quote(string(rpc.Key()))), 86 )) 87 defer func() { 88 result := "ok" 89 if err != nil { 90 result = "error" 91 sp.SetStatus(codes.Error, err.Error()) 92 } 93 94 o := operationDurationSeconds.WithLabelValues(description, result) 95 96 observability.ObserveWithTrace(ctx, o, time.Since(start).Seconds()) 97 sp.End() 98 }() 99 100 backoff := backoffStart 101 serverErrorCount := 0 102 for { 103 var rc hrpc.RegionClient 104 rc, err = c.getRegionAndClientForRPC(ctx, rpc) 105 if err != nil { 106 return nil, err 107 } 108 rpcStart := time.Now() 109 msg, err = c.sendRPCToRegionClient(ctx, rpc, rc) 110 switch err.(type) { 111 case region.RetryableError: 112 if scan, ok := rpc.(*hrpc.Scan); ok { 113 c.scanRpcScanStats(scan, msg, err, true, rpcStart, time.Now()) 114 } 115 sp.AddEvent("retrySleep") 116 backoff, err = sleepAndIncreaseBackoff(ctx, backoff) 117 if err != nil { 118 return msg, err 119 } 120 continue // retry 121 case region.ServerError: 122 if scan, ok := rpc.(*hrpc.Scan); ok { 123 c.scanRpcScanStats(scan, msg, err, true, rpcStart, time.Now()) 124 } 125 // Retry ServerError immediately, as we want failover fast to 126 // another server. But if HBase keep sending us ServerError, we 127 // should start to backoff. We don't want to overwhelm HBase. 128 if serverErrorCount > 1 { 129 sp.AddEvent("retrySleep") 130 backoff, err = sleepAndIncreaseBackoff(ctx, backoff) 131 if err != nil { 132 return msg, err 133 } 134 } 135 serverErrorCount++ 136 continue // retry 137 case region.NotServingRegionError: 138 if scan, ok := rpc.(*hrpc.Scan); ok { 139 c.scanRpcScanStats(scan, msg, err, true, rpcStart, time.Now()) 140 } 141 continue // retry 142 } 143 if scan, ok := rpc.(*hrpc.Scan); ok { 144 c.scanRpcScanStats(scan, msg, err, false, rpcStart, time.Now()) 145 } 146 return msg, err 147 } 148 } 149 150 func (c *client) scanRpcScanStats(scan *hrpc.Scan, resp proto.Message, err error, 151 retry bool, start, end time.Time) { 152 if scan.ScanStatsHandler() != nil { 153 stats := &hrpc.ScanStats{} 154 // Update the ScanMetrics if they are being tracked. For ScanStats, these ScanMetrics 155 // are collected per call to SendRPC and therefore may not be reflective of the entire 156 // result of the Scan request if the results are split across multiple calls to 157 // Scanner.Next(). 158 if scan.TrackScanMetrics() && resp != nil { 159 scanres, ok := resp.(*pb.ScanResponse) 160 if !ok { 161 c.logger.Debug("got non ScanResponse for ScanRequest, no ScanMetrics to add") 162 } else { 163 if scanres.ScanMetrics != nil { 164 stats.ScanMetrics = make(map[string]int64) 165 for _, m := range scanres.ScanMetrics.GetMetrics() { 166 stats.ScanMetrics[m.GetName()] = m.GetValue() 167 } 168 } 169 } 170 } 171 172 stats.Table = scan.Table() 173 stats.StartRow = scan.StartRow() 174 stats.EndRow = scan.StopRow() 175 if reg := scan.Region(); reg != nil { 176 stats.RegionID = reg.ID() 177 if cl := reg.Client(); cl != nil { 178 stats.RegionServer = cl.Addr() 179 } 180 } 181 stats.ScannerID = scan.ScannerId() 182 stats.ScanStatsID = scan.ScanStatsID() 183 stats.Start = start 184 stats.End = end 185 if err != nil { 186 stats.Error = true 187 } 188 stats.Retryable = retry 189 stats.ResponseSize = scan.ResponseSize 190 scan.ScanStatsHandler()(stats) 191 } 192 } 193 194 func (c *client) getRegionAndClientForRPC(ctx context.Context, rpc hrpc.Call) ( 195 hrpc.RegionClient, error) { 196 for { 197 reg, err := c.getRegionForRpc(ctx, rpc) 198 if err != nil { 199 return nil, err 200 } 201 if ch := reg.AvailabilityChan(); ch != nil { // region is currently unavailable 202 select { 203 case <-ctx.Done(): 204 return nil, ctx.Err() 205 case <-c.done: 206 return nil, ErrClientClosed 207 case <-ch: 208 } 209 } 210 211 client := reg.Client() 212 if client == nil { 213 // There was an error getting the region client. Mark the 214 // region as unavailable. 215 if reg.MarkUnavailable() { 216 // If this was the first goroutine to mark the region as 217 // unavailable, start a goroutine to reestablish a connection 218 go c.reestablishRegion(reg) 219 } 220 if ch := reg.AvailabilityChan(); ch != nil { 221 select { 222 case <-ctx.Done(): 223 return nil, ctx.Err() 224 case <-c.done: 225 return nil, ErrClientClosed 226 case <-ch: 227 } 228 } 229 if reg.Context().Err() != nil { 230 // region is dead because it was split or merged, 231 // retry lookup 232 continue 233 } 234 client = reg.Client() 235 if client == nil { 236 continue 237 } 238 } 239 rpc.SetRegion(reg) 240 return client, nil 241 } 242 } 243 244 var ( 245 // NotExecutedError is returned when an RPC in a batch is not 246 // executed due to encountering a different error in the batch. 247 NotExecutedError = errors.New( 248 "RPC in batch not executed due to another error") 249 ) 250 251 // SendBatch will execute all the Calls in batch. Every Call must have 252 // the same table and must be Batchable. 253 // 254 // SendBatch will discover the correct region and region server for 255 // each Call and dispatch the Calls accordingly. SendBatch is not an 256 // atomic operation. Some calls may fail and others succeed. Calls 257 // sharing a region will execute in the order passed into SendBatch. 258 // 259 // SendBatch returns a slice of [hrpc.RPCResult] each containing a 260 // response and an error. The results will be returned in the same 261 // order as the Calls in the batch, in other words the i'th result 262 // will be for the i'th call. A nil error means the Call executed 263 // successfully. allOK is true if all calls completed successfully, 264 // and false if any calls failed and the errors in the results need to 265 // be checked. 266 // 267 // SendBatch will continue retrying each RPC in batch until it 268 // succeeds, fails with a non-retryable error, or the context is 269 // canceled. 270 func (c *client) SendBatch(ctx context.Context, batch []hrpc.Call) ( 271 res []hrpc.RPCResult, allOK bool) { 272 if len(batch) == 0 { 273 return nil, true 274 } 275 276 allOK = true 277 278 start := time.Now() 279 description := "SendBatch" 280 ctx, sp := observability.StartSpan(ctx, description) 281 defer func() { 282 result := "ok" 283 if !allOK { 284 result = "error" 285 sp.SetStatus(codes.Error, "batch error") 286 } 287 288 o := operationDurationSeconds.WithLabelValues(description, result) 289 290 observability.ObserveWithTrace(ctx, o, time.Since(start).Seconds()) 291 sp.End() 292 }() 293 294 table := batch[0].Table() 295 res = make([]hrpc.RPCResult, len(batch)) 296 rpcToRes := make(map[hrpc.Call]int, len(batch)) 297 for i, rpc := range batch { 298 // map Call to index in res so that we can set the correct 299 // result as Calls complete 300 if j, dup := rpcToRes[rpc]; dup { 301 res[i].Error = fmt.Errorf("duplicate call in batch at index %d", j) 302 allOK = false 303 continue 304 } 305 rpcToRes[rpc] = i 306 307 // Initialize res with NotExecutedError. As RPCs are executed this 308 // will be replaced by a more specific error or nil if no error 309 // occurs. 310 res[i].Error = NotExecutedError 311 312 if !bytes.Equal(rpc.Table(), table) { 313 res[i].Error = fmt.Errorf("multiple tables in batch request: %q and %q", 314 string(table), string(rpc.Table())) 315 allOK = false 316 } else if !hrpc.CanBatch(rpc) { 317 res[i].Error = errors.New("non-batchable call passed to SendBatch") 318 allOK = false 319 } 320 } 321 if !allOK { 322 return res, allOK 323 } 324 325 // Send and wait for responses loop. This loop will partition the 326 // batch per-regionserver batches, send those batches to the 327 // region server and wait for results. Any RPCs that hit retryable 328 // errors will be made into a new batch and passed through this 329 // loop again. 330 331 // unretryableErrorSeen set to true when any RPC in the batch hits 332 // an error that is not retryable. This is used to remember to 333 // return allOK=false even after we retry RPCs that hit retryable 334 // errors and those all succeed. 335 var unretryableErrorSeen bool 336 var retries []hrpc.Call 337 backoff := backoffStart 338 339 for { 340 rpcByClient, ok := c.findClients(ctx, batch, res) 341 if !ok { 342 return res, false 343 } 344 sendBatchSplitCount.Observe(float64(len(rpcByClient))) 345 346 // Send each group of RPCs to region client to be executed. 347 type clientAndRPCs struct { 348 client hrpc.RegionClient 349 rpcs []hrpc.Call 350 } 351 // keep track of the order requests are queued so that we can wait 352 // for their responses in the same order. 353 cAndRs := make([]clientAndRPCs, 0, len(rpcByClient)) 354 for client, rpcs := range rpcByClient { 355 client.QueueBatch(ctx, rpcs) 356 cAndRs = append(cAndRs, clientAndRPCs{client, rpcs}) 357 } 358 359 // batch wil be used to hold any RPCs that need to be retried 360 batch = batch[:0] 361 var needBackoff bool 362 363 func() { // func used to scope the span 364 ctx, sp := observability.StartSpan(ctx, "waitForResult") 365 defer sp.End() 366 for _, cAndR := range cAndRs { 367 shouldRetry, shouldBackoff, unretryableError, ok := c.waitForCompletion( 368 ctx, cAndR.client, cAndR.rpcs, res, rpcToRes) 369 if !ok { 370 allOK = false 371 retries = append(retries, shouldRetry...) 372 needBackoff = needBackoff || shouldBackoff 373 unretryableErrorSeen = unretryableErrorSeen || unretryableError 374 } 375 } 376 }() 377 378 // Exit retry loop if no RPCs are retryable because they all 379 // succeeded or hit unretryable errors (this is true if 380 // retries is empty), or the context is done. 381 if len(retries) == 0 || ctx.Err() != nil { 382 break 383 } 384 if needBackoff { 385 sp.AddEvent("retrySleep") 386 var err error 387 backoff, err = sleepAndIncreaseBackoff(ctx, backoff) 388 if err != nil { 389 break 390 } 391 } else { 392 sp.AddEvent("retry") 393 } 394 // Set state for next loop iteration 395 batch = retries 396 retries = retries[:0] 397 allOK = !unretryableErrorSeen 398 } 399 400 return res, allOK 401 } 402 403 // findClients takes a batch of rpcs and discovers the region and 404 // region client associated with each. A map is returned with rpcs 405 // grouped by their region client. If any error is encountered, the 406 // corresponding slot in res will be updated with that error and a 407 // BatchError is returned. 408 // 409 // findClients will not return on the first errror encountered. It 410 // will iterate through all the RPCs to ensure that all unknown 411 // regions encountered in the batch will start being initialized. 412 func (c *client) findClients(ctx context.Context, batch []hrpc.Call, res []hrpc.RPCResult) ( 413 map[hrpc.RegionClient][]hrpc.Call, bool) { 414 415 rpcByClient := make(map[hrpc.RegionClient][]hrpc.Call) 416 ok := true 417 for i, rpc := range batch { 418 rc, err := c.getRegionAndClientForRPC(ctx, rpc) 419 if err != nil { 420 res[i].Error = err 421 ok = false 422 continue // see if any more RPCs are missing regions 423 } 424 rpcByClient[rc] = append(rpcByClient[rc], rpc) 425 } 426 return rpcByClient, ok 427 } 428 429 // waitForCompletion waits for the completion of all rpcs, updating 430 // the appropriate index in results with the help of rpcToRes. If all 431 // rpcs succeed then ok will return true, otherwise: 432 // - ok will be false 433 // - retryables will contain RPCs that can be retried 434 // - shouldBackoff will be true if any retryable RPCs need a backoff before retrying 435 // - unretryableError will be true if there were errors seen on RPCs 436 // that were not retryable. It communicates that retryables does 437 // not contain all the RPCs that failed, so even though those 438 // retryable RPCs may eventually succeed we need to return !ok to 439 // the caller of SendBatch. 440 func (c *client) waitForCompletion(ctx context.Context, rc hrpc.RegionClient, 441 rpcs []hrpc.Call, results []hrpc.RPCResult, rpcToRes map[hrpc.Call]int) ( 442 retryables []hrpc.Call, shouldBackoff, unretryableError, ok bool) { 443 444 ok = true 445 canceledIndex := len(rpcs) 446 447 loop: 448 for i, rpc := range rpcs { 449 select { 450 case res := <-rpc.ResultChan(): 451 results[rpcToRes[rpc]] = res 452 if res.Error != nil { 453 c.handleResultError(res.Error, rpc.Region(), rc) 454 ok = false 455 switch res.Error.(type) { 456 case region.RetryableError: 457 shouldBackoff = true 458 retryables = append(retryables, rpc) 459 case region.ServerError, region.NotServingRegionError: 460 retryables = append(retryables, rpc) 461 default: 462 unretryableError = true 463 } 464 } 465 466 case <-ctx.Done(): 467 canceledIndex = i 468 ok = false 469 break loop 470 } 471 } 472 473 // If the context was canceled we may have exited the loop above 474 // without checking for every result. Do a non-blocking read of 475 // the ResultChan for the remaining RPCs. If not ready the result 476 // will be the context error. 477 for _, rpc := range rpcs[canceledIndex:] { 478 select { 479 case res := <-rpc.ResultChan(): 480 results[rpcToRes[rpc]] = res 481 if res.Error != nil { 482 c.handleResultError(res.Error, rpc.Region(), rc) 483 } 484 default: 485 results[rpcToRes[rpc]].Error = ctx.Err() 486 } 487 } 488 489 return retryables, shouldBackoff, unretryableError, ok 490 } 491 492 // handleResultErrorOverride can be used to override behaviour of handleResultError in testing 493 var handleResultErrorOverride func(err error, reg hrpc.RegionInfo, rc hrpc.RegionClient) 494 495 func (c *client) handleResultError(err error, reg hrpc.RegionInfo, rc hrpc.RegionClient) { 496 if handleResultErrorOverride != nil { 497 handleResultErrorOverride(err, reg, rc) 498 return 499 } 500 501 // Check for errors 502 switch err.(type) { 503 case region.NotServingRegionError: 504 // There's an error specific to this region, but 505 // our region client is fine. Mark this region as 506 // unavailable (as opposed to all regions sharing 507 // the client), and start a goroutine to reestablish 508 // it. 509 if reg.MarkUnavailable() { 510 go c.reestablishRegion(reg) 511 } 512 case region.ServerError: 513 // If it was an unrecoverable error, the region client is 514 // considered dead. 515 if reg == c.adminRegionInfo { 516 // If this is the admin client, mark the region 517 // as unavailable and start up a goroutine to 518 // reconnect if it wasn't already marked as such. 519 if reg.MarkUnavailable() { 520 go c.reestablishRegion(reg) 521 } 522 } else { 523 c.clientDown(rc, reg) 524 } 525 } 526 } 527 528 func sendBlocking(ctx context.Context, rc hrpc.RegionClient, rpc hrpc.Call) ( 529 hrpc.RPCResult, error) { 530 rc.QueueRPC(rpc) 531 532 ctx, sp := observability.StartSpan(ctx, "waitForResult") 533 defer sp.End() 534 var res hrpc.RPCResult 535 // Wait for the response 536 select { 537 case res = <-rpc.ResultChan(): 538 return res, nil 539 case <-ctx.Done(): 540 return res, rpc.Context().Err() 541 } 542 } 543 544 func (c *client) sendRPCToRegionClient(ctx context.Context, rpc hrpc.Call, rc hrpc.RegionClient) ( 545 proto.Message, error) { 546 res, err := sendBlocking(ctx, rc, rpc) 547 if err != nil { 548 return nil, err 549 } 550 if res.Error != nil { 551 c.handleResultError(res.Error, rpc.Region(), rc) 552 } 553 return res.Msg, res.Error 554 } 555 556 // clientDown removes client from cache and marks all the regions 557 // sharing this region's client as unavailable, and start a goroutine 558 // to reconnect for each of them. 559 // 560 // Due to races filling in the clients cache it may not be completely 561 // accurate. reg is the region we were trying to access when we saw an 562 // issue with the region client, so make sure it is marked unavailable 563 // even if it doesn't appear in the clients cache. 564 func (c *client) clientDown(client hrpc.RegionClient, reg hrpc.RegionInfo) { 565 downregions := c.clients.clientDown(client) 566 if reg.MarkUnavailable() { 567 reg.SetClient(nil) 568 go c.reestablishRegion(reg) 569 } 570 for downreg := range downregions { 571 if downreg == reg { 572 continue 573 } 574 if downreg.MarkUnavailable() { 575 downreg.SetClient(nil) 576 go c.reestablishRegion(downreg) 577 } 578 } 579 } 580 581 func (c *client) lookupRegion(ctx context.Context, 582 table, key []byte) (hrpc.RegionInfo, string, error) { 583 var reg hrpc.RegionInfo 584 var addr string 585 var err error 586 backoff := backoffStart 587 for { 588 // If it takes longer than regionLookupTimeout, fail so that we can sleep 589 lookupCtx, cancel := context.WithTimeout(ctx, c.regionLookupTimeout) 590 if c.clientType == region.MasterClient { 591 c.logger.Debug("looking up master", "resource", zk.Master) 592 593 addr, err = c.zkLookup(lookupCtx, zk.Master) 594 cancel() 595 reg = c.adminRegionInfo 596 } else if bytes.Equal(table, metaTableName) { 597 c.logger.Debug("looking up region server of hbase:meta", "resource", zk.Meta) 598 599 addr, err = c.zkLookup(lookupCtx, zk.Meta) 600 cancel() 601 reg = c.metaRegionInfo 602 } else { 603 c.logger.Debug("looking up region", 604 "table", strconv.Quote(string(table)), "key", strconv.Quote(string(key))) 605 606 reg, addr, err = c.metaLookup(lookupCtx, table, key) 607 cancel() 608 if err == TableNotFound { 609 c.logger.Debug("hbase:meta does not know about this table/key", 610 "table", strconv.Quote(string(table)), 611 "key", strconv.Quote(string(key)), "err", err) 612 613 return nil, "", err 614 } else if err == ErrClientClosed { 615 return nil, "", err 616 } 617 } 618 if err == nil { 619 c.logger.Debug("looked up a region", "table", strconv.Quote(string(table)), 620 "key", strconv.Quote(string(key)), "region", reg, "addr", addr) 621 622 return reg, addr, nil 623 } 624 625 c.logger.Error("failed looking up region", "table", strconv.Quote(string(table)), 626 "key", strconv.Quote(string(key)), "backoff", backoff, "err", err) 627 628 // This will be hit if there was an error locating the region 629 backoff, err = sleepAndIncreaseBackoff(ctx, backoff) 630 if err != nil { 631 return nil, "", err 632 } 633 } 634 } 635 636 func (c *client) lookupAllRegions(ctx context.Context, 637 table []byte) ([]regionInfoAndAddr, error) { 638 var regs []regionInfoAndAddr 639 var err error 640 backoff := backoffStart 641 for { 642 // If it takes longer than regionLookupTimeout, fail so that we can sleep 643 lookupCtx, cancel := context.WithTimeout(ctx, c.regionLookupTimeout) 644 c.logger.Debug("looking up regions", "table", strconv.Quote(string(table))) 645 646 regs, err = c.metaLookupForTable(lookupCtx, table) 647 cancel() 648 if err == TableNotFound { 649 c.logger.Debug("hbase:meta does not know about this table", 650 "table", strconv.Quote(string(table)), "err", err) 651 652 return nil, err 653 } else if err == ErrClientClosed { 654 return nil, err 655 } 656 657 if err == nil { 658 c.logger.Debug("looked up all regions", 659 "table", strconv.Quote(string(table)), "regionsAndAddr", regs) 660 661 return regs, nil 662 } 663 664 c.logger.Error("failed looking up regions", "table", strconv.Quote(string(table)), 665 "backoff", backoff, "err", err) 666 667 // This will be hit if there was an error locating the region 668 backoff, err = sleepAndIncreaseBackoff(ctx, backoff) 669 if err != nil { 670 return nil, err 671 } 672 } 673 } 674 675 func (c *client) findAllRegions(ctx context.Context, table []byte) ([]regionInfoAndAddr, error) { 676 regs, err := c.lookupAllRegions(ctx, table) 677 if err != nil { 678 return nil, err 679 } 680 for _, regaddr := range regs { 681 reg, addr := regaddr.regionInfo, regaddr.addr 682 reg.MarkUnavailable() 683 684 if reg != c.metaRegionInfo && reg != c.adminRegionInfo { 685 // Check that the region wasn't added to 686 // the cache while we were looking it up. 687 overlaps, replaced := c.regions.put(reg) 688 if !replaced { 689 // the same or younger regions are already in cache 690 continue 691 } 692 693 // otherwise, new region in cache, delete overlaps from client's cache 694 for _, r := range overlaps { 695 c.clients.del(r) 696 } 697 } 698 699 // Start a goroutine to connect to the region 700 go c.establishRegion(reg, addr) 701 } 702 703 return regs, nil 704 } 705 706 func (c *client) findRegion(ctx context.Context, table, key []byte) (hrpc.RegionInfo, error) { 707 // The region was not in the cache, it 708 // must be looked up in the meta table 709 reg, addr, err := c.lookupRegion(ctx, table, key) 710 if err != nil { 711 return nil, err 712 } 713 714 // We are the ones that looked up the region, so we need to 715 // mark it unavailable and find a client for it. 716 reg.MarkUnavailable() 717 718 if reg != c.metaRegionInfo && reg != c.adminRegionInfo { 719 // Check that the region wasn't added to 720 // the cache while we were looking it up. 721 overlaps, replaced := c.regions.put(reg) 722 if !replaced { 723 // the same or younger regions are already in cache, retry looking up in cache 724 return nil, nil 725 } 726 727 // otherwise, new region in cache, delete overlaps from client's cache 728 for _, r := range overlaps { 729 c.clients.del(r) 730 } 731 } 732 733 // Start a goroutine to connect to the region 734 go c.establishRegion(reg, addr) 735 736 // Wait for the new region to become 737 // available, and then send the RPC 738 return reg, nil 739 } 740 741 // Searches in the regions cache for the region hosting the given row. 742 func (c *client) getRegionFromCache(table, key []byte) hrpc.RegionInfo { 743 if c.clientType == region.MasterClient { 744 return c.adminRegionInfo 745 } else if bytes.Equal(table, metaTableName) { 746 return c.metaRegionInfo 747 } 748 regionName := createRegionSearchKey(table, key) 749 _, region := c.regions.get(regionName) 750 if region == nil { 751 return nil 752 } 753 754 // make sure the returned region is for the same table 755 if !bytes.Equal(fullyQualifiedTable(region), table) { 756 // not the same table, can happen if we got the last region 757 return nil 758 } 759 760 if len(region.StopKey()) != 0 && 761 // If the stop key is an empty byte array, it means this region is the 762 // last region for this table and this key ought to be in that region. 763 bytes.Compare(key, region.StopKey()) >= 0 { 764 return nil 765 } 766 767 return region 768 } 769 770 // Creates the META key to search for in order to locate the given key. 771 func createRegionSearchKey(table, key []byte) []byte { 772 // Shorten the key such that the generated meta key is <= MAX_ROW_LENGTH (MaxInt16), otherwise 773 // HBase will throw an exception. 774 keylen := math.MaxInt16 - len(table) - 3 775 if len(key) < keylen { 776 keylen = len(key) 777 } 778 779 metaKey := make([]byte, 0, len(table)+keylen+3) 780 metaKey = append(metaKey, table...) 781 metaKey = append(metaKey, ',') 782 metaKey = append(metaKey, key[:keylen]...) 783 metaKey = append(metaKey, ',') 784 // ':' is the first byte greater than '9'. We always want to find the 785 // entry with the greatest timestamp, so by looking right before ':' 786 // we'll find it. 787 metaKey = append(metaKey, ':') 788 return metaKey 789 } 790 791 // metaLookup checks meta table for the region in which the given row key for the given table is. 792 func (c *client) metaLookup(ctx context.Context, 793 table, key []byte) (hrpc.RegionInfo, string, error) { 794 metaKey := createRegionSearchKey(table, key) 795 rpc, err := hrpc.NewScanRange(ctx, metaTableName, metaKey, table, 796 hrpc.Families(infoFamily), 797 hrpc.Reversed(), 798 hrpc.CloseScanner(), 799 hrpc.NumberOfRows(1)) 800 if err != nil { 801 return nil, "", err 802 } 803 804 scanner := c.Scan(rpc) 805 resp, err := scanner.Next() 806 if err == io.EOF { 807 return nil, "", TableNotFound 808 } 809 if err != nil { 810 return nil, "", err 811 } 812 813 reg, addr, err := region.ParseRegionInfo(resp) 814 if err != nil { 815 return nil, "", err 816 } 817 if !bytes.Equal(table, fullyQualifiedTable(reg)) { 818 // This would indicate a bug in HBase. 819 return nil, "", fmt.Errorf("meta returned an entry for the wrong table!"+ 820 " Looked up table=%q key=%q got region=%s", table, key, reg) 821 } else if len(reg.StopKey()) != 0 && 822 bytes.Compare(key, reg.StopKey()) >= 0 { 823 // This would indicate a hole in the meta table. 824 return nil, "", fmt.Errorf("meta returned an entry for the wrong region!"+ 825 " Looked up table=%q key=%q got region=%s", table, key, reg) 826 } 827 return reg, addr, nil 828 } 829 830 // Creates the META key to search for all regions 831 func createAllRegionSearchKey(table []byte) []byte { 832 metaKey := make([]byte, 0, len(table)+1) 833 metaKey = append(metaKey, table...) 834 // '.' is the first byte greater than ','. Meta table entry has 835 // the format table,key,timestamp. By adding '.' to the stop row 836 // we scan all keys for table 837 metaKey = append(metaKey, '.') 838 return metaKey 839 } 840 841 // metaLookupForTable checks meta table for all the open table regions. 842 func (c *client) metaLookupForTable(ctx context.Context, 843 table []byte) ([]regionInfoAndAddr, error) { 844 metaKey := createAllRegionSearchKey(table) 845 rpc, err := hrpc.NewScanRange(ctx, metaTableName, table, metaKey, 846 hrpc.Families(infoFamily)) 847 if err != nil { 848 return nil, err 849 } 850 851 var regions []regionInfoAndAddr 852 scanner := c.Scan(rpc) 853 for { 854 resp, err := scanner.Next() 855 if err == io.EOF { 856 break 857 } 858 if err != nil { 859 return nil, err 860 } 861 862 reg, addr, err := region.ParseRegionInfo(resp) 863 if err != nil { 864 // Ignore error, but log if it's anything else than OfflineRegionError. This really 865 // shouldn't happen unless HBase meta table is corrupted/changed format. 866 if _, ok := err.(region.OfflineRegionError); !ok { 867 c.logger.Debug("failed to parse region", "err", err) 868 } 869 continue 870 } 871 872 regions = append(regions, regionInfoAndAddr{regionInfo: reg, addr: addr}) 873 } 874 875 if len(regions) == 0 { 876 return nil, TableNotFound 877 } 878 return regions, nil 879 } 880 881 func fullyQualifiedTable(reg hrpc.RegionInfo) []byte { 882 namespace := reg.Namespace() 883 table := reg.Table() 884 if namespace == nil { 885 return table 886 } 887 // non-default namespace table 888 fqTable := make([]byte, 0, len(namespace)+1+len(table)) 889 fqTable = append(fqTable, namespace...) 890 fqTable = append(fqTable, byte(':')) 891 fqTable = append(fqTable, table...) 892 return fqTable 893 } 894 895 func (c *client) reestablishRegion(reg hrpc.RegionInfo) { 896 select { 897 case <-c.done: 898 return 899 default: 900 } 901 902 c.logger.Debug("reestablishing region", "region", reg) 903 c.establishRegion(reg, "") 904 } 905 906 // probeKey returns a key in region that is unlikely to have data at it 907 // in order to test if the region is online. This prevents the Get request 908 // to actually fetch the data from the storage which consumes resources 909 // of the region server 910 func probeKey(reg hrpc.RegionInfo) []byte { 911 // now we create a probe key: reg.StartKey() + 17 zeros 912 probe := make([]byte, len(reg.StartKey())+17) 913 copy(probe, reg.StartKey()) 914 return probe 915 } 916 917 // isRegionEstablished checks whether regionserver accepts rpcs for the region. 918 // Returns the cause if not established. 919 func isRegionEstablished(rc hrpc.RegionClient, reg hrpc.RegionInfo) error { 920 probe, err := hrpc.NewGet(context.Background(), fullyQualifiedTable(reg), probeKey(reg), 921 hrpc.SkipBatch()) 922 if err != nil { 923 panic(fmt.Sprintf("should not happen: %s", err)) 924 } 925 probe.ExistsOnly() 926 927 probe.SetRegion(reg) 928 res, err := sendBlocking(probe.Context(), rc, probe) 929 if err != nil { 930 panic(fmt.Sprintf("should not happen: %s", err)) 931 } 932 933 switch res.Error.(type) { 934 case region.ServerError, region.NotServingRegionError, region.RetryableError: 935 return res.Error 936 default: 937 return nil 938 } 939 } 940 941 // establishRegionOverride can be set by tests to override the 942 // behavior of establishRegion 943 var establishRegionOverride func(reg hrpc.RegionInfo, addr string) 944 945 func (c *client) establishRegion(reg hrpc.RegionInfo, addr string) { 946 if establishRegionOverride != nil { 947 establishRegionOverride(reg, addr) 948 return 949 } 950 951 var backoff time.Duration 952 var err error 953 for { 954 backoff, err = sleepAndIncreaseBackoff(reg.Context(), backoff) 955 if err != nil { 956 // region is dead 957 reg.MarkAvailable() 958 return 959 } 960 if addr == "" { 961 // need to look up region and address of the regionserver 962 originalReg := reg 963 // lookup region forever until we get it or we learn that it doesn't exist 964 reg, addr, err = c.lookupRegion(originalReg.Context(), 965 fullyQualifiedTable(originalReg), originalReg.StartKey()) 966 967 if err == TableNotFound { 968 // region doesn't exist, delete it from caches 969 c.regions.del(originalReg) 970 c.clients.del(originalReg) 971 originalReg.MarkAvailable() 972 973 c.logger.Info("region does not exist anymore", 974 "region", originalReg.String(), "err", err, "backoff", backoff) 975 976 return 977 } else if originalReg.Context().Err() != nil { 978 // region is dead 979 originalReg.MarkAvailable() 980 981 c.logger.Info("region became dead while establishing client for it", 982 "region", originalReg.String(), "err", err, "backoff", backoff) 983 984 return 985 } else if err == ErrClientClosed { 986 // client has been closed 987 return 988 } else if err != nil { 989 panic(fmt.Errorf("unknown error occurred when looking up region %q, "+ 990 "backoff=%s: %s", originalReg.String(), backoff, err)) 991 } 992 if !bytes.Equal(reg.Name(), originalReg.Name()) { 993 // put new region and remove overlapping ones. 994 // Should remove the original region as well. 995 reg.MarkUnavailable() 996 overlaps, replaced := c.regions.put(reg) 997 if !replaced { 998 // a region that is the same or younger is already in cache 999 reg.MarkAvailable() 1000 originalReg.MarkAvailable() 1001 return 1002 } 1003 // otherwise delete the overlapped regions in cache 1004 for _, r := range overlaps { 1005 c.clients.del(r) 1006 } 1007 // let rpcs know that they can retry and either get the newly 1008 // added region from cache or lookup the one they need 1009 originalReg.MarkAvailable() 1010 } else { 1011 // same region, discard the looked up one 1012 reg = originalReg 1013 } 1014 } 1015 1016 var client hrpc.RegionClient 1017 if reg == c.adminRegionInfo { 1018 // admin region is used for talking to master, so it only has one connection to 1019 // master that we don't add to the cache 1020 // TODO: consider combining this case with the regular regionserver path 1021 client = c.newRegionClientFn(addr, c.clientType, c.rpcQueueSize, c.flushInterval, 1022 c.effectiveUser, c.regionReadTimeout, nil, c.regionDialer, c.logger) 1023 } else { 1024 client = c.clients.put(addr, reg, func() hrpc.RegionClient { 1025 return c.newRegionClientFn(addr, c.clientType, c.rpcQueueSize, c.flushInterval, 1026 c.effectiveUser, c.regionReadTimeout, c.compressionCodec, 1027 c.regionDialer, c.logger) 1028 }) 1029 } 1030 1031 // connect to the region's regionserver. 1032 // only the first caller to Dial gets to actually connect, other concurrent calls 1033 // will block until connected or an error. 1034 dialCtx, cancel := context.WithTimeout(reg.Context(), c.regionLookupTimeout) 1035 err = client.Dial(dialCtx) 1036 cancel() 1037 1038 if err == nil { 1039 if reg == c.adminRegionInfo { 1040 reg.SetClient(client) 1041 reg.MarkAvailable() 1042 return 1043 } 1044 1045 if err = isRegionEstablished(client, reg); err == nil { 1046 // set region client so that as soon as we mark it available, 1047 // concurrent readers are able to find the client 1048 reg.SetClient(client) 1049 reg.MarkAvailable() 1050 return 1051 } else if _, ok := err.(region.ServerError); ok { 1052 // the client we got died 1053 c.clientDown(client, reg) 1054 } 1055 } else if err == context.Canceled { 1056 // region is dead 1057 reg.MarkAvailable() 1058 return 1059 } else { 1060 // otherwise Dial failed, purge the client and retry. 1061 // note that it's safer to reestablish all regions for this client as well 1062 // because they could have ended up setteling for the same client. 1063 c.clientDown(client, reg) 1064 } 1065 1066 c.logger.Debug("region was not established, retrying", 1067 "region", reg, "backoff", backoff, "err", err) 1068 // reset address because we weren't able to connect to it 1069 // or regionserver says it's still offline, should look up again 1070 addr = "" 1071 } 1072 } 1073 1074 // sleepAndIncreaseBackoffOverride can be set by tests to override the 1075 // behavior of sleepAndIncreaseBackoff 1076 var sleepAndIncreaseBackoffOverride func( 1077 ctx context.Context, backoff time.Duration) (time.Duration, error) 1078 1079 func sleepAndIncreaseBackoff(ctx context.Context, backoff time.Duration) (time.Duration, error) { 1080 if sleepAndIncreaseBackoffOverride != nil { 1081 return sleepAndIncreaseBackoffOverride(ctx, backoff) 1082 } 1083 if backoff == 0 { 1084 return backoffStart, nil 1085 } 1086 1087 select { 1088 case <-time.After(backoff): 1089 case <-ctx.Done(): 1090 return 0, ctx.Err() 1091 } 1092 1093 // Keep track of the amount of time spend sleeping in retry backoff. Ignore if context was 1094 // canceled. 1095 retryBackoffDuration.Observe(backoff.Seconds()) 1096 1097 // When changing this formula, update the buckets of the retryBackoffDuration metric too. 1098 if backoff < 5*time.Second { 1099 return backoff * 2, nil 1100 } else if backoff < 30*time.Second { 1101 return backoff + 5*time.Second, nil 1102 } 1103 return backoff, nil 1104 } 1105 1106 // zkResult contains the result of a ZooKeeper lookup (when we're looking for 1107 // the meta region or the HMaster). 1108 type zkResult struct { 1109 addr string 1110 err error 1111 } 1112 1113 // zkLookup asynchronously looks up the meta region or HMaster in ZooKeeper. 1114 func (c *client) zkLookup(ctx context.Context, resource zk.ResourceName) (string, error) { 1115 // We make this a buffered channel so that if we stop waiting due to a 1116 // timeout, we won't block the zkLookupSync() that we start in a 1117 // separate goroutine. 1118 reschan := make(chan zkResult, 1) 1119 go func() { 1120 addr, err := c.zkClient.LocateResource(resource.Prepend(c.zkRoot)) 1121 // This is guaranteed to never block as the channel is always buffered. 1122 reschan <- zkResult{addr, err} 1123 }() 1124 select { 1125 case res := <-reschan: 1126 return res.addr, res.err 1127 case <-ctx.Done(): 1128 return "", ctx.Err() 1129 } 1130 }