github.com/tsuna/gohbase@v0.0.0-20250731002811-4ffcadfba63e/scanner.go (about) 1 // Copyright (C) 2017 The GoHBase Authors. All rights reserved. 2 // This file is part of GoHBase. 3 // Use of this source code is governed by the Apache License 2.0 4 // that can be found in the COPYING file. 5 6 package gohbase 7 8 import ( 9 "bytes" 10 "context" 11 "errors" 12 "fmt" 13 "io" 14 "log/slog" 15 "math" 16 "time" 17 18 "github.com/tsuna/gohbase/hrpc" 19 "github.com/tsuna/gohbase/pb" 20 "google.golang.org/protobuf/proto" 21 ) 22 23 const ( 24 noScannerID = math.MaxUint64 25 26 rowsScanned = "ROWS_SCANNED" 27 rowsFiltered = "ROWS_FILTERED" 28 ) 29 30 // rowPadding used to pad the row key when constructing a row before 31 var rowPadding = []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff} 32 33 type scanner struct { 34 RPCClient 35 // rpc is original scan query 36 rpc *hrpc.Scan 37 // curRegionScannerID is the id of scanner on current region 38 curRegionScannerID uint64 39 // startRow is the start row in the current region 40 startRow []byte 41 results []*pb.Result 42 closed bool 43 scanMetrics map[string]int64 44 45 logger *slog.Logger 46 renewCancel context.CancelFunc 47 } 48 49 func (s *scanner) fetch() ([]*pb.Result, error) { 50 // keep looping until we have error, some non-empty result or until close 51 for { 52 resp, region, err := s.request() 53 if err != nil { 54 s.Close() 55 return nil, err 56 } 57 if s.rpc.TrackScanMetrics() && resp.ScanMetrics != nil { 58 metrics := resp.ScanMetrics.GetMetrics() 59 for _, m := range metrics { 60 s.scanMetrics[m.GetName()] += m.GetValue() 61 } 62 } 63 64 s.update(resp, region) 65 if s.isDone(resp, region) { 66 s.Close() 67 } 68 69 if rs := resp.Results; len(rs) > 0 { 70 return rs, nil 71 } else if s.closed { 72 return nil, io.EOF 73 } 74 } 75 } 76 77 func (s *scanner) peek() (*pb.Result, error) { 78 if len(s.results) > 0 { 79 return s.results[0], nil 80 } 81 82 if s.renewCancel != nil { 83 // About to send new Scan request to HBase, cancel our 84 // renewer. 85 s.renewCancel() 86 s.renewCancel = nil 87 } 88 89 if s.closed { 90 // done scanning 91 return nil, io.EOF 92 } 93 94 rs, err := s.fetch() 95 if err != nil { 96 return nil, err 97 } 98 if !s.closed && s.rpc.RenewInterval() > 0 { 99 // Start up a renewer 100 renewCtx, cancel := context.WithCancel(s.rpc.Context()) 101 s.renewCancel = cancel 102 go s.renewLoop(renewCtx, s.startRow) 103 } 104 105 // fetch cannot return zero results 106 s.results = rs 107 return s.results[0], nil 108 } 109 110 func (s *scanner) shift() { 111 if len(s.results) == 0 { 112 return 113 } 114 // set to nil so that GC isn't blocked to clean up the result 115 s.results[0] = nil 116 s.results = s.results[1:] 117 } 118 119 // coalesce combines result with partial if they belong to the same row 120 // and returns the coalesced result and whether coalescing happened 121 func (s *scanner) coalesce(result, partial *pb.Result) (*pb.Result, bool) { 122 if result == nil { 123 return partial, true 124 } 125 if !result.GetPartial() { 126 // results is not partial, shouldn't coalesce 127 return result, false 128 } 129 130 if len(partial.Cell) > 0 && !bytes.Equal(result.Cell[0].Row, partial.Cell[0].Row) { 131 // new row 132 result.Partial = proto.Bool(false) 133 return result, false 134 } 135 136 // same row, add the partial 137 result.Cell = append(result.Cell, partial.Cell...) 138 if partial.GetStale() { 139 result.Stale = proto.Bool(partial.GetStale()) 140 } 141 return result, true 142 } 143 144 func newScanner(c RPCClient, rpc *hrpc.Scan, logger *slog.Logger) *scanner { 145 var sm map[string]int64 146 if rpc.TrackScanMetrics() { 147 sm = make(map[string]int64) 148 } 149 return &scanner{ 150 RPCClient: c, 151 rpc: rpc, 152 startRow: rpc.StartRow(), 153 curRegionScannerID: noScannerID, 154 scanMetrics: sm, 155 logger: logger, 156 } 157 } 158 159 func toLocalResult(r *pb.Result) *hrpc.Result { 160 if r == nil { 161 return nil 162 } 163 res := hrpc.ToLocalResult(r) 164 return res 165 } 166 167 func (s *scanner) Next() (*hrpc.Result, error) { 168 var ( 169 result, partial *pb.Result 170 err error 171 ) 172 173 select { 174 case <-s.rpc.Context().Done(): 175 s.Close() 176 return nil, s.rpc.Context().Err() 177 default: 178 } 179 180 if s.rpc.AllowPartialResults() { 181 // if client handles partials, just return it 182 result, err = s.peek() 183 if err != nil { 184 return nil, err 185 } 186 s.shift() 187 return toLocalResult(result), nil 188 } 189 190 for { 191 partial, err = s.peek() 192 if err == io.EOF && result != nil { 193 // no more results, return what we have. Next call to the Next() will get EOF 194 result.Partial = proto.Bool(false) 195 return toLocalResult(result), nil 196 } 197 if err != nil { 198 // return whatever we have so far and the error 199 return toLocalResult(result), err 200 } 201 202 var done bool 203 result, done = s.coalesce(result, partial) 204 if done { 205 s.shift() 206 } 207 if !result.GetPartial() { 208 // if not partial anymore, return it 209 return toLocalResult(result), nil 210 } 211 } 212 } 213 214 func (s *scanner) request() (*pb.ScanResponse, hrpc.RegionInfo, error) { 215 var ( 216 rpc *hrpc.Scan 217 err error 218 ) 219 220 if s.isRegionScannerClosed() { 221 // preserve ScanStatsID 222 opts := append(s.rpc.Options(), hrpc.ScanStatsID(s.rpc.ScanStatsID())) 223 224 // open a new region scan to scan on a new region 225 rpc, err = hrpc.NewScanRange( 226 s.rpc.Context(), 227 s.rpc.Table(), 228 s.startRow, 229 s.rpc.StopRow(), 230 opts...) 231 } else { 232 // continuing to scan current region 233 rpc, err = hrpc.NewScanRange(s.rpc.Context(), 234 s.rpc.Table(), 235 s.startRow, 236 nil, 237 hrpc.ScannerID(s.curRegionScannerID), 238 hrpc.NumberOfRows(s.rpc.NumberOfRows()), 239 hrpc.Priority(s.rpc.Priority()), 240 hrpc.RenewInterval(s.rpc.RenewInterval()), 241 // preserve ScanStatsID 242 hrpc.ScanStatsID(s.rpc.ScanStatsID()), 243 ) 244 } 245 if err != nil { 246 return nil, nil, err 247 } 248 249 res, err := s.SendRPC(rpc) 250 if err != nil { 251 return nil, nil, err 252 } 253 scanres, ok := res.(*pb.ScanResponse) 254 if !ok { 255 return nil, nil, errors.New("got non-ScanResponse for scan request") 256 } 257 return scanres, rpc.Region(), nil 258 } 259 260 // update updates the scanner for the next scan request 261 func (s *scanner) update(resp *pb.ScanResponse, region hrpc.RegionInfo) { 262 if s.isRegionScannerClosed() && resp.ScannerId != nil { 263 s.openRegionScanner(resp.GetScannerId()) 264 } 265 if !resp.GetMoreResultsInRegion() { 266 // we are done with this region, prepare scan for next region 267 s.curRegionScannerID = noScannerID 268 269 // Normal Scan 270 if !s.rpc.Reversed() { 271 s.startRow = region.StopKey() 272 return 273 } 274 275 // Reversed Scan 276 // return if we are at the end 277 if len(region.StartKey()) == 0 { 278 s.startRow = region.StartKey() 279 return 280 } 281 282 // create the nearest value lower than the current region startKey 283 rsk := region.StartKey() 284 // if last element is 0x0, just shorten the slice 285 if rsk[len(rsk)-1] == 0x0 { 286 s.startRow = rsk[:len(rsk)-1] 287 return 288 } 289 290 // otherwise lower the last element byte value by 1 and pad with 0xffs 291 tmp := make([]byte, len(rsk), len(rsk)+len(rowPadding)) 292 copy(tmp, rsk) 293 tmp[len(tmp)-1] = tmp[len(tmp)-1] - 1 294 s.startRow = append(tmp, rowPadding...) 295 } 296 } 297 298 func (s *scanner) Close() error { 299 if s.closed { 300 return nil 301 } 302 if s.renewCancel != nil { 303 s.renewCancel() 304 } 305 s.closed = true 306 // close the last region scanner 307 s.closeRegionScanner() 308 return nil 309 } 310 311 // GetScanMetrics returns the scan metrics for the scanner. 312 // The scan metrics are non-nil only if the Scan has TrackScanMetrics() enabled. 313 // GetScanMetrics should only be called after the scanner has been closed with an io.EOF 314 // (there are no more rows left to be returned by calls to Next()). 315 func (s *scanner) GetScanMetrics() map[string]int64 { 316 return s.scanMetrics 317 } 318 319 // isDone check if this scanner is done fetching new results 320 func (s *scanner) isDone(resp *pb.ScanResponse, region hrpc.RegionInfo) bool { 321 if resp.MoreResults != nil && !*resp.MoreResults { 322 // or the filter for the whole scan has been exhausted, close the scanner 323 return true 324 } 325 326 if !s.isRegionScannerClosed() { 327 // not done with this region yet 328 return false 329 } 330 331 // Check to see if this region is the last we should scan because: 332 // (1) it's the last region 333 if len(region.StopKey()) == 0 && !s.rpc.Reversed() { 334 return true 335 } 336 if s.rpc.Reversed() && len(region.StartKey()) == 0 { 337 return true 338 } 339 // (3) because its stop_key is greater than or equal to the stop_key of this scanner, 340 // provided that (2) we're not trying to scan until the end of the table. 341 if !s.rpc.Reversed() { 342 return len(s.rpc.StopRow()) != 0 && // (2) 343 bytes.Compare(s.rpc.StopRow(), region.StopKey()) <= 0 // (3) 344 } 345 346 // Reversed Scanner 347 return len(s.rpc.StopRow()) != 0 && // (2) 348 bytes.Compare(s.rpc.StopRow(), region.StartKey()) >= 0 // (3) 349 } 350 351 func (s *scanner) isRegionScannerClosed() bool { 352 return s.curRegionScannerID == noScannerID 353 } 354 355 func (s *scanner) openRegionScanner(scannerId uint64) { 356 if !s.isRegionScannerClosed() { 357 panic(fmt.Sprintf("should not happen: previous region scanner was not closed")) 358 } 359 s.curRegionScannerID = scannerId 360 } 361 362 func (s *scanner) closeRegionScanner() { 363 if s.isRegionScannerClosed() { 364 return 365 } 366 if !s.rpc.IsClosing() { 367 // Not closed at server side 368 // if we are closing in the middle of scanning a region, 369 // send a close scanner request 370 // TODO: add a deadline 371 rpc, err := hrpc.NewScanRange(context.Background(), 372 s.rpc.Table(), s.startRow, nil, 373 hrpc.ScannerID(s.curRegionScannerID), 374 hrpc.CloseScanner(), 375 hrpc.NumberOfRows(0), 376 hrpc.ScanStatsID(s.rpc.ScanStatsID())) 377 if err != nil { 378 panic(fmt.Sprintf("should not happen: %s", err)) 379 } 380 381 // If the request fails, the scanner lease will be expired 382 // and it will be closed automatically by hbase. 383 // No need to bother clients about that. 384 go s.SendRPC(rpc) 385 } 386 s.curRegionScannerID = noScannerID 387 } 388 389 // renews a scanner by resending scan request with renew = true 390 func (s *scanner) renew(ctx context.Context, startRow []byte) error { 391 if err := ctx.Err(); err != nil { 392 return err 393 } 394 rpc, err := hrpc.NewScanRange(ctx, 395 s.rpc.Table(), 396 startRow, 397 nil, 398 hrpc.ScannerID(s.curRegionScannerID), 399 hrpc.Priority(s.rpc.Priority()), 400 hrpc.RenewalScan(), 401 hrpc.ScanStatsID(s.rpc.ScanStatsID()), 402 ) 403 if err != nil { 404 return err 405 } 406 _, err = s.SendRPC(rpc) 407 return err 408 } 409 410 func (s *scanner) renewLoop(ctx context.Context, startRow []byte) { 411 scanRenewers.Inc() 412 t := time.NewTicker(s.rpc.RenewInterval()) 413 defer func() { 414 t.Stop() 415 scanRenewers.Dec() 416 }() 417 418 for { 419 select { 420 case <-t.C: 421 if err := s.renew(ctx, startRow); err != nil { 422 s.logger.Error("error renewing scanner", "err", err) 423 return 424 } 425 case <-ctx.Done(): 426 return 427 } 428 } 429 }