github.com/tsuna/gohbase@v0.0.0-20250731002811-4ffcadfba63e/scanner.go (about)

     1  // Copyright (C) 2017  The GoHBase Authors.  All rights reserved.
     2  // This file is part of GoHBase.
     3  // Use of this source code is governed by the Apache License 2.0
     4  // that can be found in the COPYING file.
     5  
     6  package gohbase
     7  
     8  import (
     9  	"bytes"
    10  	"context"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"log/slog"
    15  	"math"
    16  	"time"
    17  
    18  	"github.com/tsuna/gohbase/hrpc"
    19  	"github.com/tsuna/gohbase/pb"
    20  	"google.golang.org/protobuf/proto"
    21  )
    22  
    23  const (
    24  	noScannerID = math.MaxUint64
    25  
    26  	rowsScanned  = "ROWS_SCANNED"
    27  	rowsFiltered = "ROWS_FILTERED"
    28  )
    29  
    30  // rowPadding used to pad the row key when constructing a row before
    31  var rowPadding = []byte{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
    32  
    33  type scanner struct {
    34  	RPCClient
    35  	// rpc is original scan query
    36  	rpc *hrpc.Scan
    37  	// curRegionScannerID is the id of scanner on current region
    38  	curRegionScannerID uint64
    39  	// startRow is the start row in the current region
    40  	startRow    []byte
    41  	results     []*pb.Result
    42  	closed      bool
    43  	scanMetrics map[string]int64
    44  
    45  	logger      *slog.Logger
    46  	renewCancel context.CancelFunc
    47  }
    48  
    49  func (s *scanner) fetch() ([]*pb.Result, error) {
    50  	// keep looping until we have error, some non-empty result or until close
    51  	for {
    52  		resp, region, err := s.request()
    53  		if err != nil {
    54  			s.Close()
    55  			return nil, err
    56  		}
    57  		if s.rpc.TrackScanMetrics() && resp.ScanMetrics != nil {
    58  			metrics := resp.ScanMetrics.GetMetrics()
    59  			for _, m := range metrics {
    60  				s.scanMetrics[m.GetName()] += m.GetValue()
    61  			}
    62  		}
    63  
    64  		s.update(resp, region)
    65  		if s.isDone(resp, region) {
    66  			s.Close()
    67  		}
    68  
    69  		if rs := resp.Results; len(rs) > 0 {
    70  			return rs, nil
    71  		} else if s.closed {
    72  			return nil, io.EOF
    73  		}
    74  	}
    75  }
    76  
    77  func (s *scanner) peek() (*pb.Result, error) {
    78  	if len(s.results) > 0 {
    79  		return s.results[0], nil
    80  	}
    81  
    82  	if s.renewCancel != nil {
    83  		// About to send new Scan request to HBase, cancel our
    84  		// renewer.
    85  		s.renewCancel()
    86  		s.renewCancel = nil
    87  	}
    88  
    89  	if s.closed {
    90  		// done scanning
    91  		return nil, io.EOF
    92  	}
    93  
    94  	rs, err := s.fetch()
    95  	if err != nil {
    96  		return nil, err
    97  	}
    98  	if !s.closed && s.rpc.RenewInterval() > 0 {
    99  		// Start up a renewer
   100  		renewCtx, cancel := context.WithCancel(s.rpc.Context())
   101  		s.renewCancel = cancel
   102  		go s.renewLoop(renewCtx, s.startRow)
   103  	}
   104  
   105  	// fetch cannot return zero results
   106  	s.results = rs
   107  	return s.results[0], nil
   108  }
   109  
   110  func (s *scanner) shift() {
   111  	if len(s.results) == 0 {
   112  		return
   113  	}
   114  	// set to nil so that GC isn't blocked to clean up the result
   115  	s.results[0] = nil
   116  	s.results = s.results[1:]
   117  }
   118  
   119  // coalesce combines result with partial if they belong to the same row
   120  // and returns the coalesced result and whether coalescing happened
   121  func (s *scanner) coalesce(result, partial *pb.Result) (*pb.Result, bool) {
   122  	if result == nil {
   123  		return partial, true
   124  	}
   125  	if !result.GetPartial() {
   126  		// results is not partial, shouldn't coalesce
   127  		return result, false
   128  	}
   129  
   130  	if len(partial.Cell) > 0 && !bytes.Equal(result.Cell[0].Row, partial.Cell[0].Row) {
   131  		// new row
   132  		result.Partial = proto.Bool(false)
   133  		return result, false
   134  	}
   135  
   136  	// same row, add the partial
   137  	result.Cell = append(result.Cell, partial.Cell...)
   138  	if partial.GetStale() {
   139  		result.Stale = proto.Bool(partial.GetStale())
   140  	}
   141  	return result, true
   142  }
   143  
   144  func newScanner(c RPCClient, rpc *hrpc.Scan, logger *slog.Logger) *scanner {
   145  	var sm map[string]int64
   146  	if rpc.TrackScanMetrics() {
   147  		sm = make(map[string]int64)
   148  	}
   149  	return &scanner{
   150  		RPCClient:          c,
   151  		rpc:                rpc,
   152  		startRow:           rpc.StartRow(),
   153  		curRegionScannerID: noScannerID,
   154  		scanMetrics:        sm,
   155  		logger:             logger,
   156  	}
   157  }
   158  
   159  func toLocalResult(r *pb.Result) *hrpc.Result {
   160  	if r == nil {
   161  		return nil
   162  	}
   163  	res := hrpc.ToLocalResult(r)
   164  	return res
   165  }
   166  
   167  func (s *scanner) Next() (*hrpc.Result, error) {
   168  	var (
   169  		result, partial *pb.Result
   170  		err             error
   171  	)
   172  
   173  	select {
   174  	case <-s.rpc.Context().Done():
   175  		s.Close()
   176  		return nil, s.rpc.Context().Err()
   177  	default:
   178  	}
   179  
   180  	if s.rpc.AllowPartialResults() {
   181  		// if client handles partials, just return it
   182  		result, err = s.peek()
   183  		if err != nil {
   184  			return nil, err
   185  		}
   186  		s.shift()
   187  		return toLocalResult(result), nil
   188  	}
   189  
   190  	for {
   191  		partial, err = s.peek()
   192  		if err == io.EOF && result != nil {
   193  			// no more results, return what we have. Next call to the Next() will get EOF
   194  			result.Partial = proto.Bool(false)
   195  			return toLocalResult(result), nil
   196  		}
   197  		if err != nil {
   198  			// return whatever we have so far and the error
   199  			return toLocalResult(result), err
   200  		}
   201  
   202  		var done bool
   203  		result, done = s.coalesce(result, partial)
   204  		if done {
   205  			s.shift()
   206  		}
   207  		if !result.GetPartial() {
   208  			// if not partial anymore, return it
   209  			return toLocalResult(result), nil
   210  		}
   211  	}
   212  }
   213  
   214  func (s *scanner) request() (*pb.ScanResponse, hrpc.RegionInfo, error) {
   215  	var (
   216  		rpc *hrpc.Scan
   217  		err error
   218  	)
   219  
   220  	if s.isRegionScannerClosed() {
   221  		// preserve ScanStatsID
   222  		opts := append(s.rpc.Options(), hrpc.ScanStatsID(s.rpc.ScanStatsID()))
   223  
   224  		// open a new region scan to scan on a new region
   225  		rpc, err = hrpc.NewScanRange(
   226  			s.rpc.Context(),
   227  			s.rpc.Table(),
   228  			s.startRow,
   229  			s.rpc.StopRow(),
   230  			opts...)
   231  	} else {
   232  		// continuing to scan current region
   233  		rpc, err = hrpc.NewScanRange(s.rpc.Context(),
   234  			s.rpc.Table(),
   235  			s.startRow,
   236  			nil,
   237  			hrpc.ScannerID(s.curRegionScannerID),
   238  			hrpc.NumberOfRows(s.rpc.NumberOfRows()),
   239  			hrpc.Priority(s.rpc.Priority()),
   240  			hrpc.RenewInterval(s.rpc.RenewInterval()),
   241  			// preserve ScanStatsID
   242  			hrpc.ScanStatsID(s.rpc.ScanStatsID()),
   243  		)
   244  	}
   245  	if err != nil {
   246  		return nil, nil, err
   247  	}
   248  
   249  	res, err := s.SendRPC(rpc)
   250  	if err != nil {
   251  		return nil, nil, err
   252  	}
   253  	scanres, ok := res.(*pb.ScanResponse)
   254  	if !ok {
   255  		return nil, nil, errors.New("got non-ScanResponse for scan request")
   256  	}
   257  	return scanres, rpc.Region(), nil
   258  }
   259  
   260  // update updates the scanner for the next scan request
   261  func (s *scanner) update(resp *pb.ScanResponse, region hrpc.RegionInfo) {
   262  	if s.isRegionScannerClosed() && resp.ScannerId != nil {
   263  		s.openRegionScanner(resp.GetScannerId())
   264  	}
   265  	if !resp.GetMoreResultsInRegion() {
   266  		// we are done with this region, prepare scan for next region
   267  		s.curRegionScannerID = noScannerID
   268  
   269  		// Normal Scan
   270  		if !s.rpc.Reversed() {
   271  			s.startRow = region.StopKey()
   272  			return
   273  		}
   274  
   275  		// Reversed Scan
   276  		// return if we are at the end
   277  		if len(region.StartKey()) == 0 {
   278  			s.startRow = region.StartKey()
   279  			return
   280  		}
   281  
   282  		// create the nearest value lower than the current region startKey
   283  		rsk := region.StartKey()
   284  		// if last element is 0x0, just shorten the slice
   285  		if rsk[len(rsk)-1] == 0x0 {
   286  			s.startRow = rsk[:len(rsk)-1]
   287  			return
   288  		}
   289  
   290  		// otherwise lower the last element byte value by 1 and pad with 0xffs
   291  		tmp := make([]byte, len(rsk), len(rsk)+len(rowPadding))
   292  		copy(tmp, rsk)
   293  		tmp[len(tmp)-1] = tmp[len(tmp)-1] - 1
   294  		s.startRow = append(tmp, rowPadding...)
   295  	}
   296  }
   297  
   298  func (s *scanner) Close() error {
   299  	if s.closed {
   300  		return nil
   301  	}
   302  	if s.renewCancel != nil {
   303  		s.renewCancel()
   304  	}
   305  	s.closed = true
   306  	// close the last region scanner
   307  	s.closeRegionScanner()
   308  	return nil
   309  }
   310  
   311  // GetScanMetrics returns the scan metrics for the scanner.
   312  // The scan metrics are non-nil only if the Scan has TrackScanMetrics() enabled.
   313  // GetScanMetrics should only be called after the scanner has been closed with an io.EOF
   314  // (there are no more rows left to be returned by calls to Next()).
   315  func (s *scanner) GetScanMetrics() map[string]int64 {
   316  	return s.scanMetrics
   317  }
   318  
   319  // isDone check if this scanner is done fetching new results
   320  func (s *scanner) isDone(resp *pb.ScanResponse, region hrpc.RegionInfo) bool {
   321  	if resp.MoreResults != nil && !*resp.MoreResults {
   322  		// or the filter for the whole scan has been exhausted, close the scanner
   323  		return true
   324  	}
   325  
   326  	if !s.isRegionScannerClosed() {
   327  		// not done with this region yet
   328  		return false
   329  	}
   330  
   331  	// Check to see if this region is the last we should scan because:
   332  	// (1) it's the last region
   333  	if len(region.StopKey()) == 0 && !s.rpc.Reversed() {
   334  		return true
   335  	}
   336  	if s.rpc.Reversed() && len(region.StartKey()) == 0 {
   337  		return true
   338  	}
   339  	// (3) because its stop_key is greater than or equal to the stop_key of this scanner,
   340  	// provided that (2) we're not trying to scan until the end of the table.
   341  	if !s.rpc.Reversed() {
   342  		return len(s.rpc.StopRow()) != 0 && // (2)
   343  			bytes.Compare(s.rpc.StopRow(), region.StopKey()) <= 0 // (3)
   344  	}
   345  
   346  	//  Reversed Scanner
   347  	return len(s.rpc.StopRow()) != 0 && // (2)
   348  		bytes.Compare(s.rpc.StopRow(), region.StartKey()) >= 0 // (3)
   349  }
   350  
   351  func (s *scanner) isRegionScannerClosed() bool {
   352  	return s.curRegionScannerID == noScannerID
   353  }
   354  
   355  func (s *scanner) openRegionScanner(scannerId uint64) {
   356  	if !s.isRegionScannerClosed() {
   357  		panic(fmt.Sprintf("should not happen: previous region scanner was not closed"))
   358  	}
   359  	s.curRegionScannerID = scannerId
   360  }
   361  
   362  func (s *scanner) closeRegionScanner() {
   363  	if s.isRegionScannerClosed() {
   364  		return
   365  	}
   366  	if !s.rpc.IsClosing() {
   367  		// Not closed at server side
   368  		// if we are closing in the middle of scanning a region,
   369  		// send a close scanner request
   370  		// TODO: add a deadline
   371  		rpc, err := hrpc.NewScanRange(context.Background(),
   372  			s.rpc.Table(), s.startRow, nil,
   373  			hrpc.ScannerID(s.curRegionScannerID),
   374  			hrpc.CloseScanner(),
   375  			hrpc.NumberOfRows(0),
   376  			hrpc.ScanStatsID(s.rpc.ScanStatsID()))
   377  		if err != nil {
   378  			panic(fmt.Sprintf("should not happen: %s", err))
   379  		}
   380  
   381  		// If the request fails, the scanner lease will be expired
   382  		// and it will be closed automatically by hbase.
   383  		// No need to bother clients about that.
   384  		go s.SendRPC(rpc)
   385  	}
   386  	s.curRegionScannerID = noScannerID
   387  }
   388  
   389  // renews a scanner by resending scan request with renew = true
   390  func (s *scanner) renew(ctx context.Context, startRow []byte) error {
   391  	if err := ctx.Err(); err != nil {
   392  		return err
   393  	}
   394  	rpc, err := hrpc.NewScanRange(ctx,
   395  		s.rpc.Table(),
   396  		startRow,
   397  		nil,
   398  		hrpc.ScannerID(s.curRegionScannerID),
   399  		hrpc.Priority(s.rpc.Priority()),
   400  		hrpc.RenewalScan(),
   401  		hrpc.ScanStatsID(s.rpc.ScanStatsID()),
   402  	)
   403  	if err != nil {
   404  		return err
   405  	}
   406  	_, err = s.SendRPC(rpc)
   407  	return err
   408  }
   409  
   410  func (s *scanner) renewLoop(ctx context.Context, startRow []byte) {
   411  	scanRenewers.Inc()
   412  	t := time.NewTicker(s.rpc.RenewInterval())
   413  	defer func() {
   414  		t.Stop()
   415  		scanRenewers.Dec()
   416  	}()
   417  
   418  	for {
   419  		select {
   420  		case <-t.C:
   421  			if err := s.renew(ctx, startRow); err != nil {
   422  				s.logger.Error("error renewing scanner", "err", err)
   423  				return
   424  			}
   425  		case <-ctx.Done():
   426  			return
   427  		}
   428  	}
   429  }