github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/pingcap/go-hbase/scan.go (about)

     1  package hbase
     2  
     3  import (
     4  	"bytes"
     5  
     6  	pb "github.com/insionng/yougam/libraries/golang/protobuf/proto"
     7  	"github.com/insionng/yougam/libraries/juju/errors"
     8  	"github.com/insionng/yougam/libraries/ngaut/log"
     9  	"github.com/insionng/yougam/libraries/pingcap/go-hbase/proto"
    10  )
    11  
    12  // nextKey returns the next key in byte-order.
    13  // for example:
    14  // nil -> [0]
    15  // [] -> [0]
    16  // [0] -> [1]
    17  // [1, 2, 3] -> [1, 2, 4]
    18  // [1, 255] -> [2, 0]
    19  // [255] -> [0, 0]
    20  func nextKey(data []byte) []byte {
    21  	// nil or []byte{}
    22  	dataLen := len(data)
    23  	if dataLen == 0 {
    24  		return []byte{0}
    25  	}
    26  
    27  	// Check and process carry bit.
    28  	i := dataLen - 1
    29  	data[i]++
    30  	for i > 0 {
    31  		if data[i] == 0 {
    32  			i--
    33  			data[i]++
    34  		} else {
    35  			break
    36  		}
    37  	}
    38  
    39  	// Check whether need to add another byte for carry bit,
    40  	// like [255] -> [0, 0]
    41  	if data[i] == 0 {
    42  		data = append([]byte{0}, data...)
    43  	}
    44  
    45  	return data
    46  }
    47  
    48  const (
    49  	defaultScanMaxRetries = 3
    50  )
    51  
    52  type Scan struct {
    53  	client *client
    54  	id     uint64
    55  	table  []byte
    56  	// row key
    57  	StartRow     []byte
    58  	StopRow      []byte
    59  	families     [][]byte
    60  	qualifiers   [][][]byte
    61  	nextStartKey []byte
    62  	numCached    int
    63  	closed       bool
    64  	location     *RegionInfo
    65  	server       *connection
    66  	cache        []*ResultRow
    67  	attrs        map[string][]byte
    68  	MaxVersions  uint32
    69  	TsRangeFrom  uint64
    70  	TsRangeTo    uint64
    71  	lastResult   *ResultRow
    72  	// if region split, set startKey = lastResult.Row, but must skip the first
    73  	skipFirst  bool
    74  	maxRetries int
    75  }
    76  
    77  func NewScan(table []byte, batchSize int, c HBaseClient) *Scan {
    78  	if batchSize <= 0 {
    79  		batchSize = 100
    80  	}
    81  	return &Scan{
    82  		client:       c.(*client),
    83  		table:        table,
    84  		nextStartKey: nil,
    85  		families:     make([][]byte, 0),
    86  		qualifiers:   make([][][]byte, 0),
    87  		numCached:    batchSize,
    88  		closed:       false,
    89  		attrs:        make(map[string][]byte),
    90  		maxRetries:   defaultScanMaxRetries,
    91  	}
    92  }
    93  
    94  func (s *Scan) Close() error {
    95  	if s.closed {
    96  		return nil
    97  	}
    98  
    99  	err := s.closeScan(s.server, s.location, s.id)
   100  	if err != nil {
   101  		return errors.Trace(err)
   102  	}
   103  
   104  	s.closed = true
   105  	return nil
   106  }
   107  
   108  func (s *Scan) AddColumn(family, qual []byte) {
   109  	s.AddFamily(family)
   110  	pos := s.posOfFamily(family)
   111  	s.qualifiers[pos] = append(s.qualifiers[pos], qual)
   112  }
   113  
   114  func (s *Scan) AddStringColumn(family, qual string) {
   115  	s.AddColumn([]byte(family), []byte(qual))
   116  }
   117  
   118  func (s *Scan) AddFamily(family []byte) {
   119  	pos := s.posOfFamily(family)
   120  	if pos == -1 {
   121  		s.families = append(s.families, family)
   122  		s.qualifiers = append(s.qualifiers, make([][]byte, 0))
   123  	}
   124  }
   125  
   126  func (s *Scan) AddStringFamily(family string) {
   127  	s.AddFamily([]byte(family))
   128  }
   129  
   130  func (s *Scan) posOfFamily(family []byte) int {
   131  	for p, v := range s.families {
   132  		if bytes.Equal(family, v) {
   133  			return p
   134  		}
   135  	}
   136  	return -1
   137  }
   138  
   139  func (s *Scan) AddAttr(name string, val []byte) {
   140  	s.attrs[name] = val
   141  }
   142  
   143  func (s *Scan) AddTimeRange(from uint64, to uint64) {
   144  	s.TsRangeFrom = from
   145  	s.TsRangeTo = to
   146  }
   147  
   148  func (s *Scan) Closed() bool {
   149  	return s.closed
   150  }
   151  
   152  func (s *Scan) CreateGetFromScan(row []byte) *Get {
   153  	g := NewGet(row)
   154  	for i, family := range s.families {
   155  		if len(s.qualifiers[i]) > 0 {
   156  			for _, qual := range s.qualifiers[i] {
   157  				g.AddColumn(family, qual)
   158  			}
   159  		} else {
   160  			g.AddFamily(family)
   161  		}
   162  	}
   163  	return g
   164  }
   165  
   166  func (s *Scan) getData(startKey []byte, retries int) ([]*ResultRow, error) {
   167  	server, location, err := s.getServerAndLocation(s.table, startKey)
   168  	if err != nil {
   169  		return nil, errors.Trace(err)
   170  	}
   171  
   172  	req := &proto.ScanRequest{
   173  		Region: &proto.RegionSpecifier{
   174  			Type:  proto.RegionSpecifier_REGION_NAME.Enum(),
   175  			Value: []byte(location.Name),
   176  		},
   177  		NumberOfRows: pb.Uint32(uint32(s.numCached)),
   178  		Scan:         &proto.Scan{},
   179  	}
   180  
   181  	// set attributes
   182  	var attrs []*proto.NameBytesPair
   183  	for k, v := range s.attrs {
   184  		p := &proto.NameBytesPair{
   185  			Name:  pb.String(k),
   186  			Value: v,
   187  		}
   188  		attrs = append(attrs, p)
   189  	}
   190  	if len(attrs) > 0 {
   191  		req.Scan.Attribute = attrs
   192  	}
   193  
   194  	if s.id > 0 {
   195  		req.ScannerId = pb.Uint64(s.id)
   196  	}
   197  	req.Scan.StartRow = startKey
   198  	if s.StopRow != nil {
   199  		req.Scan.StopRow = s.StopRow
   200  	}
   201  	if s.MaxVersions > 0 {
   202  		req.Scan.MaxVersions = &s.MaxVersions
   203  	}
   204  	if s.TsRangeTo > s.TsRangeFrom {
   205  		req.Scan.TimeRange = &proto.TimeRange{
   206  			From: pb.Uint64(s.TsRangeFrom),
   207  			To:   pb.Uint64(s.TsRangeTo),
   208  		}
   209  	}
   210  
   211  	for i, v := range s.families {
   212  		req.Scan.Column = append(req.Scan.Column, &proto.Column{
   213  			Family:    v,
   214  			Qualifier: s.qualifiers[i],
   215  		})
   216  	}
   217  
   218  	cl := newCall(req)
   219  	err = server.call(cl)
   220  	if err != nil {
   221  		return nil, errors.Trace(err)
   222  	}
   223  
   224  	msg := <-cl.responseCh
   225  	rs, err := s.processResponse(msg)
   226  	if err != nil && (isNotInRegionError(err) || isUnknownScannerError(err)) {
   227  		if retries <= s.maxRetries {
   228  			// clean this table region cache and try again
   229  			s.client.CleanRegionCache(s.table)
   230  			// create new scanner and set startRow to lastResult
   231  			s.id = 0
   232  			if s.lastResult != nil {
   233  				startKey = s.lastResult.Row
   234  				s.skipFirst = true
   235  			}
   236  			s.server = nil
   237  			s.location = nil
   238  			log.Warnf("Retryint get data for %d time(s)", retries+1)
   239  			retrySleep(retries + 1)
   240  			return s.getData(startKey, retries+1)
   241  		}
   242  	}
   243  	return rs, nil
   244  }
   245  
   246  func (s *Scan) processResponse(response pb.Message) ([]*ResultRow, error) {
   247  	var res *proto.ScanResponse
   248  	switch r := response.(type) {
   249  	case *proto.ScanResponse:
   250  		res = r
   251  	case *exception:
   252  		return nil, errors.New(r.msg)
   253  	default:
   254  		return nil, errors.Errorf("Invalid response seen [response: %#v]", response)
   255  	}
   256  
   257  	// Check whether response is nil.
   258  	if res == nil {
   259  		return nil, errors.Errorf("Empty response: [table=%s] [StartRow=%q] [StopRow=%q] ", s.table, s.StartRow, s.StopRow)
   260  	}
   261  
   262  	nextRegion := true
   263  	s.nextStartKey = nil
   264  	s.id = res.GetScannerId()
   265  
   266  	results := res.GetResults()
   267  	n := len(results)
   268  
   269  	if (n == s.numCached) ||
   270  		len(s.location.EndKey) == 0 ||
   271  		(s.StopRow != nil && bytes.Compare(s.location.EndKey, s.StopRow) > 0 && n < s.numCached) ||
   272  		res.GetMoreResultsInRegion() {
   273  		nextRegion = false
   274  	}
   275  
   276  	var err error
   277  	if nextRegion {
   278  		s.nextStartKey = s.location.EndKey
   279  		err = s.closeScan(s.server, s.location, s.id)
   280  		if err != nil {
   281  			return nil, errors.Trace(err)
   282  		}
   283  		s.server = nil
   284  		s.location = nil
   285  		s.id = 0
   286  	}
   287  
   288  	if n == 0 && !nextRegion {
   289  		err = s.Close()
   290  		if err != nil {
   291  			return nil, errors.Trace(err)
   292  		}
   293  	}
   294  
   295  	if s.skipFirst {
   296  		results = results[1:]
   297  		s.skipFirst = false
   298  		n = len(results)
   299  	}
   300  
   301  	tbr := make([]*ResultRow, n)
   302  	for i, v := range results {
   303  		if v != nil {
   304  			tbr[i] = NewResultRow(v)
   305  		}
   306  	}
   307  
   308  	return tbr, nil
   309  }
   310  
   311  func (s *Scan) nextBatch() int {
   312  	startKey := s.nextStartKey
   313  	if startKey == nil {
   314  		startKey = s.StartRow
   315  	}
   316  
   317  	// Notice: ignore error here.
   318  	// TODO: add error check, now only add a log.
   319  	rs, err := s.getData(startKey, 0)
   320  	if err != nil {
   321  		log.Errorf("scan next batch failed - [startKey=%q], %v", startKey, errors.ErrorStack(err))
   322  	}
   323  
   324  	// Current region get 0 data, try switch to next region.
   325  	if len(rs) == 0 && len(s.nextStartKey) > 0 {
   326  		// TODO: add error check, now only add a log.
   327  		rs, err = s.getData(s.nextStartKey, 0)
   328  		if err != nil {
   329  			log.Errorf("scan next batch failed - [startKey=%q], %v", s.nextStartKey, errors.ErrorStack(err))
   330  		}
   331  	}
   332  
   333  	s.cache = rs
   334  	return len(s.cache)
   335  }
   336  
   337  func (s *Scan) Next() *ResultRow {
   338  	if s.closed {
   339  		return nil
   340  	}
   341  	var ret *ResultRow
   342  	if len(s.cache) == 0 {
   343  		n := s.nextBatch()
   344  		// no data returned
   345  		if n == 0 {
   346  			return nil
   347  		}
   348  	}
   349  
   350  	ret = s.cache[0]
   351  	s.lastResult = ret
   352  	s.cache = s.cache[1:]
   353  	return ret
   354  }
   355  
   356  func (s *Scan) closeScan(server *connection, location *RegionInfo, id uint64) error {
   357  	if server == nil || location == nil {
   358  		return nil
   359  	}
   360  
   361  	req := &proto.ScanRequest{
   362  		Region: &proto.RegionSpecifier{
   363  			Type:  proto.RegionSpecifier_REGION_NAME.Enum(),
   364  			Value: []byte(location.Name),
   365  		},
   366  		ScannerId:    pb.Uint64(id),
   367  		CloseScanner: pb.Bool(true),
   368  	}
   369  
   370  	cl := newCall(req)
   371  	err := server.call(cl)
   372  	if err != nil {
   373  		return errors.Trace(err)
   374  	}
   375  
   376  	// TODO: add exception check.
   377  	<-cl.responseCh
   378  	return nil
   379  }
   380  
   381  func (s *Scan) getServerAndLocation(table, startRow []byte) (*connection, *RegionInfo, error) {
   382  	if s.server != nil && s.location != nil {
   383  		return s.server, s.location, nil
   384  	}
   385  
   386  	var err error
   387  	s.location, err = s.client.LocateRegion(table, startRow, true)
   388  	if err != nil {
   389  		return nil, nil, errors.Trace(err)
   390  	}
   391  
   392  	s.server, err = s.client.getClientConn(s.location.Server)
   393  	if err != nil {
   394  		return nil, nil, errors.Trace(err)
   395  	}
   396  	return s.server, s.location, nil
   397  }