github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/pingcap/go-hbase/scan.go (about) 1 package hbase 2 3 import ( 4 "bytes" 5 6 pb "github.com/insionng/yougam/libraries/golang/protobuf/proto" 7 "github.com/insionng/yougam/libraries/juju/errors" 8 "github.com/insionng/yougam/libraries/ngaut/log" 9 "github.com/insionng/yougam/libraries/pingcap/go-hbase/proto" 10 ) 11 12 // nextKey returns the next key in byte-order. 13 // for example: 14 // nil -> [0] 15 // [] -> [0] 16 // [0] -> [1] 17 // [1, 2, 3] -> [1, 2, 4] 18 // [1, 255] -> [2, 0] 19 // [255] -> [0, 0] 20 func nextKey(data []byte) []byte { 21 // nil or []byte{} 22 dataLen := len(data) 23 if dataLen == 0 { 24 return []byte{0} 25 } 26 27 // Check and process carry bit. 28 i := dataLen - 1 29 data[i]++ 30 for i > 0 { 31 if data[i] == 0 { 32 i-- 33 data[i]++ 34 } else { 35 break 36 } 37 } 38 39 // Check whether need to add another byte for carry bit, 40 // like [255] -> [0, 0] 41 if data[i] == 0 { 42 data = append([]byte{0}, data...) 43 } 44 45 return data 46 } 47 48 const ( 49 defaultScanMaxRetries = 3 50 ) 51 52 type Scan struct { 53 client *client 54 id uint64 55 table []byte 56 // row key 57 StartRow []byte 58 StopRow []byte 59 families [][]byte 60 qualifiers [][][]byte 61 nextStartKey []byte 62 numCached int 63 closed bool 64 location *RegionInfo 65 server *connection 66 cache []*ResultRow 67 attrs map[string][]byte 68 MaxVersions uint32 69 TsRangeFrom uint64 70 TsRangeTo uint64 71 lastResult *ResultRow 72 // if region split, set startKey = lastResult.Row, but must skip the first 73 skipFirst bool 74 maxRetries int 75 } 76 77 func NewScan(table []byte, batchSize int, c HBaseClient) *Scan { 78 if batchSize <= 0 { 79 batchSize = 100 80 } 81 return &Scan{ 82 client: c.(*client), 83 table: table, 84 nextStartKey: nil, 85 families: make([][]byte, 0), 86 qualifiers: make([][][]byte, 0), 87 numCached: batchSize, 88 closed: false, 89 attrs: make(map[string][]byte), 90 maxRetries: defaultScanMaxRetries, 91 } 92 } 93 94 func (s *Scan) Close() error { 95 if s.closed { 96 return nil 97 } 98 99 err := s.closeScan(s.server, s.location, s.id) 100 if err != nil { 101 return errors.Trace(err) 102 } 103 104 s.closed = true 105 return nil 106 } 107 108 func (s *Scan) AddColumn(family, qual []byte) { 109 s.AddFamily(family) 110 pos := s.posOfFamily(family) 111 s.qualifiers[pos] = append(s.qualifiers[pos], qual) 112 } 113 114 func (s *Scan) AddStringColumn(family, qual string) { 115 s.AddColumn([]byte(family), []byte(qual)) 116 } 117 118 func (s *Scan) AddFamily(family []byte) { 119 pos := s.posOfFamily(family) 120 if pos == -1 { 121 s.families = append(s.families, family) 122 s.qualifiers = append(s.qualifiers, make([][]byte, 0)) 123 } 124 } 125 126 func (s *Scan) AddStringFamily(family string) { 127 s.AddFamily([]byte(family)) 128 } 129 130 func (s *Scan) posOfFamily(family []byte) int { 131 for p, v := range s.families { 132 if bytes.Equal(family, v) { 133 return p 134 } 135 } 136 return -1 137 } 138 139 func (s *Scan) AddAttr(name string, val []byte) { 140 s.attrs[name] = val 141 } 142 143 func (s *Scan) AddTimeRange(from uint64, to uint64) { 144 s.TsRangeFrom = from 145 s.TsRangeTo = to 146 } 147 148 func (s *Scan) Closed() bool { 149 return s.closed 150 } 151 152 func (s *Scan) CreateGetFromScan(row []byte) *Get { 153 g := NewGet(row) 154 for i, family := range s.families { 155 if len(s.qualifiers[i]) > 0 { 156 for _, qual := range s.qualifiers[i] { 157 g.AddColumn(family, qual) 158 } 159 } else { 160 g.AddFamily(family) 161 } 162 } 163 return g 164 } 165 166 func (s *Scan) getData(startKey []byte, retries int) ([]*ResultRow, error) { 167 server, location, err := s.getServerAndLocation(s.table, startKey) 168 if err != nil { 169 return nil, errors.Trace(err) 170 } 171 172 req := &proto.ScanRequest{ 173 Region: &proto.RegionSpecifier{ 174 Type: proto.RegionSpecifier_REGION_NAME.Enum(), 175 Value: []byte(location.Name), 176 }, 177 NumberOfRows: pb.Uint32(uint32(s.numCached)), 178 Scan: &proto.Scan{}, 179 } 180 181 // set attributes 182 var attrs []*proto.NameBytesPair 183 for k, v := range s.attrs { 184 p := &proto.NameBytesPair{ 185 Name: pb.String(k), 186 Value: v, 187 } 188 attrs = append(attrs, p) 189 } 190 if len(attrs) > 0 { 191 req.Scan.Attribute = attrs 192 } 193 194 if s.id > 0 { 195 req.ScannerId = pb.Uint64(s.id) 196 } 197 req.Scan.StartRow = startKey 198 if s.StopRow != nil { 199 req.Scan.StopRow = s.StopRow 200 } 201 if s.MaxVersions > 0 { 202 req.Scan.MaxVersions = &s.MaxVersions 203 } 204 if s.TsRangeTo > s.TsRangeFrom { 205 req.Scan.TimeRange = &proto.TimeRange{ 206 From: pb.Uint64(s.TsRangeFrom), 207 To: pb.Uint64(s.TsRangeTo), 208 } 209 } 210 211 for i, v := range s.families { 212 req.Scan.Column = append(req.Scan.Column, &proto.Column{ 213 Family: v, 214 Qualifier: s.qualifiers[i], 215 }) 216 } 217 218 cl := newCall(req) 219 err = server.call(cl) 220 if err != nil { 221 return nil, errors.Trace(err) 222 } 223 224 msg := <-cl.responseCh 225 rs, err := s.processResponse(msg) 226 if err != nil && (isNotInRegionError(err) || isUnknownScannerError(err)) { 227 if retries <= s.maxRetries { 228 // clean this table region cache and try again 229 s.client.CleanRegionCache(s.table) 230 // create new scanner and set startRow to lastResult 231 s.id = 0 232 if s.lastResult != nil { 233 startKey = s.lastResult.Row 234 s.skipFirst = true 235 } 236 s.server = nil 237 s.location = nil 238 log.Warnf("Retryint get data for %d time(s)", retries+1) 239 retrySleep(retries + 1) 240 return s.getData(startKey, retries+1) 241 } 242 } 243 return rs, nil 244 } 245 246 func (s *Scan) processResponse(response pb.Message) ([]*ResultRow, error) { 247 var res *proto.ScanResponse 248 switch r := response.(type) { 249 case *proto.ScanResponse: 250 res = r 251 case *exception: 252 return nil, errors.New(r.msg) 253 default: 254 return nil, errors.Errorf("Invalid response seen [response: %#v]", response) 255 } 256 257 // Check whether response is nil. 258 if res == nil { 259 return nil, errors.Errorf("Empty response: [table=%s] [StartRow=%q] [StopRow=%q] ", s.table, s.StartRow, s.StopRow) 260 } 261 262 nextRegion := true 263 s.nextStartKey = nil 264 s.id = res.GetScannerId() 265 266 results := res.GetResults() 267 n := len(results) 268 269 if (n == s.numCached) || 270 len(s.location.EndKey) == 0 || 271 (s.StopRow != nil && bytes.Compare(s.location.EndKey, s.StopRow) > 0 && n < s.numCached) || 272 res.GetMoreResultsInRegion() { 273 nextRegion = false 274 } 275 276 var err error 277 if nextRegion { 278 s.nextStartKey = s.location.EndKey 279 err = s.closeScan(s.server, s.location, s.id) 280 if err != nil { 281 return nil, errors.Trace(err) 282 } 283 s.server = nil 284 s.location = nil 285 s.id = 0 286 } 287 288 if n == 0 && !nextRegion { 289 err = s.Close() 290 if err != nil { 291 return nil, errors.Trace(err) 292 } 293 } 294 295 if s.skipFirst { 296 results = results[1:] 297 s.skipFirst = false 298 n = len(results) 299 } 300 301 tbr := make([]*ResultRow, n) 302 for i, v := range results { 303 if v != nil { 304 tbr[i] = NewResultRow(v) 305 } 306 } 307 308 return tbr, nil 309 } 310 311 func (s *Scan) nextBatch() int { 312 startKey := s.nextStartKey 313 if startKey == nil { 314 startKey = s.StartRow 315 } 316 317 // Notice: ignore error here. 318 // TODO: add error check, now only add a log. 319 rs, err := s.getData(startKey, 0) 320 if err != nil { 321 log.Errorf("scan next batch failed - [startKey=%q], %v", startKey, errors.ErrorStack(err)) 322 } 323 324 // Current region get 0 data, try switch to next region. 325 if len(rs) == 0 && len(s.nextStartKey) > 0 { 326 // TODO: add error check, now only add a log. 327 rs, err = s.getData(s.nextStartKey, 0) 328 if err != nil { 329 log.Errorf("scan next batch failed - [startKey=%q], %v", s.nextStartKey, errors.ErrorStack(err)) 330 } 331 } 332 333 s.cache = rs 334 return len(s.cache) 335 } 336 337 func (s *Scan) Next() *ResultRow { 338 if s.closed { 339 return nil 340 } 341 var ret *ResultRow 342 if len(s.cache) == 0 { 343 n := s.nextBatch() 344 // no data returned 345 if n == 0 { 346 return nil 347 } 348 } 349 350 ret = s.cache[0] 351 s.lastResult = ret 352 s.cache = s.cache[1:] 353 return ret 354 } 355 356 func (s *Scan) closeScan(server *connection, location *RegionInfo, id uint64) error { 357 if server == nil || location == nil { 358 return nil 359 } 360 361 req := &proto.ScanRequest{ 362 Region: &proto.RegionSpecifier{ 363 Type: proto.RegionSpecifier_REGION_NAME.Enum(), 364 Value: []byte(location.Name), 365 }, 366 ScannerId: pb.Uint64(id), 367 CloseScanner: pb.Bool(true), 368 } 369 370 cl := newCall(req) 371 err := server.call(cl) 372 if err != nil { 373 return errors.Trace(err) 374 } 375 376 // TODO: add exception check. 377 <-cl.responseCh 378 return nil 379 } 380 381 func (s *Scan) getServerAndLocation(table, startRow []byte) (*connection, *RegionInfo, error) { 382 if s.server != nil && s.location != nil { 383 return s.server, s.location, nil 384 } 385 386 var err error 387 s.location, err = s.client.LocateRegion(table, startRow, true) 388 if err != nil { 389 return nil, nil, errors.Trace(err) 390 } 391 392 s.server, err = s.client.getClientConn(s.location.Server) 393 if err != nil { 394 return nil, nil, errors.Trace(err) 395 } 396 return s.server, s.location, nil 397 }