github.com/tsuna/gohbase@v0.0.0-20250731002811-4ffcadfba63e/region/info.go (about) 1 // Copyright (C) 2015 The GoHBase Authors. All rights reserved. 2 // This file is part of GoHBase. 3 // Use of this source code is governed by the Apache License 2.0 4 // that can be found in the COPYING file. 5 6 // Package region contains data structures to represent HBase regions. 7 package region 8 9 import ( 10 "bytes" 11 "context" 12 "encoding/binary" 13 "encoding/json" 14 "fmt" 15 "strconv" 16 "sync" 17 18 "github.com/tsuna/gohbase/hrpc" 19 "github.com/tsuna/gohbase/pb" 20 "google.golang.org/protobuf/proto" 21 ) 22 23 var defaultNamespace = []byte("default") 24 25 // OfflineRegionError is returned if region is offline 26 type OfflineRegionError struct { 27 n string 28 } 29 30 func (e OfflineRegionError) Error() string { 31 return fmt.Sprintf("region %s is offline", e.n) 32 } 33 34 // info describes a region. 35 type info struct { 36 id uint64 // A timestamp when the region is created 37 namespace []byte 38 table []byte 39 name []byte 40 startKey []byte 41 stopKey []byte 42 specifier *pb.RegionSpecifier 43 ctx context.Context 44 cancel context.CancelFunc 45 46 // The attributes before this mutex are supposed to be immutable. 47 // The attributes defined below can be changed and accesses must 48 // be protected with this mutex. 49 m sync.RWMutex 50 51 client hrpc.RegionClient 52 53 // Once a region becomes unreachable, this channel is created, and any 54 // functions that wish to be notified when the region becomes available 55 // again can read from this channel, which will be closed when the region 56 // is available again 57 available chan struct{} 58 } 59 60 // NewInfo creates a new region info 61 func NewInfo(id uint64, namespace, table, name, startKey, stopKey []byte) hrpc.RegionInfo { 62 ctx, cancel := context.WithCancel(context.Background()) 63 return &info{ 64 id: id, 65 ctx: ctx, 66 cancel: cancel, 67 namespace: namespace, 68 table: table, 69 name: name, 70 startKey: startKey, 71 stopKey: stopKey, 72 specifier: &pb.RegionSpecifier{ 73 Type: pb.RegionSpecifier_REGION_NAME.Enum(), 74 Value: name, 75 }, 76 } 77 } 78 79 // infoFromCell parses a KeyValue from the meta table and creates the 80 // corresponding Info object. 81 func infoFromCell(cell *hrpc.Cell) (hrpc.RegionInfo, error) { 82 value := cell.Value 83 if len(value) == 0 { 84 return nil, fmt.Errorf("empty value in %q", cell) 85 } else if value[0] != 'P' { 86 return nil, fmt.Errorf("unsupported region info version %d in %q", value[0], cell) 87 } 88 const pbufMagic = 1346524486 // 4 bytes: "PBUF" 89 magic := binary.BigEndian.Uint32(value[:4]) 90 if magic != pbufMagic { 91 return nil, fmt.Errorf("invalid magic number in %q", cell) 92 } 93 var regInfo pb.RegionInfo 94 err := proto.Unmarshal(value[4:], ®Info) 95 if err != nil { 96 return nil, fmt.Errorf("failed to decode %q: %s", cell, err) 97 } 98 if regInfo.GetOffline() { 99 return nil, OfflineRegionError{n: string(cell.Row)} 100 } 101 var namespace []byte 102 if !bytes.Equal(regInfo.TableName.Namespace, defaultNamespace) { 103 // if default namespace, pretend there's no namespace 104 namespace = regInfo.TableName.Namespace 105 } 106 107 return NewInfo( 108 regInfo.GetRegionId(), 109 namespace, 110 regInfo.TableName.Qualifier, 111 cell.Row, 112 regInfo.StartKey, 113 regInfo.EndKey, 114 ), nil 115 } 116 117 // ParseRegionInfo parses the contents of a row from the meta table. 118 // It's guaranteed to return a region info and a host:port OR return an error. 119 func ParseRegionInfo(metaRow *hrpc.Result) (hrpc.RegionInfo, string, error) { 120 var reg hrpc.RegionInfo 121 var addr string 122 123 for _, cell := range metaRow.Cells { 124 switch string(cell.Qualifier) { 125 case "regioninfo": 126 var err error 127 reg, err = infoFromCell(cell) 128 if err != nil { 129 return nil, "", err 130 } 131 case "server": 132 value := cell.Value 133 if len(value) == 0 { 134 continue // Empty during NSRE. 135 } 136 addr = string(value) 137 default: 138 // Other kinds of qualifiers: ignore them. 139 // TODO: If this is the parent of a split region, there are two other 140 // KVs that could be useful: `info:splitA' and `info:splitB'. 141 // Need to investigate whether we can use those as a hint to update our 142 // regions_cache with the daughter regions of the split. 143 } 144 } 145 146 if reg == nil { 147 // There was no region in the row in meta, this is really not expected. 148 return nil, "", fmt.Errorf("meta seems to be broken, there was no region in %v", metaRow) 149 } 150 if len(addr) == 0 { 151 return nil, "", fmt.Errorf("meta doesn't have a server location in %v", metaRow) 152 } 153 return reg, addr, nil 154 } 155 156 // IsUnavailable returns true if this region has been marked as unavailable. 157 func (i *info) IsUnavailable() bool { 158 i.m.RLock() 159 res := i.available != nil 160 i.m.RUnlock() 161 return res 162 } 163 164 // AvailabilityChan returns a channel that can be used to wait on for 165 // notification that a connection to this region has been reestablished. 166 // If this region is not marked as unavailable, nil will be returned. 167 func (i *info) AvailabilityChan() <-chan struct{} { 168 i.m.RLock() 169 ch := i.available 170 i.m.RUnlock() 171 return ch 172 } 173 174 // MarkUnavailable will mark this region as unavailable, by creating the struct 175 // returned by AvailabilityChan. If this region was marked as available 176 // before this, true will be returned. 177 func (i *info) MarkUnavailable() bool { 178 created := false 179 i.m.Lock() 180 if i.available == nil { 181 i.available = make(chan struct{}) 182 created = true 183 } 184 i.m.Unlock() 185 return created 186 } 187 188 // MarkAvailable will mark this region as available again, by closing the struct 189 // returned by AvailabilityChan 190 func (i *info) MarkAvailable() { 191 i.m.Lock() 192 ch := i.available 193 i.available = nil 194 close(ch) 195 i.m.Unlock() 196 } 197 198 // MarkDead will mark this region as not useful anymore to notify everyone 199 // who's trying to use it that there's no point 200 func (i *info) MarkDead() { 201 i.cancel() 202 } 203 204 // Context to check if the region is dead 205 func (i *info) Context() context.Context { 206 return i.ctx 207 } 208 209 func (i *info) String() string { 210 return fmt.Sprintf( 211 "RegionInfo{Name: %q, ID: %d, Namespace: %q, Table: %q, StartKey: %q, StopKey: %q}", 212 i.name, i.id, i.namespace, i.table, i.startKey, i.stopKey) 213 } 214 215 // ID returns region's age 216 func (i *info) ID() uint64 { 217 return i.id 218 } 219 220 // Name returns region name 221 func (i *info) Name() []byte { 222 return i.name 223 } 224 225 // RegionSpecifier returns the RegionSpecifier proto for this region 226 func (i *info) RegionSpecifier() *pb.RegionSpecifier { 227 return i.specifier 228 } 229 230 // StopKey return region stop key 231 func (i *info) StopKey() []byte { 232 return i.stopKey 233 } 234 235 // StartKey return region start key 236 func (i *info) StartKey() []byte { 237 return i.startKey 238 } 239 240 // Namespace returns region table 241 func (i *info) Namespace() []byte { 242 return i.namespace 243 } 244 245 // Table returns region table 246 func (i *info) Table() []byte { 247 return i.table 248 } 249 250 // Client returns region client 251 func (i *info) Client() hrpc.RegionClient { 252 i.m.RLock() 253 c := i.client 254 i.m.RUnlock() 255 return c 256 } 257 258 // SetClient sets region client 259 func (i *info) SetClient(c hrpc.RegionClient) { 260 i.m.Lock() 261 i.client = c 262 i.m.Unlock() 263 } 264 265 // Compare compares two region names. 266 // We can't just use bytes.Compare() because it doesn't play nicely 267 // with the way META keys are built as the first region has an empty start 268 // key. Let's assume we know about those 2 regions in our cache: 269 // 270 // .META.,,1 271 // tableA,,1273018455182 272 // 273 // We're given an RPC to execute on "tableA", row "\x00" (1 byte row key 274 // containing a 0). If we use Compare() to sort the entries in the cache, 275 // when we search for the entry right before "tableA,\000,:" 276 // we'll erroneously find ".META.,,1" instead of the entry for first 277 // region of "tableA". 278 // 279 // Since this scheme breaks natural ordering, we need this comparator to 280 // implement a special version of comparison to handle this scenario. 281 func Compare(a, b []byte) int { 282 var length int 283 if la, lb := len(a), len(b); la < lb { 284 length = la 285 } else { 286 length = lb 287 } 288 // Reminder: region names are of the form: 289 // table_name,start_key,timestamp[.MD5.] 290 // First compare the table names. 291 var i int 292 for i = 0; i < length; i++ { 293 ai := a[i] // Saves one pointer deference every iteration. 294 bi := b[i] // Saves one pointer deference every iteration. 295 if ai != bi { // The name of the tables differ. 296 if ai == ',' { 297 return -1001 // `a' has a smaller table name. a < b 298 } else if bi == ',' { 299 return 1001 // `b' has a smaller table name. a > b 300 } 301 return int(ai) - int(bi) 302 } 303 if ai == ',' { // Remember: at this point ai == bi. 304 break // We're done comparing the table names. They're equal. 305 } 306 } 307 308 // Now find the last comma in both `a' and `b'. We need to start the 309 // search from the end as the row key could have an arbitrary number of 310 // commas and we don't know its length. 311 aComma := findCommaFromEnd(a, i) 312 bComma := findCommaFromEnd(b, i) 313 // If either `a' or `b' is followed immediately by another comma, then 314 // they are the first region (it's the empty start key). 315 i++ // No need to check against `length', there MUST be more bytes. 316 317 // Compare keys. 318 var firstComma int 319 if aComma < bComma { 320 firstComma = aComma 321 } else { 322 firstComma = bComma 323 } 324 for ; i < firstComma; i++ { 325 ai := a[i] 326 bi := b[i] 327 if ai != bi { // The keys differ. 328 return int(ai) - int(bi) 329 } 330 } 331 if aComma < bComma { 332 return -1002 // `a' has a shorter key. a < b 333 } else if bComma < aComma { 334 return 1002 // `b' has a shorter key. a > b 335 } 336 337 // Keys have the same length and have compared identical. Compare the 338 // rest, which essentially means: use start code as a tie breaker. 339 for ; /*nothing*/ i < length; i++ { 340 ai := a[i] 341 bi := b[i] 342 if ai != bi { // The start codes differ. 343 return int(ai) - int(bi) 344 } 345 } 346 347 return len(a) - len(b) 348 } 349 350 // Because there is no `LastIndexByte()' in the standard `bytes' package. 351 func findCommaFromEnd(b []byte, offset int) int { 352 for i := len(b) - 1; i > offset; i-- { 353 if b[i] == ',' { 354 return i 355 } 356 } 357 panic(fmt.Errorf("no comma found in %q after offset %d", b, offset)) 358 } 359 360 func (i *info) MarshalJSON() ([]byte, error) { 361 362 var ctxError, client string 363 364 if i.ctx != nil { 365 ctxError = fmt.Sprint(i.ctx.Err()) 366 } 367 if i.Client() != nil { 368 client = i.Client().String() 369 } 370 371 state := struct { 372 Id uint64 373 Namespace string 374 Table string 375 Name string 376 StartKey string 377 StopKey string 378 ContextInstance string 379 Err string 380 ClientPtr string 381 Client string 382 Available bool 383 }{ 384 Id: i.id, 385 Namespace: strconv.QuoteToASCII(string(i.namespace)), 386 Table: strconv.QuoteToASCII(string(i.table)), 387 Name: strconv.QuoteToASCII(string(i.name)), 388 StartKey: strconv.QuoteToASCII(string(i.startKey)), 389 StopKey: strconv.QuoteToASCII(string(i.stopKey)), 390 ContextInstance: fmt.Sprintf("%p", (i.ctx)), 391 Err: ctxError, 392 ClientPtr: fmt.Sprintf("%p", (i.Client())), 393 Client: client, 394 Available: !i.IsUnavailable(), 395 } 396 jsonVal, err := json.Marshal(state) 397 398 return jsonVal, err 399 }