github.com/tsuna/gohbase@v0.0.0-20250731002811-4ffcadfba63e/region/info.go (about)

     1  // Copyright (C) 2015  The GoHBase Authors.  All rights reserved.
     2  // This file is part of GoHBase.
     3  // Use of this source code is governed by the Apache License 2.0
     4  // that can be found in the COPYING file.
     5  
     6  // Package region contains data structures to represent HBase regions.
     7  package region
     8  
     9  import (
    10  	"bytes"
    11  	"context"
    12  	"encoding/binary"
    13  	"encoding/json"
    14  	"fmt"
    15  	"strconv"
    16  	"sync"
    17  
    18  	"github.com/tsuna/gohbase/hrpc"
    19  	"github.com/tsuna/gohbase/pb"
    20  	"google.golang.org/protobuf/proto"
    21  )
    22  
    23  var defaultNamespace = []byte("default")
    24  
    25  // OfflineRegionError is returned if region is offline
    26  type OfflineRegionError struct {
    27  	n string
    28  }
    29  
    30  func (e OfflineRegionError) Error() string {
    31  	return fmt.Sprintf("region %s is offline", e.n)
    32  }
    33  
    34  // info describes a region.
    35  type info struct {
    36  	id        uint64 // A timestamp when the region is created
    37  	namespace []byte
    38  	table     []byte
    39  	name      []byte
    40  	startKey  []byte
    41  	stopKey   []byte
    42  	specifier *pb.RegionSpecifier
    43  	ctx       context.Context
    44  	cancel    context.CancelFunc
    45  
    46  	// The attributes before this mutex are supposed to be immutable.
    47  	// The attributes defined below can be changed and accesses must
    48  	// be protected with this mutex.
    49  	m sync.RWMutex
    50  
    51  	client hrpc.RegionClient
    52  
    53  	// Once a region becomes unreachable, this channel is created, and any
    54  	// functions that wish to be notified when the region becomes available
    55  	// again can read from this channel, which will be closed when the region
    56  	// is available again
    57  	available chan struct{}
    58  }
    59  
    60  // NewInfo creates a new region info
    61  func NewInfo(id uint64, namespace, table, name, startKey, stopKey []byte) hrpc.RegionInfo {
    62  	ctx, cancel := context.WithCancel(context.Background())
    63  	return &info{
    64  		id:        id,
    65  		ctx:       ctx,
    66  		cancel:    cancel,
    67  		namespace: namespace,
    68  		table:     table,
    69  		name:      name,
    70  		startKey:  startKey,
    71  		stopKey:   stopKey,
    72  		specifier: &pb.RegionSpecifier{
    73  			Type:  pb.RegionSpecifier_REGION_NAME.Enum(),
    74  			Value: name,
    75  		},
    76  	}
    77  }
    78  
    79  // infoFromCell parses a KeyValue from the meta table and creates the
    80  // corresponding Info object.
    81  func infoFromCell(cell *hrpc.Cell) (hrpc.RegionInfo, error) {
    82  	value := cell.Value
    83  	if len(value) == 0 {
    84  		return nil, fmt.Errorf("empty value in %q", cell)
    85  	} else if value[0] != 'P' {
    86  		return nil, fmt.Errorf("unsupported region info version %d in %q", value[0], cell)
    87  	}
    88  	const pbufMagic = 1346524486 // 4 bytes: "PBUF"
    89  	magic := binary.BigEndian.Uint32(value[:4])
    90  	if magic != pbufMagic {
    91  		return nil, fmt.Errorf("invalid magic number in %q", cell)
    92  	}
    93  	var regInfo pb.RegionInfo
    94  	err := proto.Unmarshal(value[4:], &regInfo)
    95  	if err != nil {
    96  		return nil, fmt.Errorf("failed to decode %q: %s", cell, err)
    97  	}
    98  	if regInfo.GetOffline() {
    99  		return nil, OfflineRegionError{n: string(cell.Row)}
   100  	}
   101  	var namespace []byte
   102  	if !bytes.Equal(regInfo.TableName.Namespace, defaultNamespace) {
   103  		// if default namespace, pretend there's no namespace
   104  		namespace = regInfo.TableName.Namespace
   105  	}
   106  
   107  	return NewInfo(
   108  		regInfo.GetRegionId(),
   109  		namespace,
   110  		regInfo.TableName.Qualifier,
   111  		cell.Row,
   112  		regInfo.StartKey,
   113  		regInfo.EndKey,
   114  	), nil
   115  }
   116  
   117  // ParseRegionInfo parses the contents of a row from the meta table.
   118  // It's guaranteed to return a region info and a host:port OR return an error.
   119  func ParseRegionInfo(metaRow *hrpc.Result) (hrpc.RegionInfo, string, error) {
   120  	var reg hrpc.RegionInfo
   121  	var addr string
   122  
   123  	for _, cell := range metaRow.Cells {
   124  		switch string(cell.Qualifier) {
   125  		case "regioninfo":
   126  			var err error
   127  			reg, err = infoFromCell(cell)
   128  			if err != nil {
   129  				return nil, "", err
   130  			}
   131  		case "server":
   132  			value := cell.Value
   133  			if len(value) == 0 {
   134  				continue // Empty during NSRE.
   135  			}
   136  			addr = string(value)
   137  		default:
   138  			// Other kinds of qualifiers: ignore them.
   139  			// TODO: If this is the parent of a split region, there are two other
   140  			// KVs that could be useful: `info:splitA' and `info:splitB'.
   141  			// Need to investigate whether we can use those as a hint to update our
   142  			// regions_cache with the daughter regions of the split.
   143  		}
   144  	}
   145  
   146  	if reg == nil {
   147  		// There was no region in the row in meta, this is really not expected.
   148  		return nil, "", fmt.Errorf("meta seems to be broken, there was no region in %v", metaRow)
   149  	}
   150  	if len(addr) == 0 {
   151  		return nil, "", fmt.Errorf("meta doesn't have a server location in %v", metaRow)
   152  	}
   153  	return reg, addr, nil
   154  }
   155  
   156  // IsUnavailable returns true if this region has been marked as unavailable.
   157  func (i *info) IsUnavailable() bool {
   158  	i.m.RLock()
   159  	res := i.available != nil
   160  	i.m.RUnlock()
   161  	return res
   162  }
   163  
   164  // AvailabilityChan returns a channel that can be used to wait on for
   165  // notification that a connection to this region has been reestablished.
   166  // If this region is not marked as unavailable, nil will be returned.
   167  func (i *info) AvailabilityChan() <-chan struct{} {
   168  	i.m.RLock()
   169  	ch := i.available
   170  	i.m.RUnlock()
   171  	return ch
   172  }
   173  
   174  // MarkUnavailable will mark this region as unavailable, by creating the struct
   175  // returned by AvailabilityChan. If this region was marked as available
   176  // before this, true will be returned.
   177  func (i *info) MarkUnavailable() bool {
   178  	created := false
   179  	i.m.Lock()
   180  	if i.available == nil {
   181  		i.available = make(chan struct{})
   182  		created = true
   183  	}
   184  	i.m.Unlock()
   185  	return created
   186  }
   187  
   188  // MarkAvailable will mark this region as available again, by closing the struct
   189  // returned by AvailabilityChan
   190  func (i *info) MarkAvailable() {
   191  	i.m.Lock()
   192  	ch := i.available
   193  	i.available = nil
   194  	close(ch)
   195  	i.m.Unlock()
   196  }
   197  
   198  // MarkDead will mark this region as not useful anymore to notify everyone
   199  // who's trying to use it that there's no point
   200  func (i *info) MarkDead() {
   201  	i.cancel()
   202  }
   203  
   204  // Context to check if the region is dead
   205  func (i *info) Context() context.Context {
   206  	return i.ctx
   207  }
   208  
   209  func (i *info) String() string {
   210  	return fmt.Sprintf(
   211  		"RegionInfo{Name: %q, ID: %d, Namespace: %q, Table: %q, StartKey: %q, StopKey: %q}",
   212  		i.name, i.id, i.namespace, i.table, i.startKey, i.stopKey)
   213  }
   214  
   215  // ID returns region's age
   216  func (i *info) ID() uint64 {
   217  	return i.id
   218  }
   219  
   220  // Name returns region name
   221  func (i *info) Name() []byte {
   222  	return i.name
   223  }
   224  
   225  // RegionSpecifier returns the RegionSpecifier proto for this region
   226  func (i *info) RegionSpecifier() *pb.RegionSpecifier {
   227  	return i.specifier
   228  }
   229  
   230  // StopKey return region stop key
   231  func (i *info) StopKey() []byte {
   232  	return i.stopKey
   233  }
   234  
   235  // StartKey return region start key
   236  func (i *info) StartKey() []byte {
   237  	return i.startKey
   238  }
   239  
   240  // Namespace returns region table
   241  func (i *info) Namespace() []byte {
   242  	return i.namespace
   243  }
   244  
   245  // Table returns region table
   246  func (i *info) Table() []byte {
   247  	return i.table
   248  }
   249  
   250  // Client returns region client
   251  func (i *info) Client() hrpc.RegionClient {
   252  	i.m.RLock()
   253  	c := i.client
   254  	i.m.RUnlock()
   255  	return c
   256  }
   257  
   258  // SetClient sets region client
   259  func (i *info) SetClient(c hrpc.RegionClient) {
   260  	i.m.Lock()
   261  	i.client = c
   262  	i.m.Unlock()
   263  }
   264  
   265  // Compare compares two region names.
   266  // We can't just use bytes.Compare() because it doesn't play nicely
   267  // with the way META keys are built as the first region has an empty start
   268  // key.  Let's assume we know about those 2 regions in our cache:
   269  //
   270  //	.META.,,1
   271  //	tableA,,1273018455182
   272  //
   273  // We're given an RPC to execute on "tableA", row "\x00" (1 byte row key
   274  // containing a 0).  If we use Compare() to sort the entries in the cache,
   275  // when we search for the entry right before "tableA,\000,:"
   276  // we'll erroneously find ".META.,,1" instead of the entry for first
   277  // region of "tableA".
   278  //
   279  // Since this scheme breaks natural ordering, we need this comparator to
   280  // implement a special version of comparison to handle this scenario.
   281  func Compare(a, b []byte) int {
   282  	var length int
   283  	if la, lb := len(a), len(b); la < lb {
   284  		length = la
   285  	} else {
   286  		length = lb
   287  	}
   288  	// Reminder: region names are of the form:
   289  	//   table_name,start_key,timestamp[.MD5.]
   290  	// First compare the table names.
   291  	var i int
   292  	for i = 0; i < length; i++ {
   293  		ai := a[i]    // Saves one pointer deference every iteration.
   294  		bi := b[i]    // Saves one pointer deference every iteration.
   295  		if ai != bi { // The name of the tables differ.
   296  			if ai == ',' {
   297  				return -1001 // `a' has a smaller table name.  a < b
   298  			} else if bi == ',' {
   299  				return 1001 // `b' has a smaller table name.  a > b
   300  			}
   301  			return int(ai) - int(bi)
   302  		}
   303  		if ai == ',' { // Remember: at this point ai == bi.
   304  			break // We're done comparing the table names.  They're equal.
   305  		}
   306  	}
   307  
   308  	// Now find the last comma in both `a' and `b'.  We need to start the
   309  	// search from the end as the row key could have an arbitrary number of
   310  	// commas and we don't know its length.
   311  	aComma := findCommaFromEnd(a, i)
   312  	bComma := findCommaFromEnd(b, i)
   313  	// If either `a' or `b' is followed immediately by another comma, then
   314  	// they are the first region (it's the empty start key).
   315  	i++ // No need to check against `length', there MUST be more bytes.
   316  
   317  	// Compare keys.
   318  	var firstComma int
   319  	if aComma < bComma {
   320  		firstComma = aComma
   321  	} else {
   322  		firstComma = bComma
   323  	}
   324  	for ; i < firstComma; i++ {
   325  		ai := a[i]
   326  		bi := b[i]
   327  		if ai != bi { // The keys differ.
   328  			return int(ai) - int(bi)
   329  		}
   330  	}
   331  	if aComma < bComma {
   332  		return -1002 // `a' has a shorter key.  a < b
   333  	} else if bComma < aComma {
   334  		return 1002 // `b' has a shorter key.  a > b
   335  	}
   336  
   337  	// Keys have the same length and have compared identical.  Compare the
   338  	// rest, which essentially means: use start code as a tie breaker.
   339  	for ; /*nothing*/ i < length; i++ {
   340  		ai := a[i]
   341  		bi := b[i]
   342  		if ai != bi { // The start codes differ.
   343  			return int(ai) - int(bi)
   344  		}
   345  	}
   346  
   347  	return len(a) - len(b)
   348  }
   349  
   350  // Because there is no `LastIndexByte()' in the standard `bytes' package.
   351  func findCommaFromEnd(b []byte, offset int) int {
   352  	for i := len(b) - 1; i > offset; i-- {
   353  		if b[i] == ',' {
   354  			return i
   355  		}
   356  	}
   357  	panic(fmt.Errorf("no comma found in %q after offset %d", b, offset))
   358  }
   359  
   360  func (i *info) MarshalJSON() ([]byte, error) {
   361  
   362  	var ctxError, client string
   363  
   364  	if i.ctx != nil {
   365  		ctxError = fmt.Sprint(i.ctx.Err())
   366  	}
   367  	if i.Client() != nil {
   368  		client = i.Client().String()
   369  	}
   370  
   371  	state := struct {
   372  		Id              uint64
   373  		Namespace       string
   374  		Table           string
   375  		Name            string
   376  		StartKey        string
   377  		StopKey         string
   378  		ContextInstance string
   379  		Err             string
   380  		ClientPtr       string
   381  		Client          string
   382  		Available       bool
   383  	}{
   384  		Id:              i.id,
   385  		Namespace:       strconv.QuoteToASCII(string(i.namespace)),
   386  		Table:           strconv.QuoteToASCII(string(i.table)),
   387  		Name:            strconv.QuoteToASCII(string(i.name)),
   388  		StartKey:        strconv.QuoteToASCII(string(i.startKey)),
   389  		StopKey:         strconv.QuoteToASCII(string(i.stopKey)),
   390  		ContextInstance: fmt.Sprintf("%p", (i.ctx)),
   391  		Err:             ctxError,
   392  		ClientPtr:       fmt.Sprintf("%p", (i.Client())),
   393  		Client:          client,
   394  		Available:       !i.IsUnavailable(),
   395  	}
   396  	jsonVal, err := json.Marshal(state)
   397  
   398  	return jsonVal, err
   399  }