github.com/tsuna/gohbase@v0.0.0-20250731002811-4ffcadfba63e/caches.go (about)

     1  // Copyright (C) 2016  The GoHBase Authors.  All rights reserved.
     2  // This file is part of GoHBase.
     3  // Use of this source code is governed by the Apache License 2.0
     4  // that can be found in the COPYING file.
     5  
     6  package gohbase
     7  
     8  import (
     9  	"bytes"
    10  	"fmt"
    11  	"io"
    12  	"log/slog"
    13  	"sync"
    14  
    15  	"github.com/tsuna/gohbase/hrpc"
    16  	"modernc.org/b/v2"
    17  )
    18  
    19  // clientRegionCache is client -> region cache. Used to quickly
    20  // look up all the regioninfos that map to a specific client
    21  type clientRegionCache struct {
    22  	m      sync.RWMutex
    23  	logger *slog.Logger
    24  
    25  	regions map[hrpc.RegionClient]map[hrpc.RegionInfo]struct{}
    26  }
    27  
    28  // put associates a region with client for provided addrss. It returns the client if it's already
    29  // in cache or otherwise instantiates a new one by calling newClient.
    30  // TODO: obvious place for optimization (use map with address as key to lookup exisiting clients)
    31  func (rcc *clientRegionCache) put(addr string, r hrpc.RegionInfo,
    32  	newClient func() hrpc.RegionClient) hrpc.RegionClient {
    33  	rcc.m.Lock()
    34  	for existingClient, regions := range rcc.regions {
    35  		// check if client already exists, checking by host and port
    36  		// because concurrent callers might try to put the same client
    37  		if addr == existingClient.Addr() {
    38  			// check client already knows about the region, checking
    39  			// by pointer is enough because we make sure that there are
    40  			// no regions with the same name around
    41  			if _, ok := regions[r]; !ok {
    42  				regions[r] = struct{}{}
    43  			}
    44  			rcc.m.Unlock()
    45  
    46  			rcc.logger.Debug("region client is already in client's cache", "client", existingClient)
    47  			return existingClient
    48  		}
    49  	}
    50  
    51  	// no such client yet
    52  	c := newClient()
    53  	rcc.regions[c] = map[hrpc.RegionInfo]struct{}{r: {}}
    54  	rcc.m.Unlock()
    55  
    56  	rcc.logger.Info("added new region client", "client", c)
    57  	return c
    58  }
    59  
    60  func (rcc *clientRegionCache) del(r hrpc.RegionInfo) {
    61  	rcc.m.Lock()
    62  	c := r.Client()
    63  	if c != nil {
    64  		r.SetClient(nil)
    65  		regions := rcc.regions[c]
    66  		delete(regions, r)
    67  	}
    68  	rcc.m.Unlock()
    69  }
    70  
    71  func (rcc *clientRegionCache) closeAll() {
    72  	rcc.m.Lock()
    73  	for client, regions := range rcc.regions {
    74  		for region := range regions {
    75  			region.MarkUnavailable()
    76  			region.SetClient(nil)
    77  		}
    78  		client.Close()
    79  	}
    80  	rcc.m.Unlock()
    81  }
    82  
    83  func (rcc *clientRegionCache) clientDown(c hrpc.RegionClient) map[hrpc.RegionInfo]struct{} {
    84  	rcc.m.Lock()
    85  	downregions, ok := rcc.regions[c]
    86  	delete(rcc.regions, c)
    87  	rcc.m.Unlock()
    88  
    89  	if ok {
    90  		rcc.logger.Info("removed region client", "client", c)
    91  	}
    92  	return downregions
    93  }
    94  
    95  // Collects information about the clientRegion cache and appends them to the two maps to reduce
    96  // duplication of data. We do this in one function to avoid running the iterations twice
    97  func (rcc *clientRegionCache) debugInfo(
    98  	regions map[string]hrpc.RegionInfo,
    99  	clients map[string]hrpc.RegionClient) map[string][]string {
   100  
   101  	// key = RegionClient memory address , value = List of RegionInfo addresses
   102  	clientRegionCacheMap := map[string][]string{}
   103  
   104  	rcc.m.RLock()
   105  	for client, reginfos := range rcc.regions {
   106  		clientRegionInfoMap := make([]string, len(reginfos))
   107  		// put all the region infos in the client into the keyRegionInfosMap b/c its not
   108  		// guaranteed that rcc and krc will have the same infos
   109  		clients[fmt.Sprintf("%p", client)] = client
   110  
   111  		i := 0
   112  		for regionInfo := range reginfos {
   113  			clientRegionInfoMap[i] = fmt.Sprintf("%p", regionInfo)
   114  			regions[fmt.Sprintf("%p", regionInfo)] = regionInfo
   115  			i++
   116  		}
   117  
   118  		clientRegionCacheMap[fmt.Sprintf("%p", client)] = clientRegionInfoMap
   119  	}
   120  	rcc.m.RUnlock()
   121  
   122  	return clientRegionCacheMap
   123  }
   124  
   125  // key -> region cache.
   126  type keyRegionCache struct {
   127  	m      sync.RWMutex
   128  	logger *slog.Logger
   129  
   130  	// Maps a []byte of a region start key to a hrpc.RegionInfo
   131  	regions *b.Tree[[]byte, hrpc.RegionInfo]
   132  }
   133  
   134  func (krc *keyRegionCache) get(key []byte) ([]byte, hrpc.RegionInfo) {
   135  	krc.m.RLock()
   136  
   137  	enum, ok := krc.regions.Seek(key)
   138  	if ok {
   139  		krc.m.RUnlock()
   140  		panic(fmt.Errorf("WTF: got exact match for region search key %q", key))
   141  	}
   142  	k, v, err := enum.Prev()
   143  	enum.Close()
   144  
   145  	krc.m.RUnlock()
   146  
   147  	if err == io.EOF {
   148  		// we are the beginning of the tree
   149  		return nil, nil
   150  	}
   151  	return k, v
   152  }
   153  
   154  // reads whole b tree in keyRegionCache and gathers debug info.
   155  // We append that information in the given map
   156  func (krc *keyRegionCache) debugInfo(
   157  	regions map[string]hrpc.RegionInfo) map[string]string {
   158  	regionCacheMap := map[string]string{}
   159  
   160  	krc.m.RLock()
   161  	enum, err := krc.regions.SeekFirst()
   162  	if err != nil {
   163  		krc.m.RUnlock()
   164  		return regionCacheMap
   165  	}
   166  	krc.m.RUnlock()
   167  
   168  	for {
   169  		krc.m.RLock()
   170  		k, v, err := enum.Next()
   171  		// release lock after each iteration to allow other processes a chance to get it
   172  		krc.m.RUnlock()
   173  		if err == io.EOF {
   174  			break
   175  		}
   176  		regions[fmt.Sprintf("%p", v)] = v
   177  		regionCacheMap[string(k)] = fmt.Sprintf("%p", v)
   178  	}
   179  
   180  	return regionCacheMap
   181  }
   182  
   183  func isRegionOverlap(regA, regB hrpc.RegionInfo) bool {
   184  	// if region's stop key is empty, it's assumed to be the greatest key
   185  	return bytes.Equal(regA.Namespace(), regB.Namespace()) &&
   186  		bytes.Equal(regA.Table(), regB.Table()) &&
   187  		(len(regB.StopKey()) == 0 || bytes.Compare(regA.StartKey(), regB.StopKey()) < 0) &&
   188  		(len(regA.StopKey()) == 0 || bytes.Compare(regA.StopKey(), regB.StartKey()) > 0)
   189  }
   190  
   191  func (krc *keyRegionCache) getOverlaps(reg hrpc.RegionInfo) []hrpc.RegionInfo {
   192  	var overlaps []hrpc.RegionInfo
   193  	var v hrpc.RegionInfo
   194  	var err error
   195  
   196  	// deal with empty tree in the beginning so that we don't have to check
   197  	// EOF errors for enum later
   198  	if krc.regions.Len() == 0 {
   199  		return overlaps
   200  	}
   201  
   202  	// check if key created from new region falls into any cached regions
   203  	key := createRegionSearchKey(fullyQualifiedTable(reg), reg.StartKey())
   204  	enum, ok := krc.regions.Seek(key)
   205  	if ok {
   206  		panic(fmt.Errorf("WTF: found a region with exact name as the search key %q", key))
   207  	}
   208  
   209  	// case 1: landed before the first region in cache
   210  	// enum.Prev() returns io.EOF
   211  	// enum.Next() returns io.EOF
   212  	// SeekFirst() + enum.Next() returns the first region, which has larger start key
   213  
   214  	// case 2: landed before the second region in cache
   215  	// enum.Prev() returns the first region X and moves pointer to -infinity
   216  	// enum.Next() returns io.EOF
   217  	// SeekFirst() + enum.Next() returns first region X, which has smaller start key
   218  
   219  	// case 3: landed anywhere after the second region
   220  	// enum.Prev() returns the region X before it landed, moves pointer to the region X - 1
   221  	// enum.Next() returns X - 1 and move pointer to X, which has smaller start key
   222  
   223  	_, _, _ = enum.Prev()
   224  	_, _, err = enum.Next()
   225  	if err == io.EOF {
   226  		// we are in the beginning of tree, get new enum starting
   227  		// from first region
   228  		enum.Close()
   229  		enum, err = krc.regions.SeekFirst()
   230  		if err != nil {
   231  			panic(fmt.Errorf(
   232  				"error seeking first region when getting overlaps for region %v: %v", reg, err))
   233  		}
   234  	}
   235  
   236  	_, v, err = enum.Next()
   237  	if err != nil {
   238  		panic(fmt.Errorf(
   239  			"error accessing first region when getting overlaps for region %v: %v", reg, err))
   240  	}
   241  	if isRegionOverlap(v, reg) {
   242  		overlaps = append(overlaps, v)
   243  	}
   244  	_, v, err = enum.Next()
   245  
   246  	// now append all regions that overlap until the end of the tree
   247  	// or until they don't overlap
   248  	for err != io.EOF && isRegionOverlap(v, reg) {
   249  		overlaps = append(overlaps, v)
   250  		_, v, err = enum.Next()
   251  	}
   252  	enum.Close()
   253  	return overlaps
   254  }
   255  
   256  // put looks up if there's already region with this name in regions cache
   257  // and if there's, returns it in overlaps and doesn't modify the cache.
   258  // Otherwise, it puts the region and removes all overlaps in case all of
   259  // them are older. Returns a slice of overlapping regions and whether
   260  // passed region was put in the cache.
   261  func (krc *keyRegionCache) put(reg hrpc.RegionInfo) (overlaps []hrpc.RegionInfo, replaced bool) {
   262  	krc.m.Lock()
   263  	defer krc.m.Unlock()
   264  
   265  	// Update region cache metric
   266  	beforeLen := krc.regions.Len()
   267  	defer func() {
   268  		afterLen := krc.regions.Len()
   269  		cachedRegionTotal.Add(float64(afterLen - beforeLen))
   270  	}()
   271  
   272  	krc.regions.Put(reg.Name(), func(v hrpc.RegionInfo, exists bool) (hrpc.RegionInfo, bool) {
   273  		if exists {
   274  			// region is already in cache,
   275  			// note: regions with the same name have the same age
   276  			overlaps = []hrpc.RegionInfo{v}
   277  			return nil, false
   278  		}
   279  		// find all entries that are overlapping with the range of the new region.
   280  		overlaps = krc.getOverlaps(reg)
   281  		for _, o := range overlaps {
   282  			if o.ID() > reg.ID() {
   283  				// overlapping region is younger,
   284  				// don't replace any regions
   285  				// TODO: figure out if there can a case where we might
   286  				// have both older and younger overlapping regions, for
   287  				// now we only replace if all overlaps are older
   288  				return nil, false
   289  			}
   290  		}
   291  		// all overlaps are older, put the new region
   292  		replaced = true
   293  		return reg, true
   294  	})
   295  	if !replaced {
   296  		krc.logger.Debug("region is already in cache",
   297  			"region", reg, "overlaps", overlaps, "replaced", replaced)
   298  		return
   299  	}
   300  	// delete overlapping regions
   301  	// TODO: in case overlaps are always either younger or older,
   302  	// we can just greedily remove them in Put function
   303  	for _, o := range overlaps {
   304  		krc.regions.Delete(o.Name())
   305  		// let region establishers know that they can give up
   306  		o.MarkDead()
   307  	}
   308  
   309  	krc.logger.Info("added new region",
   310  		"region", reg, "overlaps", overlaps, "replaced", replaced)
   311  	return
   312  }
   313  
   314  func (krc *keyRegionCache) del(reg hrpc.RegionInfo) bool {
   315  	krc.m.Lock()
   316  	success := krc.regions.Delete(reg.Name())
   317  	krc.m.Unlock()
   318  	// let region establishers know that they can give up
   319  	reg.MarkDead()
   320  
   321  	if success {
   322  		cachedRegionTotal.Dec()
   323  	}
   324  	krc.logger.Debug("removed region", "region", reg)
   325  	return success
   326  }