go-micro.dev/v5@v5.12.0/registry/cache/cache.go (about)

     1  // Package cache provides a registry cache
     2  package cache
     3  
     4  import (
     5  	"math"
     6  	"math/rand"
     7  	"sync"
     8  	"time"
     9  
    10  	"golang.org/x/sync/singleflight"
    11  
    12  	log "go-micro.dev/v5/logger"
    13  	"go-micro.dev/v5/registry"
    14  	util "go-micro.dev/v5/util/registry"
    15  )
    16  
    17  // Cache is the registry cache interface.
    18  type Cache interface {
    19  	// embed the registry interface
    20  	registry.Registry
    21  	// stop the cache watcher
    22  	Stop()
    23  }
    24  
    25  type Options struct {
    26  	Logger log.Logger
    27  	// TTL is the cache TTL
    28  	TTL time.Duration
    29  }
    30  
    31  type Option func(o *Options)
    32  
    33  type cache struct {
    34  	opts Options
    35  
    36  	registry.Registry
    37  	// status of the registry
    38  	// used to hold onto the cache
    39  	// in failure state
    40  	status error
    41  	// used to prevent cache breakdwon
    42  	sg      singleflight.Group
    43  	cache   map[string][]*registry.Service
    44  	ttls    map[string]time.Time
    45  	nttls   map[string]map[string]time.Time // node ttls
    46  	watched map[string]bool
    47  
    48  	// used to stop the cache
    49  	exit chan bool
    50  
    51  	// indicate whether its running
    52  	watchedRunning map[string]bool
    53  
    54  	// registry cache
    55  	sync.RWMutex
    56  }
    57  
    58  var (
    59  	DefaultTTL = time.Minute
    60  )
    61  
    62  func backoff(attempts int) time.Duration {
    63  	if attempts == 0 {
    64  		return time.Duration(0)
    65  	}
    66  	return time.Duration(math.Pow(10, float64(attempts))) * time.Millisecond
    67  }
    68  
    69  func (c *cache) getStatus() error {
    70  	c.RLock()
    71  	defer c.RUnlock()
    72  	return c.status
    73  }
    74  
    75  func (c *cache) setStatus(err error) {
    76  	c.Lock()
    77  	c.status = err
    78  	c.Unlock()
    79  }
    80  
    81  // isValid checks if the service is valid.
    82  func (c *cache) isValid(services []*registry.Service, ttl time.Time) bool {
    83  	// no services exist
    84  	if len(services) == 0 {
    85  		return false
    86  	}
    87  
    88  	// ttl is invalid
    89  	if ttl.IsZero() {
    90  		return false
    91  	}
    92  
    93  	// time since ttl is longer than timeout
    94  	if time.Since(ttl) > 0 {
    95  		return false
    96  	}
    97  
    98  	// a node did not get updated
    99  	for _, s := range services {
   100  		for _, n := range s.Nodes {
   101  			nttl := c.nttls[s.Name][n.Id]
   102  			if time.Since(nttl) > 0 {
   103  				return false
   104  			}
   105  		}
   106  	}
   107  
   108  	// ok
   109  	return true
   110  }
   111  
   112  func (c *cache) quit() bool {
   113  	select {
   114  	case <-c.exit:
   115  		return true
   116  	default:
   117  		return false
   118  	}
   119  }
   120  
   121  func (c *cache) del(service string) {
   122  	// don't blow away cache in error state
   123  	if err := c.status; err != nil {
   124  		return
   125  	}
   126  	// otherwise delete entries
   127  	delete(c.cache, service)
   128  	delete(c.ttls, service)
   129  	delete(c.nttls, service)
   130  }
   131  
   132  func (c *cache) get(service string) ([]*registry.Service, error) {
   133  	// read lock
   134  	c.RLock()
   135  
   136  	// check the cache first
   137  	services := c.cache[service]
   138  	// get cache ttl
   139  	ttl := c.ttls[service]
   140  	// make a copy
   141  	cp := util.Copy(services)
   142  
   143  	// got services, nodes && within ttl so return cache
   144  	if c.isValid(cp, ttl) {
   145  		c.RUnlock()
   146  		// return services
   147  		return cp, nil
   148  	}
   149  
   150  	// get does the actual request for a service and cache it
   151  	get := func(service string, cached []*registry.Service) ([]*registry.Service, error) {
   152  		// ask the registry
   153  		val, err, _ := c.sg.Do(service, func() (interface{}, error) {
   154  			return c.Registry.GetService(service)
   155  		})
   156  		services, _ := val.([]*registry.Service)
   157  		if err != nil {
   158  			// check the cache
   159  			if len(cached) > 0 {
   160  				// set the error status
   161  				c.setStatus(err)
   162  
   163  				// return the stale cache
   164  				return cached, nil
   165  			}
   166  			// otherwise return error
   167  			return nil, err
   168  		}
   169  
   170  		// reset the status
   171  		if err := c.getStatus(); err != nil {
   172  			c.setStatus(nil)
   173  		}
   174  
   175  		// cache results
   176  		cp := util.Copy(services)
   177  		c.Lock()
   178  		for _, s := range services {
   179  			c.updateNodeTTLs(service, s.Nodes)
   180  		}
   181  		c.set(service, services)
   182  		c.Unlock()
   183  
   184  		return cp, nil
   185  	}
   186  
   187  	// watch service if not watched
   188  	_, ok := c.watched[service]
   189  
   190  	// unlock the read lock
   191  	c.RUnlock()
   192  
   193  	// check if its being watched
   194  	if c.opts.TTL > 0 && !ok {
   195  		c.Lock()
   196  
   197  		// set to watched
   198  		c.watched[service] = true
   199  
   200  		// only kick it off if not running
   201  		if !c.watchedRunning[service] {
   202  			go c.run(service)
   203  		}
   204  
   205  		c.Unlock()
   206  	}
   207  
   208  	// get and return services
   209  	return get(service, cp)
   210  }
   211  
   212  func (c *cache) set(service string, services []*registry.Service) {
   213  	c.cache[service] = services
   214  	c.ttls[service] = time.Now().Add(c.opts.TTL)
   215  }
   216  
   217  func (c *cache) updateNodeTTLs(name string, nodes []*registry.Node) {
   218  	if c.nttls[name] == nil {
   219  		c.nttls[name] = make(map[string]time.Time)
   220  	}
   221  	for _, node := range nodes {
   222  		c.nttls[name][node.Id] = time.Now().Add(c.opts.TTL)
   223  	}
   224  	// clean up expired nodes
   225  	for nodeId, nttl := range c.nttls[name] {
   226  		if time.Since(nttl) > 0 {
   227  			delete(c.nttls[name], nodeId)
   228  		}
   229  	}
   230  }
   231  
   232  func (c *cache) update(res *registry.Result) {
   233  	if res == nil || res.Service == nil {
   234  		return
   235  	}
   236  
   237  	c.Lock()
   238  	defer c.Unlock()
   239  
   240  	// only save watched services
   241  	if _, ok := c.watched[res.Service.Name]; !ok {
   242  		return
   243  	}
   244  
   245  	services, ok := c.cache[res.Service.Name]
   246  	if !ok {
   247  		// we're not going to cache anything
   248  		// unless there was already a lookup
   249  		return
   250  	}
   251  
   252  	if len(res.Service.Nodes) == 0 {
   253  		switch res.Action {
   254  		case "delete":
   255  			c.del(res.Service.Name)
   256  		}
   257  		return
   258  	}
   259  
   260  	// existing service found
   261  	var service *registry.Service
   262  	var index int
   263  	for i, s := range services {
   264  		if s.Version == res.Service.Version {
   265  			service = s
   266  			index = i
   267  		}
   268  	}
   269  
   270  	switch res.Action {
   271  	case "create", "update":
   272  		c.updateNodeTTLs(res.Service.Name, res.Service.Nodes)
   273  		if service == nil {
   274  			c.set(res.Service.Name, append(services, res.Service))
   275  			return
   276  		}
   277  
   278  		// append old nodes to new service
   279  		for _, cur := range service.Nodes {
   280  			var seen bool
   281  			for _, node := range res.Service.Nodes {
   282  				if cur.Id == node.Id {
   283  					seen = true
   284  					break
   285  				}
   286  			}
   287  			if !seen {
   288  				res.Service.Nodes = append(res.Service.Nodes, cur)
   289  			}
   290  		}
   291  
   292  		services[index] = res.Service
   293  		c.set(res.Service.Name, services)
   294  	case "delete":
   295  		if service == nil {
   296  			return
   297  		}
   298  
   299  		var nodes []*registry.Node
   300  
   301  		// filter cur nodes to remove the dead one
   302  		for _, cur := range service.Nodes {
   303  			var seen bool
   304  			for _, del := range res.Service.Nodes {
   305  				if del.Id == cur.Id {
   306  					seen = true
   307  					break
   308  				}
   309  			}
   310  			if !seen {
   311  				nodes = append(nodes, cur)
   312  			}
   313  		}
   314  
   315  		// still got nodes, save and return
   316  		if len(nodes) > 0 {
   317  			service.Nodes = nodes
   318  			services[index] = service
   319  			c.set(service.Name, services)
   320  			return
   321  		}
   322  
   323  		// zero nodes left
   324  
   325  		// only have one thing to delete
   326  		// nuke the thing
   327  		if len(services) == 1 {
   328  			c.del(service.Name)
   329  			return
   330  		}
   331  
   332  		// still have more than 1 service
   333  		// check the version and keep what we know
   334  		var srvs []*registry.Service
   335  		for _, s := range services {
   336  			if s.Version != service.Version {
   337  				srvs = append(srvs, s)
   338  			}
   339  		}
   340  
   341  		// save
   342  		c.set(service.Name, srvs)
   343  	case "override":
   344  		if service == nil {
   345  			return
   346  		}
   347  
   348  		c.del(service.Name)
   349  	}
   350  }
   351  
   352  // run starts the cache watcher loop
   353  // it creates a new watcher if there's a problem.
   354  func (c *cache) run(service string) {
   355  	c.Lock()
   356  	c.watchedRunning[service] = true
   357  	c.Unlock()
   358  	logger := c.opts.Logger
   359  	// reset watcher on exit
   360  	defer func() {
   361  		c.Lock()
   362  		c.watched = make(map[string]bool)
   363  		c.watchedRunning[service] = false
   364  		c.Unlock()
   365  	}()
   366  
   367  	var a, b int
   368  
   369  	for {
   370  		// exit early if already dead
   371  		if c.quit() {
   372  			return
   373  		}
   374  
   375  		// jitter before starting
   376  		j := rand.Int63n(100)
   377  		time.Sleep(time.Duration(j) * time.Millisecond)
   378  
   379  		// create new watcher
   380  		w, err := c.Registry.Watch(registry.WatchService(service))
   381  		if err != nil {
   382  			if c.quit() {
   383  				return
   384  			}
   385  
   386  			d := backoff(a)
   387  			c.setStatus(err)
   388  
   389  			if a > 3 {
   390  				logger.Logf(log.DebugLevel, "rcache: ", err, " backing off ", d)
   391  				a = 0
   392  			}
   393  
   394  			time.Sleep(d)
   395  			a++
   396  
   397  			continue
   398  		}
   399  
   400  		// reset a
   401  		a = 0
   402  
   403  		// watch for events
   404  		if err := c.watch(w); err != nil {
   405  			if c.quit() {
   406  				return
   407  			}
   408  
   409  			d := backoff(b)
   410  			c.setStatus(err)
   411  
   412  			if b > 3 {
   413  				logger.Logf(log.DebugLevel, "rcache: ", err, " backing off ", d)
   414  				b = 0
   415  			}
   416  
   417  			time.Sleep(d)
   418  			b++
   419  
   420  			continue
   421  		}
   422  
   423  		// reset b
   424  		b = 0
   425  	}
   426  }
   427  
   428  // watch loops the next event and calls update
   429  // it returns if there's an error.
   430  func (c *cache) watch(w registry.Watcher) error {
   431  	// used to stop the watch
   432  	stop := make(chan bool)
   433  
   434  	// manage this loop
   435  	go func() {
   436  		defer w.Stop()
   437  
   438  		select {
   439  		// wait for exit
   440  		case <-c.exit:
   441  			return
   442  		// we've been stopped
   443  		case <-stop:
   444  			return
   445  		}
   446  	}()
   447  
   448  	for {
   449  		res, err := w.Next()
   450  		if err != nil {
   451  			close(stop)
   452  			return err
   453  		}
   454  
   455  		// reset the error status since we succeeded
   456  		if err := c.getStatus(); err != nil {
   457  			// reset status
   458  			c.setStatus(nil)
   459  		}
   460  
   461  		c.update(res)
   462  	}
   463  }
   464  
   465  func (c *cache) GetService(service string, opts ...registry.GetOption) ([]*registry.Service, error) {
   466  	// get the service
   467  	services, err := c.get(service)
   468  	if err != nil {
   469  		return nil, err
   470  	}
   471  
   472  	// if there's nothing return err
   473  	if len(services) == 0 {
   474  		return nil, registry.ErrNotFound
   475  	}
   476  
   477  	// return services
   478  	return services, nil
   479  }
   480  
   481  func (c *cache) Stop() {
   482  	c.Lock()
   483  	defer c.Unlock()
   484  
   485  	select {
   486  	case <-c.exit:
   487  		return
   488  	default:
   489  		close(c.exit)
   490  	}
   491  }
   492  
   493  func (c *cache) String() string {
   494  	return "cache"
   495  }
   496  
   497  // New returns a new cache.
   498  func New(r registry.Registry, opts ...Option) Cache {
   499  	rand.Seed(time.Now().UnixNano())
   500  
   501  	options := Options{
   502  		TTL:    DefaultTTL,
   503  		Logger: log.DefaultLogger,
   504  	}
   505  
   506  	for _, o := range opts {
   507  		o(&options)
   508  	}
   509  
   510  	return &cache{
   511  		Registry:       r,
   512  		opts:           options,
   513  		watched:        make(map[string]bool),
   514  		watchedRunning: make(map[string]bool),
   515  		cache:          make(map[string][]*registry.Service),
   516  		ttls:           make(map[string]time.Time),
   517  		nttls:          make(map[string]map[string]time.Time),
   518  		exit:           make(chan bool),
   519  	}
   520  }