github.com/xmidt-org/webpa-common@v1.11.9/device/rehasher/rehasher.go (about)

     1  package rehasher
     2  
     3  import (
     4  	"time"
     5  
     6  	"github.com/go-kit/kit/log"
     7  	"github.com/go-kit/kit/log/level"
     8  	"github.com/go-kit/kit/metrics"
     9  	"github.com/go-kit/kit/metrics/provider"
    10  
    11  	"github.com/xmidt-org/webpa-common/device"
    12  	"github.com/xmidt-org/webpa-common/logging"
    13  	"github.com/xmidt-org/webpa-common/service"
    14  	"github.com/xmidt-org/webpa-common/service/monitor"
    15  )
    16  
    17  const (
    18  	RehashError         = "rehash-error"
    19  	RehashOtherInstance = "rehash-other-instance"
    20  
    21  	ServiceDiscoveryError       = "service-discovery-error"
    22  	ServiceDiscoveryStopped     = "service-discovery-stopped"
    23  	ServiceDiscoveryNoInstances = "service-discovery-no-instances"
    24  )
    25  
    26  // Option is a configuration option for a rehasher
    27  type Option func(*rehasher)
    28  
    29  // WithLogger configures a rehasher with a logger, using the default logger if l is nil.
    30  func WithLogger(l log.Logger) Option {
    31  	return func(r *rehasher) {
    32  		if l == nil {
    33  			r.logger = logging.DefaultLogger()
    34  		} else {
    35  			r.logger = l
    36  		}
    37  	}
    38  }
    39  
    40  // WithAccessorFactory configures a rehasher with a specific factory for service.Accessor objects.
    41  // If af is nil, the default accessor factory is used.
    42  func WithAccessorFactory(af service.AccessorFactory) Option {
    43  	return func(r *rehasher) {
    44  		if af == nil {
    45  			r.accessorFactory = service.DefaultAccessorFactory
    46  		} else {
    47  			r.accessorFactory = af
    48  		}
    49  	}
    50  }
    51  
    52  // WithIsRegistered configures a rehasher with a strategy for determining if a discovered service instance
    53  // is registered as this process.  There is no default.
    54  func WithIsRegistered(f func(string) bool) Option {
    55  	return func(r *rehasher) {
    56  		r.isRegistered = f
    57  	}
    58  }
    59  
    60  // WithMetricsProvider configures a metrics subsystem the resulting rehasher will use to track things.
    61  // A nil provider passed to this option means to discard all metrics.
    62  func WithMetricsProvider(p provider.Provider) Option {
    63  	return func(r *rehasher) {
    64  		if p == nil {
    65  			p = provider.NewDiscardProvider()
    66  		}
    67  
    68  		r.keep = p.NewGauge(RehashKeepDevice)
    69  		r.disconnect = p.NewGauge(RehashDisconnectDevice)
    70  		r.disconnectAllCounter = p.NewCounter(RehashDisconnectAllCounter)
    71  		r.timestamp = p.NewGauge(RehashTimestamp)
    72  		r.duration = p.NewGauge(RehashDurationMilliseconds)
    73  	}
    74  }
    75  
    76  // New creates a monitor Listener which will rehash and disconnect devices in response to service discovery events
    77  // from a given set of services.
    78  // This function panics if the connector is nil, if no IsRegistered strategy is configured or if no services were
    79  // provided to filter events.
    80  //
    81  // If the returned listener encounters any service discovery error, all devices are disconnected.  Otherwise,
    82  // the IsRegistered strategy is used to determine which devices should still be connected to the Connector.  Devices
    83  // that hash to instances not registered in this environment are disconnected.
    84  func New(connector device.Connector, services []string, options ...Option) monitor.Listener {
    85  	if connector == nil {
    86  		panic("A device Connector is required.")
    87  	}
    88  
    89  	if len(services) < 1 {
    90  		panic("Services are required to avoid unintended reshashes.")
    91  	}
    92  
    93  	var (
    94  		defaultProvider = provider.NewDiscardProvider()
    95  
    96  		r = &rehasher{
    97  			logger:          logging.DefaultLogger(),
    98  			accessorFactory: service.DefaultAccessorFactory,
    99  			connector:       connector,
   100  			now:             time.Now,
   101  			services:        make(map[string]bool),
   102  
   103  			keep:                 defaultProvider.NewGauge(RehashKeepDevice),
   104  			disconnect:           defaultProvider.NewGauge(RehashDisconnectDevice),
   105  			disconnectAllCounter: defaultProvider.NewCounter(RehashDisconnectAllCounter),
   106  			timestamp:            defaultProvider.NewGauge(RehashTimestamp),
   107  			duration:             defaultProvider.NewGauge(RehashDurationMilliseconds),
   108  		}
   109  	)
   110  
   111  	for _, svc := range services {
   112  		r.services[svc] = true
   113  	}
   114  
   115  	for _, o := range options {
   116  		o(r)
   117  	}
   118  
   119  	if r.isRegistered == nil {
   120  		panic("No IsRegistered strategy configured.  Use WithIsRegistered or WithEnvironment.")
   121  	}
   122  
   123  	return r
   124  }
   125  
   126  // rehasher implements monitor.Listener and (1) disconnects all devices when any service discovery error occurs,
   127  // and (2) rehashes devices in response to updated instances.
   128  type rehasher struct {
   129  	logger          log.Logger
   130  	services        map[string]bool
   131  	accessorFactory service.AccessorFactory
   132  	isRegistered    func(string) bool
   133  	connector       device.Connector
   134  	now             func() time.Time
   135  
   136  	keep                 metrics.Gauge
   137  	disconnect           metrics.Gauge
   138  	disconnectAllCounter metrics.Counter
   139  	timestamp            metrics.Gauge
   140  	duration             metrics.Gauge
   141  }
   142  
   143  func (r *rehasher) rehash(svc string, logger log.Logger, accessor service.Accessor) {
   144  	logger.Log(level.Key(), level.InfoValue(), logging.MessageKey(), "rehash starting")
   145  
   146  	start := r.now()
   147  	r.timestamp.With(service.ServiceLabel, svc).Set(float64(start.UTC().Unix()))
   148  
   149  	var (
   150  		keepCount = 0
   151  
   152  		disconnectCount = r.connector.DisconnectIf(func(candidate device.ID) (device.CloseReason, bool) {
   153  			instance, err := accessor.Get(candidate.Bytes())
   154  			switch {
   155  			case err != nil:
   156  				logger.Log(level.Key(), level.ErrorValue(),
   157  					logging.MessageKey(), "disconnecting device: error during rehash",
   158  					logging.ErrorKey(), err,
   159  					"id", candidate,
   160  				)
   161  
   162  				return device.CloseReason{Err: err, Text: RehashError}, true
   163  
   164  			case !r.isRegistered(instance):
   165  				logger.Log(level.Key(), level.InfoValue(),
   166  					logging.MessageKey(), "disconnecting device: rehashed to another instance",
   167  					"instance", instance,
   168  					"id", candidate,
   169  				)
   170  
   171  				return device.CloseReason{Text: RehashOtherInstance}, true
   172  
   173  			default:
   174  				logger.Log(level.Key(), level.DebugValue(), logging.MessageKey(), "device hashed to this instance", "id", candidate)
   175  				keepCount++
   176  				return device.CloseReason{}, false
   177  			}
   178  		})
   179  
   180  		duration = r.now().Sub(start)
   181  	)
   182  
   183  	r.keep.With(service.ServiceLabel, svc).Set(float64(keepCount))
   184  	r.disconnect.With(service.ServiceLabel, svc).Set(float64(disconnectCount))
   185  	r.duration.With(service.ServiceLabel, svc).Set(float64(duration / time.Millisecond))
   186  	logger.Log(level.Key(), level.InfoValue(), logging.MessageKey(), "rehash complete", "disconnectCount", disconnectCount, "duration", duration)
   187  }
   188  
   189  func (r *rehasher) MonitorEvent(e monitor.Event) {
   190  	if !r.services[e.Service] {
   191  		return
   192  	}
   193  
   194  	logger := logging.Enrich(
   195  		log.With(
   196  			r.logger,
   197  			monitor.EventCountKey(), e.EventCount,
   198  		),
   199  		e.Instancer,
   200  	)
   201  
   202  	switch {
   203  	case e.Err != nil:
   204  		logger.Log(level.Key(), level.ErrorValue(), logging.MessageKey(), "disconnecting all devices: service discovery error", logging.ErrorKey(), e.Err)
   205  		r.connector.DisconnectAll(device.CloseReason{Err: e.Err, Text: ServiceDiscoveryError})
   206  		r.disconnectAllCounter.With(service.ServiceLabel, e.Service, ReasonLabel, DisconnectAllServiceDiscoveryError).Add(1.0)
   207  
   208  	case e.Stopped:
   209  		logger.Log(level.Key(), level.ErrorValue(), logging.MessageKey(), "disconnecting all devices: service discovery monitor being stopped")
   210  		r.connector.DisconnectAll(device.CloseReason{Text: ServiceDiscoveryStopped})
   211  		r.disconnectAllCounter.With(service.ServiceLabel, e.Service, ReasonLabel, DisconnectAllServiceDiscoveryStopped).Add(1.0)
   212  
   213  	case e.EventCount == 1:
   214  		logger.Log(level.Key(), level.InfoValue(), logging.MessageKey(), "ignoring initial instances")
   215  
   216  	case len(e.Instances) > 0:
   217  		r.rehash(e.Service, logger, r.accessorFactory(e.Instances))
   218  
   219  	default:
   220  		logger.Log(level.Key(), level.ErrorValue(), logging.MessageKey(), "disconnecting all devices: service discovery updated with no instances")
   221  		r.connector.DisconnectAll(device.CloseReason{Text: ServiceDiscoveryNoInstances})
   222  		r.disconnectAllCounter.With(service.ServiceLabel, e.Service, ReasonLabel, DisconnectAllServiceDiscoveryNoInstances).Add(1.0)
   223  	}
   224  }