github.com/xmidt-org/webpa-common@v1.11.9/device/rehasher/rehasher.go (about) 1 package rehasher 2 3 import ( 4 "time" 5 6 "github.com/go-kit/kit/log" 7 "github.com/go-kit/kit/log/level" 8 "github.com/go-kit/kit/metrics" 9 "github.com/go-kit/kit/metrics/provider" 10 11 "github.com/xmidt-org/webpa-common/device" 12 "github.com/xmidt-org/webpa-common/logging" 13 "github.com/xmidt-org/webpa-common/service" 14 "github.com/xmidt-org/webpa-common/service/monitor" 15 ) 16 17 const ( 18 RehashError = "rehash-error" 19 RehashOtherInstance = "rehash-other-instance" 20 21 ServiceDiscoveryError = "service-discovery-error" 22 ServiceDiscoveryStopped = "service-discovery-stopped" 23 ServiceDiscoveryNoInstances = "service-discovery-no-instances" 24 ) 25 26 // Option is a configuration option for a rehasher 27 type Option func(*rehasher) 28 29 // WithLogger configures a rehasher with a logger, using the default logger if l is nil. 30 func WithLogger(l log.Logger) Option { 31 return func(r *rehasher) { 32 if l == nil { 33 r.logger = logging.DefaultLogger() 34 } else { 35 r.logger = l 36 } 37 } 38 } 39 40 // WithAccessorFactory configures a rehasher with a specific factory for service.Accessor objects. 41 // If af is nil, the default accessor factory is used. 42 func WithAccessorFactory(af service.AccessorFactory) Option { 43 return func(r *rehasher) { 44 if af == nil { 45 r.accessorFactory = service.DefaultAccessorFactory 46 } else { 47 r.accessorFactory = af 48 } 49 } 50 } 51 52 // WithIsRegistered configures a rehasher with a strategy for determining if a discovered service instance 53 // is registered as this process. There is no default. 54 func WithIsRegistered(f func(string) bool) Option { 55 return func(r *rehasher) { 56 r.isRegistered = f 57 } 58 } 59 60 // WithMetricsProvider configures a metrics subsystem the resulting rehasher will use to track things. 61 // A nil provider passed to this option means to discard all metrics. 62 func WithMetricsProvider(p provider.Provider) Option { 63 return func(r *rehasher) { 64 if p == nil { 65 p = provider.NewDiscardProvider() 66 } 67 68 r.keep = p.NewGauge(RehashKeepDevice) 69 r.disconnect = p.NewGauge(RehashDisconnectDevice) 70 r.disconnectAllCounter = p.NewCounter(RehashDisconnectAllCounter) 71 r.timestamp = p.NewGauge(RehashTimestamp) 72 r.duration = p.NewGauge(RehashDurationMilliseconds) 73 } 74 } 75 76 // New creates a monitor Listener which will rehash and disconnect devices in response to service discovery events 77 // from a given set of services. 78 // This function panics if the connector is nil, if no IsRegistered strategy is configured or if no services were 79 // provided to filter events. 80 // 81 // If the returned listener encounters any service discovery error, all devices are disconnected. Otherwise, 82 // the IsRegistered strategy is used to determine which devices should still be connected to the Connector. Devices 83 // that hash to instances not registered in this environment are disconnected. 84 func New(connector device.Connector, services []string, options ...Option) monitor.Listener { 85 if connector == nil { 86 panic("A device Connector is required.") 87 } 88 89 if len(services) < 1 { 90 panic("Services are required to avoid unintended reshashes.") 91 } 92 93 var ( 94 defaultProvider = provider.NewDiscardProvider() 95 96 r = &rehasher{ 97 logger: logging.DefaultLogger(), 98 accessorFactory: service.DefaultAccessorFactory, 99 connector: connector, 100 now: time.Now, 101 services: make(map[string]bool), 102 103 keep: defaultProvider.NewGauge(RehashKeepDevice), 104 disconnect: defaultProvider.NewGauge(RehashDisconnectDevice), 105 disconnectAllCounter: defaultProvider.NewCounter(RehashDisconnectAllCounter), 106 timestamp: defaultProvider.NewGauge(RehashTimestamp), 107 duration: defaultProvider.NewGauge(RehashDurationMilliseconds), 108 } 109 ) 110 111 for _, svc := range services { 112 r.services[svc] = true 113 } 114 115 for _, o := range options { 116 o(r) 117 } 118 119 if r.isRegistered == nil { 120 panic("No IsRegistered strategy configured. Use WithIsRegistered or WithEnvironment.") 121 } 122 123 return r 124 } 125 126 // rehasher implements monitor.Listener and (1) disconnects all devices when any service discovery error occurs, 127 // and (2) rehashes devices in response to updated instances. 128 type rehasher struct { 129 logger log.Logger 130 services map[string]bool 131 accessorFactory service.AccessorFactory 132 isRegistered func(string) bool 133 connector device.Connector 134 now func() time.Time 135 136 keep metrics.Gauge 137 disconnect metrics.Gauge 138 disconnectAllCounter metrics.Counter 139 timestamp metrics.Gauge 140 duration metrics.Gauge 141 } 142 143 func (r *rehasher) rehash(svc string, logger log.Logger, accessor service.Accessor) { 144 logger.Log(level.Key(), level.InfoValue(), logging.MessageKey(), "rehash starting") 145 146 start := r.now() 147 r.timestamp.With(service.ServiceLabel, svc).Set(float64(start.UTC().Unix())) 148 149 var ( 150 keepCount = 0 151 152 disconnectCount = r.connector.DisconnectIf(func(candidate device.ID) (device.CloseReason, bool) { 153 instance, err := accessor.Get(candidate.Bytes()) 154 switch { 155 case err != nil: 156 logger.Log(level.Key(), level.ErrorValue(), 157 logging.MessageKey(), "disconnecting device: error during rehash", 158 logging.ErrorKey(), err, 159 "id", candidate, 160 ) 161 162 return device.CloseReason{Err: err, Text: RehashError}, true 163 164 case !r.isRegistered(instance): 165 logger.Log(level.Key(), level.InfoValue(), 166 logging.MessageKey(), "disconnecting device: rehashed to another instance", 167 "instance", instance, 168 "id", candidate, 169 ) 170 171 return device.CloseReason{Text: RehashOtherInstance}, true 172 173 default: 174 logger.Log(level.Key(), level.DebugValue(), logging.MessageKey(), "device hashed to this instance", "id", candidate) 175 keepCount++ 176 return device.CloseReason{}, false 177 } 178 }) 179 180 duration = r.now().Sub(start) 181 ) 182 183 r.keep.With(service.ServiceLabel, svc).Set(float64(keepCount)) 184 r.disconnect.With(service.ServiceLabel, svc).Set(float64(disconnectCount)) 185 r.duration.With(service.ServiceLabel, svc).Set(float64(duration / time.Millisecond)) 186 logger.Log(level.Key(), level.InfoValue(), logging.MessageKey(), "rehash complete", "disconnectCount", disconnectCount, "duration", duration) 187 } 188 189 func (r *rehasher) MonitorEvent(e monitor.Event) { 190 if !r.services[e.Service] { 191 return 192 } 193 194 logger := logging.Enrich( 195 log.With( 196 r.logger, 197 monitor.EventCountKey(), e.EventCount, 198 ), 199 e.Instancer, 200 ) 201 202 switch { 203 case e.Err != nil: 204 logger.Log(level.Key(), level.ErrorValue(), logging.MessageKey(), "disconnecting all devices: service discovery error", logging.ErrorKey(), e.Err) 205 r.connector.DisconnectAll(device.CloseReason{Err: e.Err, Text: ServiceDiscoveryError}) 206 r.disconnectAllCounter.With(service.ServiceLabel, e.Service, ReasonLabel, DisconnectAllServiceDiscoveryError).Add(1.0) 207 208 case e.Stopped: 209 logger.Log(level.Key(), level.ErrorValue(), logging.MessageKey(), "disconnecting all devices: service discovery monitor being stopped") 210 r.connector.DisconnectAll(device.CloseReason{Text: ServiceDiscoveryStopped}) 211 r.disconnectAllCounter.With(service.ServiceLabel, e.Service, ReasonLabel, DisconnectAllServiceDiscoveryStopped).Add(1.0) 212 213 case e.EventCount == 1: 214 logger.Log(level.Key(), level.InfoValue(), logging.MessageKey(), "ignoring initial instances") 215 216 case len(e.Instances) > 0: 217 r.rehash(e.Service, logger, r.accessorFactory(e.Instances)) 218 219 default: 220 logger.Log(level.Key(), level.ErrorValue(), logging.MessageKey(), "disconnecting all devices: service discovery updated with no instances") 221 r.connector.DisconnectAll(device.CloseReason{Text: ServiceDiscoveryNoInstances}) 222 r.disconnectAllCounter.With(service.ServiceLabel, e.Service, ReasonLabel, DisconnectAllServiceDiscoveryNoInstances).Add(1.0) 223 } 224 }