github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/core/raftlease/store.go (about) 1 // Copyright 2018 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package raftlease 5 6 import ( 7 "sync" 8 "sync/atomic" 9 "time" 10 11 "github.com/juju/clock" 12 "github.com/juju/errors" 13 "github.com/juju/loggo" 14 "github.com/juju/pubsub" 15 "github.com/prometheus/client_golang/prometheus" 16 17 "github.com/juju/juju/core/globalclock" 18 "github.com/juju/juju/core/lease" 19 ) 20 21 var logger = loggo.GetLogger("juju.core.raftlease") 22 23 // NotifyTarget defines methods needed to keep an external database 24 // updated with who holds leases. (In non-test code the notify target 25 // will generally be the state DB.) 26 type NotifyTarget interface { 27 // Claimed will be called when a new lease has been claimed. Not 28 // allowed to return an error because this is purely advisory - 29 // the lease claim has still occurred, whether or not the callback 30 // succeeds. 31 Claimed(lease.Key, string) 32 33 // Expired will be called when an existing lease has expired. Not 34 // allowed to return an error because this is purely advisory. 35 Expired(lease.Key) 36 } 37 38 // TrapdoorFunc returns a trapdoor to be attached to lease details for 39 // use by clients. This is intended to hold assertions that can be 40 // added to state transactions to ensure the lease is still held when 41 // the transaction is applied. 42 type TrapdoorFunc func(lease.Key, string) lease.Trapdoor 43 44 // ReadonlyFSM defines the methods of the lease FSM the store can use 45 // - any writes must go through the hub. 46 type ReadonlyFSM interface { 47 // Leases receives a func for retrieving time, because it needs to be 48 // determined after potential lock-waiting to be accurate. 49 Leases(func() time.Time, ...lease.Key) map[lease.Key]lease.Info 50 GlobalTime() time.Time 51 Pinned() map[lease.Key][]string 52 } 53 54 // StoreConfig holds resources and settings needed to run the Store. 55 type StoreConfig struct { 56 FSM ReadonlyFSM 57 Hub *pubsub.StructuredHub 58 Trapdoor TrapdoorFunc 59 RequestTopic string 60 ResponseTopic func(requestID uint64) string 61 62 Clock clock.Clock 63 ForwardTimeout time.Duration 64 } 65 66 // NewStore returns a core/lease.Store that manages leases in Raft. 67 func NewStore(config StoreConfig) *Store { 68 return &Store{ 69 fsm: config.FSM, 70 hub: config.Hub, 71 config: config, 72 prevTime: config.FSM.GlobalTime(), 73 metrics: newMetricsCollector(), 74 } 75 } 76 77 // Store manages a raft FSM and forwards writes through a pubsub hub. 78 type Store struct { 79 fsm ReadonlyFSM 80 hub *pubsub.StructuredHub 81 requestID uint64 82 config StoreConfig 83 metrics *metricsCollector 84 85 prevTimeMu sync.Mutex 86 prevTime time.Time 87 } 88 89 // Autoexpire is part of lease.Store. 90 func (*Store) Autoexpire() bool { return true } 91 92 // ClaimLease is part of lease.Store. 93 func (s *Store) ClaimLease(key lease.Key, req lease.Request) error { 94 err := s.runOnLeader(&Command{ 95 Version: CommandVersion, 96 Operation: OperationClaim, 97 Namespace: key.Namespace, 98 ModelUUID: key.ModelUUID, 99 Lease: key.Lease, 100 Holder: req.Holder, 101 Duration: req.Duration, 102 }) 103 return errors.Trace(err) 104 } 105 106 // ExtendLease is part of lease.Store. 107 func (s *Store) ExtendLease(key lease.Key, req lease.Request) error { 108 return errors.Trace(s.runOnLeader(&Command{ 109 Version: CommandVersion, 110 Operation: OperationExtend, 111 Namespace: key.Namespace, 112 ModelUUID: key.ModelUUID, 113 Lease: key.Lease, 114 Holder: req.Holder, 115 Duration: req.Duration, 116 })) 117 } 118 119 // ExpireLease is part of lease.Store. 120 func (s *Store) ExpireLease(key lease.Key) error { 121 // It's always an invalid operation - expiration happens 122 // automatically when time is advanced. 123 return lease.ErrInvalid 124 } 125 126 // Leases is part of lease.Store. 127 func (s *Store) Leases(keys ...lease.Key) map[lease.Key]lease.Info { 128 leaseMap := s.fsm.Leases(s.config.Clock.Now, keys...) 129 // Add trapdoors into the information from the FSM. 130 for k, v := range leaseMap { 131 v.Trapdoor = s.config.Trapdoor(k, v.Holder) 132 leaseMap[k] = v 133 } 134 return leaseMap 135 } 136 137 // Refresh is part of lease.Store. 138 func (s *Store) Refresh() error { 139 return nil 140 } 141 142 // PinLease is part of lease.Store. 143 func (s *Store) PinLease(key lease.Key, entity string) error { 144 return errors.Trace(s.pinOp(OperationPin, key, entity)) 145 } 146 147 // UnpinLease is part of lease.Store. 148 func (s *Store) UnpinLease(key lease.Key, entity string) error { 149 return errors.Trace(s.pinOp(OperationUnpin, key, entity)) 150 } 151 152 // Pinned is part of the Store interface. 153 func (s *Store) Pinned() map[lease.Key][]string { 154 return s.fsm.Pinned() 155 } 156 157 func (s *Store) pinOp(operation string, key lease.Key, entity string) error { 158 return errors.Trace(s.runOnLeader(&Command{ 159 Version: CommandVersion, 160 Operation: operation, 161 Namespace: key.Namespace, 162 ModelUUID: key.ModelUUID, 163 Lease: key.Lease, 164 PinEntity: entity, 165 })) 166 } 167 168 // Advance is part of globalclock.Updater. 169 func (s *Store) Advance(duration time.Duration) error { 170 s.prevTimeMu.Lock() 171 defer s.prevTimeMu.Unlock() 172 newTime := s.prevTime.Add(duration) 173 err := s.runOnLeader(&Command{ 174 Version: CommandVersion, 175 Operation: OperationSetTime, 176 OldTime: s.prevTime, 177 NewTime: newTime, 178 }) 179 if globalclock.IsConcurrentUpdate(err) { 180 // Someone else updated before us - get the new time. 181 s.prevTime = s.fsm.GlobalTime() 182 } else if lease.IsTimeout(err) { 183 // Convert this to a globalclock timeout to match the Updater 184 // interface. 185 err = globalclock.ErrTimeout 186 } else if err == nil { 187 s.prevTime = newTime 188 } 189 return errors.Trace(err) 190 } 191 192 func (s *Store) runOnLeader(command *Command) error { 193 bytes, err := command.Marshal() 194 if err != nil { 195 return errors.Trace(err) 196 } 197 requestID := atomic.AddUint64(&s.requestID, 1) 198 responseTopic := s.config.ResponseTopic(requestID) 199 200 responseChan := make(chan ForwardResponse, 1) 201 errChan := make(chan error) 202 unsubscribe, err := s.hub.Subscribe( 203 responseTopic, 204 func(_ string, resp ForwardResponse, err error) { 205 if err != nil { 206 errChan <- err 207 return 208 } 209 responseChan <- resp 210 }, 211 ) 212 if err != nil { 213 return errors.Trace(err) 214 } 215 defer unsubscribe() 216 217 start := time.Now() 218 defer func() { 219 elapsed := time.Now().Sub(start) 220 logger.Tracef("runOnLeader elapsed from publish: %v", elapsed.Round(time.Millisecond)) 221 }() 222 _, err = s.hub.Publish(s.config.RequestTopic, ForwardRequest{ 223 Command: string(bytes), 224 ResponseTopic: responseTopic, 225 }) 226 if err != nil { 227 s.record(command.Operation, "error", start) 228 return errors.Trace(err) 229 } 230 231 select { 232 case <-s.config.Clock.After(s.config.ForwardTimeout): 233 logger.Infof("timeout") 234 s.record(command.Operation, "timeout", start) 235 return lease.ErrTimeout 236 case err := <-errChan: 237 logger.Errorf("%v", err) 238 s.record(command.Operation, "error", start) 239 return errors.Trace(err) 240 case response := <-responseChan: 241 err := RecoverError(response.Error) 242 logger.Tracef("got response, err %v", err) 243 result := "failure" 244 if err == nil { 245 result = "success" 246 } 247 s.record(command.Operation, result, start) 248 return err 249 } 250 } 251 252 func (s *Store) record(operation, result string, start time.Time) { 253 elapsedMS := float64(time.Now().Sub(start)) / float64(time.Millisecond) 254 s.metrics.requests.With(prometheus.Labels{ 255 "operation": operation, 256 "result": result, 257 }).Observe(elapsedMS) 258 } 259 260 // ForwardRequest is a message sent over the hub to the raft forwarder 261 // (only running on the raft leader node). 262 type ForwardRequest struct { 263 Command string `yaml:"command"` 264 ResponseTopic string `yaml:"response-topic"` 265 } 266 267 // ForwardResponse is the response sent back from the raft forwarder. 268 type ForwardResponse struct { 269 Error *ResponseError `yaml:"error"` 270 } 271 272 // ResponseError is used for sending error values back to the lease 273 // store via the hub. 274 type ResponseError struct { 275 Message string `yaml:"message"` 276 Code string `yaml:"code"` 277 } 278 279 // AsResponseError returns a *ResponseError that can be sent back over 280 // the hub in response to a forwarded FSM command. 281 func AsResponseError(err error) *ResponseError { 282 if err == nil { 283 return nil 284 } 285 message := err.Error() 286 var code string 287 switch errors.Cause(err) { 288 case lease.ErrInvalid: 289 code = "invalid" 290 case globalclock.ErrConcurrentUpdate: 291 code = "concurrent-update" 292 default: 293 code = "error" 294 } 295 return &ResponseError{ 296 Message: message, 297 Code: code, 298 } 299 } 300 301 // RecoverError converts a ResponseError back into the specific error 302 // it represents, or into a generic error if it wasn't one of the 303 // singleton errors handled. 304 func RecoverError(resp *ResponseError) error { 305 if resp == nil { 306 return nil 307 } 308 switch resp.Code { 309 case "invalid": 310 return lease.ErrInvalid 311 case "concurrent-update": 312 return globalclock.ErrConcurrentUpdate 313 default: 314 return errors.New(resp.Message) 315 } 316 } 317 318 // Describe is part of prometheus.Collector. 319 func (s *Store) Describe(ch chan<- *prometheus.Desc) { 320 s.metrics.Describe(ch) 321 } 322 323 // Collect is part of prometheus.Collector. 324 func (s *Store) Collect(ch chan<- prometheus.Metric) { 325 s.metrics.Collect(ch) 326 }