github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/core/machinelock/machinelock.go (about) 1 // Copyright 2018 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package machinelock 5 6 import ( 7 "fmt" 8 "runtime/debug" 9 "sort" 10 "sync" 11 "time" 12 13 "github.com/juju/collections/deque" 14 "github.com/juju/errors" 15 "github.com/juju/mutex" 16 "gopkg.in/natefinch/lumberjack.v2" 17 "gopkg.in/yaml.v2" 18 ) 19 20 // Filename represents the name of the logfile that is created in the LOG_DIR. 21 const Filename = "machine-lock.log" 22 23 // Lock is used to give external packages something to refer to. 24 type Lock interface { 25 Acquire(spec Spec) (func(), error) 26 Report(opts ...ReportOption) (string, error) 27 } 28 29 // Clock provides an interface for dealing with clocks. 30 type Clock interface { 31 // After waits for the duration to elapse and then sends the 32 // current time on the returned channel. 33 After(time.Duration) <-chan time.Time 34 35 // Now returns the current clock time. 36 Now() time.Time 37 } 38 39 // Logger defines the logging methods used by the machine lock. 40 type Logger interface { 41 Debugf(string, ...interface{}) 42 Warningf(string, ...interface{}) 43 } 44 45 // Config defines the attributes needed to correctly construct 46 // a machine lock. 47 type Config struct { 48 AgentName string 49 Clock Clock 50 Logger Logger 51 LogFilename string 52 } 53 54 // Validate ensures that all the required config values are set. 55 func (c Config) Validate() error { 56 if c.AgentName == "" { 57 return errors.NotValidf("missing AgentName") 58 } 59 if c.Clock == nil { 60 return errors.NotValidf("missing Clock") 61 } 62 if c.Logger == nil { 63 return errors.NotValidf("missing Logger") 64 } 65 if c.LogFilename == "" { 66 return errors.NotValidf("missing LogFilename") 67 } 68 return nil 69 } 70 71 // New creates a new machine lock. 72 func New(config Config) (*lock, error) { 73 if err := config.Validate(); err != nil { 74 return nil, errors.Trace(err) 75 } 76 lock := &lock{ 77 agent: config.AgentName, 78 clock: config.Clock, 79 logger: config.Logger, 80 logFilename: config.LogFilename, 81 acquire: mutex.Acquire, 82 spec: mutex.Spec{ 83 Name: "machine-lock", 84 Clock: config.Clock, 85 Delay: 250 * time.Millisecond, 86 // Cancel is added in Acquire. 87 }, 88 waiting: make(map[int]*info), 89 history: deque.NewWithMaxLen(1000), 90 } 91 lock.setStartMessage() 92 return lock, nil 93 } 94 95 func (c *lock) setStartMessage() { 96 now := c.clock.Now().Format(timeFormat) 97 // The reason that we don't attempt to write the start message out immediately 98 // is that there may be multiple agents on the machine writing to the same log file. 99 // The way that we control this is to open and close the file while we hold the 100 // machine lock. We don't want to acquire the lock just to write out the agent 101 // has started as this would potentially hold up the agent starting while another 102 // agent holds the lock. However it is very useful to have the agent start time 103 // recorded in the central file, so we save it and write it out the first time. 104 c.startMessage = fmt.Sprintf("%s === agent %s started ===", now, c.agent) 105 } 106 107 // Spec is an argument struct for the `Acquire` method. 108 // It must have a Cancel channel and a Worker name defined. 109 type Spec struct { 110 Cancel <-chan struct{} 111 // The purpose of the NoCancel is to ensure that there isn't 112 // an accidental forgetting of the cancel channel. The primary 113 // use case for this is the reboot worker that doesn't want to 114 // pass in a cancel channel because it really wants to reboot. 115 NoCancel bool 116 Worker string 117 Comment string 118 } 119 120 // Validate ensures that a Cancel channel and a Worker name are defined. 121 func (s Spec) Validate() error { 122 if s.Cancel == nil { 123 if !s.NoCancel { 124 return errors.NotValidf("missing Cancel") 125 } 126 } 127 if s.Worker == "" { 128 return errors.NotValidf("mssing Worker") 129 } 130 return nil 131 } 132 133 // Acquire will attempt to acquire the machine hook execution lock. 134 // The method returns an error if the spec is invalid, or if the Cancel 135 // channel is signalled before the lock is acquired. 136 func (c *lock) Acquire(spec Spec) (func(), error) { 137 if err := spec.Validate(); err != nil { 138 return nil, errors.Trace(err) 139 } 140 current := &info{ 141 worker: spec.Worker, 142 comment: spec.Comment, 143 stack: string(debug.Stack()), 144 requested: c.clock.Now(), 145 } 146 c.mu.Lock() 147 148 id := c.next 149 c.next++ 150 c.waiting[id] = current 151 152 mSpec := c.spec 153 mSpec.Cancel = spec.Cancel 154 155 c.mu.Unlock() 156 c.logger.Debugf("acquire machine lock for %s (%s)", spec.Worker, spec.Comment) 157 releaser, err := c.acquire(mSpec) 158 c.mu.Lock() 159 defer c.mu.Unlock() 160 // Remove from the waiting map. 161 delete(c.waiting, id) 162 163 if err != nil { 164 return nil, errors.Trace(err) 165 } 166 c.logger.Debugf("machine lock acquired for %s (%s)", spec.Worker, spec.Comment) 167 c.holder = current 168 current.acquired = c.clock.Now() 169 return func() { 170 // We need to acquire the mutex before we call the releaser 171 // to ensure that we move the current to the history before 172 // another pending acquisition overwrites the current value. 173 c.mu.Lock() 174 defer c.mu.Unlock() 175 // We write the log file entry before we release the execution 176 // lock to ensure that no other agent is attempting to write to the 177 // log file. 178 current.released = c.clock.Now() 179 c.writeLogEntry() 180 c.logger.Debugf("machine lock released for %s (%s)", spec.Worker, spec.Comment) 181 releaser.Release() 182 c.history.PushFront(current) 183 c.holder = nil 184 }, nil 185 } 186 187 func (c *lock) writeLogEntry() { 188 // At the time this method is called, the holder is still set and the lock's 189 // mutex is held. 190 writer := &lumberjack.Logger{ 191 Filename: c.logFilename, 192 MaxSize: 10, // megabytes 193 MaxBackups: 5, 194 Compress: true, 195 } 196 defer writer.Close() 197 198 if c.startMessage != "" { 199 _, err := fmt.Fprintln(writer, c.startMessage) 200 if err != nil { 201 c.logger.Warningf("unable to write startMessage: %s", err.Error()) 202 } 203 c.startMessage = "" 204 } 205 206 _, err := fmt.Fprintln(writer, simpleInfo(c.agent, c.holder, c.clock.Now())) 207 if err != nil { 208 c.logger.Warningf("unable to release message: %s", err.Error()) 209 } 210 } 211 212 type info struct { 213 // worker is the worker that wants or has the lock. 214 worker string 215 // comment is provided by the worker to say what they are doing. 216 comment string 217 // stack trace for additional debugging 218 stack string 219 220 requested time.Time 221 acquired time.Time 222 released time.Time 223 } 224 225 type lock struct { 226 agent string 227 clock Clock 228 logger Logger 229 logFilename string 230 startMessage string 231 232 acquire func(mutex.Spec) (mutex.Releaser, error) 233 234 spec mutex.Spec 235 236 mu sync.Mutex 237 next int 238 holder *info 239 waiting map[int]*info 240 history *deque.Deque 241 } 242 243 type ReportOption int 244 245 const ( 246 ShowHistory ReportOption = iota 247 ShowStack 248 ShowDetailsYAML 249 ) 250 251 func contains(opts []ReportOption, opt ReportOption) bool { 252 for _, value := range opts { 253 if value == opt { 254 return true 255 } 256 } 257 return false 258 } 259 260 type reportInfo struct { 261 Worker string `yaml:"worker"` 262 Comment string `yaml:"comment,omitempty"` 263 264 Requested string `yaml:"requested,omitempty"` 265 Acquired string `yaml:"acquired,omitempty"` 266 Released string `yaml:"released,omitempty"` 267 268 WaitTime time.Duration `yaml:"wait-time,omitempty"` 269 HoldTime time.Duration `yaml:"hold-time,omitempty"` 270 271 Stack string `yaml:"stack,omitempty"` 272 } 273 274 type report struct { 275 Holder interface{} `yaml:"holder"` 276 Waiting []interface{} `yaml:"waiting,omitempty"` 277 History []interface{} `yaml:"history,omitempty"` 278 } 279 280 func (c *lock) Report(opts ...ReportOption) (string, error) { 281 282 includeStack := contains(opts, ShowStack) 283 detailsYAML := contains(opts, ShowDetailsYAML) 284 285 c.mu.Lock() 286 defer c.mu.Unlock() 287 now := c.clock.Now() 288 289 r := report{ 290 Holder: displayInfo(c.holder, includeStack, detailsYAML, now), 291 } 292 // Show the waiting with oldest first, which will have the smallest 293 // map key. 294 for _, key := range sortedKeys(c.waiting) { 295 r.Waiting = append(r.Waiting, displayInfo(c.waiting[key], includeStack, detailsYAML, now)) 296 } 297 if contains(opts, ShowHistory) { 298 iter := c.history.Iterator() 299 var v *info 300 for iter.Next(&v) { 301 r.History = append(r.History, displayInfo(v, includeStack, detailsYAML, now)) 302 } 303 } 304 305 output := map[string]report{c.agent: r} 306 out, err := yaml.Marshal(output) 307 if err != nil { 308 return "", errors.Trace(err) 309 } 310 return string(out), nil 311 } 312 313 func sortedKeys(m map[int]*info) []int { 314 values := make([]int, 0, len(m)) 315 for key := range m { 316 values = append(values, key) 317 } 318 sort.Ints(values) 319 return values 320 } 321 322 func timeOutput(t time.Time) string { 323 if t.IsZero() { 324 return "" 325 } 326 return t.String() 327 } 328 329 func displayInfo(info *info, includeStack, detailsYAML bool, now time.Time) interface{} { 330 if !detailsYAML { 331 return simpleInfo("", info, now) 332 } 333 if info == nil { 334 return nil 335 } 336 output := reportInfo{ 337 Worker: info.worker, 338 Comment: info.comment, 339 Requested: timeOutput(info.requested), 340 Acquired: timeOutput(info.acquired), 341 Released: timeOutput(info.released), 342 } 343 var other time.Time 344 if info.acquired.IsZero() { 345 other = now 346 } else { 347 if info.released.IsZero() { 348 other = now 349 } else { 350 other = info.released 351 } 352 output.HoldTime = other.Sub(info.acquired).Round(time.Second) 353 // Now set other for the wait time. 354 other = info.acquired 355 } 356 output.WaitTime = other.Sub(info.requested).Round(time.Second) 357 return &output 358 } 359 360 func simpleInfo(agent string, info *info, now time.Time) string { 361 if info == nil { 362 return "none" 363 } 364 msg := info.worker 365 if info.comment != "" { 366 msg += " (" + info.comment + ")" 367 } 368 // We pass in agent when writing to the file, but not for the report. 369 // This allows us to have the agent in the file but keep the first column 370 // aligned for timestamps. 371 if agent != "" { 372 msg = agent + ": " + msg 373 } 374 if info.acquired.IsZero() { 375 waiting := now.Sub(info.requested).Round(time.Second) 376 return fmt.Sprintf("%s, waiting %s", msg, waiting) 377 } 378 if info.released.IsZero() { 379 holding := now.Sub(info.acquired).Round(time.Second) 380 return fmt.Sprintf("%s, holding %s", msg, holding) 381 } 382 ts := info.released.Format(timeFormat) 383 waited := info.acquired.Sub(info.requested).Round(time.Second) 384 held := info.released.Sub(info.acquired).Round(time.Second) 385 return fmt.Sprintf("%s %s, waited %s, held %s", ts, msg, waited, held) 386 } 387 388 const timeFormat = "2006-01-02 15:04:05"