github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/core/machinelock/machinelock.go (about)

     1  // Copyright 2018 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package machinelock
     5  
     6  import (
     7  	"fmt"
     8  	"runtime/debug"
     9  	"sort"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/juju/collections/deque"
    14  	"github.com/juju/errors"
    15  	"github.com/juju/mutex"
    16  	"gopkg.in/natefinch/lumberjack.v2"
    17  	"gopkg.in/yaml.v2"
    18  )
    19  
    20  // Filename represents the name of the logfile that is created in the LOG_DIR.
    21  const Filename = "machine-lock.log"
    22  
    23  // Lock is used to give external packages something to refer to.
    24  type Lock interface {
    25  	Acquire(spec Spec) (func(), error)
    26  	Report(opts ...ReportOption) (string, error)
    27  }
    28  
    29  // Clock provides an interface for dealing with clocks.
    30  type Clock interface {
    31  	// After waits for the duration to elapse and then sends the
    32  	// current time on the returned channel.
    33  	After(time.Duration) <-chan time.Time
    34  
    35  	// Now returns the current clock time.
    36  	Now() time.Time
    37  }
    38  
    39  // Logger defines the logging methods used by the machine lock.
    40  type Logger interface {
    41  	Debugf(string, ...interface{})
    42  	Warningf(string, ...interface{})
    43  }
    44  
    45  // Config defines the attributes needed to correctly construct
    46  // a machine lock.
    47  type Config struct {
    48  	AgentName   string
    49  	Clock       Clock
    50  	Logger      Logger
    51  	LogFilename string
    52  }
    53  
    54  // Validate ensures that all the required config values are set.
    55  func (c Config) Validate() error {
    56  	if c.AgentName == "" {
    57  		return errors.NotValidf("missing AgentName")
    58  	}
    59  	if c.Clock == nil {
    60  		return errors.NotValidf("missing Clock")
    61  	}
    62  	if c.Logger == nil {
    63  		return errors.NotValidf("missing Logger")
    64  	}
    65  	if c.LogFilename == "" {
    66  		return errors.NotValidf("missing LogFilename")
    67  	}
    68  	return nil
    69  }
    70  
    71  // New creates a new machine lock.
    72  func New(config Config) (*lock, error) {
    73  	if err := config.Validate(); err != nil {
    74  		return nil, errors.Trace(err)
    75  	}
    76  	lock := &lock{
    77  		agent:       config.AgentName,
    78  		clock:       config.Clock,
    79  		logger:      config.Logger,
    80  		logFilename: config.LogFilename,
    81  		acquire:     mutex.Acquire,
    82  		spec: mutex.Spec{
    83  			Name:  "machine-lock",
    84  			Clock: config.Clock,
    85  			Delay: 250 * time.Millisecond,
    86  			// Cancel is added in Acquire.
    87  		},
    88  		waiting: make(map[int]*info),
    89  		history: deque.NewWithMaxLen(1000),
    90  	}
    91  	lock.setStartMessage()
    92  	return lock, nil
    93  }
    94  
    95  func (c *lock) setStartMessage() {
    96  	now := c.clock.Now().Format(timeFormat)
    97  	// The reason that we don't attempt to write the start message out immediately
    98  	// is that there may be multiple agents on the machine writing to the same log file.
    99  	// The way that we control this is to open and close the file while we hold the
   100  	// machine lock. We don't want to acquire the lock just to write out the agent
   101  	// has started as this would potentially hold up the agent starting while another
   102  	// agent holds the lock. However it is very useful to have the agent start time
   103  	// recorded in the central file, so we save it and write it out the first time.
   104  	c.startMessage = fmt.Sprintf("%s === agent %s started ===", now, c.agent)
   105  }
   106  
   107  // Spec is an argument struct for the `Acquire` method.
   108  // It must have a Cancel channel and a Worker name defined.
   109  type Spec struct {
   110  	Cancel <-chan struct{}
   111  	// The purpose of the NoCancel is to ensure that there isn't
   112  	// an accidental forgetting of the cancel channel. The primary
   113  	// use case for this is the reboot worker that doesn't want to
   114  	// pass in a cancel channel because it really wants to reboot.
   115  	NoCancel bool
   116  	Worker   string
   117  	Comment  string
   118  }
   119  
   120  // Validate ensures that a Cancel channel and a Worker name are defined.
   121  func (s Spec) Validate() error {
   122  	if s.Cancel == nil {
   123  		if !s.NoCancel {
   124  			return errors.NotValidf("missing Cancel")
   125  		}
   126  	}
   127  	if s.Worker == "" {
   128  		return errors.NotValidf("mssing Worker")
   129  	}
   130  	return nil
   131  }
   132  
   133  // Acquire will attempt to acquire the machine hook execution lock.
   134  // The method returns an error if the spec is invalid, or if the Cancel
   135  // channel is signalled before the lock is acquired.
   136  func (c *lock) Acquire(spec Spec) (func(), error) {
   137  	if err := spec.Validate(); err != nil {
   138  		return nil, errors.Trace(err)
   139  	}
   140  	current := &info{
   141  		worker:    spec.Worker,
   142  		comment:   spec.Comment,
   143  		stack:     string(debug.Stack()),
   144  		requested: c.clock.Now(),
   145  	}
   146  	c.mu.Lock()
   147  
   148  	id := c.next
   149  	c.next++
   150  	c.waiting[id] = current
   151  
   152  	mSpec := c.spec
   153  	mSpec.Cancel = spec.Cancel
   154  
   155  	c.mu.Unlock()
   156  	c.logger.Debugf("acquire machine lock for %s (%s)", spec.Worker, spec.Comment)
   157  	releaser, err := c.acquire(mSpec)
   158  	c.mu.Lock()
   159  	defer c.mu.Unlock()
   160  	// Remove from the waiting map.
   161  	delete(c.waiting, id)
   162  
   163  	if err != nil {
   164  		return nil, errors.Trace(err)
   165  	}
   166  	c.logger.Debugf("machine lock acquired for %s (%s)", spec.Worker, spec.Comment)
   167  	c.holder = current
   168  	current.acquired = c.clock.Now()
   169  	return func() {
   170  		// We need to acquire the mutex before we call the releaser
   171  		// to ensure that we move the current to the history before
   172  		// another pending acquisition overwrites the current value.
   173  		c.mu.Lock()
   174  		defer c.mu.Unlock()
   175  		// We write the log file entry before we release the execution
   176  		// lock to ensure that no other agent is attempting to write to the
   177  		// log file.
   178  		current.released = c.clock.Now()
   179  		c.writeLogEntry()
   180  		c.logger.Debugf("machine lock released for %s (%s)", spec.Worker, spec.Comment)
   181  		releaser.Release()
   182  		c.history.PushFront(current)
   183  		c.holder = nil
   184  	}, nil
   185  }
   186  
   187  func (c *lock) writeLogEntry() {
   188  	// At the time this method is called, the holder is still set and the lock's
   189  	// mutex is held.
   190  	writer := &lumberjack.Logger{
   191  		Filename:   c.logFilename,
   192  		MaxSize:    10, // megabytes
   193  		MaxBackups: 5,
   194  		Compress:   true,
   195  	}
   196  	defer writer.Close()
   197  
   198  	if c.startMessage != "" {
   199  		_, err := fmt.Fprintln(writer, c.startMessage)
   200  		if err != nil {
   201  			c.logger.Warningf("unable to write startMessage: %s", err.Error())
   202  		}
   203  		c.startMessage = ""
   204  	}
   205  
   206  	_, err := fmt.Fprintln(writer, simpleInfo(c.agent, c.holder, c.clock.Now()))
   207  	if err != nil {
   208  		c.logger.Warningf("unable to release message: %s", err.Error())
   209  	}
   210  }
   211  
   212  type info struct {
   213  	// worker is the worker that wants or has the lock.
   214  	worker string
   215  	// comment is provided by the worker to say what they are doing.
   216  	comment string
   217  	// stack trace for additional debugging
   218  	stack string
   219  
   220  	requested time.Time
   221  	acquired  time.Time
   222  	released  time.Time
   223  }
   224  
   225  type lock struct {
   226  	agent        string
   227  	clock        Clock
   228  	logger       Logger
   229  	logFilename  string
   230  	startMessage string
   231  
   232  	acquire func(mutex.Spec) (mutex.Releaser, error)
   233  
   234  	spec mutex.Spec
   235  
   236  	mu      sync.Mutex
   237  	next    int
   238  	holder  *info
   239  	waiting map[int]*info
   240  	history *deque.Deque
   241  }
   242  
   243  type ReportOption int
   244  
   245  const (
   246  	ShowHistory ReportOption = iota
   247  	ShowStack
   248  	ShowDetailsYAML
   249  )
   250  
   251  func contains(opts []ReportOption, opt ReportOption) bool {
   252  	for _, value := range opts {
   253  		if value == opt {
   254  			return true
   255  		}
   256  	}
   257  	return false
   258  }
   259  
   260  type reportInfo struct {
   261  	Worker  string `yaml:"worker"`
   262  	Comment string `yaml:"comment,omitempty"`
   263  
   264  	Requested string `yaml:"requested,omitempty"`
   265  	Acquired  string `yaml:"acquired,omitempty"`
   266  	Released  string `yaml:"released,omitempty"`
   267  
   268  	WaitTime time.Duration `yaml:"wait-time,omitempty"`
   269  	HoldTime time.Duration `yaml:"hold-time,omitempty"`
   270  
   271  	Stack string `yaml:"stack,omitempty"`
   272  }
   273  
   274  type report struct {
   275  	Holder  interface{}   `yaml:"holder"`
   276  	Waiting []interface{} `yaml:"waiting,omitempty"`
   277  	History []interface{} `yaml:"history,omitempty"`
   278  }
   279  
   280  func (c *lock) Report(opts ...ReportOption) (string, error) {
   281  
   282  	includeStack := contains(opts, ShowStack)
   283  	detailsYAML := contains(opts, ShowDetailsYAML)
   284  
   285  	c.mu.Lock()
   286  	defer c.mu.Unlock()
   287  	now := c.clock.Now()
   288  
   289  	r := report{
   290  		Holder: displayInfo(c.holder, includeStack, detailsYAML, now),
   291  	}
   292  	// Show the waiting with oldest first, which will have the smallest
   293  	// map key.
   294  	for _, key := range sortedKeys(c.waiting) {
   295  		r.Waiting = append(r.Waiting, displayInfo(c.waiting[key], includeStack, detailsYAML, now))
   296  	}
   297  	if contains(opts, ShowHistory) {
   298  		iter := c.history.Iterator()
   299  		var v *info
   300  		for iter.Next(&v) {
   301  			r.History = append(r.History, displayInfo(v, includeStack, detailsYAML, now))
   302  		}
   303  	}
   304  
   305  	output := map[string]report{c.agent: r}
   306  	out, err := yaml.Marshal(output)
   307  	if err != nil {
   308  		return "", errors.Trace(err)
   309  	}
   310  	return string(out), nil
   311  }
   312  
   313  func sortedKeys(m map[int]*info) []int {
   314  	values := make([]int, 0, len(m))
   315  	for key := range m {
   316  		values = append(values, key)
   317  	}
   318  	sort.Ints(values)
   319  	return values
   320  }
   321  
   322  func timeOutput(t time.Time) string {
   323  	if t.IsZero() {
   324  		return ""
   325  	}
   326  	return t.String()
   327  }
   328  
   329  func displayInfo(info *info, includeStack, detailsYAML bool, now time.Time) interface{} {
   330  	if !detailsYAML {
   331  		return simpleInfo("", info, now)
   332  	}
   333  	if info == nil {
   334  		return nil
   335  	}
   336  	output := reportInfo{
   337  		Worker:    info.worker,
   338  		Comment:   info.comment,
   339  		Requested: timeOutput(info.requested),
   340  		Acquired:  timeOutput(info.acquired),
   341  		Released:  timeOutput(info.released),
   342  	}
   343  	var other time.Time
   344  	if info.acquired.IsZero() {
   345  		other = now
   346  	} else {
   347  		if info.released.IsZero() {
   348  			other = now
   349  		} else {
   350  			other = info.released
   351  		}
   352  		output.HoldTime = other.Sub(info.acquired).Round(time.Second)
   353  		// Now set other for the wait time.
   354  		other = info.acquired
   355  	}
   356  	output.WaitTime = other.Sub(info.requested).Round(time.Second)
   357  	return &output
   358  }
   359  
   360  func simpleInfo(agent string, info *info, now time.Time) string {
   361  	if info == nil {
   362  		return "none"
   363  	}
   364  	msg := info.worker
   365  	if info.comment != "" {
   366  		msg += " (" + info.comment + ")"
   367  	}
   368  	// We pass in agent when writing to the file, but not for the report.
   369  	// This allows us to have the agent in the file but keep the first column
   370  	// aligned for timestamps.
   371  	if agent != "" {
   372  		msg = agent + ": " + msg
   373  	}
   374  	if info.acquired.IsZero() {
   375  		waiting := now.Sub(info.requested).Round(time.Second)
   376  		return fmt.Sprintf("%s, waiting %s", msg, waiting)
   377  	}
   378  	if info.released.IsZero() {
   379  		holding := now.Sub(info.acquired).Round(time.Second)
   380  		return fmt.Sprintf("%s, holding %s", msg, holding)
   381  	}
   382  	ts := info.released.Format(timeFormat)
   383  	waited := info.acquired.Sub(info.requested).Round(time.Second)
   384  	held := info.released.Sub(info.acquired).Round(time.Second)
   385  	return fmt.Sprintf("%s %s, waited %s, held %s", ts, msg, waited, held)
   386  }
   387  
   388  const timeFormat = "2006-01-02 15:04:05"