github.com/juju/juju@v0.0.0-20240430160146-1752b71fcf00/state/metricsmanager.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package state
     5  
     6  import (
     7  	"time"
     8  
     9  	"github.com/juju/errors"
    10  	"github.com/juju/mgo/v3"
    11  	"github.com/juju/mgo/v3/bson"
    12  	"github.com/juju/mgo/v3/txn"
    13  	jujutxn "github.com/juju/txn/v3"
    14  )
    15  
    16  const (
    17  	defaultGracePeriod                      = 7 * 24 * time.Hour // 1 week in hours
    18  	metricsManagerConsecutiveErrorThreshold = 3
    19  )
    20  
    21  func metricsManagerKey(st *State) string {
    22  	return st.docID("metricsManager")
    23  }
    24  
    25  // MetricsManager stores data about the state of the metrics manager
    26  type MetricsManager struct {
    27  	st     *State
    28  	doc    metricsManagerDoc
    29  	status meterStatusDoc
    30  }
    31  
    32  type metricsManagerDoc struct {
    33  	LastSuccessfulSend time.Time     `bson:"lastsuccessfulsend"`
    34  	ConsecutiveErrors  int           `bson:"consecutiveerrors"`
    35  	GracePeriod        time.Duration `bson:"graceperiod"`
    36  }
    37  
    38  // LastSuccessfulSend returns the time of the last successful send.
    39  func (m *MetricsManager) LastSuccessfulSend() time.Time {
    40  	return m.doc.LastSuccessfulSend
    41  }
    42  
    43  // ConsecutiveErrors returns the number of consecutive failures.
    44  func (m *MetricsManager) ConsecutiveErrors() int {
    45  	return m.doc.ConsecutiveErrors
    46  }
    47  
    48  // GracePeriod returns the current grace period.
    49  func (m *MetricsManager) GracePeriod() time.Duration {
    50  	return m.doc.GracePeriod
    51  }
    52  
    53  // MetricsManager returns an existing metricsmanager, or a new one if non exists.
    54  func (st *State) MetricsManager() (*MetricsManager, error) {
    55  	mm, err := st.getMetricsManager()
    56  	if errors.IsNotFound(err) {
    57  		return st.newMetricsManager()
    58  	} else if err != nil {
    59  		return nil, errors.Trace(err)
    60  	}
    61  	return mm, nil
    62  }
    63  
    64  func (st *State) newMetricsManager() (*MetricsManager, error) {
    65  	buildTxn := func(attempt int) ([]txn.Op, error) {
    66  		if attempt > 1 {
    67  			if _, err := st.getMetricsManager(); err == nil {
    68  				return nil, jujutxn.ErrNoOperations
    69  			}
    70  		}
    71  		id := metricsManagerKey(st)
    72  		mm := &MetricsManager{
    73  			st: st,
    74  			doc: metricsManagerDoc{
    75  				LastSuccessfulSend: time.Time{},
    76  				ConsecutiveErrors:  0,
    77  				GracePeriod:        defaultGracePeriod,
    78  			},
    79  			status: meterStatusDoc{
    80  				Code:      meterString[MeterNotSet],
    81  				ModelUUID: st.ModelUUID(),
    82  			},
    83  		}
    84  		return []txn.Op{{
    85  			C:      metricsManagerC,
    86  			Id:     id,
    87  			Assert: txn.DocMissing,
    88  			Insert: mm.doc,
    89  		}, {
    90  			C:      meterStatusC,
    91  			Id:     id,
    92  			Assert: txn.DocMissing,
    93  			Insert: mm.status,
    94  		}}, nil
    95  	}
    96  	err := st.db().Run(buildTxn)
    97  	if err != nil {
    98  		return nil, onAbort(err, errors.NotFoundf("metrics manager"))
    99  	}
   100  	return st.getMetricsManager()
   101  }
   102  
   103  func (st *State) getMetricsManager() (*MetricsManager, error) {
   104  	coll, closer := st.db().GetCollection(metricsManagerC)
   105  	defer closer()
   106  	var doc metricsManagerDoc
   107  	err := coll.FindId(metricsManagerKey(st)).One(&doc)
   108  	if err == mgo.ErrNotFound {
   109  		return nil, errors.NotFoundf("metrics manager")
   110  	} else if err != nil {
   111  		return nil, errors.Trace(err)
   112  	}
   113  	collS, closerS := st.db().GetCollection(meterStatusC)
   114  	defer closerS()
   115  	status := meterStatusDoc{
   116  		Code:      meterString[MeterNotSet],
   117  		ModelUUID: st.ModelUUID(),
   118  	}
   119  	err = collS.FindId(metricsManagerKey(st)).One(&status)
   120  	if err != nil && err != mgo.ErrNotFound {
   121  		return nil, errors.Trace(err)
   122  	}
   123  	return &MetricsManager{
   124  		st:     st,
   125  		doc:    doc,
   126  		status: status,
   127  	}, nil
   128  }
   129  
   130  func (m *MetricsManager) updateMetricsManager(update bson.M, status *bson.M) error {
   131  	buildTxn := func(attempt int) ([]txn.Op, error) {
   132  		if attempt > 0 {
   133  			if _, err := m.st.getMetricsManager(); errors.IsNotFound(err) {
   134  				return nil, jujutxn.ErrNoOperations
   135  			} else if err != nil {
   136  				return nil, errors.Trace(err)
   137  			}
   138  		}
   139  		ops := []txn.Op{{
   140  			C:      metricsManagerC,
   141  			Id:     metricsManagerKey(m.st),
   142  			Assert: txn.DocExists,
   143  			Update: update,
   144  		}}
   145  		if status != nil {
   146  			ops = append(ops, txn.Op{
   147  				C:      meterStatusC,
   148  				Id:     metricsManagerKey(m.st),
   149  				Assert: txn.DocExists,
   150  				Update: *status,
   151  			})
   152  		}
   153  		return ops, nil
   154  	}
   155  
   156  	if err := m.st.db().Run(buildTxn); err != nil {
   157  		return errors.Trace(err)
   158  	}
   159  	return nil
   160  }
   161  
   162  // SetLastSuccessfulSend sets the last successful send time to the input time.
   163  func (m *MetricsManager) SetLastSuccessfulSend(t time.Time) error {
   164  	var status *bson.M
   165  	if m.status.Code != meterString[MeterGreen] {
   166  		status = &bson.M{
   167  			"$set": bson.M{
   168  				"code": meterString[MeterGreen],
   169  				"info": "",
   170  			},
   171  		}
   172  	}
   173  	err := m.updateMetricsManager(
   174  		bson.M{
   175  			"$set": bson.M{
   176  				"lastsuccessfulsend": t.UTC(),
   177  				"consecutiveerrors":  0,
   178  			},
   179  		},
   180  		status,
   181  	)
   182  	if err != nil {
   183  		return errors.Trace(err)
   184  	}
   185  
   186  	m.doc.LastSuccessfulSend = t.UTC()
   187  	m.doc.ConsecutiveErrors = 0
   188  	return nil
   189  }
   190  
   191  func (m *MetricsManager) SetGracePeriod(t time.Duration) error {
   192  	if t < 0 {
   193  		return errors.New("grace period can't be negative")
   194  	}
   195  	m1 := MetricsManager{
   196  		st:  m.st,
   197  		doc: m.doc,
   198  	}
   199  	m1.doc.GracePeriod = t
   200  	newStatus := m1.MeterStatus()
   201  
   202  	var statusUpdate *bson.M
   203  	if newStatus != m.MeterStatus() {
   204  		statusUpdate = &bson.M{
   205  			"$set": bson.M{
   206  				"code":       meterString[newStatus.Code],
   207  				"info":       newStatus.Info,
   208  				"model-uuid": m.st.ModelUUID(),
   209  			},
   210  		}
   211  	}
   212  
   213  	err := m.updateMetricsManager(
   214  		bson.M{"$set": bson.M{
   215  			"graceperiod": t,
   216  		}},
   217  		statusUpdate,
   218  	)
   219  	if err != nil {
   220  		return errors.Trace(err)
   221  	}
   222  	m.doc.GracePeriod = t
   223  	return nil
   224  }
   225  
   226  // IncrementConsecutiveErrors adds 1 to the consecutive errors count.
   227  func (m *MetricsManager) IncrementConsecutiveErrors() error {
   228  	m1 := MetricsManager{
   229  		st:  m.st,
   230  		doc: m.doc,
   231  	}
   232  	m1.doc.ConsecutiveErrors++
   233  	newStatus := m1.MeterStatus()
   234  
   235  	var statusUpdate *bson.M
   236  	if newStatus != m.MeterStatus() {
   237  		statusUpdate = &bson.M{
   238  			"$set": bson.M{
   239  				"code":       meterString[newStatus.Code],
   240  				"info":       newStatus.Info,
   241  				"model-uuid": m.st.ModelUUID(),
   242  			},
   243  		}
   244  	}
   245  	err := m.updateMetricsManager(
   246  		bson.M{"$inc": bson.M{"consecutiveerrors": 1}},
   247  		statusUpdate,
   248  	)
   249  	if err != nil {
   250  		return errors.Trace(err)
   251  	}
   252  	m.doc.ConsecutiveErrors++
   253  	return nil
   254  }
   255  
   256  func (m *MetricsManager) gracePeriodExceeded() bool {
   257  	now := m.st.clock().Now()
   258  	t := m.LastSuccessfulSend().Add(m.GracePeriod())
   259  	return t.Before(now) || t.Equal(now)
   260  }
   261  
   262  // MeterStatus returns the overall state of the MetricsManager as a meter status summary.
   263  func (m *MetricsManager) MeterStatus() MeterStatus {
   264  	if m.ConsecutiveErrors() < metricsManagerConsecutiveErrorThreshold {
   265  		return MeterStatus{Code: MeterGreen, Info: "ok"}
   266  	}
   267  	if m.gracePeriodExceeded() {
   268  		return MeterStatus{Code: MeterRed, Info: "failed to send metrics, exceeded grace period"}
   269  	}
   270  	return MeterStatus{Code: MeterAmber, Info: "failed to send metrics"}
   271  }
   272  
   273  func (m *MetricsManager) ModelStatus() MeterStatus {
   274  	return MeterStatus{
   275  		Code: MeterStatusFromString(m.status.Code),
   276  		Info: m.status.Info,
   277  	}
   278  }