github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/state_replication.go

github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/state_replication.go (about)

     1  package alertmanager
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sync"
     7  	"time"
     8  
     9  	"github.com/prometheus/client_golang/prometheus/promauto"
    10  
    11  	"github.com/go-kit/log"
    12  	"github.com/go-kit/log/level"
    13  	"github.com/grafana/dskit/services"
    14  	"github.com/pkg/errors"
    15  	"github.com/prometheus/alertmanager/cluster"
    16  	"github.com/prometheus/alertmanager/cluster/clusterpb"
    17  	"github.com/prometheus/client_golang/prometheus"
    18  
    19  	"github.com/cortexproject/cortex/pkg/alertmanager/alertspb"
    20  	"github.com/cortexproject/cortex/pkg/alertmanager/alertstore"
    21  )
    22  
    23  const (
    24  	defaultSettleReadTimeout = 15 * time.Second
    25  	defaultStoreReadTimeout  = 15 * time.Second
    26  
    27  	// Initial sync outcome label values.
    28  	syncFromReplica  = "from-replica"
    29  	syncFromStorage  = "from-storage"
    30  	syncUserNotFound = "user-not-found"
    31  	syncFailed       = "failed"
    32  )
    33  
    34  // state represents the Alertmanager silences and notification log internal state.
    35  type state struct {
    36  	services.Service
    37  
    38  	userID string
    39  	logger log.Logger
    40  	reg    prometheus.Registerer
    41  
    42  	settleReadTimeout time.Duration
    43  	storeReadTimeout  time.Duration
    44  
    45  	mtx    sync.Mutex
    46  	states map[string]cluster.State
    47  
    48  	replicationFactor int
    49  	replicator        Replicator
    50  	store             alertstore.AlertStore
    51  
    52  	partialStateMergesTotal  *prometheus.CounterVec
    53  	partialStateMergesFailed *prometheus.CounterVec
    54  	stateReplicationTotal    *prometheus.CounterVec
    55  	stateReplicationFailed   *prometheus.CounterVec
    56  	fetchReplicaStateTotal   prometheus.Counter
    57  	fetchReplicaStateFailed  prometheus.Counter
    58  	initialSyncTotal         prometheus.Counter
    59  	initialSyncCompleted     *prometheus.CounterVec
    60  	initialSyncDuration      prometheus.Histogram
    61  
    62  	msgc chan *clusterpb.Part
    63  }
    64  
    65  // newReplicatedStates creates a new state struct, which manages state to be replicated between alertmanagers.
    66  func newReplicatedStates(userID string, rf int, re Replicator, st alertstore.AlertStore, l log.Logger, r prometheus.Registerer) *state {
    67  
    68  	s := &state{
    69  		logger:            l,
    70  		userID:            userID,
    71  		replicationFactor: rf,
    72  		replicator:        re,
    73  		store:             st,
    74  		states:            make(map[string]cluster.State, 2), // we use two, one for the notifications and one for silences.
    75  		msgc:              make(chan *clusterpb.Part),
    76  		reg:               r,
    77  		settleReadTimeout: defaultSettleReadTimeout,
    78  		storeReadTimeout:  defaultStoreReadTimeout,
    79  		partialStateMergesTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
    80  			Name: "alertmanager_partial_state_merges_total",
    81  			Help: "Number of times we have received a partial state to merge for a key.",
    82  		}, []string{"key"}),
    83  		partialStateMergesFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
    84  			Name: "alertmanager_partial_state_merges_failed_total",
    85  			Help: "Number of times we have failed to merge a partial state received for a key.",
    86  		}, []string{"key"}),
    87  		stateReplicationTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
    88  			Name: "alertmanager_state_replication_total",
    89  			Help: "Number of times we have tried to replicate a state to other alertmanagers.",
    90  		}, []string{"key"}),
    91  		stateReplicationFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
    92  			Name: "alertmanager_state_replication_failed_total",
    93  			Help: "Number of times we have failed to replicate a state to other alertmanagers.",
    94  		}, []string{"key"}),
    95  		fetchReplicaStateTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
    96  			Name: "alertmanager_state_fetch_replica_state_total",
    97  			Help: "Number of times we have tried to read and merge the full state from another replica.",
    98  		}),
    99  		fetchReplicaStateFailed: promauto.With(r).NewCounter(prometheus.CounterOpts{
   100  			Name: "alertmanager_state_fetch_replica_state_failed_total",
   101  			Help: "Number of times we have failed to read and merge the full state from another replica.",
   102  		}),
   103  		initialSyncTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{
   104  			Name: "alertmanager_state_initial_sync_total",
   105  			Help: "Number of times we have tried to sync initial state from peers or remote storage.",
   106  		}),
   107  		initialSyncCompleted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
   108  			Name: "alertmanager_state_initial_sync_completed_total",
   109  			Help: "Number of times we have completed syncing initial state for each possible outcome.",
   110  		}, []string{"outcome"}),
   111  		initialSyncDuration: promauto.With(r).NewHistogram(prometheus.HistogramOpts{
   112  			Name:    "alertmanager_state_initial_sync_duration_seconds",
   113  			Help:    "Time spent syncing initial state from peers or remote storage.",
   114  			Buckets: prometheus.ExponentialBuckets(0.008, 4, 7),
   115  		}),
   116  	}
   117  	s.initialSyncCompleted.WithLabelValues(syncFromReplica)
   118  	s.initialSyncCompleted.WithLabelValues(syncFromStorage)
   119  	s.initialSyncCompleted.WithLabelValues(syncUserNotFound)
   120  	s.initialSyncCompleted.WithLabelValues(syncFailed)
   121  
   122  	s.Service = services.NewBasicService(s.starting, s.running, nil)
   123  
   124  	return s
   125  }
   126  
   127  // AddState adds a new state that will be replicated using the ReplicationFunc. It returns a channel to which the client can broadcast messages of the state to be sent.
   128  func (s *state) AddState(key string, cs cluster.State, _ prometheus.Registerer) cluster.ClusterChannel {
   129  	s.mtx.Lock()
   130  	defer s.mtx.Unlock()
   131  
   132  	s.states[key] = cs
   133  
   134  	s.partialStateMergesTotal.WithLabelValues(key)
   135  	s.partialStateMergesFailed.WithLabelValues(key)
   136  	s.stateReplicationTotal.WithLabelValues(key)
   137  	s.stateReplicationFailed.WithLabelValues(key)
   138  
   139  	return &stateChannel{
   140  		s:   s,
   141  		key: key,
   142  	}
   143  }
   144  
   145  // MergePartialState merges a received partial message with an internal state.
   146  func (s *state) MergePartialState(p *clusterpb.Part) error {
   147  	s.partialStateMergesTotal.WithLabelValues(p.Key).Inc()
   148  
   149  	s.mtx.Lock()
   150  	defer s.mtx.Unlock()
   151  	st, ok := s.states[p.Key]
   152  	if !ok {
   153  		s.partialStateMergesFailed.WithLabelValues(p.Key).Inc()
   154  		return fmt.Errorf("key not found while merging")
   155  	}
   156  
   157  	if err := st.Merge(p.Data); err != nil {
   158  		s.partialStateMergesFailed.WithLabelValues(p.Key).Inc()
   159  		return err
   160  	}
   161  
   162  	return nil
   163  }
   164  
   165  // Position helps in determining how long should we wait before sending a notification based on the number of replicas.
   166  func (s *state) Position() int {
   167  	return s.replicator.GetPositionForUser(s.userID)
   168  }
   169  
   170  // GetFullState returns the full internal state.
   171  func (s *state) GetFullState() (*clusterpb.FullState, error) {
   172  	s.mtx.Lock()
   173  	defer s.mtx.Unlock()
   174  
   175  	all := &clusterpb.FullState{
   176  		Parts: make([]clusterpb.Part, 0, len(s.states)),
   177  	}
   178  
   179  	for key, s := range s.states {
   180  		b, err := s.MarshalBinary()
   181  		if err != nil {
   182  			return nil, errors.Wrapf(err, "failed to encode state for key: %v", key)
   183  		}
   184  		all.Parts = append(all.Parts, clusterpb.Part{Key: key, Data: b})
   185  	}
   186  
   187  	return all, nil
   188  }
   189  
   190  // starting waits until the alertmanagers are ready (and sets the appropriate internal state when it is).
   191  // The idea is that we don't want to start working" before we get a chance to know most of the notifications and/or silences.
   192  func (s *state) starting(ctx context.Context) error {
   193  	s.initialSyncTotal.Inc()
   194  	timer := prometheus.NewTimer(s.initialSyncDuration)
   195  	defer timer.ObserveDuration()
   196  
   197  	level.Info(s.logger).Log("msg", "Waiting for notification and silences to settle...")
   198  
   199  	// If the replication factor is <= 1, there is nowhere to obtain the state from.
   200  	if s.replicationFactor <= 1 {
   201  		level.Info(s.logger).Log("msg", "skipping settling (no replicas)")
   202  		return nil
   203  	}
   204  
   205  	// We can check other alertmanager(s) and explicitly ask them to propagate their state to us if available.
   206  	readCtx, cancel := context.WithTimeout(ctx, s.settleReadTimeout)
   207  	defer cancel()
   208  
   209  	s.fetchReplicaStateTotal.Inc()
   210  	fullStates, err := s.replicator.ReadFullStateForUser(readCtx, s.userID)
   211  	if err == nil {
   212  		if err = s.mergeFullStates(fullStates); err == nil {
   213  			level.Info(s.logger).Log("msg", "state settled; proceeding")
   214  			s.initialSyncCompleted.WithLabelValues(syncFromReplica).Inc()
   215  			return nil
   216  		}
   217  	}
   218  	s.fetchReplicaStateFailed.Inc()
   219  
   220  	level.Info(s.logger).Log("msg", "state not settled; trying to read from storage", "err", err)
   221  
   222  	// Attempt to read the state from persistent storage instead.
   223  	storeReadCtx, cancel := context.WithTimeout(ctx, s.storeReadTimeout)
   224  	defer cancel()
   225  
   226  	fullState, err := s.store.GetFullState(storeReadCtx, s.userID)
   227  	if errors.Is(err, alertspb.ErrNotFound) {
   228  		level.Info(s.logger).Log("msg", "no state for user in storage; proceeding", "user", s.userID)
   229  		s.initialSyncCompleted.WithLabelValues(syncUserNotFound).Inc()
   230  		return nil
   231  	}
   232  	if err == nil {
   233  		if err = s.mergeFullStates([]*clusterpb.FullState{fullState.State}); err == nil {
   234  			level.Info(s.logger).Log("msg", "state read from storage; proceeding")
   235  			s.initialSyncCompleted.WithLabelValues(syncFromStorage).Inc()
   236  			return nil
   237  		}
   238  	}
   239  
   240  	level.Warn(s.logger).Log("msg", "failed to read state from storage; continuing anyway", "err", err)
   241  	s.initialSyncCompleted.WithLabelValues(syncFailed).Inc()
   242  
   243  	return nil
   244  }
   245  
   246  // WaitReady is needed for the pipeline builder to know whenever we've settled and the state is up to date.
   247  func (s *state) WaitReady(ctx context.Context) error {
   248  	return s.Service.AwaitRunning(ctx)
   249  }
   250  
   251  func (s *state) Ready() bool {
   252  	return s.Service.State() == services.Running
   253  }
   254  
   255  // mergeFullStates attempts to merge all full states received from peers during settling.
   256  func (s *state) mergeFullStates(fs []*clusterpb.FullState) error {
   257  	s.mtx.Lock()
   258  	defer s.mtx.Unlock()
   259  
   260  	for _, f := range fs {
   261  		for _, p := range f.Parts {
   262  			level.Debug(s.logger).Log("msg", "merging full state", "user", s.userID, "key", p.Key, "bytes", len(p.Data))
   263  
   264  			st, ok := s.states[p.Key]
   265  			if !ok {
   266  				level.Error(s.logger).Log("msg", "key not found while merging full state", "user", s.userID, "key", p.Key)
   267  				continue
   268  			}
   269  
   270  			if err := st.Merge(p.Data); err != nil {
   271  				return errors.Wrapf(err, "failed to merge part of full state for key: %v", p.Key)
   272  			}
   273  		}
   274  	}
   275  
   276  	return nil
   277  }
   278  
   279  func (s *state) running(ctx context.Context) error {
   280  	for {
   281  		select {
   282  		case p := <-s.msgc:
   283  			// If the replication factor is <= 1, we don't need to replicate any state anywhere else.
   284  			if s.replicationFactor <= 1 {
   285  				return nil
   286  			}
   287  
   288  			s.stateReplicationTotal.WithLabelValues(p.Key).Inc()
   289  			if err := s.replicator.ReplicateStateForUser(ctx, s.userID, p); err != nil {
   290  				s.stateReplicationFailed.WithLabelValues(p.Key).Inc()
   291  				level.Error(s.logger).Log("msg", "failed to replicate state to other alertmanagers", "user", s.userID, "key", p.Key, "err", err)
   292  			}
   293  		case <-ctx.Done():
   294  			return nil
   295  		}
   296  	}
   297  }
   298  
   299  func (s *state) broadcast(key string, b []byte) {
   300  	// We should ignore the Merges into the initial state during settling.
   301  	if s.Ready() {
   302  		s.msgc <- &clusterpb.Part{Key: key, Data: b}
   303  	}
   304  }
   305  
   306  // stateChannel allows a state publisher to send messages that will be broadcasted to all other alertmanagers that a tenant
   307  // belongs to.
   308  type stateChannel struct {
   309  	s   *state
   310  	key string
   311  }
   312  
   313  // Broadcast receives a message to be replicated by the state.
   314  func (c *stateChannel) Broadcast(b []byte) {
   315  	c.s.broadcast(c.key, b)
   316  }