github.com/muhammadn/cortex@v1.9.1-0.20220510110439-46bb7000d03d/pkg/alertmanager/state_replication.go (about) 1 package alertmanager 2 3 import ( 4 "context" 5 "fmt" 6 "sync" 7 "time" 8 9 "github.com/prometheus/client_golang/prometheus/promauto" 10 11 "github.com/go-kit/log" 12 "github.com/go-kit/log/level" 13 "github.com/grafana/dskit/services" 14 "github.com/pkg/errors" 15 "github.com/prometheus/alertmanager/cluster" 16 "github.com/prometheus/alertmanager/cluster/clusterpb" 17 "github.com/prometheus/client_golang/prometheus" 18 19 "github.com/cortexproject/cortex/pkg/alertmanager/alertspb" 20 "github.com/cortexproject/cortex/pkg/alertmanager/alertstore" 21 ) 22 23 const ( 24 defaultSettleReadTimeout = 15 * time.Second 25 defaultStoreReadTimeout = 15 * time.Second 26 27 // Initial sync outcome label values. 28 syncFromReplica = "from-replica" 29 syncFromStorage = "from-storage" 30 syncUserNotFound = "user-not-found" 31 syncFailed = "failed" 32 ) 33 34 // state represents the Alertmanager silences and notification log internal state. 35 type state struct { 36 services.Service 37 38 userID string 39 logger log.Logger 40 reg prometheus.Registerer 41 42 settleReadTimeout time.Duration 43 storeReadTimeout time.Duration 44 45 mtx sync.Mutex 46 states map[string]cluster.State 47 48 replicationFactor int 49 replicator Replicator 50 store alertstore.AlertStore 51 52 partialStateMergesTotal *prometheus.CounterVec 53 partialStateMergesFailed *prometheus.CounterVec 54 stateReplicationTotal *prometheus.CounterVec 55 stateReplicationFailed *prometheus.CounterVec 56 fetchReplicaStateTotal prometheus.Counter 57 fetchReplicaStateFailed prometheus.Counter 58 initialSyncTotal prometheus.Counter 59 initialSyncCompleted *prometheus.CounterVec 60 initialSyncDuration prometheus.Histogram 61 62 msgc chan *clusterpb.Part 63 } 64 65 // newReplicatedStates creates a new state struct, which manages state to be replicated between alertmanagers. 66 func newReplicatedStates(userID string, rf int, re Replicator, st alertstore.AlertStore, l log.Logger, r prometheus.Registerer) *state { 67 68 s := &state{ 69 logger: l, 70 userID: userID, 71 replicationFactor: rf, 72 replicator: re, 73 store: st, 74 states: make(map[string]cluster.State, 2), // we use two, one for the notifications and one for silences. 75 msgc: make(chan *clusterpb.Part), 76 reg: r, 77 settleReadTimeout: defaultSettleReadTimeout, 78 storeReadTimeout: defaultStoreReadTimeout, 79 partialStateMergesTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 80 Name: "alertmanager_partial_state_merges_total", 81 Help: "Number of times we have received a partial state to merge for a key.", 82 }, []string{"key"}), 83 partialStateMergesFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 84 Name: "alertmanager_partial_state_merges_failed_total", 85 Help: "Number of times we have failed to merge a partial state received for a key.", 86 }, []string{"key"}), 87 stateReplicationTotal: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 88 Name: "alertmanager_state_replication_total", 89 Help: "Number of times we have tried to replicate a state to other alertmanagers.", 90 }, []string{"key"}), 91 stateReplicationFailed: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 92 Name: "alertmanager_state_replication_failed_total", 93 Help: "Number of times we have failed to replicate a state to other alertmanagers.", 94 }, []string{"key"}), 95 fetchReplicaStateTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ 96 Name: "alertmanager_state_fetch_replica_state_total", 97 Help: "Number of times we have tried to read and merge the full state from another replica.", 98 }), 99 fetchReplicaStateFailed: promauto.With(r).NewCounter(prometheus.CounterOpts{ 100 Name: "alertmanager_state_fetch_replica_state_failed_total", 101 Help: "Number of times we have failed to read and merge the full state from another replica.", 102 }), 103 initialSyncTotal: promauto.With(r).NewCounter(prometheus.CounterOpts{ 104 Name: "alertmanager_state_initial_sync_total", 105 Help: "Number of times we have tried to sync initial state from peers or remote storage.", 106 }), 107 initialSyncCompleted: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ 108 Name: "alertmanager_state_initial_sync_completed_total", 109 Help: "Number of times we have completed syncing initial state for each possible outcome.", 110 }, []string{"outcome"}), 111 initialSyncDuration: promauto.With(r).NewHistogram(prometheus.HistogramOpts{ 112 Name: "alertmanager_state_initial_sync_duration_seconds", 113 Help: "Time spent syncing initial state from peers or remote storage.", 114 Buckets: prometheus.ExponentialBuckets(0.008, 4, 7), 115 }), 116 } 117 s.initialSyncCompleted.WithLabelValues(syncFromReplica) 118 s.initialSyncCompleted.WithLabelValues(syncFromStorage) 119 s.initialSyncCompleted.WithLabelValues(syncUserNotFound) 120 s.initialSyncCompleted.WithLabelValues(syncFailed) 121 122 s.Service = services.NewBasicService(s.starting, s.running, nil) 123 124 return s 125 } 126 127 // AddState adds a new state that will be replicated using the ReplicationFunc. It returns a channel to which the client can broadcast messages of the state to be sent. 128 func (s *state) AddState(key string, cs cluster.State, _ prometheus.Registerer) cluster.ClusterChannel { 129 s.mtx.Lock() 130 defer s.mtx.Unlock() 131 132 s.states[key] = cs 133 134 s.partialStateMergesTotal.WithLabelValues(key) 135 s.partialStateMergesFailed.WithLabelValues(key) 136 s.stateReplicationTotal.WithLabelValues(key) 137 s.stateReplicationFailed.WithLabelValues(key) 138 139 return &stateChannel{ 140 s: s, 141 key: key, 142 } 143 } 144 145 // MergePartialState merges a received partial message with an internal state. 146 func (s *state) MergePartialState(p *clusterpb.Part) error { 147 s.partialStateMergesTotal.WithLabelValues(p.Key).Inc() 148 149 s.mtx.Lock() 150 defer s.mtx.Unlock() 151 st, ok := s.states[p.Key] 152 if !ok { 153 s.partialStateMergesFailed.WithLabelValues(p.Key).Inc() 154 return fmt.Errorf("key not found while merging") 155 } 156 157 if err := st.Merge(p.Data); err != nil { 158 s.partialStateMergesFailed.WithLabelValues(p.Key).Inc() 159 return err 160 } 161 162 return nil 163 } 164 165 // Position helps in determining how long should we wait before sending a notification based on the number of replicas. 166 func (s *state) Position() int { 167 return s.replicator.GetPositionForUser(s.userID) 168 } 169 170 // GetFullState returns the full internal state. 171 func (s *state) GetFullState() (*clusterpb.FullState, error) { 172 s.mtx.Lock() 173 defer s.mtx.Unlock() 174 175 all := &clusterpb.FullState{ 176 Parts: make([]clusterpb.Part, 0, len(s.states)), 177 } 178 179 for key, s := range s.states { 180 b, err := s.MarshalBinary() 181 if err != nil { 182 return nil, errors.Wrapf(err, "failed to encode state for key: %v", key) 183 } 184 all.Parts = append(all.Parts, clusterpb.Part{Key: key, Data: b}) 185 } 186 187 return all, nil 188 } 189 190 // starting waits until the alertmanagers are ready (and sets the appropriate internal state when it is). 191 // The idea is that we don't want to start working" before we get a chance to know most of the notifications and/or silences. 192 func (s *state) starting(ctx context.Context) error { 193 s.initialSyncTotal.Inc() 194 timer := prometheus.NewTimer(s.initialSyncDuration) 195 defer timer.ObserveDuration() 196 197 level.Info(s.logger).Log("msg", "Waiting for notification and silences to settle...") 198 199 // If the replication factor is <= 1, there is nowhere to obtain the state from. 200 if s.replicationFactor <= 1 { 201 level.Info(s.logger).Log("msg", "skipping settling (no replicas)") 202 return nil 203 } 204 205 // We can check other alertmanager(s) and explicitly ask them to propagate their state to us if available. 206 readCtx, cancel := context.WithTimeout(ctx, s.settleReadTimeout) 207 defer cancel() 208 209 s.fetchReplicaStateTotal.Inc() 210 fullStates, err := s.replicator.ReadFullStateForUser(readCtx, s.userID) 211 if err == nil { 212 if err = s.mergeFullStates(fullStates); err == nil { 213 level.Info(s.logger).Log("msg", "state settled; proceeding") 214 s.initialSyncCompleted.WithLabelValues(syncFromReplica).Inc() 215 return nil 216 } 217 } 218 s.fetchReplicaStateFailed.Inc() 219 220 level.Info(s.logger).Log("msg", "state not settled; trying to read from storage", "err", err) 221 222 // Attempt to read the state from persistent storage instead. 223 storeReadCtx, cancel := context.WithTimeout(ctx, s.storeReadTimeout) 224 defer cancel() 225 226 fullState, err := s.store.GetFullState(storeReadCtx, s.userID) 227 if errors.Is(err, alertspb.ErrNotFound) { 228 level.Info(s.logger).Log("msg", "no state for user in storage; proceeding", "user", s.userID) 229 s.initialSyncCompleted.WithLabelValues(syncUserNotFound).Inc() 230 return nil 231 } 232 if err == nil { 233 if err = s.mergeFullStates([]*clusterpb.FullState{fullState.State}); err == nil { 234 level.Info(s.logger).Log("msg", "state read from storage; proceeding") 235 s.initialSyncCompleted.WithLabelValues(syncFromStorage).Inc() 236 return nil 237 } 238 } 239 240 level.Warn(s.logger).Log("msg", "failed to read state from storage; continuing anyway", "err", err) 241 s.initialSyncCompleted.WithLabelValues(syncFailed).Inc() 242 243 return nil 244 } 245 246 // WaitReady is needed for the pipeline builder to know whenever we've settled and the state is up to date. 247 func (s *state) WaitReady(ctx context.Context) error { 248 return s.Service.AwaitRunning(ctx) 249 } 250 251 func (s *state) Ready() bool { 252 return s.Service.State() == services.Running 253 } 254 255 // mergeFullStates attempts to merge all full states received from peers during settling. 256 func (s *state) mergeFullStates(fs []*clusterpb.FullState) error { 257 s.mtx.Lock() 258 defer s.mtx.Unlock() 259 260 for _, f := range fs { 261 for _, p := range f.Parts { 262 level.Debug(s.logger).Log("msg", "merging full state", "user", s.userID, "key", p.Key, "bytes", len(p.Data)) 263 264 st, ok := s.states[p.Key] 265 if !ok { 266 level.Error(s.logger).Log("msg", "key not found while merging full state", "user", s.userID, "key", p.Key) 267 continue 268 } 269 270 if err := st.Merge(p.Data); err != nil { 271 return errors.Wrapf(err, "failed to merge part of full state for key: %v", p.Key) 272 } 273 } 274 } 275 276 return nil 277 } 278 279 func (s *state) running(ctx context.Context) error { 280 for { 281 select { 282 case p := <-s.msgc: 283 // If the replication factor is <= 1, we don't need to replicate any state anywhere else. 284 if s.replicationFactor <= 1 { 285 return nil 286 } 287 288 s.stateReplicationTotal.WithLabelValues(p.Key).Inc() 289 if err := s.replicator.ReplicateStateForUser(ctx, s.userID, p); err != nil { 290 s.stateReplicationFailed.WithLabelValues(p.Key).Inc() 291 level.Error(s.logger).Log("msg", "failed to replicate state to other alertmanagers", "user", s.userID, "key", p.Key, "err", err) 292 } 293 case <-ctx.Done(): 294 return nil 295 } 296 } 297 } 298 299 func (s *state) broadcast(key string, b []byte) { 300 // We should ignore the Merges into the initial state during settling. 301 if s.Ready() { 302 s.msgc <- &clusterpb.Part{Key: key, Data: b} 303 } 304 } 305 306 // stateChannel allows a state publisher to send messages that will be broadcasted to all other alertmanagers that a tenant 307 // belongs to. 308 type stateChannel struct { 309 s *state 310 key string 311 } 312 313 // Broadcast receives a message to be replicated by the state. 314 func (c *stateChannel) Broadcast(b []byte) { 315 c.s.broadcast(c.key, b) 316 }