github.com/verrazzano/verrazzano@v1.7.0/tools/psr/backend/workers/prometheus/alerts/alerts.go (about) 1 // Copyright (c) 2022, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package alerts 5 6 import ( 7 "context" 8 "fmt" 9 "io" 10 "net/http" 11 "sync/atomic" 12 "time" 13 14 "github.com/verrazzano/verrazzano/pkg/httputil" 15 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 16 "github.com/verrazzano/verrazzano/tools/psr/backend/config" 17 "github.com/verrazzano/verrazzano/tools/psr/backend/metrics" 18 "github.com/verrazzano/verrazzano/tools/psr/backend/osenv" 19 "github.com/verrazzano/verrazzano/tools/psr/backend/pkg/k8sclient" 20 psrprom "github.com/verrazzano/verrazzano/tools/psr/backend/pkg/prometheus" 21 psrvz "github.com/verrazzano/verrazzano/tools/psr/backend/pkg/verrazzano" 22 "github.com/verrazzano/verrazzano/tools/psr/backend/spi" 23 24 "github.com/prometheus/client_golang/prometheus" 25 corev1 "k8s.io/api/core/v1" 26 v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 27 "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" 28 ) 29 30 // metricsPrefix is the prefix that is automatically pre-pended to all metrics exported by this worker. 31 const metricsPrefix = "prom_alert" 32 33 var funcNewPsrClient = k8sclient.NewPsrClient 34 35 type worker struct { 36 metricDescList []prometheus.Desc 37 *workerMetrics 38 } 39 40 var _ spi.Worker = worker{} 41 42 // workerMetrics holds the metrics produced by the worker. Metrics must be thread safe. 43 type workerMetrics struct { 44 alertsFiringCount metrics.MetricItem 45 alertsResolvedCount metrics.MetricItem 46 } 47 48 func NewAlertsWorker() (spi.Worker, error) { 49 w := worker{workerMetrics: &workerMetrics{ 50 alertsFiringCount: metrics.MetricItem{ 51 Name: "alerts_firing_received_count", 52 Help: "The total number of alerts received from alertmanager", 53 Type: prometheus.CounterValue, 54 }, 55 alertsResolvedCount: metrics.MetricItem{ 56 Name: "alerts_resolved_received_count", 57 Help: "The total number of alerts received from alertmanager", 58 Type: prometheus.CounterValue, 59 }, 60 }} 61 62 if err := config.PsrEnv.LoadFromEnv(w.GetEnvDescList()); err != nil { 63 return w, err 64 } 65 66 metricsLabels := map[string]string{ 67 config.PsrWorkerTypeMetricsName: config.PsrEnv.GetEnv(config.PsrWorkerType), 68 } 69 70 w.metricDescList = metrics.BuildMetricDescList([]*metrics.MetricItem{ 71 &w.alertsFiringCount, 72 &w.alertsResolvedCount, 73 }, metricsLabels, w.GetWorkerDesc().MetricsPrefix) 74 return w, nil 75 } 76 77 // GetWorkerDesc returns the WorkerDesc for the worker 78 func (w worker) GetWorkerDesc() spi.WorkerDesc { 79 return spi.WorkerDesc{ 80 WorkerType: config.WorkerTypeHTTPGet, 81 Description: "The alerts receiver worker configures alertmanger and receives alerts and writes them to events", 82 MetricsPrefix: metricsPrefix, 83 } 84 } 85 86 func (w worker) GetEnvDescList() []osenv.EnvVarDesc { 87 return []osenv.EnvVarDesc{} 88 } 89 90 func (w worker) GetMetricDescList() []prometheus.Desc { 91 return w.metricDescList 92 } 93 94 func (w worker) GetMetricList() []prometheus.Metric { 95 return []prometheus.Metric{ 96 w.alertsFiringCount.BuildMetric(), 97 w.alertsResolvedCount.BuildMetric(), 98 } 99 } 100 101 func (w worker) WantLoopInfoLogged() bool { 102 return false 103 } 104 105 func (w worker) PreconditionsMet() (bool, error) { 106 return true, nil 107 } 108 109 func (w worker) DoWork(conf config.CommonConfig, log vzlog.VerrazzanoLogger) error { 110 if err := updateVZForAlertmanager(log, conf); err != nil { 111 return err 112 } 113 114 http.HandleFunc("/alerts", func(rw http.ResponseWriter, r *http.Request) { 115 c, err := funcNewPsrClient() 116 if err != nil { 117 log.Errorf("error creating client: %v", err) 118 } 119 if r.Body == nil { 120 log.Errorf("Alert webhook POST request contained a nil body") 121 return 122 } 123 bodyRaw, err := io.ReadAll(r.Body) 124 if err != nil { 125 log.Errorf("Unexpected error while reading request body: %v", err) 126 return 127 } 128 r.Body.Close() 129 alertName, err := httputil.ExtractFieldFromResponseBodyOrReturnError(string(bodyRaw), "alerts.0.labels.alertname", "unable to extract alertname from body json") 130 if err != nil { 131 log.Error(err) 132 } 133 alertStatus, err := httputil.ExtractFieldFromResponseBodyOrReturnError(string(bodyRaw), "status", "unable to extract alert status from body json") 134 if err != nil { 135 log.Error(err) 136 } 137 if alertStatus == "firing" { 138 atomic.AddInt64(&w.workerMetrics.alertsFiringCount.Val, 1) 139 } else if alertStatus == "resolved" { 140 atomic.AddInt64(&w.workerMetrics.alertsResolvedCount.Val, 1) 141 } else { 142 log.Errorf("alert received with unknown status: %s", alertStatus) 143 } 144 145 event := corev1.Event{ 146 ObjectMeta: v1.ObjectMeta{ 147 Name: "psr-alert-" + alertName, 148 Namespace: config.PsrEnv.GetEnv(config.PsrWorkerNamespace), 149 }, 150 InvolvedObject: corev1.ObjectReference{ 151 Namespace: config.PsrEnv.GetEnv(config.PsrWorkerNamespace), 152 }, 153 Type: "Warning", 154 } 155 if _, err = controllerutil.CreateOrUpdate(context.TODO(), c.CrtlRuntime, &event, func() error { 156 event.LastTimestamp = v1.Time{Time: time.Now()} 157 event.Message = string(bodyRaw) 158 event.Reason = "Alert " + alertStatus 159 return nil 160 }); err != nil { 161 log.Errorf("error generating alert event: %v", err) 162 } 163 }) 164 select {} 165 } 166 167 func updateVZForAlertmanager(log vzlog.VerrazzanoLogger, conf config.CommonConfig) error { 168 if err := createAlertmanagerOverridesCM(log, conf); err != nil { 169 return err 170 } 171 172 c, err := funcNewPsrClient() 173 if err != nil { 174 log.Errorf("error creating client: %v", err) 175 } 176 cr, err := psrvz.GetVerrazzano(c.VzInstall) 177 if err != nil { 178 return err 179 } 180 var m psrprom.AlertmanagerConfigModifier 181 m.ModifyCR(cr) 182 183 return psrvz.UpdateVerrazzano(c.VzInstall, cr) 184 } 185 186 func createAlertmanagerOverridesCM(log vzlog.VerrazzanoLogger, conf config.CommonConfig) error { 187 c, err := funcNewPsrClient() 188 if err != nil { 189 log.Errorf("error creating client: %v", err) 190 } 191 cr, err := psrvz.GetVerrazzano(c.VzInstall) 192 if err != nil { 193 return err 194 } 195 cm := corev1.ConfigMap{ 196 ObjectMeta: v1.ObjectMeta{ 197 Name: psrprom.AlertmanagerCMName, 198 Namespace: cr.Namespace, 199 }, 200 Data: map[string]string{}, 201 } 202 _, err = controllerutil.CreateOrUpdate(context.TODO(), c.CrtlRuntime, &cm, func() error { 203 cm.Data = map[string]string{ 204 psrprom.AlertmanagerCMKey: fmt.Sprintf(`alertmanager: 205 alertmanagerSpec: 206 podMetadata: 207 annotations: 208 sidecar.istio.io/inject: "false" 209 config: 210 receivers: 211 - webhook_configs: 212 - url: http://%s-%s.%s:9090/alerts 213 name: webhook 214 route: 215 group_by: 216 - alertname 217 receiver: webhook 218 routes: 219 - match: 220 alertname: Watchdog 221 receiver: webhook 222 enabled: true 223 `, conf.ReleaseName, conf.WorkerType, conf.Namespace, 224 )} 225 return nil 226 }) 227 return err 228 }