github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/aagent/watchers/metricwatcher/prometheus.go (about) 1 // Copyright (c) 2020-2024, R.I. Pienaar and the Choria Project contributors 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 5 package metricwatcher 6 7 import ( 8 "fmt" 9 "os" 10 "path/filepath" 11 "strings" 12 "sync" 13 14 "github.com/choria-io/go-choria/internal/util" 15 ) 16 17 type logger interface { 18 Debugf(format string, args ...any) 19 Infof(format string, args ...any) 20 Errorf(format string, args ...any) 21 } 22 23 var ( 24 metrics map[string]*Metric 25 mu sync.Mutex 26 ) 27 28 func init() { 29 mu.Lock() 30 metrics = make(map[string]*Metric) 31 mu.Unlock() 32 } 33 34 func updatePromState(td string, log logger, machine string, name string, metric *Metric) error { 35 mu.Lock() 36 defer mu.Unlock() 37 38 metric.name = name 39 metric.machine = machine 40 metrics[fmt.Sprintf("%s_%s", machine, name)] = metric 41 42 return savePromState(td, log) 43 } 44 45 func deletePromState(td string, log logger, machine string, name string) error { 46 mu.Lock() 47 defer mu.Unlock() 48 49 delete(metrics, fmt.Sprintf("%s_%s", machine, name)) 50 51 return savePromState(td, log) 52 } 53 54 func promName(name string) string { 55 return strings.Replace(strings.Replace(strings.Replace(strings.ToLower(name), " ", "_", -1), ",", "_", -1), `"`, "_", -1) 56 } 57 58 // lock should be held 59 func savePromState(td string, log logger) error { 60 if td == "" { 61 log.Debugf("Not updating prometheus - text file directory is unset") 62 return nil 63 } 64 65 if !util.FileIsDir(td) { 66 log.Debugf("%q is not a directory", td) 67 return nil 68 } 69 70 type promValue struct { 71 labels string 72 value float64 73 } 74 75 type promMetric struct { 76 values []*promValue 77 } 78 79 // sort by metric name so help is only ever shown per metric name across all machines. 80 // ie. machine1 with a metric name kasa and machine2 with a metric name kasa will both 81 // be the same prom metric but with different labels 82 pmetrics := map[string]*promMetric{} 83 for _, ms := range metrics { 84 ms.seen++ 85 86 // if metrics arent being updated we need to eventually stop logging them, this can happen 87 // when someone renames a watcher in a machine - it should call delete but sometimes its missed 88 if ms.seen > 5 { 89 continue 90 } 91 92 for n, v := range ms.Metrics { 93 mname := fmt.Sprintf("choria_machine_metric_watcher_%s_%s", promName(ms.name), n) 94 _, ok := pmetrics[mname] 95 if !ok { 96 pmetrics[mname] = &promMetric{values: []*promValue{}} 97 } 98 99 var labelArray []string 100 for k, v := range ms.Labels { 101 labelArray = append(labelArray, fmt.Sprintf(`%s="%v"`, promName(k), promName(v))) 102 } 103 104 pmetrics[mname].values = append(pmetrics[mname].values, &promValue{ 105 labels: strings.Join(labelArray, ","), 106 value: v, 107 }) 108 } 109 } 110 111 tfile, err := os.CreateTemp(td, "") 112 if err != nil { 113 return fmt.Errorf("failed to create prometheus metric in %q: %s", td, err) 114 } 115 defer tfile.Close() 116 117 for name, pm := range pmetrics { 118 if len(pm.values) == 0 { 119 continue 120 } 121 122 fmt.Fprintf(tfile, "# HELP %s Choria Metric\n", name) 123 fmt.Fprintf(tfile, "# TYPE %s gauge\n", name) 124 for _, v := range pm.values { 125 fmt.Fprintf(tfile, "%s{%s} %f\n", name, v.labels, v.value) 126 } 127 } 128 129 os.Chmod(tfile.Name(), 0644) 130 return os.Rename(tfile.Name(), filepath.Join(td, "choria_machine_metrics_watcher_status.prom")) 131 }