github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/aagent/watchers/metricwatcher/prometheus.go (about)

     1  // Copyright (c) 2020-2024, R.I. Pienaar and the Choria Project contributors
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package metricwatcher
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"path/filepath"
    11  	"strings"
    12  	"sync"
    13  
    14  	"github.com/choria-io/go-choria/internal/util"
    15  )
    16  
    17  type logger interface {
    18  	Debugf(format string, args ...any)
    19  	Infof(format string, args ...any)
    20  	Errorf(format string, args ...any)
    21  }
    22  
    23  var (
    24  	metrics map[string]*Metric
    25  	mu      sync.Mutex
    26  )
    27  
    28  func init() {
    29  	mu.Lock()
    30  	metrics = make(map[string]*Metric)
    31  	mu.Unlock()
    32  }
    33  
    34  func updatePromState(td string, log logger, machine string, name string, metric *Metric) error {
    35  	mu.Lock()
    36  	defer mu.Unlock()
    37  
    38  	metric.name = name
    39  	metric.machine = machine
    40  	metrics[fmt.Sprintf("%s_%s", machine, name)] = metric
    41  
    42  	return savePromState(td, log)
    43  }
    44  
    45  func deletePromState(td string, log logger, machine string, name string) error {
    46  	mu.Lock()
    47  	defer mu.Unlock()
    48  
    49  	delete(metrics, fmt.Sprintf("%s_%s", machine, name))
    50  
    51  	return savePromState(td, log)
    52  }
    53  
    54  func promName(name string) string {
    55  	return strings.Replace(strings.Replace(strings.Replace(strings.ToLower(name), " ", "_", -1), ",", "_", -1), `"`, "_", -1)
    56  }
    57  
    58  // lock should be held
    59  func savePromState(td string, log logger) error {
    60  	if td == "" {
    61  		log.Debugf("Not updating prometheus - text file directory is unset")
    62  		return nil
    63  	}
    64  
    65  	if !util.FileIsDir(td) {
    66  		log.Debugf("%q is not a directory", td)
    67  		return nil
    68  	}
    69  
    70  	type promValue struct {
    71  		labels string
    72  		value  float64
    73  	}
    74  
    75  	type promMetric struct {
    76  		values []*promValue
    77  	}
    78  
    79  	// sort by metric name so help is only ever shown per metric name across all machines.
    80  	// ie. machine1 with a metric name kasa and machine2 with a metric name kasa will both
    81  	// be the same prom metric but with different labels
    82  	pmetrics := map[string]*promMetric{}
    83  	for _, ms := range metrics {
    84  		ms.seen++
    85  
    86  		// if metrics arent being updated we need to eventually stop logging them, this can happen
    87  		// when someone renames a watcher in a machine - it should call delete but sometimes its missed
    88  		if ms.seen > 5 {
    89  			continue
    90  		}
    91  
    92  		for n, v := range ms.Metrics {
    93  			mname := fmt.Sprintf("choria_machine_metric_watcher_%s_%s", promName(ms.name), n)
    94  			_, ok := pmetrics[mname]
    95  			if !ok {
    96  				pmetrics[mname] = &promMetric{values: []*promValue{}}
    97  			}
    98  
    99  			var labelArray []string
   100  			for k, v := range ms.Labels {
   101  				labelArray = append(labelArray, fmt.Sprintf(`%s="%v"`, promName(k), promName(v)))
   102  			}
   103  
   104  			pmetrics[mname].values = append(pmetrics[mname].values, &promValue{
   105  				labels: strings.Join(labelArray, ","),
   106  				value:  v,
   107  			})
   108  		}
   109  	}
   110  
   111  	tfile, err := os.CreateTemp(td, "")
   112  	if err != nil {
   113  		return fmt.Errorf("failed to create prometheus metric in %q: %s", td, err)
   114  	}
   115  	defer tfile.Close()
   116  
   117  	for name, pm := range pmetrics {
   118  		if len(pm.values) == 0 {
   119  			continue
   120  		}
   121  
   122  		fmt.Fprintf(tfile, "# HELP %s Choria Metric\n", name)
   123  		fmt.Fprintf(tfile, "# TYPE %s gauge\n", name)
   124  		for _, v := range pm.values {
   125  			fmt.Fprintf(tfile, "%s{%s} %f\n", name, v.labels, v.value)
   126  		}
   127  	}
   128  
   129  	os.Chmod(tfile.Name(), 0644)
   130  	return os.Rename(tfile.Name(), filepath.Join(td, "choria_machine_metrics_watcher_status.prom"))
   131  }