github.com/pf-qiu/concourse/v6@v6.7.3-0.20201207032516-1f455d73275f/atc/metric/emitter/prometheus.go (about)

     1  package emitter
     2  
     3  import (
     4  	"fmt"
     5  	"net"
     6  	"net/http"
     7  	"sort"
     8  	"strings"
     9  	"sync"
    10  	"time"
    11  
    12  	"code.cloudfoundry.org/lager"
    13  	"github.com/pf-qiu/concourse/v6/atc/db"
    14  	"github.com/pf-qiu/concourse/v6/atc/metric"
    15  
    16  	"github.com/prometheus/client_golang/prometheus"
    17  	"github.com/prometheus/client_golang/prometheus/promhttp"
    18  )
    19  
    20  type PrometheusEmitter struct {
    21  	jobsScheduled  prometheus.Counter
    22  	jobsScheduling prometheus.Gauge
    23  
    24  	buildsStarted prometheus.Counter
    25  	buildsRunning prometheus.Gauge
    26  
    27  	concurrentRequestsLimitHit *prometheus.CounterVec
    28  	concurrentRequests         *prometheus.GaugeVec
    29  
    30  	tasksWaiting         *prometheus.GaugeVec
    31  	tasksWaitingDuration *prometheus.HistogramVec
    32  
    33  	buildDurationsVec *prometheus.HistogramVec
    34  	buildsAborted     prometheus.Counter
    35  	buildsErrored     prometheus.Counter
    36  	buildsFailed      prometheus.Counter
    37  	buildsFinished    prometheus.Counter
    38  	buildsFinishedVec *prometheus.CounterVec
    39  	buildsSucceeded   prometheus.Counter
    40  
    41  	dbConnections  *prometheus.GaugeVec
    42  	dbQueriesTotal prometheus.Counter
    43  
    44  	errorLogs *prometheus.CounterVec
    45  
    46  	httpRequestsDuration *prometheus.HistogramVec
    47  
    48  	locksHeld *prometheus.GaugeVec
    49  
    50  	checksFinished  *prometheus.CounterVec
    51  	checksQueueSize prometheus.Gauge
    52  	checksStarted   prometheus.Counter
    53  	checksEnqueued  prometheus.Counter
    54  
    55  	volumesStreamed prometheus.Counter
    56  
    57  	workerContainers        *prometheus.GaugeVec
    58  	workerUnknownContainers *prometheus.GaugeVec
    59  	workerVolumes           *prometheus.GaugeVec
    60  	workerUnknownVolumes    *prometheus.GaugeVec
    61  	workerTasks             *prometheus.GaugeVec
    62  	workersRegistered       *prometheus.GaugeVec
    63  
    64  	workerContainersLabels map[string]map[string]prometheus.Labels
    65  	workerVolumesLabels    map[string]map[string]prometheus.Labels
    66  	workerTasksLabels      map[string]map[string]prometheus.Labels
    67  	workerLastSeen         map[string]time.Time
    68  	mu                     sync.Mutex
    69  }
    70  
    71  type PrometheusConfig struct {
    72  	BindIP   string `long:"prometheus-bind-ip" description:"IP to listen on to expose Prometheus metrics."`
    73  	BindPort string `long:"prometheus-bind-port" description:"Port to listen on to expose Prometheus metrics."`
    74  }
    75  
    76  // The most natural data type to hold the labels is a set because each worker can have multiple but
    77  // unique sets of labels. A set in Go is represented by a map[T]struct{}. Unfortunately, we cannot
    78  // put prometheus.Labels inside a map[prometheus.Labels]struct{} because prometheus.Labels are not
    79  // hashable. To work around this, we compute a string from the labels and use this as the keys of
    80  // the map.
    81  func serializeLabels(labels *prometheus.Labels) string {
    82  	var (
    83  		key   string
    84  		names []string
    85  	)
    86  	for _, v := range *labels {
    87  		names = append(names, v)
    88  	}
    89  	sort.Strings(names)
    90  	key = strings.Join(names, "_")
    91  
    92  	return key
    93  }
    94  
    95  func init() {
    96  	metric.Metrics.RegisterEmitter(&PrometheusConfig{})
    97  }
    98  
    99  func (config *PrometheusConfig) Description() string { return "Prometheus" }
   100  func (config *PrometheusConfig) IsConfigured() bool {
   101  	return config.BindPort != "" && config.BindIP != ""
   102  }
   103  func (config *PrometheusConfig) bind() string {
   104  	return fmt.Sprintf("%s:%s", config.BindIP, config.BindPort)
   105  }
   106  
   107  func (config *PrometheusConfig) NewEmitter() (metric.Emitter, error) {
   108  	// error log metrics
   109  	errorLogs := prometheus.NewCounterVec(
   110  		prometheus.CounterOpts{
   111  			Namespace: "concourse",
   112  			Subsystem: "error",
   113  			Name:      "logs",
   114  			Help:      "Number of error logged",
   115  		}, []string{"message"},
   116  	)
   117  	prometheus.MustRegister(errorLogs)
   118  
   119  	// lock metrics
   120  	locksHeld := prometheus.NewGaugeVec(prometheus.GaugeOpts{
   121  		Namespace: "concourse",
   122  		Subsystem: "locks",
   123  		Name:      "held",
   124  		Help:      "Database locks held",
   125  	}, []string{"type"})
   126  	prometheus.MustRegister(locksHeld)
   127  
   128  	// job metrics
   129  	jobsScheduled := prometheus.NewCounter(prometheus.CounterOpts{
   130  		Namespace: "concourse",
   131  		Subsystem: "jobs",
   132  		Name:      "scheduled_total",
   133  		Help:      "Total number of Concourse jobs scheduled.",
   134  	})
   135  	prometheus.MustRegister(jobsScheduled)
   136  
   137  	jobsScheduling := prometheus.NewGauge(prometheus.GaugeOpts{
   138  		Namespace: "concourse",
   139  		Subsystem: "jobs",
   140  		Name:      "scheduling",
   141  		Help:      "Number of Concourse jobs currently being scheduled.",
   142  	})
   143  	prometheus.MustRegister(jobsScheduling)
   144  
   145  	// build metrics
   146  	buildsStarted := prometheus.NewCounter(prometheus.CounterOpts{
   147  		Namespace: "concourse",
   148  		Subsystem: "builds",
   149  		Name:      "started_total",
   150  		Help:      "Total number of Concourse builds started.",
   151  	})
   152  	prometheus.MustRegister(buildsStarted)
   153  
   154  	buildsRunning := prometheus.NewGauge(prometheus.GaugeOpts{
   155  		Namespace: "concourse",
   156  		Subsystem: "builds",
   157  		Name:      "running",
   158  		Help:      "Number of Concourse builds currently running.",
   159  	})
   160  	prometheus.MustRegister(buildsRunning)
   161  
   162  	concurrentRequestsLimitHit := prometheus.NewCounterVec(prometheus.CounterOpts{
   163  		Namespace: "concourse",
   164  		Subsystem: "concurrent_requests",
   165  		Name:      "limit_hit_total",
   166  		Help:      "Total number of requests rejected because the server was already serving too many concurrent requests.",
   167  	}, []string{"action"})
   168  	prometheus.MustRegister(concurrentRequestsLimitHit)
   169  
   170  	concurrentRequests := prometheus.NewGaugeVec(prometheus.GaugeOpts{
   171  		Namespace: "concourse",
   172  		Name:      "concurrent_requests",
   173  		Help:      "Number of concurrent requests being served by endpoints that have a specified limit of concurrent requests.",
   174  	}, []string{"action"})
   175  	prometheus.MustRegister(concurrentRequests)
   176  
   177  	tasksWaiting := prometheus.NewGaugeVec(prometheus.GaugeOpts{
   178  		Namespace: "concourse",
   179  		Subsystem: "tasks",
   180  		Name:      "waiting",
   181  		Help:      "Number of Concourse tasks currently waiting.",
   182  	}, []string{"teamId", "workerTags", "platform"})
   183  	prometheus.MustRegister(tasksWaiting)
   184  
   185  	tasksWaitingDuration := prometheus.NewHistogramVec(prometheus.HistogramOpts{
   186  		Namespace: "concourse",
   187  		Subsystem: "tasks",
   188  		Name:      "wait_duration",
   189  		Help:      "Elapsed time waiting for execution",
   190  		Buckets:   []float64{1, 15, 30, 60, 120, 180, 240, 300, 600, 1200},
   191  	}, []string{"teamId", "workerTags", "platform"})
   192  	prometheus.MustRegister(tasksWaitingDuration)
   193  
   194  	buildsFinished := prometheus.NewCounter(prometheus.CounterOpts{
   195  		Namespace: "concourse",
   196  		Subsystem: "builds",
   197  		Name:      "finished_total",
   198  		Help:      "Total number of Concourse builds finished.",
   199  	})
   200  	prometheus.MustRegister(buildsFinished)
   201  
   202  	buildsSucceeded := prometheus.NewCounter(prometheus.CounterOpts{
   203  		Namespace: "concourse",
   204  		Subsystem: "builds",
   205  		Name:      "succeeded_total",
   206  		Help:      "Total number of Concourse builds succeeded.",
   207  	})
   208  	prometheus.MustRegister(buildsSucceeded)
   209  
   210  	buildsErrored := prometheus.NewCounter(prometheus.CounterOpts{
   211  		Namespace: "concourse",
   212  		Subsystem: "builds",
   213  		Name:      "errored_total",
   214  		Help:      "Total number of Concourse builds errored.",
   215  	})
   216  	prometheus.MustRegister(buildsErrored)
   217  
   218  	buildsFailed := prometheus.NewCounter(prometheus.CounterOpts{
   219  		Namespace: "concourse",
   220  		Subsystem: "builds",
   221  		Name:      "failed_total",
   222  		Help:      "Total number of Concourse builds failed.",
   223  	})
   224  	prometheus.MustRegister(buildsFailed)
   225  
   226  	buildsAborted := prometheus.NewCounter(prometheus.CounterOpts{
   227  		Namespace: "concourse",
   228  		Subsystem: "builds",
   229  		Name:      "aborted_total",
   230  		Help:      "Total number of Concourse builds aborted.",
   231  	})
   232  	prometheus.MustRegister(buildsAborted)
   233  
   234  	buildsFinishedVec := prometheus.NewCounterVec(
   235  		prometheus.CounterOpts{
   236  			Namespace: "concourse",
   237  			Subsystem: "builds",
   238  			Name:      "finished",
   239  			Help:      "Count of builds finished across various dimensions.",
   240  		},
   241  		[]string{"team", "pipeline", "job", "status"},
   242  	)
   243  	prometheus.MustRegister(buildsFinishedVec)
   244  
   245  	buildDurationsVec := prometheus.NewHistogramVec(
   246  		prometheus.HistogramOpts{
   247  			Namespace: "concourse",
   248  			Subsystem: "builds",
   249  			Name:      "duration_seconds",
   250  			Help:      "Build time in seconds",
   251  			Buckets:   []float64{1, 60, 180, 300, 600, 900, 1200, 1800, 2700, 3600, 7200, 18000, 36000},
   252  		},
   253  		[]string{"team", "pipeline", "job"},
   254  	)
   255  	prometheus.MustRegister(buildDurationsVec)
   256  
   257  	// worker metrics
   258  	workerContainers := prometheus.NewGaugeVec(
   259  		prometheus.GaugeOpts{
   260  			Namespace: "concourse",
   261  			Subsystem: "workers",
   262  			Name:      "containers",
   263  			Help:      "Number of containers per worker",
   264  		},
   265  		[]string{"worker", "platform", "team", "tags"},
   266  	)
   267  	prometheus.MustRegister(workerContainers)
   268  
   269  	workerUnknownContainers := prometheus.NewGaugeVec(
   270  		prometheus.GaugeOpts{
   271  			Namespace: "concourse",
   272  			Subsystem: "workers",
   273  			Name:      "unknown_containers",
   274  			Help:      "Number of unknown containers found on worker",
   275  		},
   276  		[]string{"worker"},
   277  	)
   278  	prometheus.MustRegister(workerUnknownContainers)
   279  
   280  	workerVolumes := prometheus.NewGaugeVec(
   281  		prometheus.GaugeOpts{
   282  			Namespace: "concourse",
   283  			Subsystem: "workers",
   284  			Name:      "volumes",
   285  			Help:      "Number of volumes per worker",
   286  		},
   287  		[]string{"worker", "platform", "team", "tags"},
   288  	)
   289  	prometheus.MustRegister(workerVolumes)
   290  
   291  	workerUnknownVolumes := prometheus.NewGaugeVec(
   292  		prometheus.GaugeOpts{
   293  			Namespace: "concourse",
   294  			Subsystem: "workers",
   295  			Name:      "unknown_volumes",
   296  			Help:      "Number of unknown volumes found on worker",
   297  		},
   298  		[]string{"worker"},
   299  	)
   300  	prometheus.MustRegister(workerUnknownVolumes)
   301  
   302  	workerTasks := prometheus.NewGaugeVec(
   303  		prometheus.GaugeOpts{
   304  			Namespace: "concourse",
   305  			Subsystem: "workers",
   306  			Name:      "tasks",
   307  			Help:      "Number of active tasks per worker",
   308  		},
   309  		[]string{"worker", "platform"},
   310  	)
   311  	prometheus.MustRegister(workerTasks)
   312  
   313  	workersRegistered := prometheus.NewGaugeVec(
   314  		prometheus.GaugeOpts{
   315  			Namespace: "concourse",
   316  			Subsystem: "workers",
   317  			Name:      "registered",
   318  			Help:      "Number of workers per state as seen by the database",
   319  		},
   320  		[]string{"state"},
   321  	)
   322  	prometheus.MustRegister(workersRegistered)
   323  
   324  	// http metrics
   325  	httpRequestsDuration := prometheus.NewHistogramVec(
   326  		prometheus.HistogramOpts{
   327  			Namespace: "concourse",
   328  			Subsystem: "http_responses",
   329  			Name:      "duration_seconds",
   330  			Help:      "Response time in seconds",
   331  		},
   332  		[]string{"method", "route", "status"},
   333  	)
   334  	prometheus.MustRegister(httpRequestsDuration)
   335  
   336  	dbQueriesTotal := prometheus.NewCounter(prometheus.CounterOpts{
   337  		Namespace: "concourse",
   338  		Subsystem: "db",
   339  		Name:      "queries_total",
   340  		Help:      "Total number of database Concourse database queries",
   341  	})
   342  	prometheus.MustRegister(dbQueriesTotal)
   343  
   344  	dbConnections := prometheus.NewGaugeVec(
   345  		prometheus.GaugeOpts{
   346  			Namespace: "concourse",
   347  			Subsystem: "db",
   348  			Name:      "connections",
   349  			Help:      "Current number of concourse database connections",
   350  		},
   351  		[]string{"dbname"},
   352  	)
   353  	prometheus.MustRegister(dbConnections)
   354  
   355  	checksFinished := prometheus.NewCounterVec(
   356  		prometheus.CounterOpts{
   357  			Namespace: "concourse",
   358  			Subsystem: "lidar",
   359  			Name:      "checks_finished_total",
   360  			Help:      "Total number of checks finished",
   361  		},
   362  		[]string{"status"},
   363  	)
   364  	prometheus.MustRegister(checksFinished)
   365  
   366  	checksQueueSize := prometheus.NewGauge(
   367  		prometheus.GaugeOpts{
   368  			Namespace: "concourse",
   369  			Subsystem: "lidar",
   370  			Name:      "check_queue_size",
   371  			Help:      "The size of the checks queue",
   372  		},
   373  	)
   374  	prometheus.MustRegister(checksQueueSize)
   375  
   376  	checksStarted := prometheus.NewCounter(
   377  		prometheus.CounterOpts{
   378  			Namespace: "concourse",
   379  			Subsystem: "lidar",
   380  			Name:      "checks_started_total",
   381  			Help:      "Total number of checks started",
   382  		},
   383  	)
   384  	prometheus.MustRegister(checksStarted)
   385  
   386  	checksEnqueued := prometheus.NewCounter(
   387  		prometheus.CounterOpts{
   388  			Namespace: "concourse",
   389  			Subsystem: "lidar",
   390  			Name:      "checks_enqueued_total",
   391  			Help:      "Total number of checks enqueued",
   392  		},
   393  	)
   394  	prometheus.MustRegister(checksEnqueued)
   395  
   396  	volumesStreamed := prometheus.NewCounter(
   397  		prometheus.CounterOpts{
   398  			Namespace: "concourse",
   399  			Subsystem: "volumes",
   400  			Name:      "volumes_streamed",
   401  			Help:      "Total number of volumes streamed from one worker to the other",
   402  		},
   403  	)
   404  	prometheus.MustRegister(volumesStreamed)
   405  
   406  	listener, err := net.Listen("tcp", config.bind())
   407  	if err != nil {
   408  		return nil, err
   409  	}
   410  
   411  	go http.Serve(listener, promhttp.Handler())
   412  
   413  	emitter := &PrometheusEmitter{
   414  		jobsScheduled:  jobsScheduled,
   415  		jobsScheduling: jobsScheduling,
   416  
   417  		buildsStarted: buildsStarted,
   418  		buildsRunning: buildsRunning,
   419  
   420  		concurrentRequestsLimitHit: concurrentRequestsLimitHit,
   421  		concurrentRequests:         concurrentRequests,
   422  
   423  		tasksWaiting:         tasksWaiting,
   424  		tasksWaitingDuration: tasksWaitingDuration,
   425  
   426  		buildDurationsVec: buildDurationsVec,
   427  		buildsAborted:     buildsAborted,
   428  		buildsErrored:     buildsErrored,
   429  		buildsFailed:      buildsFailed,
   430  		buildsFinished:    buildsFinished,
   431  		buildsFinishedVec: buildsFinishedVec,
   432  		buildsSucceeded:   buildsSucceeded,
   433  
   434  		dbConnections:  dbConnections,
   435  		dbQueriesTotal: dbQueriesTotal,
   436  
   437  		errorLogs: errorLogs,
   438  
   439  		httpRequestsDuration: httpRequestsDuration,
   440  
   441  		locksHeld: locksHeld,
   442  
   443  		checksFinished:  checksFinished,
   444  		checksQueueSize: checksQueueSize,
   445  		checksStarted:   checksStarted,
   446  		checksEnqueued:  checksEnqueued,
   447  
   448  		workerContainers:        workerContainers,
   449  		workersRegistered:       workersRegistered,
   450  		workerContainersLabels:  map[string]map[string]prometheus.Labels{},
   451  		workerVolumesLabels:     map[string]map[string]prometheus.Labels{},
   452  		workerTasksLabels:       map[string]map[string]prometheus.Labels{},
   453  		workerLastSeen:          map[string]time.Time{},
   454  		workerVolumes:           workerVolumes,
   455  		workerTasks:             workerTasks,
   456  		workerUnknownContainers: workerUnknownContainers,
   457  		workerUnknownVolumes:    workerUnknownVolumes,
   458  
   459  		volumesStreamed: volumesStreamed,
   460  	}
   461  	go emitter.periodicMetricGC()
   462  
   463  	return emitter, nil
   464  }
   465  
   466  // Emit processes incoming metrics.
   467  // In order to provide idiomatic Prometheus metrics, we'll have to convert the various
   468  // Event types (differentiated by the less-than-ideal string Name field) into different
   469  // Prometheus metrics.
   470  func (emitter *PrometheusEmitter) Emit(logger lager.Logger, event metric.Event) {
   471  
   472  	//update last seen counters, used to gc stale timeseries
   473  	emitter.updateLastSeen(event)
   474  
   475  	switch event.Name {
   476  	case "error log":
   477  		emitter.errorLogsMetric(logger, event)
   478  	case "lock held":
   479  		emitter.lock(logger, event)
   480  	case "jobs scheduled":
   481  		emitter.jobsScheduled.Add(event.Value)
   482  	case "jobs scheduling":
   483  		emitter.jobsScheduling.Set(event.Value)
   484  	case "builds started":
   485  		emitter.buildsStarted.Add(event.Value)
   486  	case "builds running":
   487  		emitter.buildsRunning.Set(event.Value)
   488  	case "concurrent requests limit hit":
   489  		emitter.concurrentRequestsLimitHit.WithLabelValues(event.Attributes["action"]).Add(event.Value)
   490  	case "concurrent requests":
   491  		emitter.concurrentRequests.
   492  			WithLabelValues(event.Attributes["action"]).Set(event.Value)
   493  	case "tasks waiting":
   494  		emitter.tasksWaiting.
   495  			WithLabelValues(
   496  				event.Attributes["teamId"],
   497  				event.Attributes["workerTags"],
   498  				event.Attributes["platform"],
   499  			).Set(event.Value)
   500  	case "tasks waiting duration":
   501  		emitter.tasksWaitingDuration.
   502  			WithLabelValues(
   503  				event.Attributes["teamId"],
   504  				event.Attributes["workerTags"],
   505  				event.Attributes["platform"],
   506  			).Observe(event.Value)
   507  	case "build finished":
   508  		emitter.buildFinishedMetrics(logger, event)
   509  	case "worker containers":
   510  		emitter.workerContainersMetric(logger, event)
   511  	case "worker volumes":
   512  		emitter.workerVolumesMetric(logger, event)
   513  	case "worker unknown containers":
   514  		emitter.workerUnknownContainersMetric(logger, event)
   515  	case "worker unknown volumes":
   516  		emitter.workerUnknownVolumesMetric(logger, event)
   517  	case "worker tasks":
   518  		emitter.workerTasksMetric(logger, event)
   519  	case "worker state":
   520  		emitter.workersRegisteredMetric(logger, event)
   521  	case "http response time":
   522  		emitter.httpResponseTimeMetrics(logger, event)
   523  	case "database queries":
   524  		emitter.databaseMetrics(logger, event)
   525  	case "database connections":
   526  		emitter.databaseMetrics(logger, event)
   527  	case "checks finished":
   528  		emitter.checksFinished.WithLabelValues(event.Attributes["status"]).Add(event.Value)
   529  	case "checks started":
   530  		emitter.checksStarted.Add(event.Value)
   531  	case "checks enqueued":
   532  		emitter.checksEnqueued.Add(event.Value)
   533  	case "checks queue size":
   534  		emitter.checksQueueSize.Set(event.Value)
   535  	case "volumes streamed":
   536  		emitter.volumesStreamed.Add(event.Value)
   537  	default:
   538  		// unless we have a specific metric, we do nothing
   539  	}
   540  }
   541  
   542  func (emitter *PrometheusEmitter) lock(logger lager.Logger, event metric.Event) {
   543  	lockType, exists := event.Attributes["type"]
   544  	if !exists {
   545  		logger.Error("failed-to-find-type-in-event", fmt.Errorf("expected type to exist in event.Attributes"))
   546  		return
   547  	}
   548  
   549  	if event.Value == 1 {
   550  		emitter.locksHeld.WithLabelValues(lockType).Inc()
   551  	} else {
   552  		emitter.locksHeld.WithLabelValues(lockType).Dec()
   553  	}
   554  }
   555  
   556  func (emitter *PrometheusEmitter) errorLogsMetric(logger lager.Logger, event metric.Event) {
   557  	message, exists := event.Attributes["message"]
   558  	if !exists {
   559  		logger.Error("failed-to-find-message-in-event",
   560  			fmt.Errorf("expected team_name to exist in event.Attributes"))
   561  		return
   562  	}
   563  
   564  	emitter.errorLogs.WithLabelValues(message).Inc()
   565  }
   566  
   567  func (emitter *PrometheusEmitter) buildFinishedMetrics(logger lager.Logger, event metric.Event) {
   568  	// concourse_builds_finished_total
   569  	emitter.buildsFinished.Inc()
   570  
   571  	// concourse_builds_finished
   572  	team, exists := event.Attributes["team_name"]
   573  	if !exists {
   574  		logger.Error("failed-to-find-team-name-in-event", fmt.Errorf("expected team_name to exist in event.Attributes"))
   575  		return
   576  	}
   577  
   578  	pipeline, exists := event.Attributes["pipeline"]
   579  	if !exists {
   580  		logger.Error("failed-to-find-pipeline-in-event", fmt.Errorf("expected pipeline to exist in event.Attributes"))
   581  		return
   582  	}
   583  
   584  	job, exists := event.Attributes["job"]
   585  	if !exists {
   586  		logger.Error("failed-to-find-job-in-event", fmt.Errorf("expected job to exist in event.Attributes"))
   587  		return
   588  	}
   589  
   590  	buildStatus, exists := event.Attributes["build_status"]
   591  	if !exists {
   592  		logger.Error("failed-to-find-build_status-in-event", fmt.Errorf("expected build_status to exist in event.Attributes"))
   593  		return
   594  	}
   595  	emitter.buildsFinishedVec.WithLabelValues(team, pipeline, job, buildStatus).Inc()
   596  
   597  	// concourse_builds_(aborted|succeeded|failed|errored)_total
   598  	switch buildStatus {
   599  	case string(db.BuildStatusAborted):
   600  		// concourse_builds_aborted_total
   601  		emitter.buildsAborted.Inc()
   602  	case string(db.BuildStatusSucceeded):
   603  		// concourse_builds_succeeded_total
   604  		emitter.buildsSucceeded.Inc()
   605  	case string(db.BuildStatusFailed):
   606  		// concourse_builds_failed_total
   607  		emitter.buildsFailed.Inc()
   608  	case string(db.BuildStatusErrored):
   609  		// concourse_builds_errored_total
   610  		emitter.buildsErrored.Inc()
   611  	}
   612  
   613  	// seconds are the standard prometheus base unit for time
   614  	duration := event.Value / 1000
   615  	emitter.buildDurationsVec.WithLabelValues(team, pipeline, job).Observe(duration)
   616  }
   617  
   618  func (emitter *PrometheusEmitter) workerContainersMetric(logger lager.Logger, event metric.Event) {
   619  	worker, exists := event.Attributes["worker"]
   620  	if !exists {
   621  		logger.Error("failed-to-find-worker-in-event", fmt.Errorf("expected worker to exist in event.Attributes"))
   622  		return
   623  	}
   624  	platform, exists := event.Attributes["platform"]
   625  	if !exists || platform == "" {
   626  		logger.Error("failed-to-find-platform-in-event", fmt.Errorf("expected platform to exist in event.Attributes"))
   627  		return
   628  	}
   629  	team, exists := event.Attributes["team_name"]
   630  	if !exists {
   631  		logger.Error("failed-to-find-team-name-in-event", fmt.Errorf("expected team_name to exist in event.Attributes"))
   632  		return
   633  	}
   634  	tags, _ := event.Attributes["tags"]
   635  
   636  	labels := prometheus.Labels{
   637  		"worker":   worker,
   638  		"platform": platform,
   639  		"team":     team,
   640  		"tags":     tags,
   641  	}
   642  	key := serializeLabels(&labels)
   643  	if emitter.workerContainersLabels[worker] == nil {
   644  		emitter.workerContainersLabels[worker] = make(map[string]prometheus.Labels)
   645  	}
   646  	emitter.workerContainersLabels[worker][key] = labels
   647  	emitter.workerContainers.With(emitter.workerContainersLabels[worker][key]).Set(event.Value)
   648  }
   649  
   650  func (emitter *PrometheusEmitter) workersRegisteredMetric(logger lager.Logger, event metric.Event) {
   651  	state, exists := event.Attributes["state"]
   652  	if !exists {
   653  		logger.Error("failed-to-find-state-in-event", fmt.Errorf("expected state to exist in event.Attributes"))
   654  		return
   655  	}
   656  
   657  	emitter.workersRegistered.WithLabelValues(state).Set(event.Value)
   658  }
   659  
   660  func (emitter *PrometheusEmitter) workerUnknownContainersMetric(logger lager.Logger, event metric.Event) {
   661  	worker, exists := event.Attributes["worker"]
   662  	if !exists {
   663  		logger.Error("failed-to-find-worker-in-event", fmt.Errorf("expected worker to exist in event.Attributes"))
   664  		return
   665  	}
   666  
   667  	labels := prometheus.Labels{
   668  		"worker": worker,
   669  	}
   670  
   671  	key := serializeLabels(&labels)
   672  	if emitter.workerContainersLabels[worker] == nil {
   673  		emitter.workerContainersLabels[worker] = make(map[string]prometheus.Labels)
   674  	}
   675  	emitter.workerContainersLabels[worker][key] = labels
   676  	emitter.workerUnknownContainers.With(emitter.workerContainersLabels[worker][key]).Set(event.Value)
   677  }
   678  
   679  func (emitter *PrometheusEmitter) workerVolumesMetric(logger lager.Logger, event metric.Event) {
   680  	worker, exists := event.Attributes["worker"]
   681  	if !exists {
   682  		logger.Error("failed-to-find-worker-in-event", fmt.Errorf("expected worker to exist in event.Attributes"))
   683  		return
   684  	}
   685  	platform, exists := event.Attributes["platform"]
   686  	if !exists || platform == "" {
   687  		logger.Error("failed-to-find-platform-in-event", fmt.Errorf("expected platform to exist in event.Attributes"))
   688  		return
   689  	}
   690  	team, exists := event.Attributes["team_name"]
   691  	if !exists {
   692  		logger.Error("failed-to-find-team-name-in-event", fmt.Errorf("expected team_name to exist in event.Attributes"))
   693  		return
   694  	}
   695  	tags, _ := event.Attributes["tags"]
   696  
   697  	labels := prometheus.Labels{
   698  		"worker":   worker,
   699  		"platform": platform,
   700  		"team":     team,
   701  		"tags":     tags,
   702  	}
   703  	key := serializeLabels(&labels)
   704  	if emitter.workerVolumesLabels[worker] == nil {
   705  		emitter.workerVolumesLabels[worker] = make(map[string]prometheus.Labels)
   706  	}
   707  	emitter.workerVolumesLabels[worker][key] = labels
   708  	emitter.workerVolumes.With(emitter.workerVolumesLabels[worker][key]).Set(event.Value)
   709  }
   710  
   711  func (emitter *PrometheusEmitter) workerUnknownVolumesMetric(logger lager.Logger, event metric.Event) {
   712  	worker, exists := event.Attributes["worker"]
   713  	if !exists {
   714  		logger.Error("failed-to-find-worker-in-event", fmt.Errorf("expected worker to exist in event.Attributes"))
   715  		return
   716  	}
   717  
   718  	labels := prometheus.Labels{
   719  		"worker": worker,
   720  	}
   721  
   722  	key := serializeLabels(&labels)
   723  	if emitter.workerVolumesLabels[worker] == nil {
   724  		emitter.workerVolumesLabels[worker] = make(map[string]prometheus.Labels)
   725  	}
   726  	emitter.workerVolumesLabels[worker][key] = labels
   727  	emitter.workerUnknownVolumes.With(emitter.workerVolumesLabels[worker][key]).Set(event.Value)
   728  }
   729  
   730  func (emitter *PrometheusEmitter) workerTasksMetric(logger lager.Logger, event metric.Event) {
   731  	worker, exists := event.Attributes["worker"]
   732  	if !exists {
   733  		logger.Error("failed-to-find-worker-in-event", fmt.Errorf("expected worker to exist in event.Attributes"))
   734  		return
   735  	}
   736  	platform, exists := event.Attributes["platform"]
   737  	if !exists || platform == "" {
   738  		logger.Error("failed-to-find-platform-in-event", fmt.Errorf("expected platform to exist in event.Attributes"))
   739  		return
   740  	}
   741  
   742  	labels := prometheus.Labels{
   743  		"worker":   worker,
   744  		"platform": platform,
   745  	}
   746  	key := serializeLabels(&labels)
   747  	if emitter.workerTasksLabels[worker] == nil {
   748  		emitter.workerTasksLabels[worker] = make(map[string]prometheus.Labels)
   749  	}
   750  	emitter.workerTasksLabels[worker][key] = labels
   751  	emitter.workerTasks.With(emitter.workerTasksLabels[worker][key]).Set(event.Value)
   752  }
   753  
   754  func (emitter *PrometheusEmitter) httpResponseTimeMetrics(logger lager.Logger, event metric.Event) {
   755  	route, exists := event.Attributes["route"]
   756  	if !exists {
   757  		logger.Error("failed-to-find-route-in-event", fmt.Errorf("expected method to exist in event.Attributes"))
   758  		return
   759  	}
   760  
   761  	method, exists := event.Attributes["method"]
   762  	if !exists {
   763  		logger.Error("failed-to-find-method-in-event", fmt.Errorf("expected method to exist in event.Attributes"))
   764  		return
   765  	}
   766  
   767  	status, exists := event.Attributes["status"]
   768  	if !exists {
   769  		logger.Error("failed-to-find-status-in-event", fmt.Errorf("expected status to exist in event.Attributes"))
   770  		return
   771  	}
   772  
   773  	emitter.httpRequestsDuration.WithLabelValues(method, route, status).Observe(event.Value / 1000)
   774  }
   775  
   776  func (emitter *PrometheusEmitter) databaseMetrics(logger lager.Logger, event metric.Event) {
   777  	switch event.Name {
   778  	case "database queries":
   779  		emitter.dbQueriesTotal.Add(event.Value)
   780  	case "database connections":
   781  		connectionName, exists := event.Attributes["ConnectionName"]
   782  		if !exists {
   783  			logger.Error("failed-to-connection-name-in-event", fmt.Errorf("expected ConnectionName to exist in event.Attributes"))
   784  			return
   785  		}
   786  		emitter.dbConnections.WithLabelValues(connectionName).Set(event.Value)
   787  	default:
   788  	}
   789  
   790  }
   791  
   792  // updateLastSeen tracks for each worker when it last received a metric event.
   793  func (emitter *PrometheusEmitter) updateLastSeen(event metric.Event) {
   794  	emitter.mu.Lock()
   795  	defer emitter.mu.Unlock()
   796  	if worker, exists := event.Attributes["worker"]; exists {
   797  		emitter.workerLastSeen[worker] = time.Now()
   798  	}
   799  }
   800  
   801  //periodically remove stale metrics for workers
   802  func (emitter *PrometheusEmitter) periodicMetricGC() {
   803  	for {
   804  		emitter.mu.Lock()
   805  		now := time.Now()
   806  		for worker, lastSeen := range emitter.workerLastSeen {
   807  			if now.Sub(lastSeen) > 5*time.Minute {
   808  				DoGarbageCollection(emitter, worker)
   809  				delete(emitter.workerLastSeen, worker)
   810  			}
   811  		}
   812  		emitter.mu.Unlock()
   813  		time.Sleep(60 * time.Second)
   814  	}
   815  }
   816  
   817  // DoGarbageCollection retrieves and deletes stale metrics by their labels.
   818  func DoGarbageCollection(emitter PrometheusGarbageCollectable, worker string) {
   819  	for _, labels := range emitter.WorkerContainersLabels()[worker] {
   820  		emitter.WorkerContainers().Delete(labels)
   821  	}
   822  
   823  	for _, labels := range emitter.WorkerVolumesLabels()[worker] {
   824  		emitter.WorkerVolumes().Delete(labels)
   825  	}
   826  
   827  	for _, labels := range emitter.WorkerTasksLabels()[worker] {
   828  		emitter.WorkerTasks().Delete(labels)
   829  	}
   830  
   831  	delete(emitter.WorkerContainersLabels(), worker)
   832  	delete(emitter.WorkerVolumesLabels(), worker)
   833  	delete(emitter.WorkerTasksLabels(), worker)
   834  }
   835  
   836  //go:generate counterfeiter . PrometheusGarbageCollectable
   837  type PrometheusGarbageCollectable interface {
   838  	WorkerContainers() *prometheus.GaugeVec
   839  	WorkerVolumes() *prometheus.GaugeVec
   840  	WorkerTasks() *prometheus.GaugeVec
   841  
   842  	WorkerContainersLabels() map[string]map[string]prometheus.Labels
   843  	WorkerVolumesLabels() map[string]map[string]prometheus.Labels
   844  	WorkerTasksLabels() map[string]map[string]prometheus.Labels
   845  }
   846  
   847  func (emitter *PrometheusEmitter) WorkerContainers() *prometheus.GaugeVec {
   848  	return emitter.workerContainers
   849  }
   850  
   851  func (emitter *PrometheusEmitter) WorkerVolumes() *prometheus.GaugeVec {
   852  	return emitter.workerVolumes
   853  }
   854  
   855  func (emitter *PrometheusEmitter) WorkerTasks() *prometheus.GaugeVec {
   856  	return emitter.workerTasks
   857  }
   858  
   859  func (emitter *PrometheusEmitter) WorkerContainersLabels() map[string]map[string]prometheus.Labels {
   860  	return emitter.workerContainersLabels
   861  }
   862  
   863  func (emitter *PrometheusEmitter) WorkerVolumesLabels() map[string]map[string]prometheus.Labels {
   864  	return emitter.workerVolumesLabels
   865  }
   866  
   867  func (emitter *PrometheusEmitter) WorkerTasksLabels() map[string]map[string]prometheus.Labels {
   868  	return emitter.workerTasksLabels
   869  }