github.com/openshift-online/ocm-sdk-go@v0.1.473/metrics/transport_wrapper.go (about)

     1  /*
     2  Copyright (c) 2021 Red Hat, Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8    http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // This file contains the implementations of a transport wrapper that generates Prometheus metrics.
    18  
    19  package metrics
    20  
    21  import (
    22  	"fmt"
    23  	"net/http"
    24  	"time"
    25  
    26  	"github.com/prometheus/client_golang/prometheus"
    27  )
    28  
    29  // TransportWrapperBuilder contains the data and logic needed to build a new metrics transport
    30  // wrapper that creates HTTP round trippers that generate the following Prometheus metrics:
    31  //
    32  //	<subsystem>_request_count - Number of API requests sent.
    33  //	<subsystem>_request_duration_sum - Total time to send API requests, in seconds.
    34  //	<subsystem>_request_duration_count - Total number of API requests measured.
    35  //	<subsystem>_request_duration_bucket - Number of API requests organized in buckets.
    36  //
    37  // To set the subsystem prefix use the Subsystem method.
    38  //
    39  // The duration buckets metrics contain an `le` label that indicates the upper bound. For example if
    40  // the `le` label is `1` then the value will be the number of requests that were processed in less
    41  // than one second.
    42  //
    43  // The metrics will have the following labels:
    44  //
    45  //	method - Name of the HTTP method, for example GET or POST.
    46  //	path - Request path, for example /api/clusters_mgmt/v1/clusters.
    47  //	code - HTTP response code, for example 200 or 500.
    48  //	apiservice - API service name, for example ocm-clusters-service.
    49  //
    50  // To calculate the average request duration during the last 10 minutes, for example, use a
    51  // Prometheus expression like this:
    52  //
    53  //	rate(api_outbound_request_duration_sum[10m]) / rate(api_outbound_request_duration_count[10m])
    54  //
    55  // In order to reduce the cardinality of the metrics the path label is modified to remove the
    56  // identifiers of the objects. For example, if the original path is .../clusters/123 then it will
    57  // be replaced by .../clusters/-, and the values will be accumulated. The line returned by the
    58  // metrics server will be like this:
    59  //
    60  //	     <subsystem>_request_count{code="200",method="GET",path="/api/clusters_mgmt/v1/clusters/-",
    61  //			apiservice="ocm-clusters-service"} 56
    62  //
    63  // The meaning of that is that there were a total of 56 requests to get specific clusters,
    64  // independently of the specific identifier of the cluster.
    65  //
    66  // The value of the `code` label will be zero when sending the request failed without a response
    67  // code, for example if it wasn't possible to open the connection, or if there was a timeout waiting
    68  // for the response.
    69  //
    70  // Note that setting this attribute is not enough to have metrics published, you also need to
    71  // create and start a metrics server, as described in the documentation of the Prometheus library.
    72  //
    73  // Don't create objects of this type directly; use the NewTransportWrapper function instead.
    74  type TransportWrapperBuilder struct {
    75  	paths      []string
    76  	subsystem  string
    77  	registerer prometheus.Registerer
    78  }
    79  
    80  // TransportWrapper contains the data and logic needed to wrap an HTTP round tripper with another
    81  // one that generates Prometheus metrics.
    82  type TransportWrapper struct {
    83  	paths           pathTree
    84  	requestCount    *prometheus.CounterVec
    85  	requestDuration *prometheus.HistogramVec
    86  }
    87  
    88  // roundTripper is a round tripper that generates Prometheus metrics.
    89  type roundTripper struct {
    90  	owner     *TransportWrapper
    91  	transport http.RoundTripper
    92  }
    93  
    94  // Make sure that we implement the interface:
    95  var _ http.RoundTripper = (*roundTripper)(nil)
    96  
    97  // NewTransportWrapper creates a new builder that can then be used to configure and create a new metrics
    98  // round tripper.
    99  func NewTransportWrapper() *TransportWrapperBuilder {
   100  	return &TransportWrapperBuilder{
   101  		registerer: prometheus.DefaultRegisterer,
   102  	}
   103  }
   104  
   105  // Path adds a path that will be accepted as a value for the `path` label. By default all the paths
   106  // of the API are already added. This is intended for additional pads, for example the path for
   107  // token requests. If those paths aren't explicitly specified here then their metrics will be
   108  // accumulated in the `/-` path.
   109  func (b *TransportWrapperBuilder) Path(value string) *TransportWrapperBuilder {
   110  	b.paths = append(b.paths, value)
   111  	return b
   112  }
   113  
   114  // Subsystem sets the name of the subsystem that will be used by to register the metrics with
   115  // Prometheus. For example, if the value is `api_outbound` then the following metrics will be
   116  // registered:
   117  //
   118  //	api_outbound_request_count - Number of API requests sent.
   119  //	api_outbound_request_duration_sum - Total time to send API requests, in seconds.
   120  //	api_outbound_request_duration_count - Total number of API requests measured.
   121  //	api_outbound_request_duration_bucket - Number of API requests organized in buckets.
   122  //
   123  // This is mandatory.
   124  func (b *TransportWrapperBuilder) Subsystem(value string) *TransportWrapperBuilder {
   125  	b.subsystem = value
   126  	return b
   127  }
   128  
   129  // Registerer sets the Prometheus registerer that will be used to register the metrics. The default
   130  // is to use the default Prometheus registerer and there is usually no need to change that. This is
   131  // intended for unit tests, where it is convenient to have a registerer that doesn't interfere with
   132  // the rest of the system.
   133  func (b *TransportWrapperBuilder) Registerer(value prometheus.Registerer) *TransportWrapperBuilder {
   134  	if value == nil {
   135  		value = prometheus.DefaultRegisterer
   136  	}
   137  	b.registerer = value
   138  	return b
   139  }
   140  
   141  // Build uses the information stored in the builder to create a new transport wrapper.
   142  func (b *TransportWrapperBuilder) Build() (result *TransportWrapper, err error) {
   143  	// Check parameters:
   144  	if b.subsystem == "" {
   145  		err = fmt.Errorf("subsystem is mandatory")
   146  		return
   147  	}
   148  
   149  	// Register the request count metric:
   150  	requestCount := prometheus.NewCounterVec(
   151  		prometheus.CounterOpts{
   152  			Subsystem: b.subsystem,
   153  			Name:      "request_count",
   154  			Help:      "Number of requests sent.",
   155  		},
   156  		requestLabelNames,
   157  	)
   158  	err = b.registerer.Register(requestCount)
   159  	if err != nil {
   160  		registered, ok := err.(prometheus.AlreadyRegisteredError)
   161  		if ok {
   162  			requestCount = registered.ExistingCollector.(*prometheus.CounterVec)
   163  			err = nil //nolint:all
   164  		} else {
   165  			return
   166  		}
   167  	}
   168  
   169  	// Create the path tree:
   170  	paths := pathRoot.copy()
   171  	for _, path := range b.paths {
   172  		paths.add(path)
   173  	}
   174  
   175  	// Register the request duration metric:
   176  	requestDuration := prometheus.NewHistogramVec(
   177  		prometheus.HistogramOpts{
   178  			Subsystem: b.subsystem,
   179  			Name:      "request_duration",
   180  			Help:      "Request duration in seconds.",
   181  			Buckets: []float64{
   182  				0.1,
   183  				1.0,
   184  				10.0,
   185  				30.0,
   186  			},
   187  		},
   188  		requestLabelNames,
   189  	)
   190  	err = b.registerer.Register(requestDuration)
   191  	if err != nil {
   192  		registered, ok := err.(prometheus.AlreadyRegisteredError)
   193  		if ok {
   194  			requestDuration = registered.ExistingCollector.(*prometheus.HistogramVec)
   195  			err = nil
   196  		} else {
   197  			return
   198  		}
   199  	}
   200  
   201  	// Create and populate the object:
   202  	result = &TransportWrapper{
   203  		paths:           paths,
   204  		requestCount:    requestCount,
   205  		requestDuration: requestDuration,
   206  	}
   207  
   208  	return
   209  }
   210  
   211  // Wrap creates a new round tripper that wraps the given one and generates the Prometheus metrics.
   212  func (w *TransportWrapper) Wrap(transport http.RoundTripper) http.RoundTripper {
   213  	return &roundTripper{
   214  		owner:     w,
   215  		transport: transport,
   216  	}
   217  }
   218  
   219  // RoundTrip is the implementation of the round tripper interface.
   220  func (t *roundTripper) RoundTrip(request *http.Request) (response *http.Response, err error) {
   221  	// Measure the time that it takes to send the request and receive the response:
   222  	start := time.Now()
   223  	response, err = t.transport.RoundTrip(request)
   224  	elapsed := time.Since(start)
   225  
   226  	// Update the metrics:
   227  	path := request.URL.Path
   228  	method := request.Method
   229  	var code int
   230  	if response != nil {
   231  		code = response.StatusCode
   232  	}
   233  	labels := prometheus.Labels{
   234  		serviceLabelName: serviceLabel(path),
   235  		methodLabelName:  methodLabel(method),
   236  		pathLabelName:    pathLabel(t.owner.paths, path),
   237  		codeLabelName:    codeLabel(code),
   238  	}
   239  	t.owner.requestCount.With(labels).Inc()
   240  	t.owner.requestDuration.With(labels).Observe(elapsed.Seconds())
   241  
   242  	return
   243  }