sigs.k8s.io/cluster-api-provider-aws@v1.5.5/pkg/cloud/metrics/metrics.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package metrics
    18  
    19  import (
    20  	"net/url"
    21  	"strconv"
    22  	"strings"
    23  	"time"
    24  
    25  	"github.com/aws/aws-sdk-go/aws"
    26  	"github.com/aws/aws-sdk-go/aws/request"
    27  	"github.com/prometheus/client_golang/prometheus"
    28  	"sigs.k8s.io/controller-runtime/pkg/metrics"
    29  
    30  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/awserrors"
    31  )
    32  
    33  const (
    34  	metricAWSSubsystem       = "aws"
    35  	metricRequestCountKey    = "api_requests_total"
    36  	metricRequestDurationKey = "api_request_duration_seconds"
    37  	metricAPICallRetries     = "api_call_retries"
    38  	metricServiceLabel       = "service"
    39  	metricRegionLabel        = "region"
    40  	metricOperationLabel     = "operation"
    41  	metricControllerLabel    = "controller"
    42  	metricStatusCodeLabel    = "status_code"
    43  	metricErrorCodeLabel     = "error_code"
    44  )
    45  
    46  var (
    47  	awsRequestCount = prometheus.NewCounterVec(prometheus.CounterOpts{
    48  		Subsystem: metricAWSSubsystem,
    49  		Name:      metricRequestCountKey,
    50  		Help:      "Total number of AWS requests",
    51  	}, []string{metricControllerLabel, metricServiceLabel, metricRegionLabel, metricOperationLabel, metricStatusCodeLabel, metricErrorCodeLabel})
    52  	awsRequestDurationSeconds = prometheus.NewHistogramVec(prometheus.HistogramOpts{
    53  		Subsystem: metricAWSSubsystem,
    54  		Name:      metricRequestDurationKey,
    55  		Help:      "Latency of HTTP requests to AWS",
    56  	}, []string{metricControllerLabel, metricServiceLabel, metricRegionLabel, metricOperationLabel})
    57  	awsCallRetries = prometheus.NewHistogramVec(prometheus.HistogramOpts{
    58  		Subsystem: metricAWSSubsystem,
    59  		Name:      metricAPICallRetries,
    60  		Help:      "Number of retries made against an AWS API",
    61  		Buckets:   []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
    62  	}, []string{metricControllerLabel, metricServiceLabel, metricRegionLabel, metricOperationLabel})
    63  )
    64  
    65  func init() {
    66  	metrics.Registry.MustRegister(awsRequestCount)
    67  	metrics.Registry.MustRegister(awsRequestDurationSeconds)
    68  	metrics.Registry.MustRegister(awsCallRetries)
    69  }
    70  
    71  // CaptureRequestMetrics will monitor and capture request metrics.
    72  func CaptureRequestMetrics(controller string) func(r *request.Request) {
    73  	return func(r *request.Request) {
    74  		duration := time.Since(r.AttemptTime)
    75  		operation := r.Operation.Name
    76  		region := aws.StringValue(r.Config.Region)
    77  		service := endpointToService(r.ClientInfo.Endpoint)
    78  		statusCode := "0"
    79  		errorCode := ""
    80  		if r.HTTPResponse != nil {
    81  			statusCode = strconv.Itoa(r.HTTPResponse.StatusCode)
    82  		}
    83  		if r.Error != nil {
    84  			var ok bool
    85  			if errorCode, ok = awserrors.Code(r.Error); !ok {
    86  				errorCode = "internal"
    87  			}
    88  		}
    89  		awsRequestCount.WithLabelValues(controller, service, region, operation, statusCode, errorCode).Inc()
    90  		awsRequestDurationSeconds.WithLabelValues(controller, service, region, operation).Observe(duration.Seconds())
    91  		awsCallRetries.WithLabelValues(controller, service, region, operation).Observe(float64(r.RetryCount))
    92  	}
    93  }
    94  
    95  func endpointToService(endpoint string) string {
    96  	endpointURL, err := url.Parse(endpoint)
    97  	// If possible extract the service name, else return entire endpoint address
    98  	if err == nil {
    99  		host := endpointURL.Host
   100  		components := strings.Split(host, ".")
   101  		if len(components) > 0 {
   102  			return components[0]
   103  		}
   104  	}
   105  	return endpoint
   106  }