github.com/galamsiva2020/kubernetes-heapster-monitoring@v0.0.0-20210823134957-3c1baa7c1e70/metrics/sinks/stackdriver/stackdriver.go (about)

     1  // Copyright 2015 Google Inc. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package stackdriver
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"math/rand"
    22  	"net/url"
    23  	"strconv"
    24  	"strings"
    25  	"time"
    26  
    27  	gce "cloud.google.com/go/compute/metadata"
    28  	sd_api "cloud.google.com/go/monitoring/apiv3"
    29  	"github.com/golang/glog"
    30  	google_proto "github.com/golang/protobuf/ptypes/timestamp"
    31  	"github.com/prometheus/client_golang/prometheus"
    32  	"google.golang.org/genproto/googleapis/api/metric"
    33  	"google.golang.org/genproto/googleapis/api/monitoredres"
    34  	monitoringpb "google.golang.org/genproto/googleapis/monitoring/v3"
    35  	grpc_codes "google.golang.org/grpc/codes"
    36  	grpc_status "google.golang.org/grpc/status"
    37  	gce_util "k8s.io/heapster/common/gce"
    38  	"k8s.io/heapster/metrics/core"
    39  )
    40  
    41  const (
    42  	maxTimeseriesPerRequest = 200
    43  	// 2 seconds on SD side, 1 extra for networking overhead
    44  	sdRequestLatencySec = 3
    45  )
    46  
    47  type StackdriverSink struct {
    48  	project               string
    49  	clusterName           string
    50  	clusterLocation       string
    51  	heapsterZone          string
    52  	stackdriverClient     *sd_api.MetricClient
    53  	minInterval           time.Duration
    54  	lastExportTime        time.Time
    55  	batchExportTimeoutSec int
    56  	initialDelaySec       int
    57  	useOldResourceModel   bool
    58  	useNewResourceModel   bool
    59  }
    60  
    61  type metricMetadata struct {
    62  	MetricKind metric.MetricDescriptor_MetricKind
    63  	ValueType  metric.MetricDescriptor_ValueType
    64  	Name       string
    65  }
    66  
    67  var (
    68  	// Sink performance metrics
    69  
    70  	requestsSent = prometheus.NewCounterVec(
    71  		prometheus.CounterOpts{
    72  			Namespace: "heapster",
    73  			Subsystem: "stackdriver",
    74  			Name:      "requests_count",
    75  			Help:      "Number of requests with return codes",
    76  		},
    77  		[]string{"code"},
    78  	)
    79  
    80  	timeseriesSent = prometheus.NewCounterVec(
    81  		prometheus.CounterOpts{
    82  			Namespace: "heapster",
    83  			Subsystem: "stackdriver",
    84  			Name:      "timeseries_count",
    85  			Help:      "Number of Timeseries sent with return codes",
    86  		},
    87  		[]string{"code"},
    88  	)
    89  	requestLatency = prometheus.NewSummary(
    90  		prometheus.SummaryOpts{
    91  			Namespace: "heapster",
    92  			Subsystem: "stackdriver",
    93  			Name:      "request_latency_milliseconds",
    94  			Help:      "Latency of requests to Stackdriver Monitoring API.",
    95  		},
    96  	)
    97  )
    98  
    99  func (sink *StackdriverSink) Name() string {
   100  	return "Stackdriver Sink"
   101  }
   102  
   103  func (sink *StackdriverSink) Stop() {
   104  }
   105  
   106  func (sink *StackdriverSink) processMetrics(metricValues map[string]core.MetricValue,
   107  	timestamp time.Time, labels map[string]string, collectionStartTime time.Time, entityCreateTime time.Time) []*monitoringpb.TimeSeries {
   108  	var timeSeries []*monitoringpb.TimeSeries
   109  	if sink.useOldResourceModel {
   110  		for name, value := range metricValues {
   111  			if ts := sink.LegacyTranslateMetric(timestamp, labels, name, value, collectionStartTime); ts != nil {
   112  				timeSeries = append(timeSeries, ts)
   113  			}
   114  		}
   115  	}
   116  	if sink.useNewResourceModel {
   117  		for name, value := range metricValues {
   118  			if ts := sink.TranslateMetric(timestamp, labels, name, value, collectionStartTime, entityCreateTime); ts != nil {
   119  				timeSeries = append(timeSeries, ts)
   120  			}
   121  		}
   122  	}
   123  	return timeSeries
   124  }
   125  
   126  func (sink *StackdriverSink) ExportData(dataBatch *core.DataBatch) {
   127  	// Make sure we don't export metrics too often.
   128  	if dataBatch.Timestamp.Before(sink.lastExportTime.Add(sink.minInterval)) {
   129  		glog.V(2).Infof("Skipping batch from %s because there hasn't passed %s from last export time %s", dataBatch.Timestamp, sink.minInterval, sink.lastExportTime)
   130  		return
   131  	}
   132  	sink.lastExportTime = dataBatch.Timestamp
   133  
   134  	requests := []*monitoringpb.CreateTimeSeriesRequest{}
   135  	req := getReq(sink.project)
   136  	for key, metricSet := range dataBatch.MetricSets {
   137  		switch metricSet.Labels["type"] {
   138  		case core.MetricSetTypeNode, core.MetricSetTypePod, core.MetricSetTypePodContainer, core.MetricSetTypeSystemContainer:
   139  		default:
   140  			continue
   141  		}
   142  
   143  		if metricSet.CollectionStartTime.IsZero() {
   144  			glog.V(2).Infof("Skipping incorrect metric set %s because collection start time is zero", key)
   145  			continue
   146  		}
   147  
   148  		// Hack used with legacy resource type "gke_container". It is used to represent three
   149  		// Kubernetes resources: container, pod or node. For pods container name is empty, for nodes it
   150  		// is set to artificial value "machine". Otherwise it stores actual container name.
   151  		// With new resource types, container_name is ignored for resources other than "k8s_container"
   152  		if sink.useOldResourceModel && metricSet.Labels["type"] == core.MetricSetTypeNode {
   153  			metricSet.Labels[core.LabelContainerName.Key] = "machine"
   154  		}
   155  
   156  		derivedMetrics := sink.computeDerivedMetrics(metricSet)
   157  
   158  		derivedTimeseries := sink.processMetrics(derivedMetrics.MetricValues, dataBatch.Timestamp, metricSet.Labels, metricSet.CollectionStartTime, metricSet.EntityCreateTime)
   159  		timeseries := sink.processMetrics(metricSet.MetricValues, dataBatch.Timestamp, metricSet.Labels, metricSet.CollectionStartTime, metricSet.EntityCreateTime)
   160  
   161  		timeseries = append(timeseries, derivedTimeseries...)
   162  
   163  		for _, ts := range timeseries {
   164  			req.TimeSeries = append(req.TimeSeries, ts)
   165  			if len(req.TimeSeries) >= maxTimeseriesPerRequest {
   166  				requests = append(requests, req)
   167  				req = getReq(sink.project)
   168  			}
   169  		}
   170  
   171  		for _, metric := range metricSet.LabeledMetrics {
   172  			if sink.useOldResourceModel {
   173  				if point := sink.LegacyTranslateLabeledMetric(dataBatch.Timestamp, metricSet.Labels, metric, metricSet.CollectionStartTime); point != nil {
   174  					req.TimeSeries = append(req.TimeSeries, point)
   175  				}
   176  
   177  				if len(req.TimeSeries) >= maxTimeseriesPerRequest {
   178  					requests = append(requests, req)
   179  					req = getReq(sink.project)
   180  				}
   181  			}
   182  			if sink.useNewResourceModel {
   183  				point := sink.TranslateLabeledMetric(dataBatch.Timestamp, metricSet.Labels, metric, metricSet.CollectionStartTime)
   184  				if point != nil {
   185  					req.TimeSeries = append(req.TimeSeries, point)
   186  				}
   187  
   188  				if len(req.TimeSeries) >= maxTimeseriesPerRequest {
   189  					requests = append(requests, req)
   190  					req = getReq(sink.project)
   191  				}
   192  			}
   193  		}
   194  	}
   195  
   196  	if len(req.TimeSeries) > 0 {
   197  		requests = append(requests, req)
   198  	}
   199  
   200  	go sink.sendRequests(requests)
   201  }
   202  
   203  func (sink *StackdriverSink) sendRequests(requests []*monitoringpb.CreateTimeSeriesRequest) {
   204  	// Each worker can handle at least batchExportTimeout/sdRequestLatencySec requests within the specified period.
   205  	// 5 extra workers just in case.
   206  	workers := 5 + len(requests)/(sink.batchExportTimeoutSec/sdRequestLatencySec)
   207  	requestQueue := make(chan *monitoringpb.CreateTimeSeriesRequest)
   208  	completedQueue := make(chan bool)
   209  
   210  	// Launch Go routines responsible for sending requests
   211  	for i := 0; i < workers; i++ {
   212  		go sink.requestSender(requestQueue, completedQueue)
   213  	}
   214  
   215  	timeout := time.Duration(sink.batchExportTimeoutSec) * time.Second
   216  	timeoutSending := time.After(timeout)
   217  	timeoutCompleted := time.After(timeout)
   218  
   219  forloop:
   220  	for i, r := range requests {
   221  		select {
   222  		case requestQueue <- r:
   223  			// yet another request added to queue
   224  		case <-timeoutSending:
   225  			glog.Warningf("Timeout while exporting metrics to Stackdriver. Dropping %d out of %d requests.", len(requests)-i, len(requests))
   226  			// TODO(piosz): consider cancelling requests in flight
   227  			// Report dropped requests in metrics.
   228  			for _, req := range requests[i:] {
   229  				requestsSent.WithLabelValues(grpc_codes.DeadlineExceeded.String()).Inc()
   230  				timeseriesSent.
   231  					WithLabelValues(grpc_codes.DeadlineExceeded.String()).
   232  					Add(float64(len(req.TimeSeries)))
   233  			}
   234  			break forloop
   235  		}
   236  	}
   237  
   238  	// Close the channel in order to cancel exporting routines.
   239  	close(requestQueue)
   240  
   241  	workersCompleted := 0
   242  	for {
   243  		select {
   244  		case <-completedQueue:
   245  			workersCompleted++
   246  			if workersCompleted == workers {
   247  				glog.V(4).Infof("All %d workers successfully finished sending requests to SD.", workersCompleted)
   248  				return
   249  			}
   250  		case <-timeoutCompleted:
   251  			glog.Warningf("Only %d out of %d workers successfully finished sending requests to SD. Some metrics might be lost.", workersCompleted, workers)
   252  			return
   253  		}
   254  	}
   255  }
   256  
   257  func (sink *StackdriverSink) requestSender(reqQueue chan *monitoringpb.CreateTimeSeriesRequest, completedQueue chan bool) {
   258  	defer func() {
   259  		completedQueue <- true
   260  	}()
   261  	time.Sleep(time.Duration(rand.Intn(1000*sink.initialDelaySec)) * time.Millisecond)
   262  	for req := range reqQueue {
   263  		sink.sendOneRequest(req)
   264  	}
   265  }
   266  
   267  func marshalRequestAndLog(printer func([]byte), req *monitoringpb.CreateTimeSeriesRequest) {
   268  	reqJson, errJson := json.Marshal(req)
   269  	if errJson != nil {
   270  		glog.Errorf("Couldn't marshal Stackdriver request %v", errJson)
   271  	} else {
   272  		printer(reqJson)
   273  	}
   274  }
   275  
   276  func (sink *StackdriverSink) sendOneRequest(req *monitoringpb.CreateTimeSeriesRequest) {
   277  	startTime := time.Now()
   278  	err := sink.stackdriverClient.CreateTimeSeries(context.Background(), req)
   279  
   280  	var responseCode grpc_codes.Code
   281  	if err != nil {
   282  		glog.Warningf("Error while sending request to Stackdriver %v", err)
   283  		// Convert request to json and log it, but only if logging level is equal to 2 or more.
   284  		if glog.V(2) {
   285  			marshalRequestAndLog(func(reqJson []byte) {
   286  				glog.V(2).Infof("The request was: %s", reqJson)
   287  			}, req)
   288  		}
   289  		if status, ok := grpc_status.FromError(err); ok {
   290  			responseCode = status.Code()
   291  		} else {
   292  			responseCode = grpc_codes.Unknown
   293  		}
   294  	} else {
   295  		// Convert request to json and log it, but only if logging level is equal to 10 or more.
   296  		if glog.V(10) {
   297  			marshalRequestAndLog(func(reqJson []byte) {
   298  				glog.V(10).Infof("Stackdriver request sent: %s", reqJson)
   299  			}, req)
   300  		}
   301  		responseCode = grpc_codes.OK
   302  	}
   303  
   304  	requestsSent.WithLabelValues(responseCode.String()).Inc()
   305  	timeseriesSent.
   306  		WithLabelValues(responseCode.String()).
   307  		Add(float64(len(req.TimeSeries)))
   308  	requestLatency.Observe(time.Since(startTime).Seconds() / time.Millisecond.Seconds())
   309  }
   310  
   311  func CreateStackdriverSink(uri *url.URL) (core.DataSink, error) {
   312  	if len(uri.Scheme) > 0 {
   313  		return nil, fmt.Errorf("Scheme should not be set for Stackdriver sink")
   314  	}
   315  	if len(uri.Host) > 0 {
   316  		return nil, fmt.Errorf("Host should not be set for Stackdriver sink")
   317  	}
   318  
   319  	opts := uri.Query()
   320  
   321  	useOldResourceModel := true
   322  	if err := parseBoolFlag(opts, "use_old_resources", &useOldResourceModel); err != nil {
   323  		return nil, err
   324  	}
   325  	useNewResourceModel := false
   326  	if err := parseBoolFlag(opts, "use_new_resources", &useNewResourceModel); err != nil {
   327  		return nil, err
   328  	}
   329  
   330  	minInterval := time.Nanosecond
   331  	if len(opts["min_interval_sec"]) >= 1 {
   332  		if interval, err := strconv.Atoi(opts["min_interval_sec"][0]); err != nil {
   333  			return nil, fmt.Errorf("Min interval should be an integer, found: %v", opts["min_interval_sec"][0])
   334  		} else {
   335  			minInterval = time.Duration(interval) * time.Second
   336  		}
   337  	}
   338  
   339  	batchExportTimeoutSec := 60
   340  	var err error
   341  	if len(opts["batch_export_timeout_sec"]) >= 1 {
   342  		if batchExportTimeoutSec, err = strconv.Atoi(opts["batch_export_timeout_sec"][0]); err != nil {
   343  			return nil, fmt.Errorf("Batch export timeout should be an integer, found: %v", opts["batch_export_timeout_sec"][0])
   344  		}
   345  	}
   346  
   347  	initialDelaySec := sdRequestLatencySec
   348  	if len(opts["initial_delay_sec"]) >= 1 {
   349  		if initialDelaySec, err = strconv.Atoi(opts["initial_delay_sec"][0]); err != nil {
   350  			return nil, fmt.Errorf("Initial delay should be an integer, found: %v", opts["initial_delay_sec"][0])
   351  		}
   352  	}
   353  
   354  	var projectId, heapsterZone string
   355  	// Cluster name and location are required when useNewResourceModel is true.
   356  	var clusterName, clusterLocation string
   357  
   358  	if len(opts["cluster_name"]) >= 1 {
   359  		clusterName = opts["cluster_name"][0]
   360  	}
   361  
   362  	if len(opts["cluster_location"]) >= 1 {
   363  		clusterLocation = opts["cluster_location"][0]
   364  	}
   365  
   366  	if gce.OnGCE() {
   367  		// Detect project ID
   368  		projectId, err = gce.ProjectID()
   369  		if err != nil {
   370  			return nil, err
   371  		}
   372  
   373  		// Detect zone for old resource model
   374  		heapsterZone, err = gce.Zone()
   375  		if err != nil {
   376  			glog.Warningf("Zone could not be discovered using the GCE Metadata Server: %s", err)
   377  
   378  			if useOldResourceModel {
   379  				return nil, err
   380  			}
   381  		}
   382  
   383  		if useNewResourceModel {
   384  			if clusterName == "" {
   385  				glog.Info("An empty cluster name has been provided, checking the GCE Metadata Server to try to auto-detect.")
   386  
   387  				clusterName, err = gce.InstanceAttributeValue("cluster-name")
   388  				if err == nil {
   389  					glog.Infof("Discovered '%s' as the cluster name from the GCE Metadata Server.", clusterName)
   390  				} else {
   391  					glog.Warningf("Cluster name could not be discovered using the GCE Metadata Server: %s", err)
   392  				}
   393  			}
   394  
   395  			if clusterLocation == "" {
   396  				glog.Info("An empty cluster location has been provided, checking the GCE Metadata Server to try to auto-detect.")
   397  
   398  				clusterLocation, err = gce.InstanceAttributeValue("cluster-location")
   399  				if err == nil {
   400  					glog.Infof("Discovered '%s' as the cluster location from the GCE Metadata Server.", clusterLocation)
   401  				} else {
   402  					glog.Warningf("Cluster location could not be discovered using the GCE Metadata Server: %s", err)
   403  				}
   404  			}
   405  		}
   406  	} else {
   407  		// Detect project ID from the environment
   408  		projectId, err = gce_util.GetProjectId()
   409  		if err != nil {
   410  			return nil, err
   411  		}
   412  
   413  		heapsterZone = opts["zone"][0]
   414  	}
   415  
   416  	if useNewResourceModel {
   417  		if clusterName == "" {
   418  			glog.Warning("Cluster name required but not provided, using empty cluster name.")
   419  		}
   420  
   421  		if clusterLocation == "" {
   422  			glog.Warning("Cluster location required with new resource model but not provided. Falling back to the zone where Heapster runs.")
   423  			clusterLocation = heapsterZone
   424  		}
   425  	}
   426  
   427  	// Create Metric Client
   428  	stackdriverClient, err := sd_api.NewMetricClient(context.Background())
   429  	if err != nil {
   430  		return nil, err
   431  	}
   432  
   433  	sink := &StackdriverSink{
   434  		project:               projectId,
   435  		clusterName:           clusterName,
   436  		clusterLocation:       clusterLocation,
   437  		heapsterZone:          heapsterZone,
   438  		stackdriverClient:     stackdriverClient,
   439  		minInterval:           minInterval,
   440  		batchExportTimeoutSec: batchExportTimeoutSec,
   441  		initialDelaySec:       initialDelaySec,
   442  		useOldResourceModel:   useOldResourceModel,
   443  		useNewResourceModel:   useNewResourceModel,
   444  	}
   445  
   446  	// Register sink metrics
   447  	prometheus.MustRegister(requestsSent)
   448  	prometheus.MustRegister(timeseriesSent)
   449  	prometheus.MustRegister(requestLatency)
   450  
   451  	glog.Infof("Created Stackdriver sink")
   452  
   453  	return sink, nil
   454  }
   455  
   456  func parseBoolFlag(opts map[string][]string, name string, targetValue *bool) error {
   457  	if len(opts[name]) >= 1 {
   458  		var err error
   459  		*targetValue, err = strconv.ParseBool(opts[name][0])
   460  		if err != nil {
   461  			return fmt.Errorf("%s = %s is not correct boolean value", name, opts[name][0])
   462  		}
   463  	}
   464  	return nil
   465  }
   466  
   467  func (sink *StackdriverSink) computeDerivedMetrics(metricSet *core.MetricSet) *core.MetricSet {
   468  	newMetricSet := &core.MetricSet{MetricValues: map[string]core.MetricValue{}}
   469  	usage, usageOK := metricSet.MetricValues[core.MetricMemoryUsage.MetricDescriptor.Name]
   470  	workingSet, workingSetOK := metricSet.MetricValues[core.MetricMemoryWorkingSet.MetricDescriptor.Name]
   471  
   472  	if usageOK && workingSetOK {
   473  		newMetricSet.MetricValues["memory/bytes_used"] = core.MetricValue{
   474  			IntValue: usage.IntValue - workingSet.IntValue,
   475  		}
   476  	}
   477  
   478  	memoryFaults, memoryFaultsOK := metricSet.MetricValues[core.MetricMemoryPageFaults.MetricDescriptor.Name]
   479  	majorMemoryFaults, majorMemoryFaultsOK := metricSet.MetricValues[core.MetricMemoryMajorPageFaults.MetricDescriptor.Name]
   480  	if memoryFaultsOK && majorMemoryFaultsOK {
   481  		newMetricSet.MetricValues["memory/minor_page_faults"] = core.MetricValue{
   482  			IntValue: memoryFaults.IntValue - majorMemoryFaults.IntValue,
   483  		}
   484  	}
   485  
   486  	return newMetricSet
   487  }
   488  
   489  func (sink *StackdriverSink) LegacyTranslateLabeledMetric(timestamp time.Time, labels map[string]string, metric core.LabeledMetric, collectionStartTime time.Time) *monitoringpb.TimeSeries {
   490  	resourceLabels := sink.legacyGetResourceLabels(labels)
   491  	switch metric.Name {
   492  	case core.MetricFilesystemUsage.MetricDescriptor.Name:
   493  		point := sink.intPoint(timestamp, timestamp, metric.IntValue)
   494  		ts := legacyCreateTimeSeries(resourceLabels, legacyDiskBytesUsedMD, point)
   495  		ts.Metric.Labels = map[string]string{
   496  			"device_name": metric.Labels[core.LabelResourceID.Key],
   497  		}
   498  		return ts
   499  	case core.MetricFilesystemLimit.MetricDescriptor.Name:
   500  		point := sink.intPoint(timestamp, timestamp, metric.IntValue)
   501  		ts := legacyCreateTimeSeries(resourceLabels, legacyDiskBytesTotalMD, point)
   502  		ts.Metric.Labels = map[string]string{
   503  			"device_name": metric.Labels[core.LabelResourceID.Key],
   504  		}
   505  		return ts
   506  	case core.MetricAcceleratorMemoryTotal.MetricDescriptor.Name:
   507  		point := sink.intPoint(timestamp, timestamp, metric.IntValue)
   508  		ts := legacyCreateTimeSeries(resourceLabels, legacyAcceleratorMemoryTotalMD, point)
   509  		ts.Metric.Labels = map[string]string{
   510  			core.LabelAcceleratorMake.Key:  metric.Labels[core.LabelAcceleratorMake.Key],
   511  			core.LabelAcceleratorModel.Key: metric.Labels[core.LabelAcceleratorModel.Key],
   512  			core.LabelAcceleratorID.Key:    metric.Labels[core.LabelAcceleratorID.Key],
   513  		}
   514  		return ts
   515  	case core.MetricAcceleratorMemoryUsed.MetricDescriptor.Name:
   516  		point := sink.intPoint(timestamp, timestamp, metric.IntValue)
   517  		ts := legacyCreateTimeSeries(resourceLabels, legacyAcceleratorMemoryUsedMD, point)
   518  		ts.Metric.Labels = map[string]string{
   519  			core.LabelAcceleratorMake.Key:  metric.Labels[core.LabelAcceleratorMake.Key],
   520  			core.LabelAcceleratorModel.Key: metric.Labels[core.LabelAcceleratorModel.Key],
   521  			core.LabelAcceleratorID.Key:    metric.Labels[core.LabelAcceleratorID.Key],
   522  		}
   523  		return ts
   524  	case core.MetricAcceleratorDutyCycle.MetricDescriptor.Name:
   525  		point := sink.intPoint(timestamp, timestamp, metric.IntValue)
   526  		ts := legacyCreateTimeSeries(resourceLabels, legacyAcceleratorDutyCycleMD, point)
   527  		ts.Metric.Labels = map[string]string{
   528  			core.LabelAcceleratorMake.Key:  metric.Labels[core.LabelAcceleratorMake.Key],
   529  			core.LabelAcceleratorModel.Key: metric.Labels[core.LabelAcceleratorModel.Key],
   530  			core.LabelAcceleratorID.Key:    metric.Labels[core.LabelAcceleratorID.Key],
   531  		}
   532  		return ts
   533  	}
   534  	return nil
   535  }
   536  
   537  func (sink *StackdriverSink) LegacyTranslateMetric(timestamp time.Time, labels map[string]string, name string, value core.MetricValue, collectionStartTime time.Time) *monitoringpb.TimeSeries {
   538  	resourceLabels := sink.legacyGetResourceLabels(labels)
   539  	if !collectionStartTime.Before(timestamp) {
   540  		glog.V(4).Infof("Error translating metric %v for pod %v: batch timestamp %v earlier than pod create time %v", name, labels["pod_name"], timestamp, collectionStartTime)
   541  		return nil
   542  	}
   543  	switch name {
   544  	case core.MetricUptime.MetricDescriptor.Name:
   545  		doubleValue := float64(value.IntValue) / float64(time.Second/time.Millisecond)
   546  		point := sink.doublePoint(timestamp, collectionStartTime, doubleValue)
   547  		return legacyCreateTimeSeries(resourceLabels, legacyUptimeMD, point)
   548  	case core.MetricCpuLimit.MetricDescriptor.Name:
   549  		// converting from millicores to cores
   550  		point := sink.doublePoint(timestamp, timestamp, float64(value.IntValue)/1000)
   551  		return legacyCreateTimeSeries(resourceLabels, legacyCPUReservedCoresMD, point)
   552  	case core.MetricCpuUsage.MetricDescriptor.Name:
   553  		point := sink.doublePoint(timestamp, collectionStartTime, float64(value.IntValue)/float64(time.Second/time.Nanosecond))
   554  		return legacyCreateTimeSeries(resourceLabels, legacyCPUUsageTimeMD, point)
   555  	case core.MetricNetworkRx.MetricDescriptor.Name:
   556  		point := sink.intPoint(timestamp, collectionStartTime, value.IntValue)
   557  		return legacyCreateTimeSeries(resourceLabels, legacyNetworkRxMD, point)
   558  	case core.MetricNetworkTx.MetricDescriptor.Name:
   559  		point := sink.intPoint(timestamp, collectionStartTime, value.IntValue)
   560  		return legacyCreateTimeSeries(resourceLabels, legacyNetworkTxMD, point)
   561  	case core.MetricMemoryLimit.MetricDescriptor.Name:
   562  		// omit nodes, using memory/node_allocatable instead
   563  		if labels["type"] == core.MetricSetTypeNode {
   564  			return nil
   565  		}
   566  		point := sink.intPoint(timestamp, timestamp, value.IntValue)
   567  		return legacyCreateTimeSeries(resourceLabels, legacyMemoryLimitMD, point)
   568  	case core.MetricNodeMemoryAllocatable.MetricDescriptor.Name:
   569  		point := sink.intPoint(timestamp, timestamp, value.IntValue)
   570  		return legacyCreateTimeSeries(resourceLabels, legacyMemoryLimitMD, point)
   571  	case core.MetricMemoryMajorPageFaults.MetricDescriptor.Name:
   572  		point := sink.intPoint(timestamp, collectionStartTime, value.IntValue)
   573  		ts := legacyCreateTimeSeries(resourceLabels, legacyMemoryPageFaultsMD, point)
   574  		ts.Metric.Labels = map[string]string{
   575  			"fault_type": "major",
   576  		}
   577  		return ts
   578  	case "memory/bytes_used":
   579  		point := sink.intPoint(timestamp, timestamp, value.IntValue)
   580  		ts := legacyCreateTimeSeries(resourceLabels, legacyMemoryBytesUsedMD, point)
   581  		ts.Metric.Labels = map[string]string{
   582  			"memory_type": "evictable",
   583  		}
   584  		return ts
   585  	case "nvidia.com/gpu/request":
   586  		point := sink.intPoint(timestamp, timestamp, value.IntValue)
   587  		ts := legacyCreateTimeSeries(resourceLabels, legacyAcceleratorRequestMD, point)
   588  		ts.Metric.Labels = map[string]string{
   589  			"resource_name": "nvidia.com/gpu",
   590  		}
   591  		return ts
   592  	case core.MetricMemoryWorkingSet.MetricDescriptor.Name:
   593  		point := sink.intPoint(timestamp, timestamp, value.IntValue)
   594  		ts := legacyCreateTimeSeries(resourceLabels, legacyMemoryBytesUsedMD, point)
   595  		ts.Metric.Labels = map[string]string{
   596  			"memory_type": "non-evictable",
   597  		}
   598  		return ts
   599  	case "memory/minor_page_faults":
   600  		point := sink.intPoint(timestamp, collectionStartTime, value.IntValue)
   601  		ts := legacyCreateTimeSeries(resourceLabels, legacyMemoryPageFaultsMD, point)
   602  		ts.Metric.Labels = map[string]string{
   603  			"fault_type": "minor",
   604  		}
   605  		return ts
   606  	}
   607  	return nil
   608  }
   609  
   610  func (sink *StackdriverSink) TranslateLabeledMetric(timestamp time.Time, labels map[string]string, metric core.LabeledMetric, collectionStartTime time.Time) *monitoringpb.TimeSeries {
   611  	switch labels["type"] {
   612  	case core.MetricSetTypePod:
   613  		podLabels := sink.getPodResourceLabels(labels)
   614  		switch metric.Name {
   615  		case core.MetricFilesystemUsage.MetricDescriptor.Name:
   616  			point := sink.intPoint(timestamp, timestamp, metric.MetricValue.IntValue)
   617  			ts := createTimeSeries("k8s_pod", podLabels, volumeUsedBytesMD, point)
   618  			ts.Metric.Labels = map[string]string{
   619  				core.LabelVolumeName.Key: strings.TrimPrefix(metric.Labels[core.LabelResourceID.Key], "Volume:"),
   620  			}
   621  			return ts
   622  		case core.MetricFilesystemLimit.MetricDescriptor.Name:
   623  			point := sink.intPoint(timestamp, timestamp, metric.MetricValue.IntValue)
   624  			ts := createTimeSeries("k8s_pod", podLabels, volumeTotalBytesMD, point)
   625  			ts.Metric.Labels = map[string]string{
   626  				core.LabelVolumeName.Key: strings.TrimPrefix(metric.Labels[core.LabelResourceID.Key], "Volume:"),
   627  			}
   628  			return ts
   629  		}
   630  	case core.MetricSetTypePodContainer:
   631  		containerLabels := sink.getContainerResourceLabels(labels)
   632  		switch metric.Name {
   633  		case core.MetricAcceleratorMemoryTotal.MetricDescriptor.Name:
   634  			point := sink.intPoint(timestamp, timestamp, metric.IntValue)
   635  			ts := createTimeSeries("k8s_container", containerLabels, acceleratorMemoryTotalMD, point)
   636  			ts.Metric.Labels = map[string]string{
   637  				core.LabelAcceleratorMake.Key:  metric.Labels[core.LabelAcceleratorMake.Key],
   638  				core.LabelAcceleratorModel.Key: metric.Labels[core.LabelAcceleratorModel.Key],
   639  				core.LabelAcceleratorID.Key:    metric.Labels[core.LabelAcceleratorID.Key],
   640  			}
   641  			return ts
   642  		case core.MetricAcceleratorMemoryUsed.MetricDescriptor.Name:
   643  			point := sink.intPoint(timestamp, timestamp, metric.IntValue)
   644  			ts := createTimeSeries("k8s_container", containerLabels, acceleratorMemoryUsedMD, point)
   645  			ts.Metric.Labels = map[string]string{
   646  				core.LabelAcceleratorMake.Key:  metric.Labels[core.LabelAcceleratorMake.Key],
   647  				core.LabelAcceleratorModel.Key: metric.Labels[core.LabelAcceleratorModel.Key],
   648  				core.LabelAcceleratorID.Key:    metric.Labels[core.LabelAcceleratorID.Key],
   649  			}
   650  			return ts
   651  		case core.MetricAcceleratorDutyCycle.MetricDescriptor.Name:
   652  			point := sink.intPoint(timestamp, timestamp, metric.IntValue)
   653  			ts := createTimeSeries("k8s_container", containerLabels, acceleratorDutyCycleMD, point)
   654  			ts.Metric.Labels = map[string]string{
   655  				core.LabelAcceleratorMake.Key:  metric.Labels[core.LabelAcceleratorMake.Key],
   656  				core.LabelAcceleratorModel.Key: metric.Labels[core.LabelAcceleratorModel.Key],
   657  				core.LabelAcceleratorID.Key:    metric.Labels[core.LabelAcceleratorID.Key],
   658  			}
   659  			return ts
   660  		}
   661  	}
   662  	return nil
   663  }
   664  
   665  func (sink *StackdriverSink) TranslateMetric(timestamp time.Time, labels map[string]string, name string, value core.MetricValue, collectionStartTime time.Time, entityCreateTime time.Time) *monitoringpb.TimeSeries {
   666  	if !collectionStartTime.Before(timestamp) {
   667  		glog.V(4).Infof("Error translating metric %v for pod %v: batch timestamp %v earlier than pod create time %v", name, labels["pod_name"], timestamp, collectionStartTime)
   668  		return nil
   669  	}
   670  	switch labels["type"] {
   671  	case core.MetricSetTypePodContainer:
   672  		containerLabels := sink.getContainerResourceLabels(labels)
   673  		switch name {
   674  		case core.MetricUptime.MetricDescriptor.Name:
   675  			doubleValue := float64(value.IntValue) / float64(time.Second/time.Millisecond)
   676  			point := sink.doublePoint(timestamp, timestamp, doubleValue)
   677  			return createTimeSeries("k8s_container", containerLabels, containerUptimeMD, point)
   678  		case core.MetricCpuLimit.MetricDescriptor.Name:
   679  			point := sink.doublePoint(timestamp, timestamp, float64(value.IntValue)/1000)
   680  			return createTimeSeries("k8s_container", containerLabels, cpuLimitCoresMD, point)
   681  		case core.MetricCpuRequest.MetricDescriptor.Name:
   682  			point := sink.doublePoint(timestamp, timestamp, float64(value.IntValue)/1000)
   683  			return createTimeSeries("k8s_container", containerLabels, cpuRequestedCoresMD, point)
   684  		case core.MetricCpuUsage.MetricDescriptor.Name:
   685  			point := sink.doublePoint(timestamp, collectionStartTime, float64(value.IntValue)/float64(time.Second/time.Nanosecond))
   686  			return createTimeSeries("k8s_container", containerLabels, cpuContainerCoreUsageTimeMD, point)
   687  		case core.MetricMemoryLimit.MetricDescriptor.Name:
   688  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   689  			return createTimeSeries("k8s_container", containerLabels, memoryLimitBytesMD, point)
   690  		case "memory/bytes_used":
   691  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   692  			ts := createTimeSeries("k8s_container", containerLabels, memoryContainerUsedBytesMD, point)
   693  			ts.Metric.Labels = map[string]string{
   694  				"memory_type": "evictable",
   695  			}
   696  			return ts
   697  		case "nvidia.com/gpu/request":
   698  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   699  			ts := createTimeSeries("k8s_container", containerLabels, acceleratorRequestedMD, point)
   700  			ts.Metric.Labels = map[string]string{
   701  				"resource_name": "nvidia.com/gpu",
   702  			}
   703  			return ts
   704  		case core.MetricMemoryWorkingSet.MetricDescriptor.Name:
   705  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   706  			ts := createTimeSeries("k8s_container", containerLabels, memoryContainerUsedBytesMD, point)
   707  			ts.Metric.Labels = map[string]string{
   708  				"memory_type": "non-evictable",
   709  			}
   710  			return ts
   711  		case core.MetricMemoryRequest.MetricDescriptor.Name:
   712  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   713  			return createTimeSeries("k8s_container", containerLabels, memoryRequestedBytesMD, point)
   714  		case core.MetricEphemeralStorageLimit.MetricDescriptor.Name:
   715  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   716  			return createTimeSeries("k8s_container", containerLabels, ephemeralstorageLimitBytesMD, point)
   717  		case core.MetricEphemeralStorageRequest.MetricDescriptor.Name:
   718  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   719  			return createTimeSeries("k8s_container", containerLabels, ephemeralstorageRequestedBytesMD, point)
   720  		case core.MetricEphemeralStorageUsage.MetricDescriptor.Name:
   721  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   722  			return createTimeSeries("k8s_container", containerLabels, ephemeralstorageContainerUsedBytesMD, point)
   723  
   724  		case core.MetricRestartCount.MetricDescriptor.Name:
   725  			if entityCreateTime.IsZero() {
   726  				glog.V(2).Infof("Skipping restart_count metric for container %s because entity create time is zero", core.PodContainerKey(containerLabels["namespace_name"], containerLabels["pod_name"], containerLabels["container_name"]))
   727  				return nil
   728  			}
   729  			point := sink.intPoint(timestamp, entityCreateTime, value.IntValue)
   730  			return createTimeSeries("k8s_container", containerLabels, restartCountMD, point)
   731  		}
   732  	case core.MetricSetTypePod:
   733  		podLabels := sink.getPodResourceLabels(labels)
   734  		switch name {
   735  		case core.MetricNetworkRx.MetricDescriptor.Name:
   736  			point := sink.intPoint(timestamp, collectionStartTime, value.IntValue)
   737  			return createTimeSeries("k8s_pod", podLabels, networkPodReceivedBytesMD, point)
   738  		case core.MetricNetworkTx.MetricDescriptor.Name:
   739  			point := sink.intPoint(timestamp, collectionStartTime, value.IntValue)
   740  			return createTimeSeries("k8s_pod", podLabels, networkPodSentBytesMD, point)
   741  		}
   742  	case core.MetricSetTypeNode:
   743  		nodeLabels := sink.getNodeResourceLabels(labels)
   744  		switch name {
   745  		case core.MetricNodeCpuCapacity.MetricDescriptor.Name:
   746  			point := sink.doublePoint(timestamp, timestamp, float64(value.FloatValue)/1000)
   747  			return createTimeSeries("k8s_node", nodeLabels, cpuTotalCoresMD, point)
   748  		case core.MetricNodeCpuAllocatable.MetricDescriptor.Name:
   749  			point := sink.doublePoint(timestamp, timestamp, float64(value.FloatValue)/1000)
   750  			return createTimeSeries("k8s_node", nodeLabels, cpuAllocatableCoresMD, point)
   751  		case core.MetricCpuUsage.MetricDescriptor.Name:
   752  			point := sink.doublePoint(timestamp, collectionStartTime, float64(value.IntValue)/float64(time.Second/time.Nanosecond))
   753  			return createTimeSeries("k8s_node", nodeLabels, cpuNodeCoreUsageTimeMD, point)
   754  		case core.MetricNodeMemoryCapacity.MetricDescriptor.Name:
   755  			point := sink.intPoint(timestamp, timestamp, int64(value.FloatValue))
   756  			return createTimeSeries("k8s_node", nodeLabels, memoryTotalBytesMD, point)
   757  		case core.MetricNodeMemoryAllocatable.MetricDescriptor.Name:
   758  			point := sink.intPoint(timestamp, timestamp, int64(value.FloatValue))
   759  			return createTimeSeries("k8s_node", nodeLabels, memoryAllocatableBytesMD, point)
   760  		case "memory/bytes_used":
   761  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   762  			ts := createTimeSeries("k8s_node", nodeLabels, memoryNodeUsedBytesMD, point)
   763  			ts.Metric.Labels = map[string]string{
   764  				"memory_type": "evictable",
   765  			}
   766  			return ts
   767  		case core.MetricMemoryWorkingSet.MetricDescriptor.Name:
   768  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   769  			ts := createTimeSeries("k8s_node", nodeLabels, memoryNodeUsedBytesMD, point)
   770  			ts.Metric.Labels = map[string]string{
   771  				"memory_type": "non-evictable",
   772  			}
   773  			return ts
   774  		case core.MetricNetworkRx.MetricDescriptor.Name:
   775  			point := sink.intPoint(timestamp, collectionStartTime, value.IntValue)
   776  			return createTimeSeries("k8s_node", nodeLabels, networkNodeReceivedBytesMD, point)
   777  		case core.MetricNetworkTx.MetricDescriptor.Name:
   778  			point := sink.intPoint(timestamp, collectionStartTime, value.IntValue)
   779  			return createTimeSeries("k8s_node", nodeLabels, networkNodeSentBytesMD, point)
   780  		case core.MetricNodeEphemeralStorageCapacity.MetricDescriptor.Name:
   781  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   782  			return createTimeSeries("k8s_node", nodeLabels, ephemeralstorageTotalBytesMD, point)
   783  
   784  		}
   785  	case core.MetricSetTypeSystemContainer:
   786  		nodeLabels := sink.getNodeResourceLabels(labels)
   787  		switch name {
   788  		case "memory/bytes_used":
   789  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   790  			ts := createTimeSeries("k8s_node", nodeLabels, memoryNodeDaemonUsedBytesMD, point)
   791  			ts.Metric.Labels = map[string]string{
   792  				"component":   labels[core.LabelContainerName.Key],
   793  				"memory_type": "evictable",
   794  			}
   795  			return ts
   796  		case core.MetricMemoryWorkingSet.MetricDescriptor.Name:
   797  			point := sink.intPoint(timestamp, timestamp, value.IntValue)
   798  			ts := createTimeSeries("k8s_node", nodeLabels, memoryNodeDaemonUsedBytesMD, point)
   799  			ts.Metric.Labels = map[string]string{
   800  				"component":   labels[core.LabelContainerName.Key],
   801  				"memory_type": "non-evictable",
   802  			}
   803  			return ts
   804  		case core.MetricCpuUsage.MetricDescriptor.Name:
   805  			point := sink.doublePoint(timestamp, collectionStartTime, float64(value.IntValue)/float64(time.Second/time.Nanosecond))
   806  			ts := createTimeSeries("k8s_node", nodeLabels, cpuNodeDaemonCoreUsageTimeMD, point)
   807  			ts.Metric.Labels = map[string]string{
   808  				"component": labels[core.LabelContainerName.Key],
   809  			}
   810  			return ts
   811  		}
   812  	}
   813  	return nil
   814  }
   815  
   816  func (sink *StackdriverSink) legacyGetResourceLabels(labels map[string]string) map[string]string {
   817  	return map[string]string{
   818  		"project_id":     sink.project,
   819  		"cluster_name":   sink.clusterName,
   820  		"zone":           sink.heapsterZone,
   821  		"instance_id":    labels[core.LabelHostID.Key],
   822  		"namespace_id":   labels[core.LabelPodNamespaceUID.Key],
   823  		"pod_id":         labels[core.LabelPodId.Key],
   824  		"container_name": labels[core.LabelContainerName.Key],
   825  	}
   826  }
   827  
   828  func (sink *StackdriverSink) getContainerResourceLabels(labels map[string]string) map[string]string {
   829  	return map[string]string{
   830  		"project_id":     sink.project,
   831  		"location":       sink.clusterLocation,
   832  		"cluster_name":   sink.clusterName,
   833  		"namespace_name": labels[core.LabelNamespaceName.Key],
   834  		"pod_name":       labels[core.LabelPodName.Key],
   835  		"container_name": labels[core.LabelContainerName.Key],
   836  	}
   837  }
   838  
   839  func (sink *StackdriverSink) getPodResourceLabels(labels map[string]string) map[string]string {
   840  	return map[string]string{
   841  		"project_id":     sink.project,
   842  		"location":       sink.clusterLocation,
   843  		"cluster_name":   sink.clusterName,
   844  		"namespace_name": labels[core.LabelNamespaceName.Key],
   845  		"pod_name":       labels[core.LabelPodName.Key],
   846  	}
   847  }
   848  
   849  func (sink *StackdriverSink) getNodeResourceLabels(labels map[string]string) map[string]string {
   850  	return map[string]string{
   851  		"project_id":   sink.project,
   852  		"location":     sink.clusterLocation,
   853  		"cluster_name": sink.clusterName,
   854  		"node_name":    labels[core.LabelNodename.Key],
   855  	}
   856  }
   857  
   858  func legacyCreateTimeSeries(resourceLabels map[string]string, metadata *metricMetadata, point *monitoringpb.Point) *monitoringpb.TimeSeries {
   859  	return createTimeSeries("gke_container", resourceLabels, metadata, point)
   860  }
   861  
   862  func createTimeSeries(resource string, resourceLabels map[string]string, metadata *metricMetadata, point *monitoringpb.Point) *monitoringpb.TimeSeries {
   863  	return &monitoringpb.TimeSeries{
   864  		Metric: &metric.Metric{
   865  			Type: metadata.Name,
   866  		},
   867  		MetricKind: metadata.MetricKind,
   868  		ValueType:  metadata.ValueType,
   869  		Resource: &monitoredres.MonitoredResource{
   870  			Labels: resourceLabels,
   871  			Type:   resource,
   872  		},
   873  		Points: []*monitoringpb.Point{point},
   874  	}
   875  }
   876  
   877  func (sink *StackdriverSink) doublePoint(endTime time.Time, startTime time.Time, value float64) *monitoringpb.Point {
   878  	return &monitoringpb.Point{
   879  		Interval: &monitoringpb.TimeInterval{
   880  			EndTime:   &google_proto.Timestamp{Seconds: endTime.Unix(), Nanos: int32(endTime.Nanosecond())},
   881  			StartTime: &google_proto.Timestamp{Seconds: startTime.Unix(), Nanos: int32(startTime.Nanosecond())},
   882  		},
   883  		Value: &monitoringpb.TypedValue{
   884  			Value: &monitoringpb.TypedValue_DoubleValue{
   885  				DoubleValue: value,
   886  			},
   887  		},
   888  	}
   889  
   890  }
   891  
   892  func (sink *StackdriverSink) intPoint(endTime time.Time, startTime time.Time, value int64) *monitoringpb.Point {
   893  	return &monitoringpb.Point{
   894  		Interval: &monitoringpb.TimeInterval{
   895  			EndTime:   &google_proto.Timestamp{Seconds: endTime.Unix(), Nanos: int32(endTime.Nanosecond())},
   896  			StartTime: &google_proto.Timestamp{Seconds: startTime.Unix(), Nanos: int32(startTime.Nanosecond())},
   897  		},
   898  		Value: &monitoringpb.TypedValue{
   899  			Value: &monitoringpb.TypedValue_Int64Value{
   900  				Int64Value: value,
   901  			},
   902  		},
   903  	}
   904  }
   905  
   906  func fullProjectName(name string) string {
   907  	return fmt.Sprintf("projects/%s", name)
   908  }
   909  
   910  func getReq(project string) *monitoringpb.CreateTimeSeriesRequest {
   911  	return &monitoringpb.CreateTimeSeriesRequest{
   912  		TimeSeries: nil,
   913  		Name:       fullProjectName(project),
   914  	}
   915  }