vitess.io/vitess@v0.16.2/go/stats/opentsdb/opentsdb.go (about)

     1  /*
     2  Copyright 2019 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package opentsdb adds support for pushing stats to opentsdb.
    18  package opentsdb
    19  
    20  import (
    21  	"bytes"
    22  	"encoding/json"
    23  	"expvar"
    24  	"net/http"
    25  	"sort"
    26  	"strings"
    27  	"time"
    28  	"unicode"
    29  
    30  	"github.com/spf13/pflag"
    31  
    32  	"vitess.io/vitess/go/stats"
    33  	"vitess.io/vitess/go/vt/servenv"
    34  )
    35  
    36  var openTsdbURI string
    37  
    38  func registerFlags(fs *pflag.FlagSet) {
    39  	fs.StringVar(&openTsdbURI, "opentsdb_uri", openTsdbURI, "URI of opentsdb /api/put method")
    40  }
    41  
    42  func init() {
    43  	servenv.OnParseFor("vtctld", registerFlags)
    44  	servenv.OnParseFor("vtgate", registerFlags)
    45  	servenv.OnParseFor("vttablet", registerFlags)
    46  }
    47  
    48  // dataPoint represents a single OpenTSDB data point.
    49  type dataPoint struct {
    50  	// Example: sys.cpu.nice
    51  	Metric string `json:"metric"`
    52  	// Seconds or milliseconds since unix epoch.
    53  	Timestamp float64           `json:"timestamp"`
    54  	Value     float64           `json:"value"`
    55  	Tags      map[string]string `json:"tags"`
    56  }
    57  
    58  // sendDataPoints pushes a list of data points to openTSDB.
    59  // All other code in this file is just to support getting this function called
    60  // with all stats represented as data points.
    61  func sendDataPoints(data []dataPoint) error {
    62  	json, err := json.Marshal(data)
    63  	if err != nil {
    64  		return err
    65  	}
    66  
    67  	resp, err := http.Post(openTsdbURI, "application/json", bytes.NewReader(json))
    68  	if err != nil {
    69  		return err
    70  	}
    71  	resp.Body.Close()
    72  	return nil
    73  }
    74  
    75  // openTSDBBackend implements stats.PushBackend
    76  type openTSDBBackend struct {
    77  	// The prefix is the name of the binary (vtgate, vttablet, etc.) and will be
    78  	// prepended to all the stats reported.
    79  	prefix string
    80  	// Tags that should be included with every data point. If there's a tag name
    81  	// collision between the common tags and a single data point's tags, the data
    82  	// point tag will override the common tag.
    83  	commonTags map[string]string
    84  }
    85  
    86  // dataCollector tracks state for a single pass of stats reporting / data collection.
    87  type dataCollector struct {
    88  	settings   *openTSDBBackend
    89  	timestamp  int64
    90  	dataPoints []dataPoint
    91  }
    92  
    93  // Init attempts to create a singleton openTSDBBackend and register it as a PushBackend.
    94  // If it fails to create one, this is a noop. The prefix argument is an optional string
    95  // to prepend to the name of every data point reported.
    96  func Init(prefix string) {
    97  	// Needs to happen in servenv.OnRun() instead of init because it requires flag parsing and logging
    98  	servenv.OnRun(func() {
    99  		InitWithoutServenv(prefix)
   100  	})
   101  }
   102  
   103  // InitWithoutServenv initializes the opentsdb without servenv
   104  func InitWithoutServenv(prefix string) {
   105  	if openTsdbURI == "" {
   106  		return
   107  	}
   108  
   109  	backend := &openTSDBBackend{
   110  		prefix:     prefix,
   111  		commonTags: stats.ParseCommonTags(stats.CommonTags),
   112  	}
   113  
   114  	stats.RegisterPushBackend("opentsdb", backend)
   115  
   116  	http.HandleFunc("/debug/opentsdb", func(w http.ResponseWriter, r *http.Request) {
   117  		w.Header().Set("Content-Type", "application/json; charset=utf-8")
   118  		dataPoints := (*backend).getDataPoints()
   119  		sort.Sort(byMetric(dataPoints))
   120  
   121  		if b, err := json.MarshalIndent(dataPoints, "", "  "); err != nil {
   122  			w.Write([]byte(err.Error()))
   123  		} else {
   124  			w.Write(b)
   125  		}
   126  	})
   127  }
   128  
   129  // PushAll pushes all stats to OpenTSDB
   130  func (backend *openTSDBBackend) PushAll() error {
   131  	return sendDataPoints(backend.getDataPoints())
   132  }
   133  
   134  // getDataPoints fetches all stats in an opentsdb-compatible format.
   135  // This is separated from PushAll() so it can be reused for the /debug/opentsdb handler.
   136  func (backend *openTSDBBackend) getDataPoints() []dataPoint {
   137  	dataCollector := &dataCollector{
   138  		settings:  backend,
   139  		timestamp: time.Now().Unix(),
   140  	}
   141  
   142  	expvar.Do(func(kv expvar.KeyValue) {
   143  		dataCollector.addExpVar(kv)
   144  	})
   145  
   146  	return dataCollector.dataPoints
   147  }
   148  
   149  // combineMetricName joins parts of a hierarchical name with a "."
   150  func combineMetricName(parts ...string) string {
   151  	return strings.Join(parts, ".")
   152  }
   153  
   154  func (dc *dataCollector) addInt(metric string, val int64, tags map[string]string) {
   155  	dc.addFloat(metric, float64(val), tags)
   156  }
   157  
   158  func (dc *dataCollector) addFloat(metric string, val float64, tags map[string]string) {
   159  	var fullMetric string
   160  	if len(dc.settings.prefix) > 0 {
   161  		fullMetric = combineMetricName(dc.settings.prefix, metric)
   162  	} else {
   163  		fullMetric = metric
   164  	}
   165  
   166  	// Restrict metric and tag name/values to legal characters:
   167  	// http://opentsdb.net/docs/build/html/user_guide/writing.html#metrics-and-tags
   168  	//
   169  	// Also make everything lowercase, since opentsdb is case sensitive and lowercase
   170  	// simplifies the convention.
   171  	sanitize := func(text string) string {
   172  		var b bytes.Buffer
   173  		for _, r := range text {
   174  			if unicode.IsDigit(r) || unicode.IsLetter(r) || r == '-' || r == '_' || r == '/' || r == '.' {
   175  				b.WriteRune(r)
   176  			} else {
   177  				// For characters that would cause errors, write underscore instead
   178  				b.WriteRune('_')
   179  			}
   180  		}
   181  		return strings.ToLower(b.String())
   182  	}
   183  
   184  	fullTags := make(map[string]string)
   185  	for k, v := range dc.settings.commonTags {
   186  		fullTags[sanitize(k)] = sanitize(v)
   187  	}
   188  	for k, v := range tags {
   189  		fullTags[sanitize(k)] = sanitize(v)
   190  	}
   191  
   192  	dp := dataPoint{
   193  		Metric:    sanitize(fullMetric),
   194  		Value:     val,
   195  		Timestamp: float64(dc.timestamp),
   196  		Tags:      fullTags,
   197  	}
   198  	dc.dataPoints = append(dc.dataPoints, dp)
   199  }
   200  
   201  // addExpVar adds all the data points associated with a particular expvar to the list of
   202  // opentsdb data points. How an expvar is translated depends on its type.
   203  //
   204  // Well-known metric types like histograms and integers are directly converted (saving labels
   205  // as tags).
   206  //
   207  // Generic unrecognized expvars are serialized to json and their int/float values are exported.
   208  // Strings and lists in expvars are not exported.
   209  func (dc *dataCollector) addExpVar(kv expvar.KeyValue) {
   210  	k := kv.Key
   211  	switch v := kv.Value.(type) {
   212  	case stats.FloatFunc:
   213  		dc.addFloat(k, v(), nil)
   214  	case *stats.Counter:
   215  		dc.addInt(k, v.Get(), nil)
   216  	case *stats.CounterFunc:
   217  		dc.addInt(k, v.F(), nil)
   218  	case *stats.Gauge:
   219  		dc.addInt(k, v.Get(), nil)
   220  	case *stats.GaugeFloat64:
   221  		dc.addFloat(k, v.Get(), nil)
   222  	case *stats.GaugeFunc:
   223  		dc.addInt(k, v.F(), nil)
   224  	case *stats.CounterDuration:
   225  		dc.addInt(k, int64(v.Get()), nil)
   226  	case *stats.CounterDurationFunc:
   227  		dc.addInt(k, int64(v.F()), nil)
   228  	case *stats.MultiTimings:
   229  		dc.addTimings(v.Labels(), &v.Timings, k)
   230  	case *stats.Timings:
   231  		dc.addTimings([]string{v.Label()}, v, k)
   232  	case *stats.Histogram:
   233  		dc.addHistogram(v, 1, k, make(map[string]string))
   234  	case *stats.CountersWithSingleLabel:
   235  		for labelVal, val := range v.Counts() {
   236  			dc.addInt(k, val, makeLabel(v.Label(), labelVal))
   237  		}
   238  	case *stats.CountersWithMultiLabels:
   239  		for labelVals, val := range v.Counts() {
   240  			dc.addInt(k, val, makeLabels(v.Labels(), labelVals))
   241  		}
   242  	case *stats.CountersFuncWithMultiLabels:
   243  		for labelVals, val := range v.Counts() {
   244  			dc.addInt(k, val, makeLabels(v.Labels(), labelVals))
   245  		}
   246  	case *stats.GaugesWithMultiLabels:
   247  		for labelVals, val := range v.Counts() {
   248  			dc.addInt(k, val, makeLabels(v.Labels(), labelVals))
   249  		}
   250  	case *stats.GaugesFuncWithMultiLabels:
   251  		for labelVals, val := range v.Counts() {
   252  			dc.addInt(k, val, makeLabels(v.Labels(), labelVals))
   253  		}
   254  	case *stats.GaugesWithSingleLabel:
   255  		for labelVal, val := range v.Counts() {
   256  			dc.addInt(k, val, makeLabel(v.Label(), labelVal))
   257  		}
   258  	default:
   259  		// Deal with generic expvars by converting them to JSON and pulling out
   260  		// all the floats. Strings and lists will not be exported to opentsdb.
   261  		var obj map[string]any
   262  		if err := json.Unmarshal([]byte(v.String()), &obj); err != nil {
   263  			return
   264  		}
   265  
   266  		// Recursive helper function.
   267  		dc.addUnrecognizedExpvars(combineMetricName("expvar", k), obj)
   268  	}
   269  }
   270  
   271  // makeLabel builds a tag list with a single label + value.
   272  func makeLabel(labelName string, labelVal string) map[string]string {
   273  	return map[string]string{labelName: labelVal}
   274  }
   275  
   276  // makeLabels takes the vitess stat representation of label values ("."-separated list) and breaks it
   277  // apart into a map of label name -> label value.
   278  func makeLabels(labelNames []string, labelValsCombined string) map[string]string {
   279  	tags := make(map[string]string)
   280  	labelVals := strings.Split(labelValsCombined, ".")
   281  	for i, v := range labelVals {
   282  		tags[labelNames[i]] = v
   283  	}
   284  	return tags
   285  }
   286  
   287  // addUnrecognizedExpvars recurses into a json object to pull out float64 variables to report.
   288  func (dc *dataCollector) addUnrecognizedExpvars(prefix string, obj map[string]any) {
   289  	for k, v := range obj {
   290  		prefix := combineMetricName(prefix, k)
   291  		switch v := v.(type) {
   292  		case map[string]any:
   293  			dc.addUnrecognizedExpvars(prefix, v)
   294  		case float64:
   295  			dc.addFloat(prefix, v, nil)
   296  		}
   297  	}
   298  }
   299  
   300  // addTimings converts a vitess Timings stat to something opentsdb can deal with.
   301  func (dc *dataCollector) addTimings(labels []string, timings *stats.Timings, prefix string) {
   302  	histograms := timings.Histograms()
   303  	for labelValsCombined, histogram := range histograms {
   304  		// If you prefer millisecond timings over nanoseconds you can pass 1000000 here instead of 1.
   305  		dc.addHistogram(histogram, 1, prefix, makeLabels(labels, labelValsCombined))
   306  	}
   307  }
   308  
   309  func (dc *dataCollector) addHistogram(histogram *stats.Histogram, divideBy int64, prefix string, tags map[string]string) {
   310  	// TODO: OpenTSDB 2.3 doesn't have histogram support, although it's forthcoming.
   311  	// For simplicity we report each bucket as a different metric.
   312  	//
   313  	// An alternative approach if you don't mind changing the code is to add a hook to Histogram creation that
   314  	// associates each histogram with a shadow type that can track percentiles (like Timer from rcrowley/go-metrics).
   315  
   316  	labels := histogram.Labels()
   317  	buckets := histogram.Buckets()
   318  	for i := range labels {
   319  		dc.addInt(
   320  			combineMetricName(prefix, labels[i]),
   321  			buckets[i],
   322  			tags,
   323  		)
   324  	}
   325  
   326  	dc.addInt(
   327  		combineMetricName(prefix, histogram.CountLabel()),
   328  		(*histogram).Count(),
   329  		tags,
   330  	)
   331  	dc.addInt(
   332  		combineMetricName(prefix, histogram.TotalLabel()),
   333  		(*histogram).Total()/divideBy,
   334  		tags,
   335  	)
   336  }
   337  
   338  // byMetric implements sort.Interface for []dataPoint based on the metric key
   339  // and then tag values (prioritized in tag name order). Having a consistent sort order
   340  // is convenient when refreshing /debug/opentsdb or for encoding and comparing JSON directly
   341  // in the tests.
   342  type byMetric []dataPoint
   343  
   344  func (m byMetric) Len() int      { return len(m) }
   345  func (m byMetric) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
   346  func (m byMetric) Less(i, j int) bool {
   347  	if m[i].Metric < m[j].Metric {
   348  		return true
   349  	}
   350  
   351  	if m[i].Metric > m[j].Metric {
   352  		return false
   353  	}
   354  
   355  	// Metric names are the same. We can use tag values to figure out the sort order.
   356  	// The deciding tag will be the lexicographically earliest tag name where tag values differ.
   357  	decidingTagName := ""
   358  	result := false
   359  	for tagName, iVal := range m[i].Tags {
   360  		jVal, ok := m[j].Tags[tagName]
   361  		if !ok {
   362  			// We'll arbitrarily declare that if i has any tag name that j doesn't then it sorts earlier.
   363  			// This shouldn't happen in practice, though, if metric code is correct...
   364  			return true
   365  		}
   366  
   367  		if iVal != jVal && (tagName < decidingTagName || decidingTagName == "") {
   368  			decidingTagName = tagName
   369  			result = iVal < jVal
   370  		}
   371  	}
   372  	return result
   373  }