github.com/apache/beam/sdks/v2@v2.48.2/go/test/load/util.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one or more
     2  // contributor license agreements.  See the NOTICE file distributed with
     3  // this work for additional information regarding copyright ownership.
     4  // The ASF licenses this file to You under the Apache License, Version 2.0
     5  // (the "License"); you may not use this file except in compliance with
     6  // the License.  You may obtain a copy of the License at
     7  //
     8  //    http://www.apache.org/licenses/LICENSE-2.0
     9  //
    10  // Unless required by applicable law or agreed to in writing, software
    11  // distributed under the License is distributed on an "AS IS" BASIS,
    12  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  // See the License for the specific language governing permissions and
    14  // limitations under the License.
    15  
    16  package load
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"flag"
    22  	"fmt"
    23  	"io"
    24  	"log"
    25  	"net/http"
    26  	"os"
    27  	"strings"
    28  	"time"
    29  
    30  	"github.com/apache/beam/sdks/v2/go/pkg/beam"
    31  	"github.com/apache/beam/sdks/v2/go/pkg/beam/core/metrics"
    32  	"github.com/apache/beam/sdks/v2/go/pkg/beam/register"
    33  )
    34  
    35  const (
    36  	runtimeMetricNamespace = "RuntimeMonitor"
    37  	runtimeMetricName      = "runtime"
    38  )
    39  
    40  var (
    41  	influxMeasurement = flag.String(
    42  		"influx_measurement",
    43  		"",
    44  		`An InfluxDB measurement where metrics should be published to.
    45  		If empty, no metrics will be send to InfluxDB.`)
    46  	influxDatabase = flag.String(
    47  		"influx_db_name",
    48  		"",
    49  		"InfluxDB database name. If empty, no metrics will be send to InfluxDB.")
    50  	influxHost = flag.String(
    51  		"influx_hostname",
    52  		"http://localhost:8086",
    53  		"Hostname and port to connect to InfluxDB. Defaults to http://localhost:8086.")
    54  	influxNamespace = flag.String(
    55  		"influx_namespace",
    56  		"",
    57  		`A namespace to be used when constructing InfluxDB's data points.
    58  		Used to make some points different from others within the same measurement.`)
    59  	runtime = beam.NewDistribution(runtimeMetricNamespace, runtimeMetricName)
    60  )
    61  
    62  func init() {
    63  	register.DoFn3x0[[]byte, []byte, func([]byte, []byte)]((*RuntimeMonitor)(nil))
    64  	register.Emitter2[[]byte, []byte]()
    65  }
    66  
    67  // RuntimeMonitor is a DoFn to record processing time in the pipeline.
    68  //
    69  // It uses a distribution metric which is updated every time a new bundle
    70  // starts or finishes. The processing time can be extracted by calculating
    71  // the difference of the maximum and the minimum value of the distribution
    72  // metric.
    73  type RuntimeMonitor struct{}
    74  
    75  // StartBundle updates a distribution metric.
    76  func (fn *RuntimeMonitor) StartBundle(ctx context.Context, emit func([]byte, []byte)) {
    77  	runtime.Update(ctx, time.Now().UnixNano())
    78  }
    79  
    80  // FinishBundle updates a distribution metric.
    81  func (fn *RuntimeMonitor) FinishBundle(ctx context.Context, emit func([]byte, []byte)) {
    82  	runtime.Update(ctx, time.Now().UnixNano())
    83  }
    84  
    85  // ProcessElement emits unmodified input elements.
    86  func (fn *RuntimeMonitor) ProcessElement(key, value []byte, emit func([]byte, []byte)) {
    87  	emit(key, value)
    88  }
    89  
    90  type influxDBOptions struct {
    91  	measurement string
    92  	dbName      string
    93  	hostname    string
    94  	user        string
    95  	password    string
    96  }
    97  
    98  func newInfluxDBOptions() *influxDBOptions {
    99  	return &influxDBOptions{
   100  		measurement: *influxMeasurement,
   101  		dbName:      *influxDatabase,
   102  		hostname:    *influxHost,
   103  		user:        os.Getenv("INFLUXDB_USER"),
   104  		password:    os.Getenv("INFLUXDB_USER_PASSWORD")}
   105  }
   106  
   107  func (options influxDBOptions) validate() bool {
   108  	return options.measurement != "" && options.dbName != ""
   109  }
   110  
   111  func (options influxDBOptions) httpAuthEnabled() bool {
   112  	return options.user != "" && options.password != ""
   113  }
   114  
   115  // loadTestResult represents a single data record that has: a timestamp,
   116  // a type of a metric, and a value.
   117  type loadTestResult struct {
   118  	timestamp int64
   119  	metric    string
   120  	value     float64
   121  }
   122  
   123  func newLoadTestResult(value float64) loadTestResult {
   124  	metric := ""
   125  	if *influxNamespace == "" {
   126  		metric = runtimeMetricName
   127  	} else {
   128  		metric = fmt.Sprintf("%v_%v", *influxNamespace, runtimeMetricName)
   129  	}
   130  	return loadTestResult{timestamp: time.Now().Unix(), metric: metric, value: value}
   131  }
   132  
   133  // PublishMetrics calculates the runtime and sends the result to InfluxDB database.
   134  func PublishMetrics(results metrics.QueryResults) {
   135  	options := newInfluxDBOptions()
   136  	ress := toLoadTestResults(results)
   137  	for _, res := range ress {
   138  		log.Printf("%s %v", res.metric, time.Duration(float64(time.Second)*res.value))
   139  	}
   140  	if len(ress) == 0 {
   141  		log.Print("No metrics returned.")
   142  		return
   143  	}
   144  	if options.validate() {
   145  		publishMetricstoInfluxDB(options, ress)
   146  	} else {
   147  		log.Print("Missing InfluxDB options. Metrics will not be published to InfluxDB")
   148  	}
   149  }
   150  
   151  func toLoadTestResults(results metrics.QueryResults) []loadTestResult {
   152  	res := make([]loadTestResult, 0)
   153  	matched := make([]metrics.DistributionResult, 0)
   154  
   155  	for _, dist := range results.Distributions() {
   156  		if dist.Key.Namespace == runtimeMetricNamespace &&
   157  			dist.Key.Name == runtimeMetricName {
   158  			matched = append(matched, dist)
   159  		}
   160  	}
   161  
   162  	if len(matched) > 0 {
   163  		res = append(res, newLoadTestResult(extractRuntimeValue(matched)))
   164  	}
   165  	return res
   166  }
   167  
   168  // extractRuntimeValue returns a difference between the maximum of maximum
   169  // values and the minimum of minimum values in seconds.
   170  func extractRuntimeValue(dists []metrics.DistributionResult) float64 {
   171  	min := dists[0].Result().Min
   172  	max := min
   173  
   174  	for _, dist := range dists {
   175  		res := dist.Result()
   176  		if min > res.Min {
   177  			min = res.Min
   178  		}
   179  		if max < res.Max {
   180  			max = res.Max
   181  		}
   182  	}
   183  	return float64(max-min) / float64(time.Second)
   184  }
   185  
   186  func publishMetricstoInfluxDB(options *influxDBOptions, results []loadTestResult) {
   187  	url := fmt.Sprintf("%v/write", options.hostname)
   188  	payload := buildPayload(options, results)
   189  
   190  	request, err := http.NewRequest("POST", url, strings.NewReader(payload))
   191  	if err != nil {
   192  		log.Print(err)
   193  		return
   194  	}
   195  
   196  	query := request.URL.Query()
   197  	query.Add("db", options.dbName)
   198  	query.Add("precision", "s")
   199  	request.URL.RawQuery = query.Encode()
   200  
   201  	if options.httpAuthEnabled() {
   202  		request.SetBasicAuth(options.user, options.password)
   203  	}
   204  
   205  	client := &http.Client{}
   206  	resp, err := client.Do(request)
   207  	if err != nil {
   208  		log.Print(err)
   209  		return
   210  	}
   211  	defer resp.Body.Close()
   212  
   213  	body, err := io.ReadAll(resp.Body)
   214  	if err != nil {
   215  		log.Print(err)
   216  		return
   217  	}
   218  
   219  	if resp.StatusCode != 204 {
   220  		jsonData := make(map[string]string)
   221  		json.Unmarshal(body, &jsonData)
   222  		log.Printf("Failed to publish metrics to InfluxDB. Received status code %v "+
   223  			"with an error message: %v", resp.StatusCode, jsonData["error"])
   224  	}
   225  }
   226  
   227  func buildPayload(options *influxDBOptions, results []loadTestResult) string {
   228  	points := make([]string, len(results))
   229  	for i, result := range results {
   230  		points[i] = fmt.Sprintf("%v,metric=%v value=%f %d", options.measurement,
   231  			result.metric, result.value, result.timestamp)
   232  	}
   233  	return strings.Join(points, "\n")
   234  }