google.golang.org/grpc@v1.72.2/interop/soak_tests.go (about)

     1  /*
     2  *
     3  * Copyright 2014 gRPC authors.
     4  *
     5  * Licensed under the Apache License, Version 2.0 (the "License");
     6  * you may not use this file except in compliance with the License.
     7  * You may obtain a copy of the License at
     8  *
     9  *     http://www.apache.org/licenses/LICENSE-2.0
    10  *
    11  * Unless required by applicable law or agreed to in writing, software
    12  * distributed under the License is distributed on an "AS IS" BASIS,
    13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  * See the License for the specific language governing permissions and
    15  * limitations under the License.
    16  *
    17   */
    18  
    19  package interop
    20  
    21  import (
    22  	"bytes"
    23  	"context"
    24  	"fmt"
    25  	"os"
    26  	"sync"
    27  	"time"
    28  
    29  	"google.golang.org/grpc"
    30  	"google.golang.org/grpc/benchmark/stats"
    31  	"google.golang.org/grpc/peer"
    32  
    33  	testgrpc "google.golang.org/grpc/interop/grpc_testing"
    34  	testpb "google.golang.org/grpc/interop/grpc_testing"
    35  )
    36  
    37  // SoakWorkerResults stores the aggregated results for a specific worker during the soak test.
    38  type SoakWorkerResults struct {
    39  	iterationsSucceeded int
    40  	Failures            int
    41  	Latencies           *stats.Histogram
    42  }
    43  
    44  // SoakIterationConfig holds the parameters required for a single soak iteration.
    45  type SoakIterationConfig struct {
    46  	RequestSize  int                        // The size of the request payload in bytes.
    47  	ResponseSize int                        // The expected size of the response payload in bytes.
    48  	Client       testgrpc.TestServiceClient // The gRPC client to make the call.
    49  	CallOptions  []grpc.CallOption          // Call options for the RPC.
    50  }
    51  
    52  // SoakTestConfig holds the configuration for the entire soak test.
    53  type SoakTestConfig struct {
    54  	RequestSize                      int
    55  	ResponseSize                     int
    56  	PerIterationMaxAcceptableLatency time.Duration
    57  	MinTimeBetweenRPCs               time.Duration
    58  	OverallTimeout                   time.Duration
    59  	ServerAddr                       string
    60  	NumWorkers                       int
    61  	Iterations                       int
    62  	MaxFailures                      int
    63  	ChannelForTest                   func() (*grpc.ClientConn, func())
    64  }
    65  
    66  func doOneSoakIteration(ctx context.Context, config SoakIterationConfig) (latency time.Duration, err error) {
    67  	start := time.Now()
    68  	// Do a large-unary RPC.
    69  	// Create the request payload.
    70  	pl := ClientNewPayload(testpb.PayloadType_COMPRESSABLE, config.RequestSize)
    71  	req := &testpb.SimpleRequest{
    72  		ResponseType: testpb.PayloadType_COMPRESSABLE,
    73  		ResponseSize: int32(config.ResponseSize),
    74  		Payload:      pl,
    75  	}
    76  	// Perform the GRPC call.
    77  	var reply *testpb.SimpleResponse
    78  	reply, err = config.Client.UnaryCall(ctx, req, config.CallOptions...)
    79  	if err != nil {
    80  		err = fmt.Errorf("/TestService/UnaryCall RPC failed: %s", err)
    81  		return 0, err
    82  	}
    83  	// Validate response.
    84  	t := reply.GetPayload().GetType()
    85  	s := len(reply.GetPayload().GetBody())
    86  	if t != testpb.PayloadType_COMPRESSABLE || s != config.ResponseSize {
    87  		err = fmt.Errorf("got the reply with type %d len %d; want %d, %d", t, s, testpb.PayloadType_COMPRESSABLE, config.ResponseSize)
    88  		return 0, err
    89  	}
    90  	// Calculate latency and return result.
    91  	latency = time.Since(start)
    92  	return latency, nil
    93  }
    94  
    95  func executeSoakTestInWorker(ctx context.Context, config SoakTestConfig, startTime time.Time, workerID int, soakWorkerResults *SoakWorkerResults) {
    96  	timeoutDuration := config.OverallTimeout
    97  	soakIterationsPerWorker := config.Iterations / config.NumWorkers
    98  	if soakWorkerResults.Latencies == nil {
    99  		soakWorkerResults.Latencies = stats.NewHistogram(stats.HistogramOptions{
   100  			NumBuckets:     20,
   101  			GrowthFactor:   1,
   102  			BaseBucketSize: 1,
   103  			MinValue:       0,
   104  		})
   105  	}
   106  
   107  	for i := 0; i < soakIterationsPerWorker; i++ {
   108  		if ctx.Err() != nil {
   109  			return
   110  		}
   111  		if time.Since(startTime) >= timeoutDuration {
   112  			fmt.Printf("Test exceeded overall timeout of %v, stopping...\n", config.OverallTimeout)
   113  			return
   114  		}
   115  		earliestNextStart := time.After(config.MinTimeBetweenRPCs)
   116  		currentChannel, cleanup := config.ChannelForTest()
   117  		defer cleanup()
   118  		client := testgrpc.NewTestServiceClient(currentChannel)
   119  		var p peer.Peer
   120  		iterationConfig := SoakIterationConfig{
   121  			RequestSize:  config.RequestSize,
   122  			ResponseSize: config.ResponseSize,
   123  			Client:       client,
   124  			CallOptions:  []grpc.CallOption{grpc.Peer(&p)},
   125  		}
   126  		latency, err := doOneSoakIteration(ctx, iterationConfig)
   127  		if err != nil {
   128  			fmt.Fprintf(os.Stderr, "Worker %d: soak iteration: %d elapsed_ms: %d peer: %v server_uri: %s failed: %s\n", workerID, i, 0, p.Addr, config.ServerAddr, err)
   129  			soakWorkerResults.Failures++
   130  			<-earliestNextStart
   131  			continue
   132  		}
   133  		if latency > config.PerIterationMaxAcceptableLatency {
   134  			fmt.Fprintf(os.Stderr, "Worker %d: soak iteration: %d elapsed_ms: %d peer: %v server_uri: %s exceeds max acceptable latency: %d\n", workerID, i, latency.Milliseconds(), p.Addr, config.ServerAddr, config.PerIterationMaxAcceptableLatency.Milliseconds())
   135  			soakWorkerResults.Failures++
   136  			<-earliestNextStart
   137  			continue
   138  		}
   139  		// Success: log the details of the iteration.
   140  		soakWorkerResults.Latencies.Add(latency.Milliseconds())
   141  		soakWorkerResults.iterationsSucceeded++
   142  		fmt.Fprintf(os.Stderr, "Worker %d: soak iteration: %d elapsed_ms: %d peer: %v server_uri: %s succeeded\n", workerID, i, latency.Milliseconds(), p.Addr, config.ServerAddr)
   143  		<-earliestNextStart
   144  	}
   145  }
   146  
   147  // DoSoakTest runs large unary RPCs in a loop for a configurable number of times, with configurable failure thresholds.
   148  // If resetChannel is false, then each RPC will be performed on tc. Otherwise, each RPC will be performed on a new
   149  // stub that is created with the provided server address and dial options.
   150  // TODO(mohanli-ml): Create SoakTestOptions as a parameter for this method.
   151  func DoSoakTest(ctx context.Context, soakConfig SoakTestConfig) {
   152  	if soakConfig.Iterations%soakConfig.NumWorkers != 0 {
   153  		fmt.Fprintf(os.Stderr, "soakIterations must be evenly divisible by soakNumWThreads\n")
   154  	}
   155  	startTime := time.Now()
   156  	var wg sync.WaitGroup
   157  	soakWorkerResults := make([]SoakWorkerResults, soakConfig.NumWorkers)
   158  	for i := 0; i < soakConfig.NumWorkers; i++ {
   159  		wg.Add(1)
   160  		go func(workerID int) {
   161  			defer wg.Done()
   162  			executeSoakTestInWorker(ctx, soakConfig, startTime, workerID, &soakWorkerResults[workerID])
   163  		}(i)
   164  	}
   165  	// Wait for all goroutines to complete.
   166  	wg.Wait()
   167  
   168  	// Handle results.
   169  	totalSuccesses := 0
   170  	totalFailures := 0
   171  	latencies := stats.NewHistogram(stats.HistogramOptions{
   172  		NumBuckets:     20,
   173  		GrowthFactor:   1,
   174  		BaseBucketSize: 1,
   175  		MinValue:       0,
   176  	})
   177  	for _, worker := range soakWorkerResults {
   178  		totalSuccesses += worker.iterationsSucceeded
   179  		totalFailures += worker.Failures
   180  		if worker.Latencies != nil {
   181  			// Add latencies from the worker's Histogram to the main latencies.
   182  			latencies.Merge(worker.Latencies)
   183  		}
   184  	}
   185  	var b bytes.Buffer
   186  	totalIterations := totalSuccesses + totalFailures
   187  	latencies.Print(&b)
   188  	fmt.Fprintf(os.Stderr,
   189  		"(server_uri: %s) soak test successes: %d / %d iterations. Total failures: %d. Latencies in milliseconds: %s\n",
   190  		soakConfig.ServerAddr, totalSuccesses, soakConfig.Iterations, totalFailures, b.String())
   191  
   192  	if totalIterations != soakConfig.Iterations {
   193  		logger.Fatalf("Soak test consumed all %v of time and quit early, ran %d out of %d iterations.\n", soakConfig.OverallTimeout, totalIterations, soakConfig.Iterations)
   194  	}
   195  
   196  	if totalFailures > soakConfig.MaxFailures {
   197  		logger.Fatalf("Soak test total failures: %d exceeded max failures threshold: %d\n", totalFailures, soakConfig.MaxFailures)
   198  	}
   199  	if soakConfig.ChannelForTest != nil {
   200  		_, cleanup := soakConfig.ChannelForTest()
   201  		defer cleanup()
   202  	}
   203  }