github.com/network-quality/goresponsiveness@v0.0.0-20240129151524-343954285090/rpm/rpm.go (about)

     1  /*
     2   * This file is part of Go Responsiveness.
     3   *
     4   * Go Responsiveness is free software: you can redistribute it and/or modify it under
     5   * the terms of the GNU General Public License as published by the Free Software Foundation,
     6   * either version 2 of the License, or (at your option) any later version.
     7   * Go Responsiveness is distributed in the hope that it will be useful, but WITHOUT ANY
     8   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
     9   * PARTICULAR PURPOSE. See the GNU General Public License for more details.
    10   *
    11   * You should have received a copy of the GNU General Public License along
    12   * with Go Responsiveness. If not, see <https://www.gnu.org/licenses/>.
    13   */
    14  
    15  package rpm
    16  
    17  import (
    18  	"context"
    19  	"crypto/tls"
    20  	"fmt"
    21  	"io"
    22  	"net/http"
    23  	"os"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/network-quality/goresponsiveness/constants"
    28  	"github.com/network-quality/goresponsiveness/debug"
    29  	"github.com/network-quality/goresponsiveness/extendedstats"
    30  	"github.com/network-quality/goresponsiveness/lgc"
    31  	"github.com/network-quality/goresponsiveness/probe"
    32  	"github.com/network-quality/goresponsiveness/series"
    33  	"github.com/network-quality/goresponsiveness/utilities"
    34  )
    35  
    36  func addFlows(
    37  	ctx context.Context,
    38  	toAdd uint64,
    39  	lgcc *lgc.LoadGeneratingConnectionCollection,
    40  	lgcGenerator func() lgc.LoadGeneratingConnection,
    41  	debugging debug.DebugLevel,
    42  ) uint64 {
    43  	lgcc.Lock.Lock()
    44  	defer lgcc.Lock.Unlock()
    45  	for i := uint64(0); i < toAdd; i++ {
    46  		// First, generate the connection.
    47  		newConnection := lgcGenerator()
    48  		lgcc.Append(newConnection)
    49  		if debug.IsDebug(debugging) {
    50  			fmt.Printf("Added a new %s load-generating connection.\n", newConnection.Direction())
    51  		}
    52  		// Second, try to start the connection.
    53  		if !newConnection.Start(ctx, debugging) {
    54  			// If there was an error, we'll make sure that the caller knows it.
    55  			fmt.Printf(
    56  				"Error starting lgc with id %d!\n", newConnection.ClientId(),
    57  			)
    58  			return i
    59  		}
    60  	}
    61  	return toAdd
    62  }
    63  
    64  type GranularThroughputDataPoint struct {
    65  	Time       time.Time     `Description:"Time of the generation of the data point." Formatter:"Format" FormatterArgument:"01-02-2006-15-04-05.000"`
    66  	Throughput float64       `Description:"Instantaneous throughput (B/s)."`
    67  	ConnID     uint32        `Description:"Position of connection (ID)."`
    68  	TCPRtt     time.Duration `Description:"The underlying connection's RTT at probe time."               Formatter:"Seconds"`
    69  	TCPCwnd    uint32        `Description:"The underlying connection's congestion window at probe time."`
    70  	Direction  string        `Description:"Direction of Throughput."`
    71  }
    72  
    73  type ThroughputDataPoint struct {
    74  	Time                         time.Time                     `Description:"Time of the generation of the data point." Formatter:"Format" FormatterArgument:"01-02-2006-15-04-05.000"`
    75  	Throughput                   float64                       `Description:"Instantaneous throughput (B/s)."`
    76  	ActiveConnections            int                           `Description:"Number of active parallel connections."`
    77  	Connections                  int                           `Description:"Number of parallel connections."`
    78  	GranularThroughputDataPoints []GranularThroughputDataPoint `Description:"[OMIT]"`
    79  }
    80  
    81  type SelfDataCollectionResult struct {
    82  	RateBps             float64
    83  	LGCs                []lgc.LoadGeneratingConnection
    84  	ProbeDataPoints     []probe.ProbeDataPoint
    85  	LoggingContinuation func()
    86  }
    87  
    88  type ResponsivenessProbeResult struct {
    89  	Foreign *probe.ProbeDataPoint
    90  	Self    *probe.ProbeDataPoint
    91  }
    92  
    93  func ResponsivenessProber[BucketType utilities.Number](
    94  	proberCtx context.Context,
    95  	networkActivityCtx context.Context,
    96  	foreignProbeConfigurationGenerator func() probe.ProbeConfiguration,
    97  	selfProbeConfigurationGenerator func() probe.ProbeConfiguration,
    98  	selfProbeConnectionCollection *lgc.LoadGeneratingConnectionCollection,
    99  	bucketGenerator *series.NumericBucketGenerator[BucketType],
   100  	probeDirection lgc.LgcDirection,
   101  	probeInterval time.Duration,
   102  	keyLogger io.Writer,
   103  	captureExtendedStats bool,
   104  	debugging *debug.DebugWithPrefix,
   105  ) (dataPoints chan series.SeriesMessage[ResponsivenessProbeResult, BucketType]) {
   106  	if debug.IsDebug(debugging.Level) {
   107  		fmt.Printf(
   108  			"(%s) Starting to collect responsiveness information at an interval of %v!\n",
   109  			debugging.Prefix,
   110  			probeInterval,
   111  		)
   112  	}
   113  
   114  	// Make a channel to send back all the generated data points
   115  	// when we are probing.
   116  	dataPoints = make(chan series.SeriesMessage[ResponsivenessProbeResult, BucketType])
   117  
   118  	go func() {
   119  		wg := sync.WaitGroup{}
   120  		probeCount := uint(0)
   121  
   122  		dataPointsLock := sync.Mutex{}
   123  
   124  		// As long as our context says that we can continue to probe!
   125  		for proberCtx.Err() == nil {
   126  			time.Sleep(probeInterval)
   127  
   128  			// We may have slept for a very long time. So, let's check to see if we are
   129  			// still active, just for fun!
   130  			if proberCtx.Err() != nil {
   131  				break
   132  			}
   133  
   134  			wg.Add(1)
   135  			go func() {
   136  				defer wg.Done()
   137  				probeCount++
   138  				probeCount := probeCount
   139  
   140  				foreignProbeConfiguration := foreignProbeConfigurationGenerator()
   141  				selfProbeConfiguration := selfProbeConfigurationGenerator()
   142  
   143  				if debug.IsDebug(debugging.Level) {
   144  					fmt.Printf(
   145  						"(%s) About to send round %d of probes!\n",
   146  						debugging.Prefix,
   147  						probeCount,
   148  					)
   149  				}
   150  
   151  				dataPointsLock.Lock()
   152  				currentBucketId := bucketGenerator.Generate()
   153  				if dataPoints != nil {
   154  					dataPoints <- series.SeriesMessage[ResponsivenessProbeResult, BucketType]{
   155  						Type: series.SeriesMessageReserve, Bucket: currentBucketId,
   156  						Measure: utilities.None[ResponsivenessProbeResult](),
   157  					}
   158  				}
   159  				dataPointsLock.Unlock()
   160  
   161  				// The presence of a custom TLSClientConfig in a *generic* `transport`
   162  				// means that go will default to HTTP/1.1 and cowardly avoid HTTP/2:
   163  				// https://github.com/golang/go/blob/7ca6902c171b336d98adbb103d701a013229c806/src/net/http/transport.go#L278
   164  				// Also, it would appear that the API's choice of HTTP vs HTTP2 can
   165  				// depend on whether the url contains
   166  				// https:// or http://:
   167  				// https://github.com/golang/go/blob/7ca6902c171b336d98adbb103d701a013229c806/src/net/http/transport.go#L74
   168  				transport := &http.Transport{}
   169  				transport.TLSClientConfig = &tls.Config{}
   170  				transport.Proxy = http.ProxyFromEnvironment
   171  
   172  				if !utilities.IsInterfaceNil(keyLogger) {
   173  					if debug.IsDebug(debugging.Level) {
   174  						fmt.Printf(
   175  							"Using an SSL Key Logger for a foreign probe.\n",
   176  						)
   177  					}
   178  
   179  					transport.TLSClientConfig.KeyLogWriter = keyLogger
   180  				}
   181  
   182  				transport.TLSClientConfig.InsecureSkipVerify =
   183  					foreignProbeConfiguration.InsecureSkipVerify
   184  
   185  				utilities.OverrideHostTransport(transport,
   186  					foreignProbeConfiguration.ConnectToAddr)
   187  
   188  				foreignProbeClient := &http.Client{Transport: transport}
   189  
   190  				// Start Foreign Connection Prober
   191  				foreignProbeDataPoint, err := probe.Probe(
   192  					networkActivityCtx,
   193  					foreignProbeClient,
   194  					foreignProbeConfiguration.URL,
   195  					foreignProbeConfiguration.Host,
   196  					probe.Foreign,
   197  					probeCount,
   198  					foreignProbeConfiguration.CongestionControl,
   199  					captureExtendedStats,
   200  					debugging,
   201  				)
   202  				if err != nil {
   203  					return
   204  				}
   205  
   206  				var selfProbeConnection *lgc.LoadGeneratingConnection = nil
   207  				if selfProbeConnectionCollection != nil {
   208  					func() {
   209  						selfProbeConnectionCollection.Lock.Lock()
   210  						defer selfProbeConnectionCollection.Lock.Unlock()
   211  						selfProbeConnection, err = selfProbeConnectionCollection.GetRandom()
   212  						if err != nil {
   213  							if debug.IsWarn(debugging.Level) {
   214  								fmt.Printf(
   215  									"(%s) Failed to get a random %s load-generating connection on which to send a probe: %v.\n",
   216  									debugging.Prefix,
   217  									probeDirection,
   218  									err,
   219  								)
   220  							}
   221  							return
   222  						}
   223  					}()
   224  				}
   225  				if selfProbeConnectionCollection != nil && selfProbeConnection == nil {
   226  					return
   227  				}
   228  
   229  				var selfProbeDataPoint *probe.ProbeDataPoint = nil
   230  				if selfProbeConnection != nil {
   231  					// TODO: Make the following sanity check more than just a check.
   232  					// We only want to start a SelfUp probe on a connection that is
   233  					// in the RUNNING state.
   234  					if (*selfProbeConnection).Status() != lgc.LGC_STATUS_RUNNING {
   235  						if debug.IsWarn(debugging.Level) {
   236  							fmt.Printf(
   237  								"(%s) The selected random %s load-generating connection on which to send a probe was not running.\n",
   238  								debugging.Prefix,
   239  								probeDirection,
   240  							)
   241  						}
   242  						return
   243  					}
   244  
   245  					if debug.IsDebug(debugging.Level) {
   246  						fmt.Printf(
   247  							"(%s) Selected %s load-generating connection with ID %d to send a self probe with Id %d.\n",
   248  							debugging.Prefix,
   249  							probeDirection,
   250  							(*selfProbeConnection).ClientId(),
   251  							probeCount,
   252  						)
   253  					}
   254  					selfProbeDataPoint, err = probe.Probe(
   255  						proberCtx,
   256  						(*selfProbeConnection).Client(),
   257  						selfProbeConfiguration.URL,
   258  						selfProbeConfiguration.Host,
   259  						utilities.Conditional(probeDirection == lgc.LGC_DOWN, probe.SelfDown, probe.SelfUp),
   260  						probeCount,
   261  						selfProbeConfiguration.CongestionControl,
   262  						captureExtendedStats,
   263  						debugging,
   264  					)
   265  					if err != nil {
   266  						// We may see an error here because the prober context was cancelled
   267  						// and requests were attempting to be sent. This situation is not an
   268  						// error (per se) so we will not log it as such.
   269  
   270  						if proberCtx.Err() != nil {
   271  							if debug.IsDebug(debugging.Level) {
   272  								fmt.Printf(
   273  									"(%s) Failed to send a probe (id: %v) because the prober context was cancelled.\n",
   274  									debugging.Prefix,
   275  									probeCount,
   276  								)
   277  							}
   278  							return
   279  						}
   280  						fmt.Printf(
   281  							"(%s) There was an error sending a self probe with Id %d: %v\n",
   282  							debugging.Prefix,
   283  							probeCount,
   284  							err,
   285  						)
   286  						return
   287  					}
   288  				} else {
   289  					if debug.IsDebug(debugging.Level) {
   290  						fmt.Printf(
   291  							"(%s) Did not send a self probe at id %d of probes!\n",
   292  							debugging.Prefix,
   293  							probeCount,
   294  						)
   295  					}
   296  				}
   297  				if debug.IsDebug(debugging.Level) {
   298  					fmt.Printf(
   299  						"(%s) About to report results for round %d of probes!\n",
   300  						debugging.Prefix,
   301  						probeCount,
   302  					)
   303  				}
   304  				dataPointsLock.Lock()
   305  				defer dataPointsLock.Unlock()
   306  				// Now we have our (maybe) four data points (three in the foreign probe data point and [maybe] one in the self probe data point)
   307  				if dataPoints != nil {
   308  					measurement := ResponsivenessProbeResult{
   309  						Foreign: foreignProbeDataPoint, Self: selfProbeDataPoint,
   310  					}
   311  
   312  					dataPoints <- series.SeriesMessage[ResponsivenessProbeResult, BucketType]{
   313  						Type: series.SeriesMessageMeasure, Bucket: currentBucketId,
   314  						Measure: utilities.Some[ResponsivenessProbeResult](measurement),
   315  					}
   316  				}
   317  			}()
   318  		}
   319  		if debug.IsDebug(debugging.Level) {
   320  			fmt.Printf(
   321  				"(%s) Probe driver is going to start waiting for its probes to finish.\n",
   322  				debugging.Prefix,
   323  			)
   324  		}
   325  		utilities.OrTimeout(func() { wg.Wait() }, 2*time.Second)
   326  		if debug.IsDebug(debugging.Level) {
   327  			fmt.Printf(
   328  				"(%s) Probe driver is done waiting for its probes to finish.\n",
   329  				debugging.Prefix,
   330  			)
   331  		}
   332  		dataPointsLock.Lock()
   333  		close(dataPoints)
   334  		dataPoints = nil
   335  		dataPointsLock.Unlock()
   336  	}()
   337  	return
   338  }
   339  
   340  func LoadGenerator[BucketType utilities.Number](
   341  	throughputCtx context.Context, // Stop our activity when we no longer need any throughput
   342  	networkActivityCtx context.Context, // Create all network connections in this context.
   343  	rampupInterval time.Duration,
   344  	lgcGenerator func() lgc.LoadGeneratingConnection, // Use this to generate a new load-generating connection.
   345  	loadGeneratingConnectionsCollection *lgc.LoadGeneratingConnectionCollection,
   346  	bucketGenerator *series.NumericBucketGenerator[BucketType],
   347  	mnp int,
   348  	id time.Duration, // the interval to wait to test for stability (it doubles as the time between adding LGCs).
   349  	captureExtendedStats bool, // do we want to attempt to gather TCP information on these connections?
   350  	debugging *debug.DebugWithPrefix, // How can we forget debugging?
   351  ) (seriesCommunicationChannel chan series.SeriesMessage[ThroughputDataPoint, BucketType]) { // Send back all the instantaneous throughputs that we generate.
   352  	seriesCommunicationChannel = make(chan series.SeriesMessage[ThroughputDataPoint, BucketType])
   353  
   354  	go func() {
   355  		flowsCreated := uint64(0)
   356  
   357  		flowsCreated += addFlows(
   358  			networkActivityCtx,
   359  			constants.StartingNumberOfLoadGeneratingConnections,
   360  			loadGeneratingConnectionsCollection,
   361  			lgcGenerator,
   362  			debugging.Level,
   363  		)
   364  
   365  		nextSampleStartTime := time.Now().Add(rampupInterval)
   366  
   367  		for currentIntervalId := uint64(0); true; currentIntervalId++ {
   368  
   369  			// If the throughputCtx is canceled, then that means our work here is done ...
   370  			if throughputCtx.Err() != nil {
   371  				break
   372  			}
   373  
   374  			now := time.Now()
   375  			// At each 1-second interval
   376  			if nextSampleStartTime.Sub(now) > 0 {
   377  				if debug.IsDebug(debugging.Level) {
   378  					fmt.Printf(
   379  						"%v: Sleeping until %v\n",
   380  						debugging,
   381  						nextSampleStartTime,
   382  					)
   383  				}
   384  				time.Sleep(nextSampleStartTime.Sub(now))
   385  			} else {
   386  				fmt.Fprintf(os.Stderr, "Warning: Missed a %v deadline.\n", id.Milliseconds())
   387  			}
   388  			nextSampleStartTime = time.Now().Add(id)
   389  
   390  			// Waiting is the hardest part -- that was a long time asleep
   391  			// and we may have been cancelled during that time!
   392  			if throughputCtx.Err() != nil {
   393  				break
   394  			}
   395  
   396  			// Compute "instantaneous aggregate" goodput which is the number of
   397  			// bytes transferred within the last second.
   398  			var instantaneousThroughputTotal float64 = 0
   399  			var instantaneousThroughputDataPoints uint = 0
   400  			granularThroughputDatapoints := make([]GranularThroughputDataPoint, 0)
   401  			now = time.Now() // Used to align granular throughput data
   402  			allInvalid := true
   403  			for i := range *loadGeneratingConnectionsCollection.LGCs {
   404  				loadGeneratingConnectionsCollection.Lock.Lock()
   405  				connectionState := (*loadGeneratingConnectionsCollection.LGCs)[i].Status()
   406  				loadGeneratingConnectionsCollection.Lock.Unlock()
   407  				switch connectionState {
   408  				default:
   409  					{
   410  						error := fmt.Sprintf(
   411  							"%v: Load-generating connection with id %d is in an unrecognizable state.\n",
   412  							debugging,
   413  							(*loadGeneratingConnectionsCollection.LGCs)[i].ClientId())
   414  						fmt.Fprintf(os.Stderr, "%s", error)
   415  						panic(error)
   416  					}
   417  				case lgc.LGC_STATUS_ERROR,
   418  					lgc.LGC_STATUS_DONE:
   419  					{
   420  						if debug.IsDebug(debugging.Level) {
   421  							fmt.Printf(
   422  								"%v: Load-generating connection with id %d is invalid or complete ... skipping.\n",
   423  								debugging,
   424  								(*loadGeneratingConnectionsCollection.LGCs)[i].ClientId(),
   425  							)
   426  						}
   427  						// TODO: Do we add null connection to throughput? and how do we define it? Throughput -1 or 0?
   428  						granularThroughputDatapoints = append(
   429  							granularThroughputDatapoints,
   430  							GranularThroughputDataPoint{now, 0, uint32(i), 0, 0, ""},
   431  						)
   432  					}
   433  				case lgc.LGC_STATUS_NOT_STARTED:
   434  					{
   435  						if debug.IsDebug(debugging.Level) {
   436  							fmt.Printf(
   437  								"%v: Load-generating connection with id %d has not finished starting; "+
   438  									"it will not contribute throughput during this interval.\n",
   439  								debugging,
   440  								(*loadGeneratingConnectionsCollection.LGCs)[i].ClientId())
   441  						}
   442  					}
   443  				case lgc.LGC_STATUS_RUNNING:
   444  					{
   445  						allInvalid = false
   446  						currentTransferred, currentInterval :=
   447  							(*loadGeneratingConnectionsCollection.LGCs)[i].TransferredInInterval()
   448  						// normalize to a second-long interval!
   449  						instantaneousConnectionThroughput := float64(
   450  							currentTransferred,
   451  						) / float64(
   452  							currentInterval.Seconds(),
   453  						)
   454  						instantaneousThroughputTotal += instantaneousConnectionThroughput
   455  						instantaneousThroughputDataPoints++
   456  
   457  						tcpRtt := time.Duration(0 * time.Second)
   458  						tcpCwnd := uint32(0)
   459  						if captureExtendedStats && extendedstats.ExtendedStatsAvailable() {
   460  							if stats := (*loadGeneratingConnectionsCollection.LGCs)[i].Stats(); stats != nil {
   461  								tcpInfo, err := extendedstats.GetTCPInfo(stats.ConnInfo.Conn)
   462  								if err == nil {
   463  									tcpRtt = time.Duration(tcpInfo.Rtt) * time.Microsecond
   464  									tcpCwnd = tcpInfo.Snd_cwnd
   465  								} else {
   466  									fmt.Printf("Warning: Could not fetch the extended stats for a probe: %v\n", err)
   467  								}
   468  							}
   469  						}
   470  						granularThroughputDatapoints = append(
   471  							granularThroughputDatapoints,
   472  							GranularThroughputDataPoint{
   473  								now,
   474  								instantaneousConnectionThroughput,
   475  								uint32(i),
   476  								tcpRtt,
   477  								tcpCwnd,
   478  								"",
   479  							},
   480  						)
   481  					}
   482  				}
   483  			}
   484  
   485  			// For some reason, all the lgcs are invalid. This likely means that
   486  			// the network/server went away.
   487  			if allInvalid {
   488  				if debug.IsDebug(debugging.Level) {
   489  					fmt.Printf(
   490  						"%v: All lgcs were invalid. Assuming that network/server went away.\n",
   491  						debugging,
   492  					)
   493  				}
   494  				break
   495  			}
   496  
   497  			// We have generated a throughput calculation -- let's send it back to the coordinator
   498  			throughputDataPoint := ThroughputDataPoint{
   499  				time.Now(),
   500  				instantaneousThroughputTotal,
   501  				int(instantaneousThroughputDataPoints),
   502  				len(*loadGeneratingConnectionsCollection.LGCs),
   503  				granularThroughputDatapoints,
   504  			}
   505  
   506  			currentBucketId := bucketGenerator.Generate()
   507  
   508  			seriesCommunicationChannel <- series.SeriesMessage[ThroughputDataPoint, BucketType]{
   509  				Type: series.SeriesMessageReserve, Bucket: currentBucketId,
   510  			}
   511  			seriesCommunicationChannel <- series.SeriesMessage[ThroughputDataPoint, BucketType]{
   512  				Type: series.SeriesMessageMeasure, Bucket: currentBucketId,
   513  				Measure: utilities.Some[ThroughputDataPoint](throughputDataPoint),
   514  			}
   515  
   516  			loadGeneratingConnectionsCollection.Lock.Lock()
   517  			currentParallelConnectionCount, err :=
   518  				loadGeneratingConnectionsCollection.Len()
   519  			loadGeneratingConnectionsCollection.Lock.Unlock()
   520  
   521  			if err != nil {
   522  				if debug.IsWarn(debugging.Level) {
   523  					fmt.Printf(
   524  						"%v: Failed to get a count of the number of parallel load-generating connections: %v.\n",
   525  						debugging,
   526  						err,
   527  					)
   528  				}
   529  			}
   530  			if currentParallelConnectionCount < mnp {
   531  				// Just add another constants.AdditiveNumberOfLoadGeneratingConnections flows -- that's our only job now!
   532  				flowsCreated += addFlows(
   533  					networkActivityCtx,
   534  					constants.AdditiveNumberOfLoadGeneratingConnections,
   535  					loadGeneratingConnectionsCollection,
   536  					lgcGenerator,
   537  					debugging.Level,
   538  				)
   539  			} else if debug.IsWarn(debugging.Level) {
   540  				fmt.Printf(
   541  					"%v: Maximum number of parallel transport-layer connections reached (%d). Not adding another.\n",
   542  					debugging,
   543  					mnp,
   544  				)
   545  			}
   546  		}
   547  
   548  		if debug.IsDebug(debugging.Level) {
   549  			fmt.Printf(
   550  				"(%s) Stopping a load generator after creating %d flows.\n",
   551  				debugging.Prefix, flowsCreated)
   552  		}
   553  	}()
   554  	return
   555  }