github.com/network-quality/goresponsiveness@v0.0.0-20240129151524-343954285090/rpm/rpm.go (about) 1 /* 2 * This file is part of Go Responsiveness. 3 * 4 * Go Responsiveness is free software: you can redistribute it and/or modify it under 5 * the terms of the GNU General Public License as published by the Free Software Foundation, 6 * either version 2 of the License, or (at your option) any later version. 7 * Go Responsiveness is distributed in the hope that it will be useful, but WITHOUT ANY 8 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 9 * PARTICULAR PURPOSE. See the GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public License along 12 * with Go Responsiveness. If not, see <https://www.gnu.org/licenses/>. 13 */ 14 15 package rpm 16 17 import ( 18 "context" 19 "crypto/tls" 20 "fmt" 21 "io" 22 "net/http" 23 "os" 24 "sync" 25 "time" 26 27 "github.com/network-quality/goresponsiveness/constants" 28 "github.com/network-quality/goresponsiveness/debug" 29 "github.com/network-quality/goresponsiveness/extendedstats" 30 "github.com/network-quality/goresponsiveness/lgc" 31 "github.com/network-quality/goresponsiveness/probe" 32 "github.com/network-quality/goresponsiveness/series" 33 "github.com/network-quality/goresponsiveness/utilities" 34 ) 35 36 func addFlows( 37 ctx context.Context, 38 toAdd uint64, 39 lgcc *lgc.LoadGeneratingConnectionCollection, 40 lgcGenerator func() lgc.LoadGeneratingConnection, 41 debugging debug.DebugLevel, 42 ) uint64 { 43 lgcc.Lock.Lock() 44 defer lgcc.Lock.Unlock() 45 for i := uint64(0); i < toAdd; i++ { 46 // First, generate the connection. 47 newConnection := lgcGenerator() 48 lgcc.Append(newConnection) 49 if debug.IsDebug(debugging) { 50 fmt.Printf("Added a new %s load-generating connection.\n", newConnection.Direction()) 51 } 52 // Second, try to start the connection. 53 if !newConnection.Start(ctx, debugging) { 54 // If there was an error, we'll make sure that the caller knows it. 55 fmt.Printf( 56 "Error starting lgc with id %d!\n", newConnection.ClientId(), 57 ) 58 return i 59 } 60 } 61 return toAdd 62 } 63 64 type GranularThroughputDataPoint struct { 65 Time time.Time `Description:"Time of the generation of the data point." Formatter:"Format" FormatterArgument:"01-02-2006-15-04-05.000"` 66 Throughput float64 `Description:"Instantaneous throughput (B/s)."` 67 ConnID uint32 `Description:"Position of connection (ID)."` 68 TCPRtt time.Duration `Description:"The underlying connection's RTT at probe time." Formatter:"Seconds"` 69 TCPCwnd uint32 `Description:"The underlying connection's congestion window at probe time."` 70 Direction string `Description:"Direction of Throughput."` 71 } 72 73 type ThroughputDataPoint struct { 74 Time time.Time `Description:"Time of the generation of the data point." Formatter:"Format" FormatterArgument:"01-02-2006-15-04-05.000"` 75 Throughput float64 `Description:"Instantaneous throughput (B/s)."` 76 ActiveConnections int `Description:"Number of active parallel connections."` 77 Connections int `Description:"Number of parallel connections."` 78 GranularThroughputDataPoints []GranularThroughputDataPoint `Description:"[OMIT]"` 79 } 80 81 type SelfDataCollectionResult struct { 82 RateBps float64 83 LGCs []lgc.LoadGeneratingConnection 84 ProbeDataPoints []probe.ProbeDataPoint 85 LoggingContinuation func() 86 } 87 88 type ResponsivenessProbeResult struct { 89 Foreign *probe.ProbeDataPoint 90 Self *probe.ProbeDataPoint 91 } 92 93 func ResponsivenessProber[BucketType utilities.Number]( 94 proberCtx context.Context, 95 networkActivityCtx context.Context, 96 foreignProbeConfigurationGenerator func() probe.ProbeConfiguration, 97 selfProbeConfigurationGenerator func() probe.ProbeConfiguration, 98 selfProbeConnectionCollection *lgc.LoadGeneratingConnectionCollection, 99 bucketGenerator *series.NumericBucketGenerator[BucketType], 100 probeDirection lgc.LgcDirection, 101 probeInterval time.Duration, 102 keyLogger io.Writer, 103 captureExtendedStats bool, 104 debugging *debug.DebugWithPrefix, 105 ) (dataPoints chan series.SeriesMessage[ResponsivenessProbeResult, BucketType]) { 106 if debug.IsDebug(debugging.Level) { 107 fmt.Printf( 108 "(%s) Starting to collect responsiveness information at an interval of %v!\n", 109 debugging.Prefix, 110 probeInterval, 111 ) 112 } 113 114 // Make a channel to send back all the generated data points 115 // when we are probing. 116 dataPoints = make(chan series.SeriesMessage[ResponsivenessProbeResult, BucketType]) 117 118 go func() { 119 wg := sync.WaitGroup{} 120 probeCount := uint(0) 121 122 dataPointsLock := sync.Mutex{} 123 124 // As long as our context says that we can continue to probe! 125 for proberCtx.Err() == nil { 126 time.Sleep(probeInterval) 127 128 // We may have slept for a very long time. So, let's check to see if we are 129 // still active, just for fun! 130 if proberCtx.Err() != nil { 131 break 132 } 133 134 wg.Add(1) 135 go func() { 136 defer wg.Done() 137 probeCount++ 138 probeCount := probeCount 139 140 foreignProbeConfiguration := foreignProbeConfigurationGenerator() 141 selfProbeConfiguration := selfProbeConfigurationGenerator() 142 143 if debug.IsDebug(debugging.Level) { 144 fmt.Printf( 145 "(%s) About to send round %d of probes!\n", 146 debugging.Prefix, 147 probeCount, 148 ) 149 } 150 151 dataPointsLock.Lock() 152 currentBucketId := bucketGenerator.Generate() 153 if dataPoints != nil { 154 dataPoints <- series.SeriesMessage[ResponsivenessProbeResult, BucketType]{ 155 Type: series.SeriesMessageReserve, Bucket: currentBucketId, 156 Measure: utilities.None[ResponsivenessProbeResult](), 157 } 158 } 159 dataPointsLock.Unlock() 160 161 // The presence of a custom TLSClientConfig in a *generic* `transport` 162 // means that go will default to HTTP/1.1 and cowardly avoid HTTP/2: 163 // https://github.com/golang/go/blob/7ca6902c171b336d98adbb103d701a013229c806/src/net/http/transport.go#L278 164 // Also, it would appear that the API's choice of HTTP vs HTTP2 can 165 // depend on whether the url contains 166 // https:// or http://: 167 // https://github.com/golang/go/blob/7ca6902c171b336d98adbb103d701a013229c806/src/net/http/transport.go#L74 168 transport := &http.Transport{} 169 transport.TLSClientConfig = &tls.Config{} 170 transport.Proxy = http.ProxyFromEnvironment 171 172 if !utilities.IsInterfaceNil(keyLogger) { 173 if debug.IsDebug(debugging.Level) { 174 fmt.Printf( 175 "Using an SSL Key Logger for a foreign probe.\n", 176 ) 177 } 178 179 transport.TLSClientConfig.KeyLogWriter = keyLogger 180 } 181 182 transport.TLSClientConfig.InsecureSkipVerify = 183 foreignProbeConfiguration.InsecureSkipVerify 184 185 utilities.OverrideHostTransport(transport, 186 foreignProbeConfiguration.ConnectToAddr) 187 188 foreignProbeClient := &http.Client{Transport: transport} 189 190 // Start Foreign Connection Prober 191 foreignProbeDataPoint, err := probe.Probe( 192 networkActivityCtx, 193 foreignProbeClient, 194 foreignProbeConfiguration.URL, 195 foreignProbeConfiguration.Host, 196 probe.Foreign, 197 probeCount, 198 foreignProbeConfiguration.CongestionControl, 199 captureExtendedStats, 200 debugging, 201 ) 202 if err != nil { 203 return 204 } 205 206 var selfProbeConnection *lgc.LoadGeneratingConnection = nil 207 if selfProbeConnectionCollection != nil { 208 func() { 209 selfProbeConnectionCollection.Lock.Lock() 210 defer selfProbeConnectionCollection.Lock.Unlock() 211 selfProbeConnection, err = selfProbeConnectionCollection.GetRandom() 212 if err != nil { 213 if debug.IsWarn(debugging.Level) { 214 fmt.Printf( 215 "(%s) Failed to get a random %s load-generating connection on which to send a probe: %v.\n", 216 debugging.Prefix, 217 probeDirection, 218 err, 219 ) 220 } 221 return 222 } 223 }() 224 } 225 if selfProbeConnectionCollection != nil && selfProbeConnection == nil { 226 return 227 } 228 229 var selfProbeDataPoint *probe.ProbeDataPoint = nil 230 if selfProbeConnection != nil { 231 // TODO: Make the following sanity check more than just a check. 232 // We only want to start a SelfUp probe on a connection that is 233 // in the RUNNING state. 234 if (*selfProbeConnection).Status() != lgc.LGC_STATUS_RUNNING { 235 if debug.IsWarn(debugging.Level) { 236 fmt.Printf( 237 "(%s) The selected random %s load-generating connection on which to send a probe was not running.\n", 238 debugging.Prefix, 239 probeDirection, 240 ) 241 } 242 return 243 } 244 245 if debug.IsDebug(debugging.Level) { 246 fmt.Printf( 247 "(%s) Selected %s load-generating connection with ID %d to send a self probe with Id %d.\n", 248 debugging.Prefix, 249 probeDirection, 250 (*selfProbeConnection).ClientId(), 251 probeCount, 252 ) 253 } 254 selfProbeDataPoint, err = probe.Probe( 255 proberCtx, 256 (*selfProbeConnection).Client(), 257 selfProbeConfiguration.URL, 258 selfProbeConfiguration.Host, 259 utilities.Conditional(probeDirection == lgc.LGC_DOWN, probe.SelfDown, probe.SelfUp), 260 probeCount, 261 selfProbeConfiguration.CongestionControl, 262 captureExtendedStats, 263 debugging, 264 ) 265 if err != nil { 266 // We may see an error here because the prober context was cancelled 267 // and requests were attempting to be sent. This situation is not an 268 // error (per se) so we will not log it as such. 269 270 if proberCtx.Err() != nil { 271 if debug.IsDebug(debugging.Level) { 272 fmt.Printf( 273 "(%s) Failed to send a probe (id: %v) because the prober context was cancelled.\n", 274 debugging.Prefix, 275 probeCount, 276 ) 277 } 278 return 279 } 280 fmt.Printf( 281 "(%s) There was an error sending a self probe with Id %d: %v\n", 282 debugging.Prefix, 283 probeCount, 284 err, 285 ) 286 return 287 } 288 } else { 289 if debug.IsDebug(debugging.Level) { 290 fmt.Printf( 291 "(%s) Did not send a self probe at id %d of probes!\n", 292 debugging.Prefix, 293 probeCount, 294 ) 295 } 296 } 297 if debug.IsDebug(debugging.Level) { 298 fmt.Printf( 299 "(%s) About to report results for round %d of probes!\n", 300 debugging.Prefix, 301 probeCount, 302 ) 303 } 304 dataPointsLock.Lock() 305 defer dataPointsLock.Unlock() 306 // Now we have our (maybe) four data points (three in the foreign probe data point and [maybe] one in the self probe data point) 307 if dataPoints != nil { 308 measurement := ResponsivenessProbeResult{ 309 Foreign: foreignProbeDataPoint, Self: selfProbeDataPoint, 310 } 311 312 dataPoints <- series.SeriesMessage[ResponsivenessProbeResult, BucketType]{ 313 Type: series.SeriesMessageMeasure, Bucket: currentBucketId, 314 Measure: utilities.Some[ResponsivenessProbeResult](measurement), 315 } 316 } 317 }() 318 } 319 if debug.IsDebug(debugging.Level) { 320 fmt.Printf( 321 "(%s) Probe driver is going to start waiting for its probes to finish.\n", 322 debugging.Prefix, 323 ) 324 } 325 utilities.OrTimeout(func() { wg.Wait() }, 2*time.Second) 326 if debug.IsDebug(debugging.Level) { 327 fmt.Printf( 328 "(%s) Probe driver is done waiting for its probes to finish.\n", 329 debugging.Prefix, 330 ) 331 } 332 dataPointsLock.Lock() 333 close(dataPoints) 334 dataPoints = nil 335 dataPointsLock.Unlock() 336 }() 337 return 338 } 339 340 func LoadGenerator[BucketType utilities.Number]( 341 throughputCtx context.Context, // Stop our activity when we no longer need any throughput 342 networkActivityCtx context.Context, // Create all network connections in this context. 343 rampupInterval time.Duration, 344 lgcGenerator func() lgc.LoadGeneratingConnection, // Use this to generate a new load-generating connection. 345 loadGeneratingConnectionsCollection *lgc.LoadGeneratingConnectionCollection, 346 bucketGenerator *series.NumericBucketGenerator[BucketType], 347 mnp int, 348 id time.Duration, // the interval to wait to test for stability (it doubles as the time between adding LGCs). 349 captureExtendedStats bool, // do we want to attempt to gather TCP information on these connections? 350 debugging *debug.DebugWithPrefix, // How can we forget debugging? 351 ) (seriesCommunicationChannel chan series.SeriesMessage[ThroughputDataPoint, BucketType]) { // Send back all the instantaneous throughputs that we generate. 352 seriesCommunicationChannel = make(chan series.SeriesMessage[ThroughputDataPoint, BucketType]) 353 354 go func() { 355 flowsCreated := uint64(0) 356 357 flowsCreated += addFlows( 358 networkActivityCtx, 359 constants.StartingNumberOfLoadGeneratingConnections, 360 loadGeneratingConnectionsCollection, 361 lgcGenerator, 362 debugging.Level, 363 ) 364 365 nextSampleStartTime := time.Now().Add(rampupInterval) 366 367 for currentIntervalId := uint64(0); true; currentIntervalId++ { 368 369 // If the throughputCtx is canceled, then that means our work here is done ... 370 if throughputCtx.Err() != nil { 371 break 372 } 373 374 now := time.Now() 375 // At each 1-second interval 376 if nextSampleStartTime.Sub(now) > 0 { 377 if debug.IsDebug(debugging.Level) { 378 fmt.Printf( 379 "%v: Sleeping until %v\n", 380 debugging, 381 nextSampleStartTime, 382 ) 383 } 384 time.Sleep(nextSampleStartTime.Sub(now)) 385 } else { 386 fmt.Fprintf(os.Stderr, "Warning: Missed a %v deadline.\n", id.Milliseconds()) 387 } 388 nextSampleStartTime = time.Now().Add(id) 389 390 // Waiting is the hardest part -- that was a long time asleep 391 // and we may have been cancelled during that time! 392 if throughputCtx.Err() != nil { 393 break 394 } 395 396 // Compute "instantaneous aggregate" goodput which is the number of 397 // bytes transferred within the last second. 398 var instantaneousThroughputTotal float64 = 0 399 var instantaneousThroughputDataPoints uint = 0 400 granularThroughputDatapoints := make([]GranularThroughputDataPoint, 0) 401 now = time.Now() // Used to align granular throughput data 402 allInvalid := true 403 for i := range *loadGeneratingConnectionsCollection.LGCs { 404 loadGeneratingConnectionsCollection.Lock.Lock() 405 connectionState := (*loadGeneratingConnectionsCollection.LGCs)[i].Status() 406 loadGeneratingConnectionsCollection.Lock.Unlock() 407 switch connectionState { 408 default: 409 { 410 error := fmt.Sprintf( 411 "%v: Load-generating connection with id %d is in an unrecognizable state.\n", 412 debugging, 413 (*loadGeneratingConnectionsCollection.LGCs)[i].ClientId()) 414 fmt.Fprintf(os.Stderr, "%s", error) 415 panic(error) 416 } 417 case lgc.LGC_STATUS_ERROR, 418 lgc.LGC_STATUS_DONE: 419 { 420 if debug.IsDebug(debugging.Level) { 421 fmt.Printf( 422 "%v: Load-generating connection with id %d is invalid or complete ... skipping.\n", 423 debugging, 424 (*loadGeneratingConnectionsCollection.LGCs)[i].ClientId(), 425 ) 426 } 427 // TODO: Do we add null connection to throughput? and how do we define it? Throughput -1 or 0? 428 granularThroughputDatapoints = append( 429 granularThroughputDatapoints, 430 GranularThroughputDataPoint{now, 0, uint32(i), 0, 0, ""}, 431 ) 432 } 433 case lgc.LGC_STATUS_NOT_STARTED: 434 { 435 if debug.IsDebug(debugging.Level) { 436 fmt.Printf( 437 "%v: Load-generating connection with id %d has not finished starting; "+ 438 "it will not contribute throughput during this interval.\n", 439 debugging, 440 (*loadGeneratingConnectionsCollection.LGCs)[i].ClientId()) 441 } 442 } 443 case lgc.LGC_STATUS_RUNNING: 444 { 445 allInvalid = false 446 currentTransferred, currentInterval := 447 (*loadGeneratingConnectionsCollection.LGCs)[i].TransferredInInterval() 448 // normalize to a second-long interval! 449 instantaneousConnectionThroughput := float64( 450 currentTransferred, 451 ) / float64( 452 currentInterval.Seconds(), 453 ) 454 instantaneousThroughputTotal += instantaneousConnectionThroughput 455 instantaneousThroughputDataPoints++ 456 457 tcpRtt := time.Duration(0 * time.Second) 458 tcpCwnd := uint32(0) 459 if captureExtendedStats && extendedstats.ExtendedStatsAvailable() { 460 if stats := (*loadGeneratingConnectionsCollection.LGCs)[i].Stats(); stats != nil { 461 tcpInfo, err := extendedstats.GetTCPInfo(stats.ConnInfo.Conn) 462 if err == nil { 463 tcpRtt = time.Duration(tcpInfo.Rtt) * time.Microsecond 464 tcpCwnd = tcpInfo.Snd_cwnd 465 } else { 466 fmt.Printf("Warning: Could not fetch the extended stats for a probe: %v\n", err) 467 } 468 } 469 } 470 granularThroughputDatapoints = append( 471 granularThroughputDatapoints, 472 GranularThroughputDataPoint{ 473 now, 474 instantaneousConnectionThroughput, 475 uint32(i), 476 tcpRtt, 477 tcpCwnd, 478 "", 479 }, 480 ) 481 } 482 } 483 } 484 485 // For some reason, all the lgcs are invalid. This likely means that 486 // the network/server went away. 487 if allInvalid { 488 if debug.IsDebug(debugging.Level) { 489 fmt.Printf( 490 "%v: All lgcs were invalid. Assuming that network/server went away.\n", 491 debugging, 492 ) 493 } 494 break 495 } 496 497 // We have generated a throughput calculation -- let's send it back to the coordinator 498 throughputDataPoint := ThroughputDataPoint{ 499 time.Now(), 500 instantaneousThroughputTotal, 501 int(instantaneousThroughputDataPoints), 502 len(*loadGeneratingConnectionsCollection.LGCs), 503 granularThroughputDatapoints, 504 } 505 506 currentBucketId := bucketGenerator.Generate() 507 508 seriesCommunicationChannel <- series.SeriesMessage[ThroughputDataPoint, BucketType]{ 509 Type: series.SeriesMessageReserve, Bucket: currentBucketId, 510 } 511 seriesCommunicationChannel <- series.SeriesMessage[ThroughputDataPoint, BucketType]{ 512 Type: series.SeriesMessageMeasure, Bucket: currentBucketId, 513 Measure: utilities.Some[ThroughputDataPoint](throughputDataPoint), 514 } 515 516 loadGeneratingConnectionsCollection.Lock.Lock() 517 currentParallelConnectionCount, err := 518 loadGeneratingConnectionsCollection.Len() 519 loadGeneratingConnectionsCollection.Lock.Unlock() 520 521 if err != nil { 522 if debug.IsWarn(debugging.Level) { 523 fmt.Printf( 524 "%v: Failed to get a count of the number of parallel load-generating connections: %v.\n", 525 debugging, 526 err, 527 ) 528 } 529 } 530 if currentParallelConnectionCount < mnp { 531 // Just add another constants.AdditiveNumberOfLoadGeneratingConnections flows -- that's our only job now! 532 flowsCreated += addFlows( 533 networkActivityCtx, 534 constants.AdditiveNumberOfLoadGeneratingConnections, 535 loadGeneratingConnectionsCollection, 536 lgcGenerator, 537 debugging.Level, 538 ) 539 } else if debug.IsWarn(debugging.Level) { 540 fmt.Printf( 541 "%v: Maximum number of parallel transport-layer connections reached (%d). Not adding another.\n", 542 debugging, 543 mnp, 544 ) 545 } 546 } 547 548 if debug.IsDebug(debugging.Level) { 549 fmt.Printf( 550 "(%s) Stopping a load generator after creating %d flows.\n", 551 debugging.Prefix, flowsCreated) 552 } 553 }() 554 return 555 }