go.uber.org/yarpc@v1.72.1/yarpctest/stress.go (about) 1 // Copyright (c) 2022 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package yarpctest 22 23 import ( 24 "context" 25 "math/rand" 26 "strconv" 27 "time" 28 29 "go.uber.org/yarpc/api/peer" 30 "go.uber.org/yarpc/api/transport" 31 "go.uber.org/yarpc/peer/hostport" 32 ) 33 34 // ListStressTest describes the parameters of a stress test for a peer list implementation. 35 type ListStressTest struct { 36 Workers int 37 Duration time.Duration 38 Timeout time.Duration 39 // Latency is the minimum latency of an individual call. 40 // Higher latencies drive up concurrency per worker. 41 Latency time.Duration 42 // LowStress disables membership and connection churn, measuring peer 43 // selection baseline performance without interference. 44 LowStress bool 45 New func(peer.Transport) peer.ChooserList 46 } 47 48 // Logger is the interface needed by reports to log results. 49 // The testing.T is an example of a logger. 50 type Logger interface { 51 Logf(format string, args ...interface{}) 52 } 53 54 // Log writes the parameters for a stress test. 55 func (t ListStressTest) Log(logger Logger) { 56 logger.Logf("choosers: %d\n", t.Workers) 57 logger.Logf("duration: %s\n", t.Duration) 58 logger.Logf("timeout: %s\n", t.Timeout) 59 } 60 61 // Run runs a stress test on a peer list. 62 // 63 // The stress test creates a fake transport and a vector of fake peers. 64 // The test concurrently chooses peers from the list with some number of workers 65 // while simultaneously adding and removing peers from the peer list and 66 // simulating connection and disconnection with those peers. 67 func (t ListStressTest) Run(logger Logger) *ListStressTestReport { 68 transport := NewFakeTransport() 69 list := t.New(transport) 70 report := newStressReport(0) 71 72 s := stressor{ 73 stop: make(chan struct{}), 74 reports: make(chan *ListStressTestReport), 75 timeout: t.Timeout, 76 latency: t.Latency, 77 transport: transport, 78 list: list, 79 logger: logger, 80 } 81 82 if err := s.list.Start(); err != nil { 83 s.logger.Logf("list start error: %s\n", err.Error()) 84 } 85 86 var stressors int 87 if t.LowStress { 88 for i := uint(0); i < numIds; i++ { 89 s.transport.SimulateConnect(bitIds[i]) 90 } 91 err := s.list.Update(peer.ListUpdates{ 92 Additions: idsForBits(allIdsMask), 93 }) 94 if err != nil { 95 s.logger.Logf("list update error: %s\n", err.Error()) 96 report.Errors++ 97 } 98 report.Updates++ 99 } else { 100 go s.stressTransport(s.reports) 101 go s.stressList(s.reports) 102 stressors = 2 103 } 104 for i := 0; i < t.Workers; i++ { 105 go s.stressChooser(i) 106 } 107 108 time.Sleep(t.Duration) 109 110 close(s.stop) 111 112 for i := 0; i < t.Workers+stressors; i++ { 113 report.merge(<-s.reports) 114 } 115 116 if err := s.list.Stop(); err != nil { 117 s.logger.Logf("list stop error: %s\n", err.Error()) 118 } 119 120 return report 121 } 122 123 // ListStressTestReport catalogs the results of a peer list stress test. 124 // 125 // Each worker keeps track of its own statistics then sends them through 126 // a channel to the test runner. 127 // This allows each worker to have independent memory for its log reports and 128 // reduces the need for synchronization across threads, which could interfere 129 // with the test. 130 // The reports get merged into a final report. 131 type ListStressTestReport struct { 132 Workers int 133 Errors int 134 Choices int 135 Updates int 136 Min time.Duration 137 Max time.Duration 138 Total time.Duration 139 } 140 141 func newStressReport(numWorkers int) *ListStressTestReport { 142 return &ListStressTestReport{ 143 Workers: numWorkers, 144 Min: 1000 * time.Second, 145 } 146 } 147 148 // Log writes the vital statistics for a stress test. 149 func (r *ListStressTestReport) Log(logger Logger) { 150 logger.Logf("choices: %d\n", r.Choices) 151 logger.Logf("updates: %d\n", r.Updates) 152 logger.Logf("errors: %d\n", r.Errors) 153 logger.Logf("min: %s\n", r.Min) 154 if r.Choices != 0 { 155 logger.Logf("mean: %s\n", r.Total/time.Duration(r.Choices)) 156 } 157 logger.Logf("max: %s\n", r.Max) 158 } 159 160 // add tracks the latency for a choice of a particular peer. 161 // the idIndex refers to the peer that was selected. 162 // in a future version of this test, we can use this id index to show which 163 // peers were favored by a peer list’s strategy over time. 164 func (r *ListStressTestReport) add(idIndex int, dur time.Duration) { 165 r.Choices++ 166 r.Min = min(r.Min, dur) 167 r.Max = max(r.Max, dur) 168 r.Total += dur 169 } 170 171 // merge merges test reports from independent workers. 172 func (r *ListStressTestReport) merge(s *ListStressTestReport) { 173 r.Workers += s.Workers 174 r.Errors += s.Errors 175 r.Choices += s.Choices 176 r.Updates += s.Updates 177 r.Min = min(r.Min, s.Min) 178 r.Max = max(r.Max, s.Max) 179 r.Total += s.Total 180 } 181 182 // stressor tracks the parameters and state for a single stress test worker. 183 type stressor struct { 184 // stop closed to signal all workers to stop. 185 stop chan struct{} 186 // reports is the channel to which the final report must be sent to singal 187 // that the worker goroutine is done and transfer ownership of the report 188 // memory to the test for merging. 189 reports chan *ListStressTestReport 190 timeout time.Duration 191 latency time.Duration 192 transport *FakeTransport 193 list peer.ChooserList 194 logger Logger 195 } 196 197 // stressTransport randomly connects and disconnects each of the 63 known peers. 198 // These peers may or may not be retained by the peer list at the time the 199 // connection status changes. 200 func (s *stressor) stressTransport(reports chan<- *ListStressTestReport) { 201 report := newStressReport(0) 202 rng := rand.NewSource(0) 203 204 _ = s.transport.Start() 205 defer func() { 206 _ = s.transport.Stop() 207 }() 208 209 // Until we receive a signal to stop... 210 Loop: 211 for { 212 select { 213 case <-s.stop: 214 break Loop 215 default: 216 } 217 218 // Construt a random bit vector, where each bit signifies whether the 219 // peer for that index should be connected or disconnected. 220 bits := rng.Int63() 221 // A consequence of this is that we may send connected notifications to 222 // peers that are already connected, etc. 223 // These are valid cases to exercise in a stress test, even if they are 224 // not desirable behaviors of a real transport. 225 for i := uint(0); i < numIds; i++ { 226 bit := (1 << i) & bits 227 if bit != 0 { 228 s.transport.SimulateConnect(bitIds[i]) 229 } else { 230 s.transport.SimulateDisconnect(bitIds[i]) 231 } 232 } 233 } 234 235 reports <- report 236 } 237 238 // stressList sends membership changes to a peer list, using a random subset of all 63 peers every time. 239 // Each change will tend to include half of the peers, tend to remove a quarter 240 // from the previous round and add a quarter of the peers for the next round. 241 // As above, we track whether the peer list has each peer using a bit vector, 242 // so we can easily use bitwise operations for set differences (&^) and all of 243 // the identifiers are interned up front to avoid allocations. 244 // This allows us to send peer list updates very quickly. 245 func (s *stressor) stressList(reports chan<- *ListStressTestReport) { 246 report := newStressReport(0) 247 rng := rand.NewSource(1) 248 var oldBits int64 249 250 // Until we are asked to stop... 251 Loop: 252 for { 253 select { 254 case <-s.stop: 255 break Loop 256 default: 257 } 258 259 // Construct peer list updates by giving every peer a 50/50 chance of 260 // being included in each round. 261 // Use set difference bitwise operations to construct the lists of 262 // identifiers to add and remove from the current and previous bit 263 // vectors. 264 newBits := rng.Int63() 265 additions := idsForBits(newBits &^ oldBits) 266 removals := idsForBits(oldBits &^ newBits) 267 err := s.list.Update(peer.ListUpdates{ 268 Additions: additions, 269 Removals: removals, 270 }) 271 if err != nil { 272 s.logger.Logf("list update error: %s\n", err.Error()) 273 report.Errors++ 274 break Loop 275 } 276 report.Updates++ 277 oldBits = newBits 278 } 279 280 // Clean up. 281 err := s.list.Update(peer.ListUpdates{ 282 Removals: idsForBits(oldBits), 283 }) 284 if err != nil { 285 s.logger.Logf("final list update error: %s\n", err.Error()) 286 report.Errors++ 287 } 288 289 reports <- report 290 } 291 292 // stressChooser rapidly 293 func (s *stressor) stressChooser(i int) { 294 rng := rand.NewSource(int64(i)) 295 report := newStressReport(1) 296 297 // Until we are asked to stop... 298 Loop: 299 for { 300 // We check for the stop signal before choosing instead of after 301 // because the continue statement in the error case bypasses the end of 302 // the loop to return here and could cause a deadlock if the other 303 // stressors exit first. 304 select { 305 case <-s.stop: 306 break Loop 307 default: 308 } 309 310 // Request a peer from the peer list. 311 // We use a random pre-allocated shard key to exercise the hashring in 312 // particular, but this is harmless for all other choosers. 313 shardKey := shardKeys[rng.Int63()&shardKeysMask] 314 ctx, cancel := context.WithTimeout(context.Background(), s.timeout) 315 start := time.Now() 316 peer, onFinish, err := s.list.Choose(ctx, &transport.Request{ShardKey: shardKey}) 317 stop := time.Now() 318 if err != nil { 319 cancel() 320 s.logger.Logf("choose error: %s\n", err.Error()) 321 report.Errors++ 322 continue 323 } 324 // This is a good point for a future version to inject varying load 325 // based on the identifier of the peer that was selected, to show how 326 // each list behaves in the face of variations in speed of individual 327 // instances. 328 if s.latency > 0 { 329 time.Sleep(s.latency) 330 } 331 onFinish(nil) 332 cancel() 333 334 // Report the latency and identifier of the selected peer. 335 id := peer.Identifier() 336 index := idIndexes[id] 337 report.add(index, stop.Sub(start)) 338 } 339 340 s.reports <- report 341 } 342 343 // Accessories hereafter. 344 345 const ( 346 // We use a 64 bit vector for peer identifiers, but only get to use 63 bits 347 // since the Go random number generator only offers 63 bits of entropy. 348 numIds = 63 349 allIdsMask = 1<<numIds - 1 350 // We will use 256 unique shard keys. 351 shardKeysWidth = 8 352 numShardKeys = 1 << shardKeysWidth 353 shardKeysMask = numShardKeys - 1 354 ) 355 356 // pre-allocated vectors for identifiers and shard keys. 357 var ( 358 // Each identifier is a string: the name of its own index. 359 bitIds [numIds]peer.Identifier 360 // Reverse lookup. 361 idIndexes map[string]int 362 shardKeys [numShardKeys]string 363 ) 364 365 func init() { 366 idIndexes = make(map[string]int, numIds) 367 for i := 0; i < numIds; i++ { 368 name := strconv.Itoa(i) 369 bitIds[i] = hostport.PeerIdentifier(name) 370 idIndexes[name] = i 371 } 372 for i := 0; i < numShardKeys; i++ { 373 shardKeys[i] = strconv.Itoa(i) 374 } 375 } 376 377 func idsForBits(bits int64) []peer.Identifier { 378 var ids []peer.Identifier 379 for i := uint(0); i < numIds; i++ { 380 if (1<<i)&bits != 0 { 381 ids = append(ids, bitIds[i]) 382 } 383 } 384 return ids 385 } 386 387 func min(a, b time.Duration) time.Duration { 388 if a < b { 389 return a 390 } 391 return b 392 } 393 394 func max(a, b time.Duration) time.Duration { 395 if a > b { 396 return a 397 } 398 return b 399 }