github.com/outbrain/consul@v1.4.5/agent/cache/watch_test.go (about) 1 package cache 2 3 import ( 4 "context" 5 "fmt" 6 "sync/atomic" 7 "testing" 8 "time" 9 10 "github.com/stretchr/testify/mock" 11 "github.com/stretchr/testify/require" 12 ) 13 14 // Test that a type registered with a periodic refresh can be watched. 15 func TestCacheNotify(t *testing.T) { 16 t.Parallel() 17 18 typ := TestType(t) 19 defer typ.AssertExpectations(t) 20 c := TestCache(t) 21 c.RegisterType("t", typ, &RegisterOptions{ 22 Refresh: false, 23 }) 24 25 // Setup triggers to control when "updates" should be delivered 26 trigger := make([]chan time.Time, 4) 27 for i := range trigger { 28 trigger[i] = make(chan time.Time) 29 } 30 31 // Configure the type 32 typ.Static(FetchResult{Value: 1, Index: 4}, nil).Once().Run(func(args mock.Arguments) { 33 // Assert the right request type - all real Fetch implementations do this so 34 // it keeps us honest that Watch doesn't require type mangling which will 35 // break in real life (hint: it did on the first attempt) 36 _, ok := args.Get(1).(*MockRequest) 37 require.True(t, ok) 38 }) 39 typ.Static(FetchResult{Value: 12, Index: 5}, nil).Once().WaitUntil(trigger[0]) 40 typ.Static(FetchResult{Value: 12, Index: 5}, nil).Once().WaitUntil(trigger[1]) 41 typ.Static(FetchResult{Value: 42, Index: 7}, nil).Once().WaitUntil(trigger[2]) 42 // It's timing dependent whether the blocking loop manages to make another 43 // call before we cancel so don't require it. We need to have a higher index 44 // here because if the index is the same then the cache Get will not return 45 // until the full 10 min timeout expires. This causes the last fetch to return 46 // after cancellation as if it had timed out. 47 typ.Static(FetchResult{Value: 42, Index: 8}, nil).WaitUntil(trigger[3]) 48 49 require := require.New(t) 50 51 ctx, cancel := context.WithCancel(context.Background()) 52 defer cancel() 53 54 ch := make(chan UpdateEvent) 55 56 err := c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello"}), "test", ch) 57 require.NoError(err) 58 59 // Should receive the first result pretty soon 60 TestCacheNotifyChResult(t, ch, UpdateEvent{ 61 CorrelationID: "test", 62 Result: 1, 63 Meta: ResultMeta{Hit: false, Index: 4}, 64 Err: nil, 65 }) 66 67 // There should be no more updates delivered yet 68 require.Len(ch, 0) 69 70 // Trigger blocking query to return a "change" 71 close(trigger[0]) 72 73 // Should receive the next result pretty soon 74 TestCacheNotifyChResult(t, ch, UpdateEvent{ 75 CorrelationID: "test", 76 Result: 12, 77 // Note these are never cache "hits" because blocking will wait until there 78 // is a new value at which point it's not considered a hit. 79 Meta: ResultMeta{Hit: false, Index: 5}, 80 Err: nil, 81 }) 82 83 // Register a second observer using same chan and request. Note that this is 84 // testing a few things implicitly: 85 // - that multiple watchers on the same cache entity are de-duped in their 86 // requests to the "backend" 87 // - that multiple watchers can distinguish their results using correlationID 88 err = c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello"}), "test2", ch) 89 require.NoError(err) 90 91 // Should get test2 notify immediately, and it should be a cache hit 92 TestCacheNotifyChResult(t, ch, UpdateEvent{ 93 CorrelationID: "test2", 94 Result: 12, 95 Meta: ResultMeta{Hit: true, Index: 5}, 96 Err: nil, 97 }) 98 99 // We could wait for a full timeout but we can't directly observe it so 100 // simulate the behavior by triggering a response with the same value and 101 // index as the last one. 102 close(trigger[1]) 103 104 // We should NOT be notified about that. Note this is timing dependent but 105 // it's only a sanity check, if we somehow _do_ get the change delivered later 106 // than 10ms the next value assertion will fail anyway. 107 time.Sleep(10 * time.Millisecond) 108 require.Len(ch, 0) 109 110 // Trigger final update 111 close(trigger[2]) 112 113 TestCacheNotifyChResult(t, ch, UpdateEvent{ 114 CorrelationID: "test", 115 Result: 42, 116 Meta: ResultMeta{Hit: false, Index: 7}, 117 Err: nil, 118 }, UpdateEvent{ 119 CorrelationID: "test2", 120 Result: 42, 121 Meta: ResultMeta{Hit: false, Index: 7}, 122 Err: nil, 123 }) 124 125 // Sanity check closing chan before context is canceled doesn't panic 126 //close(ch) 127 128 // Close context 129 cancel() 130 131 // It's likely but not certain that at least one of the watchers was blocked 132 // on the next cache Get so trigger that to timeout so we can observe the 133 // watch goroutines being cleaned up. This is necessary since currently we 134 // have no way to interrupt a blocking query. In practice it's fine to know 135 // that after 10 mins max the blocking query will return and the resources 136 // will be cleaned. 137 close(trigger[3]) 138 139 // I want to test that canceling the context cleans up goroutines (which it 140 // does from manual verification with debugger etc). I had a check based on a 141 // similar approach to https://golang.org/src/net/http/main_test.go#L60 but it 142 // was just too flaky because it relies on the timing of the error backoff 143 // timer goroutines and similar so I've given up for now as I have more 144 // important things to get working. 145 } 146 147 func TestCacheNotifyPolling(t *testing.T) { 148 t.Parallel() 149 150 typ := TestTypeNonBlocking(t) 151 defer typ.AssertExpectations(t) 152 c := TestCache(t) 153 c.RegisterType("t", typ, &RegisterOptions{ 154 Refresh: false, 155 }) 156 157 // Configure the type 158 typ.Static(FetchResult{Value: 1, Index: 1}, nil).Once().Run(func(args mock.Arguments) { 159 // Assert the right request type - all real Fetch implementations do this so 160 // it keeps us honest that Watch doesn't require type mangling which will 161 // break in real life (hint: it did on the first attempt) 162 _, ok := args.Get(1).(*MockRequest) 163 require.True(t, ok) 164 }) 165 typ.Static(FetchResult{Value: 12, Index: 1}, nil).Once() 166 typ.Static(FetchResult{Value: 42, Index: 1}, nil).Once() 167 168 require := require.New(t) 169 170 ctx, cancel := context.WithCancel(context.Background()) 171 defer cancel() 172 173 ch := make(chan UpdateEvent) 174 175 err := c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello", MaxAge: 100 * time.Millisecond}), "test", ch) 176 require.NoError(err) 177 178 // Should receive the first result pretty soon 179 TestCacheNotifyChResult(t, ch, UpdateEvent{ 180 CorrelationID: "test", 181 Result: 1, 182 Meta: ResultMeta{Hit: false, Index: 1}, 183 Err: nil, 184 }) 185 186 // There should be no more updates delivered yet 187 require.Len(ch, 0) 188 189 // make sure the updates do not come too quickly 190 select { 191 case <-time.After(50 * time.Millisecond): 192 case <-ch: 193 require.Fail("Received update too early") 194 } 195 196 // make sure we get the update not too far out. 197 select { 198 case <-time.After(100 * time.Millisecond): 199 require.Fail("Didn't receive the notification") 200 case result := <-ch: 201 require.Equal(result.Result, 12) 202 require.Equal(result.CorrelationID, "test") 203 require.Equal(result.Meta.Hit, false) 204 require.Equal(result.Meta.Index, uint64(1)) 205 // pretty conservative check it should be even newer because without a second 206 // notifier each value returned will have been executed just then and not served 207 // from the cache. 208 require.True(result.Meta.Age < 50*time.Millisecond) 209 require.NoError(result.Err) 210 } 211 212 require.Len(ch, 0) 213 214 // Register a second observer using same chan and request. Note that this is 215 // testing a few things implicitly: 216 // - that multiple watchers on the same cache entity are de-duped in their 217 // requests to the "backend" 218 // - that multiple watchers can distinguish their results using correlationID 219 err = c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello", MaxAge: 100 * time.Millisecond}), "test2", ch) 220 require.NoError(err) 221 222 // Should get test2 notify immediately, and it should be a cache hit 223 TestCacheNotifyChResult(t, ch, UpdateEvent{ 224 CorrelationID: "test2", 225 Result: 12, 226 Meta: ResultMeta{Hit: true, Index: 1}, 227 Err: nil, 228 }) 229 230 require.Len(ch, 0) 231 232 // wait for the next batch of responses 233 events := make([]UpdateEvent, 0) 234 // 110 is needed to allow for the jitter 235 timeout := time.After(110 * time.Millisecond) 236 237 for i := 0; i < 2; i++ { 238 select { 239 case <-timeout: 240 require.Fail("UpdateEvent not received in time") 241 case eve := <-ch: 242 events = append(events, eve) 243 } 244 } 245 246 require.Equal(events[0].Result, 42) 247 require.Equal(events[0].Meta.Hit, false) 248 require.Equal(events[0].Meta.Index, uint64(1)) 249 require.True(events[0].Meta.Age < 50*time.Millisecond) 250 require.NoError(events[0].Err) 251 require.Equal(events[1].Result, 42) 252 // Sometimes this would be a hit and others not. It all depends on when the various getWithIndex calls got fired. 253 // If both are done concurrently then it will not be a cache hit but the request gets single flighted and both 254 // get notified at the same time. 255 // require.Equal(events[1].Meta.Hit, true) 256 require.Equal(events[1].Meta.Index, uint64(1)) 257 require.True(events[1].Meta.Age < 100*time.Millisecond) 258 require.NoError(events[1].Err) 259 } 260 261 // Test that a refresh performs a backoff. 262 func TestCacheWatch_ErrorBackoff(t *testing.T) { 263 t.Parallel() 264 265 typ := TestType(t) 266 defer typ.AssertExpectations(t) 267 c := TestCache(t) 268 c.RegisterType("t", typ, &RegisterOptions{ 269 Refresh: false, 270 }) 271 272 // Configure the type 273 var retries uint32 274 fetchErr := fmt.Errorf("test fetch error") 275 typ.Static(FetchResult{Value: 1, Index: 4}, nil).Once() 276 typ.Static(FetchResult{Value: nil, Index: 5}, fetchErr).Run(func(args mock.Arguments) { 277 atomic.AddUint32(&retries, 1) 278 }) 279 280 require := require.New(t) 281 282 ctx, cancel := context.WithCancel(context.Background()) 283 defer cancel() 284 285 ch := make(chan UpdateEvent) 286 287 err := c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello"}), "test", ch) 288 require.NoError(err) 289 290 // Should receive the first result pretty soon 291 TestCacheNotifyChResult(t, ch, UpdateEvent{ 292 CorrelationID: "test", 293 Result: 1, 294 Meta: ResultMeta{Hit: false, Index: 4}, 295 Err: nil, 296 }) 297 298 numErrors := 0 299 // Loop for a little while and count how many errors we see reported. If this 300 // was running as fast as it could go we'd expect this to be huge. We have to 301 // be a little careful here because the watch chan ch doesn't have a large 302 // buffer so we could be artificially slowing down the loop without the 303 // backoff actually taking effect. We can validate that by ensuring this test 304 // fails without the backoff code reliably. 305 timeoutC := time.After(500 * time.Millisecond) 306 OUT: 307 for { 308 select { 309 case <-timeoutC: 310 break OUT 311 case u := <-ch: 312 numErrors++ 313 require.Error(u.Err) 314 } 315 } 316 // Must be fewer than 10 failures in that time 317 require.True(numErrors < 10, fmt.Sprintf("numErrors: %d", numErrors)) 318 319 // Check the number of RPCs as a sanity check too 320 actual := atomic.LoadUint32(&retries) 321 require.True(actual < 10, fmt.Sprintf("actual: %d", actual)) 322 } 323 324 // Test that a refresh performs a backoff. 325 func TestCacheWatch_ErrorBackoffNonBlocking(t *testing.T) { 326 t.Parallel() 327 328 typ := TestTypeNonBlocking(t) 329 defer typ.AssertExpectations(t) 330 c := TestCache(t) 331 c.RegisterType("t", typ, &RegisterOptions{ 332 Refresh: false, 333 }) 334 335 // Configure the type 336 var retries uint32 337 fetchErr := fmt.Errorf("test fetch error") 338 typ.Static(FetchResult{Value: 1, Index: 4}, nil).Once() 339 typ.Static(FetchResult{Value: nil, Index: 5}, fetchErr).Run(func(args mock.Arguments) { 340 atomic.AddUint32(&retries, 1) 341 }) 342 343 require := require.New(t) 344 345 ctx, cancel := context.WithCancel(context.Background()) 346 defer cancel() 347 348 ch := make(chan UpdateEvent) 349 350 err := c.Notify(ctx, "t", TestRequest(t, RequestInfo{Key: "hello", MaxAge: 100 * time.Millisecond}), "test", ch) 351 require.NoError(err) 352 353 // Should receive the first result pretty soon 354 TestCacheNotifyChResult(t, ch, UpdateEvent{ 355 CorrelationID: "test", 356 Result: 1, 357 Meta: ResultMeta{Hit: false, Index: 4}, 358 Err: nil, 359 }) 360 361 numErrors := 0 362 // Loop for a little while and count how many errors we see reported. If this 363 // was running as fast as it could go we'd expect this to be huge. We have to 364 // be a little careful here because the watch chan ch doesn't have a large 365 // buffer so we could be artificially slowing down the loop without the 366 // backoff actually taking effect. We can validate that by ensuring this test 367 // fails without the backoff code reliably. 368 // 369 // 100 + 500 milliseconds. 100 because the first retry will not happen until 370 // the 100 + jitter milliseconds have elapsed. 371 timeoutC := time.After(600 * time.Millisecond) 372 OUT: 373 for { 374 select { 375 case <-timeoutC: 376 break OUT 377 case u := <-ch: 378 numErrors++ 379 require.Error(u.Err) 380 } 381 } 382 // Must be fewer than 10 failures in that time 383 require.True(numErrors < 10, fmt.Sprintf("numErrors: %d", numErrors)) 384 385 // Check the number of RPCs as a sanity check too 386 actual := atomic.LoadUint32(&retries) 387 require.True(actual < 10, fmt.Sprintf("actual: %d", actual)) 388 }