go.etcd.io/etcd@v3.3.27+incompatible/clientv3/integration/server_shutdown_test.go (about) 1 // Copyright 2017 The etcd Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package integration 16 17 import ( 18 "bytes" 19 "context" 20 "strings" 21 "testing" 22 "time" 23 24 "github.com/coreos/etcd/clientv3" 25 "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes" 26 "github.com/coreos/etcd/integration" 27 "github.com/coreos/etcd/pkg/testutil" 28 29 "google.golang.org/grpc" 30 "google.golang.org/grpc/codes" 31 "google.golang.org/grpc/status" 32 ) 33 34 // TestBalancerUnderServerShutdownWatch expects that watch client 35 // switch its endpoints when the member of the pinned endpoint fails. 36 func TestBalancerUnderServerShutdownWatch(t *testing.T) { 37 defer testutil.AfterTest(t) 38 39 clus := integration.NewClusterV3(t, &integration.ClusterConfig{ 40 Size: 3, 41 SkipCreatingClient: true, 42 }) 43 defer clus.Terminate(t) 44 45 eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} 46 47 lead := clus.WaitLeader(t) 48 49 // pin eps[lead] 50 watchCli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[lead]}}) 51 if err != nil { 52 t.Fatal(err) 53 } 54 defer watchCli.Close() 55 56 // wait for eps[lead] to be pinned 57 mustWaitPinReady(t, watchCli) 58 59 // add all eps to list, so that when the original pined one fails 60 // the client can switch to other available eps 61 watchCli.SetEndpoints(eps...) 62 63 key, val := "foo", "bar" 64 wch := watchCli.Watch(context.Background(), key, clientv3.WithCreatedNotify()) 65 select { 66 case <-wch: 67 case <-time.After(integration.RequestWaitTimeout): 68 t.Fatal("took too long to create watch") 69 } 70 71 donec := make(chan struct{}) 72 go func() { 73 defer close(donec) 74 75 // switch to others when eps[lead] is shut down 76 select { 77 case ev := <-wch: 78 if werr := ev.Err(); werr != nil { 79 t.Error(werr) 80 } 81 if len(ev.Events) != 1 { 82 t.Errorf("expected one event, got %+v", ev) 83 } 84 if !bytes.Equal(ev.Events[0].Kv.Value, []byte(val)) { 85 t.Errorf("expected %q, got %+v", val, ev.Events[0].Kv) 86 } 87 case <-time.After(7 * time.Second): 88 t.Error("took too long to receive events") 89 } 90 }() 91 92 // shut down eps[lead] 93 clus.Members[lead].Terminate(t) 94 95 // writes to eps[lead+1] 96 putCli, err := clientv3.New(clientv3.Config{ 97 Endpoints: []string{eps[(lead+1)%3]}, 98 DialOptions: []grpc.DialOption{grpc.WithBlock()}, 99 }) 100 if err != nil { 101 t.Fatal(err) 102 } 103 defer putCli.Close() 104 for { 105 ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) 106 _, err = putCli.Put(ctx, key, val) 107 cancel() 108 if err == nil { 109 break 110 } 111 if isClientTimeout(err) || isServerCtxTimeout(err) || err == rpctypes.ErrTimeout || err == rpctypes.ErrTimeoutDueToLeaderFail { 112 continue 113 } 114 t.Fatal(err) 115 } 116 117 select { 118 case <-donec: 119 case <-time.After(5 * time.Second): // enough time for balancer switch 120 t.Fatal("took too long to receive events") 121 } 122 } 123 124 func TestBalancerUnderServerShutdownPut(t *testing.T) { 125 testBalancerUnderServerShutdownMutable(t, func(cli *clientv3.Client, ctx context.Context) error { 126 _, err := cli.Put(ctx, "foo", "bar") 127 return err 128 }) 129 } 130 131 func TestBalancerUnderServerShutdownDelete(t *testing.T) { 132 testBalancerUnderServerShutdownMutable(t, func(cli *clientv3.Client, ctx context.Context) error { 133 _, err := cli.Delete(ctx, "foo") 134 return err 135 }) 136 } 137 138 func TestBalancerUnderServerShutdownTxn(t *testing.T) { 139 testBalancerUnderServerShutdownMutable(t, func(cli *clientv3.Client, ctx context.Context) error { 140 _, err := cli.Txn(ctx). 141 If(clientv3.Compare(clientv3.Version("foo"), "=", 0)). 142 Then(clientv3.OpPut("foo", "bar")). 143 Else(clientv3.OpPut("foo", "baz")).Commit() 144 return err 145 }) 146 } 147 148 // testBalancerUnderServerShutdownMutable expects that when the member of 149 // the pinned endpoint is shut down, the balancer switches its endpoints 150 // and all subsequent put/delete/txn requests succeed with new endpoints. 151 func testBalancerUnderServerShutdownMutable(t *testing.T, op func(*clientv3.Client, context.Context) error) { 152 defer testutil.AfterTest(t) 153 154 clus := integration.NewClusterV3(t, &integration.ClusterConfig{ 155 Size: 3, 156 SkipCreatingClient: true, 157 }) 158 defer clus.Terminate(t) 159 160 eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} 161 162 // pin eps[0] 163 cli, err := clientv3.New(clientv3.Config{ 164 Endpoints: []string{eps[0]}, 165 DialOptions: []grpc.DialOption{grpc.WithBlock()}, 166 }) 167 if err != nil { 168 t.Fatal(err) 169 } 170 defer cli.Close() 171 172 // wait for eps[0] to be pinned 173 mustWaitPinReady(t, cli) 174 175 // add all eps to list, so that when the original pined one fails 176 // the client can switch to other available eps 177 cli.SetEndpoints(eps...) 178 179 // shut down eps[0] 180 clus.Members[0].Terminate(t) 181 182 // switched to others when eps[0] was explicitly shut down 183 // and following request should succeed 184 // TODO: remove this (expose client connection state?) 185 time.Sleep(time.Second) 186 187 cctx, ccancel := context.WithTimeout(context.Background(), time.Second) 188 err = op(cli, cctx) 189 ccancel() 190 if err != nil { 191 t.Fatal(err) 192 } 193 } 194 195 func TestBalancerUnderServerShutdownGetLinearizable(t *testing.T) { 196 testBalancerUnderServerShutdownImmutable(t, func(cli *clientv3.Client, ctx context.Context) error { 197 _, err := cli.Get(ctx, "foo") 198 return err 199 }, 7*time.Second) // give enough time for leader election, balancer switch 200 } 201 202 func TestBalancerUnderServerShutdownGetSerializable(t *testing.T) { 203 testBalancerUnderServerShutdownImmutable(t, func(cli *clientv3.Client, ctx context.Context) error { 204 _, err := cli.Get(ctx, "foo", clientv3.WithSerializable()) 205 return err 206 }, 2*time.Second) 207 } 208 209 // testBalancerUnderServerShutdownImmutable expects that when the member of 210 // the pinned endpoint is shut down, the balancer switches its endpoints 211 // and all subsequent range requests succeed with new endpoints. 212 func testBalancerUnderServerShutdownImmutable(t *testing.T, op func(*clientv3.Client, context.Context) error, timeout time.Duration) { 213 defer testutil.AfterTest(t) 214 215 clus := integration.NewClusterV3(t, &integration.ClusterConfig{ 216 Size: 3, 217 SkipCreatingClient: true, 218 }) 219 defer clus.Terminate(t) 220 221 eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr(), clus.Members[2].GRPCAddr()} 222 223 // pin eps[0] 224 cli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[0]}}) 225 if err != nil { 226 t.Errorf("failed to create client: %v", err) 227 } 228 defer cli.Close() 229 230 // wait for eps[0] to be pinned 231 mustWaitPinReady(t, cli) 232 233 // add all eps to list, so that when the original pined one fails 234 // the client can switch to other available eps 235 cli.SetEndpoints(eps...) 236 237 // shut down eps[0] 238 clus.Members[0].Terminate(t) 239 240 // switched to others when eps[0] was explicitly shut down 241 // and following request should succeed 242 cctx, ccancel := context.WithTimeout(context.Background(), timeout) 243 err = op(cli, cctx) 244 ccancel() 245 if err != nil { 246 t.Errorf("failed to finish range request in time %v (timeout %v)", err, timeout) 247 } 248 } 249 250 func TestBalancerUnderServerStopInflightLinearizableGetOnRestart(t *testing.T) { 251 tt := []pinTestOpt{ 252 {pinLeader: true, stopPinFirst: true}, 253 {pinLeader: true, stopPinFirst: false}, 254 {pinLeader: false, stopPinFirst: true}, 255 {pinLeader: false, stopPinFirst: false}, 256 } 257 for i := range tt { 258 testBalancerUnderServerStopInflightRangeOnRestart(t, true, tt[i]) 259 } 260 } 261 262 func TestBalancerUnderServerStopInflightSerializableGetOnRestart(t *testing.T) { 263 tt := []pinTestOpt{ 264 {pinLeader: true, stopPinFirst: true}, 265 {pinLeader: true, stopPinFirst: false}, 266 {pinLeader: false, stopPinFirst: true}, 267 {pinLeader: false, stopPinFirst: false}, 268 } 269 for i := range tt { 270 testBalancerUnderServerStopInflightRangeOnRestart(t, false, tt[i]) 271 } 272 } 273 274 type pinTestOpt struct { 275 pinLeader bool 276 stopPinFirst bool 277 } 278 279 // testBalancerUnderServerStopInflightRangeOnRestart expects 280 // inflight range request reconnects on server restart. 281 func testBalancerUnderServerStopInflightRangeOnRestart(t *testing.T, linearizable bool, opt pinTestOpt) { 282 defer testutil.AfterTest(t) 283 284 cfg := &integration.ClusterConfig{ 285 Size: 2, 286 SkipCreatingClient: true, 287 } 288 if linearizable { 289 cfg.Size = 3 290 } 291 292 clus := integration.NewClusterV3(t, cfg) 293 defer clus.Terminate(t) 294 eps := []string{clus.Members[0].GRPCAddr(), clus.Members[1].GRPCAddr()} 295 if linearizable { 296 eps = append(eps, clus.Members[2].GRPCAddr()) 297 } 298 299 lead := clus.WaitLeader(t) 300 301 target := lead 302 if !opt.pinLeader { 303 target = (target + 1) % 2 304 } 305 306 // pin eps[target] 307 cli, err := clientv3.New(clientv3.Config{Endpoints: []string{eps[target]}}) 308 if err != nil { 309 t.Errorf("failed to create client: %v", err) 310 } 311 defer cli.Close() 312 313 // wait for eps[target] to be pinned 314 mustWaitPinReady(t, cli) 315 316 // add all eps to list, so that when the original pined one fails 317 // the client can switch to other available eps 318 cli.SetEndpoints(eps...) 319 320 if opt.stopPinFirst { 321 clus.Members[target].Stop(t) 322 // give some time for balancer switch before stopping the other 323 time.Sleep(time.Second) 324 clus.Members[(target+1)%2].Stop(t) 325 } else { 326 clus.Members[(target+1)%2].Stop(t) 327 // balancer cannot pin other member since it's already stopped 328 clus.Members[target].Stop(t) 329 } 330 331 // 3-second is the minimum interval between endpoint being marked 332 // as unhealthy and being removed from unhealthy, so possibly 333 // takes >5-second to unpin and repin an endpoint 334 // TODO: decrease timeout when balancer switch rewrite 335 clientTimeout := 7 * time.Second 336 337 var gops []clientv3.OpOption 338 if !linearizable { 339 gops = append(gops, clientv3.WithSerializable()) 340 } 341 342 donec, readyc := make(chan struct{}), make(chan struct{}, 1) 343 go func() { 344 defer close(donec) 345 ctx, cancel := context.WithTimeout(context.TODO(), clientTimeout) 346 readyc <- struct{}{} 347 348 // TODO: The new grpc load balancer will not pin to an endpoint 349 // as intended by this test. But it will round robin member within 350 // two attempts. 351 // Remove retry loop once the new grpc load balancer provides retry. 352 for i := 0; i < 2; i++ { 353 _, err = cli.Get(ctx, "abc", gops...) 354 if err == nil { 355 break 356 } 357 } 358 cancel() 359 if err != nil { 360 t.Errorf("unexpected error: %v", err) 361 } 362 }() 363 364 <-readyc 365 clus.Members[target].Restart(t) 366 367 select { 368 case <-time.After(clientTimeout + integration.RequestWaitTimeout): 369 t.Fatalf("timed out waiting for Get [linearizable: %v, opt: %+v]", linearizable, opt) 370 case <-donec: 371 } 372 } 373 374 // e.g. due to clock drifts in server-side, 375 // client context times out first in server-side 376 // while original client-side context is not timed out yet 377 func isServerCtxTimeout(err error) bool { 378 if err == nil { 379 return false 380 } 381 ev, ok := status.FromError(err) 382 if !ok { 383 return false 384 } 385 code := ev.Code() 386 return code == codes.DeadlineExceeded && strings.Contains(err.Error(), "context deadline exceeded") 387 } 388 389 // In grpc v1.11.3+ dial timeouts can error out with transport.ErrConnClosing. Previously dial timeouts 390 // would always error out with context.DeadlineExceeded. 391 func isClientTimeout(err error) bool { 392 if err == nil { 393 return false 394 } 395 if err == context.DeadlineExceeded { 396 return true 397 } 398 ev, ok := status.FromError(err) 399 if !ok { 400 return false 401 } 402 code := ev.Code() 403 return code == codes.DeadlineExceeded 404 } 405 406 func isCanceled(err error) bool { 407 if err == nil { 408 return false 409 } 410 if err == context.Canceled { 411 return true 412 } 413 ev, ok := status.FromError(err) 414 if !ok { 415 return false 416 } 417 code := ev.Code() 418 return code == codes.Canceled 419 } 420 421 func isUnavailable(err error) bool { 422 if err == nil { 423 return false 424 } 425 if err == context.Canceled { 426 return true 427 } 428 ev, ok := status.FromError(err) 429 if !ok { 430 return false 431 } 432 code := ev.Code() 433 return code == codes.Unavailable 434 }