github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/integration/cluster.go (about) 1 package integration 2 3 import ( 4 "context" 5 "crypto/tls" 6 "fmt" 7 "math/rand" 8 "net" 9 "sync" 10 "time" 11 12 "google.golang.org/grpc" 13 "google.golang.org/grpc/credentials" 14 15 "github.com/docker/swarmkit/api" 16 "github.com/docker/swarmkit/ca" 17 "github.com/docker/swarmkit/identity" 18 "github.com/docker/swarmkit/log" 19 "github.com/docker/swarmkit/manager/encryption" 20 "github.com/docker/swarmkit/node" 21 "github.com/docker/swarmkit/testutils" 22 "github.com/sirupsen/logrus" 23 ) 24 25 const opsTimeout = 64 * time.Second 26 27 // Cluster is representation of cluster - connected nodes. 28 type testCluster struct { 29 ctx context.Context 30 cancel context.CancelFunc 31 api *dummyAPI 32 nodes map[string]*testNode 33 nodesOrder map[string]int 34 errs chan error 35 wg sync.WaitGroup 36 counter int 37 fips bool 38 } 39 40 var testnameKey struct{} 41 42 // Stop makes best effort to stop all nodes and close connections to them. 43 func (c *testCluster) Stop() error { 44 c.cancel() 45 for _, n := range c.nodes { 46 if err := n.Stop(); err != nil { 47 return err 48 } 49 } 50 c.wg.Wait() 51 close(c.errs) 52 for err := range c.errs { 53 if err != nil { 54 return err 55 } 56 } 57 return nil 58 } 59 60 // RandomManager chooses random manager from cluster. 61 func (c *testCluster) RandomManager() *testNode { 62 var managers []*testNode 63 for _, n := range c.nodes { 64 if n.IsManager() { 65 managers = append(managers, n) 66 } 67 } 68 idx := rand.Intn(len(managers)) 69 return managers[idx] 70 } 71 72 // AddManager adds a node with the Manager role. The node will function as both 73 // an agent and a manager. If lateBind is set, the manager is started before a 74 // remote API port is bound. If rootCA is set, the manager is bootstrapped using 75 // said root CA. These settings only apply to the first manager. 76 func (c *testCluster) AddManager(lateBind bool, rootCA *ca.RootCA) error { 77 // first node 78 var n *testNode 79 if len(c.nodes) == 0 { 80 node, err := newTestNode("", "", lateBind, c.fips) 81 if err != nil { 82 return err 83 } 84 // generate TLS certs for this manager for bootstrapping, else the node will generate its own CA 85 if rootCA != nil { 86 if err := generateCerts(node.stateDir, rootCA, identity.NewID(), ca.ManagerRole, identity.NewID(), true); err != nil { 87 return err 88 } 89 } 90 n = node 91 } else { 92 lateBind = false 93 joinAddr, err := c.RandomManager().node.RemoteAPIAddr() 94 if err != nil { 95 return err 96 } 97 clusterInfo, err := c.GetClusterInfo() 98 if err != nil { 99 return err 100 } 101 node, err := newTestNode(joinAddr, clusterInfo.RootCA.JoinTokens.Manager, false, c.fips) 102 if err != nil { 103 return err 104 } 105 n = node 106 } 107 108 if err := c.AddNode(n); err != nil { 109 return err 110 } 111 112 if lateBind { 113 // Verify that the control API works 114 if _, err := c.GetClusterInfo(); err != nil { 115 return err 116 } 117 return n.node.BindRemote(context.Background(), "127.0.0.1:0", "") 118 } 119 120 return nil 121 } 122 123 // AddAgent adds node with Agent role(doesn't participate in raft cluster). 124 func (c *testCluster) AddAgent() error { 125 // first node 126 if len(c.nodes) == 0 { 127 return fmt.Errorf("there is no manager nodes") 128 } 129 joinAddr, err := c.RandomManager().node.RemoteAPIAddr() 130 if err != nil { 131 return err 132 } 133 clusterInfo, err := c.GetClusterInfo() 134 if err != nil { 135 return err 136 } 137 node, err := newTestNode(joinAddr, clusterInfo.RootCA.JoinTokens.Worker, false, c.fips) 138 if err != nil { 139 return err 140 } 141 return c.AddNode(node) 142 } 143 144 // AddNode adds a new node to the cluster 145 func (c *testCluster) AddNode(n *testNode) error { 146 c.counter++ 147 if err := c.runNode(n, c.counter); err != nil { 148 c.counter-- 149 return err 150 } 151 c.nodes[n.node.NodeID()] = n 152 c.nodesOrder[n.node.NodeID()] = c.counter 153 return nil 154 } 155 156 func (c *testCluster) runNode(n *testNode, nodeOrder int) error { 157 ctx := log.WithLogger(c.ctx, log.L.WithFields( 158 logrus.Fields{ 159 "testnode": nodeOrder, 160 "testname": c.ctx.Value(testnameKey), 161 }, 162 )) 163 164 errCtx, cancel := context.WithCancel(context.Background()) 165 done := make(chan error) 166 defer cancel() 167 defer close(done) 168 169 c.wg.Add(2) 170 go func() { 171 c.errs <- n.node.Start(ctx) 172 c.wg.Done() 173 }() 174 go func(n *node.Node) { 175 err := n.Err(errCtx) 176 select { 177 case <-errCtx.Done(): 178 default: 179 done <- err 180 } 181 c.wg.Done() 182 }(n.node) 183 184 select { 185 case <-n.node.Ready(): 186 case err := <-done: 187 return err 188 case <-time.After(opsTimeout): 189 return fmt.Errorf("node did not ready in time") 190 } 191 192 return nil 193 } 194 195 // CreateService creates dummy service. 196 func (c *testCluster) CreateService(name string, instances int) (string, error) { 197 spec := &api.ServiceSpec{ 198 Annotations: api.Annotations{Name: name}, 199 Mode: &api.ServiceSpec_Replicated{ 200 Replicated: &api.ReplicatedService{ 201 Replicas: uint64(instances), 202 }, 203 }, 204 Task: api.TaskSpec{ 205 Runtime: &api.TaskSpec_Container{ 206 Container: &api.ContainerSpec{Image: "alpine", Command: []string{"sh"}}, 207 }, 208 }, 209 } 210 211 resp, err := c.api.CreateService(context.Background(), &api.CreateServiceRequest{Spec: spec}) 212 if err != nil { 213 return "", err 214 } 215 return resp.Service.ID, nil 216 } 217 218 // Leader returns TestNode for cluster leader. 219 func (c *testCluster) Leader() (*testNode, error) { 220 resp, err := c.api.ListNodes(context.Background(), &api.ListNodesRequest{ 221 Filters: &api.ListNodesRequest_Filters{ 222 Roles: []api.NodeRole{api.NodeRoleManager}, 223 }, 224 }) 225 if err != nil { 226 return nil, err 227 } 228 for _, n := range resp.Nodes { 229 if n.ManagerStatus.Leader { 230 tn, ok := c.nodes[n.ID] 231 if !ok { 232 return nil, fmt.Errorf("leader id is %s, but it isn't found in test cluster object", n.ID) 233 } 234 return tn, nil 235 } 236 } 237 return nil, fmt.Errorf("cluster leader is not found in api response") 238 } 239 240 // RemoveNode removes node entirely. It tries to demote managers. 241 func (c *testCluster) RemoveNode(id string, graceful bool) error { 242 node, ok := c.nodes[id] 243 if !ok { 244 return fmt.Errorf("remove node: node %s not found", id) 245 } 246 // demote before removal 247 if node.IsManager() { 248 if err := c.SetNodeRole(id, api.NodeRoleWorker); err != nil { 249 return fmt.Errorf("demote manager: %v", err) 250 } 251 252 } 253 if err := node.Stop(); err != nil { 254 return err 255 } 256 delete(c.nodes, id) 257 if graceful { 258 if err := testutils.PollFuncWithTimeout(nil, func() error { 259 resp, err := c.api.GetNode(context.Background(), &api.GetNodeRequest{NodeID: id}) 260 if err != nil { 261 return fmt.Errorf("get node: %v", err) 262 } 263 if resp.Node.Status.State != api.NodeStatus_DOWN { 264 return fmt.Errorf("node %s is still not down", id) 265 } 266 return nil 267 }, opsTimeout); err != nil { 268 return err 269 } 270 } 271 if _, err := c.api.RemoveNode(context.Background(), &api.RemoveNodeRequest{NodeID: id, Force: !graceful}); err != nil { 272 return fmt.Errorf("remove node: %v", err) 273 } 274 return nil 275 } 276 277 // SetNodeRole sets role for node through control api. 278 func (c *testCluster) SetNodeRole(id string, role api.NodeRole) error { 279 node, ok := c.nodes[id] 280 if !ok { 281 return fmt.Errorf("set node role: node %s not found", id) 282 } 283 if node.IsManager() && role == api.NodeRoleManager { 284 return fmt.Errorf("node is already manager") 285 } 286 if !node.IsManager() && role == api.NodeRoleWorker { 287 return fmt.Errorf("node is already worker") 288 } 289 290 var initialTimeout time.Duration 291 // version might change between get and update, so retry 292 for i := 0; i < 5; i++ { 293 time.Sleep(initialTimeout) 294 initialTimeout += 500 * time.Millisecond 295 resp, err := c.api.GetNode(context.Background(), &api.GetNodeRequest{NodeID: id}) 296 if err != nil { 297 return err 298 } 299 spec := resp.Node.Spec.Copy() 300 spec.DesiredRole = role 301 if _, err := c.api.UpdateNode(context.Background(), &api.UpdateNodeRequest{ 302 NodeID: id, 303 Spec: spec, 304 NodeVersion: &resp.Node.Meta.Version, 305 }); err != nil { 306 // there possible problems on calling update node because redirecting 307 // node or leader might want to shut down 308 if testutils.ErrorDesc(err) == "update out of sequence" { 309 continue 310 } 311 return err 312 } 313 if role == api.NodeRoleManager { 314 // wait to become manager 315 return testutils.PollFuncWithTimeout(nil, func() error { 316 if !node.IsManager() { 317 return fmt.Errorf("node is still not a manager") 318 } 319 return nil 320 }, opsTimeout) 321 } 322 // wait to become worker 323 return testutils.PollFuncWithTimeout(nil, func() error { 324 if node.IsManager() { 325 return fmt.Errorf("node is still not a worker") 326 } 327 return nil 328 }, opsTimeout) 329 } 330 return fmt.Errorf("set role %s for node %s, got sequence error 5 times", role, id) 331 } 332 333 // Starts a node from a stopped state 334 func (c *testCluster) StartNode(id string) error { 335 n, ok := c.nodes[id] 336 if !ok { 337 return fmt.Errorf("set node role: node %s not found", id) 338 } 339 if err := c.runNode(n, c.nodesOrder[id]); err != nil { 340 return err 341 } 342 if n.node.NodeID() != id { 343 return fmt.Errorf("restarted node does not have have the same ID") 344 } 345 return nil 346 } 347 348 func (c *testCluster) GetClusterInfo() (*api.Cluster, error) { 349 clusterInfo, err := c.api.ListClusters(context.Background(), &api.ListClustersRequest{}) 350 if err != nil { 351 return nil, err 352 } 353 if len(clusterInfo.Clusters) != 1 { 354 return nil, fmt.Errorf("number of clusters in storage: %d; expected 1", len(clusterInfo.Clusters)) 355 } 356 return clusterInfo.Clusters[0], nil 357 } 358 359 func (c *testCluster) RotateRootCA(cert, key []byte) error { 360 // poll in case something else changes the cluster before we can update it 361 return testutils.PollFuncWithTimeout(nil, func() error { 362 clusterInfo, err := c.GetClusterInfo() 363 if err != nil { 364 return err 365 } 366 newSpec := clusterInfo.Spec.Copy() 367 newSpec.CAConfig.SigningCACert = cert 368 newSpec.CAConfig.SigningCAKey = key 369 _, err = c.api.UpdateCluster(context.Background(), &api.UpdateClusterRequest{ 370 ClusterID: clusterInfo.ID, 371 Spec: newSpec, 372 ClusterVersion: &clusterInfo.Meta.Version, 373 }) 374 return err 375 }, opsTimeout) 376 } 377 378 func (c *testCluster) RotateUnlockKey() error { 379 // poll in case something else changes the cluster before we can update it 380 return testutils.PollFuncWithTimeout(nil, func() error { 381 clusterInfo, err := c.GetClusterInfo() 382 if err != nil { 383 return err 384 } 385 _, err = c.api.UpdateCluster(context.Background(), &api.UpdateClusterRequest{ 386 ClusterID: clusterInfo.ID, 387 Spec: &clusterInfo.Spec, 388 ClusterVersion: &clusterInfo.Meta.Version, 389 Rotation: api.KeyRotation{ 390 ManagerUnlockKey: true, 391 }, 392 }) 393 return err 394 }, opsTimeout) 395 } 396 397 func (c *testCluster) AutolockManagers(autolock bool) error { 398 // poll in case something else changes the cluster before we can update it 399 return testutils.PollFuncWithTimeout(nil, func() error { 400 clusterInfo, err := c.GetClusterInfo() 401 if err != nil { 402 return err 403 } 404 newSpec := clusterInfo.Spec.Copy() 405 newSpec.EncryptionConfig.AutoLockManagers = autolock 406 _, err = c.api.UpdateCluster(context.Background(), &api.UpdateClusterRequest{ 407 ClusterID: clusterInfo.ID, 408 Spec: newSpec, 409 ClusterVersion: &clusterInfo.Meta.Version, 410 }) 411 return err 412 }, opsTimeout) 413 } 414 415 func (c *testCluster) GetUnlockKey() (string, error) { 416 opts := []grpc.DialOption{} 417 insecureCreds := credentials.NewTLS(&tls.Config{InsecureSkipVerify: true}) 418 opts = append(opts, grpc.WithTransportCredentials(insecureCreds)) 419 opts = append(opts, grpc.WithDialer( 420 func(addr string, timeout time.Duration) (net.Conn, error) { 421 return net.DialTimeout("unix", addr, timeout) 422 })) 423 conn, err := grpc.Dial(c.RandomManager().config.ListenControlAPI, opts...) 424 if err != nil { 425 return "", err 426 } 427 428 resp, err := api.NewCAClient(conn).GetUnlockKey(context.Background(), &api.GetUnlockKeyRequest{}) 429 if err != nil { 430 return "", err 431 } 432 433 return encryption.HumanReadableKey(resp.UnlockKey), nil 434 }