github.com/rita33cool1/iot-system-gateway@v0.0.0-20200911033302-e65bde238cc5/docker-engine/daemon/cluster/swarm.go (about) 1 package cluster // import "github.com/docker/docker/daemon/cluster" 2 3 import ( 4 "fmt" 5 "net" 6 "strings" 7 "time" 8 9 apitypes "github.com/docker/docker/api/types" 10 "github.com/docker/docker/api/types/filters" 11 types "github.com/docker/docker/api/types/swarm" 12 "github.com/docker/docker/daemon/cluster/convert" 13 "github.com/docker/docker/errdefs" 14 "github.com/docker/docker/opts" 15 "github.com/docker/docker/pkg/signal" 16 swarmapi "github.com/docker/swarmkit/api" 17 "github.com/docker/swarmkit/manager/encryption" 18 swarmnode "github.com/docker/swarmkit/node" 19 "github.com/pkg/errors" 20 "github.com/sirupsen/logrus" 21 "golang.org/x/net/context" 22 ) 23 24 // Init initializes new cluster from user provided request. 25 func (c *Cluster) Init(req types.InitRequest) (string, error) { 26 c.controlMutex.Lock() 27 defer c.controlMutex.Unlock() 28 if c.nr != nil { 29 if req.ForceNewCluster { 30 31 // Take c.mu temporarily to wait for presently running 32 // API handlers to finish before shutting down the node. 33 c.mu.Lock() 34 if !c.nr.nodeState.IsManager() { 35 return "", errSwarmNotManager 36 } 37 c.mu.Unlock() 38 39 if err := c.nr.Stop(); err != nil { 40 return "", err 41 } 42 } else { 43 return "", errSwarmExists 44 } 45 } 46 47 if err := validateAndSanitizeInitRequest(&req); err != nil { 48 return "", errdefs.InvalidParameter(err) 49 } 50 51 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 52 if err != nil { 53 return "", err 54 } 55 56 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 57 if err != nil { 58 return "", err 59 } 60 61 dataPathAddr, err := resolveDataPathAddr(req.DataPathAddr) 62 if err != nil { 63 return "", err 64 } 65 66 localAddr := listenHost 67 68 // If the local address is undetermined, the advertise address 69 // will be used as local address, if it belongs to this system. 70 // If the advertise address is not local, then we try to find 71 // a system address to use as local address. If this fails, 72 // we give up and ask the user to pass the listen address. 73 if net.ParseIP(localAddr).IsUnspecified() { 74 advertiseIP := net.ParseIP(advertiseHost) 75 76 found := false 77 for _, systemIP := range listSystemIPs() { 78 if systemIP.Equal(advertiseIP) { 79 localAddr = advertiseIP.String() 80 found = true 81 break 82 } 83 } 84 85 if !found { 86 ip, err := c.resolveSystemAddr() 87 if err != nil { 88 logrus.Warnf("Could not find a local address: %v", err) 89 return "", errMustSpecifyListenAddr 90 } 91 localAddr = ip.String() 92 } 93 } 94 95 nr, err := c.newNodeRunner(nodeStartConfig{ 96 forceNewCluster: req.ForceNewCluster, 97 autolock: req.AutoLockManagers, 98 LocalAddr: localAddr, 99 ListenAddr: net.JoinHostPort(listenHost, listenPort), 100 AdvertiseAddr: net.JoinHostPort(advertiseHost, advertisePort), 101 DataPathAddr: dataPathAddr, 102 availability: req.Availability, 103 }) 104 if err != nil { 105 return "", err 106 } 107 c.mu.Lock() 108 c.nr = nr 109 c.mu.Unlock() 110 111 if err := <-nr.Ready(); err != nil { 112 c.mu.Lock() 113 c.nr = nil 114 c.mu.Unlock() 115 if !req.ForceNewCluster { // if failure on first attempt don't keep state 116 if err := clearPersistentState(c.root); err != nil { 117 return "", err 118 } 119 } 120 return "", err 121 } 122 state := nr.State() 123 if state.swarmNode == nil { // should never happen but protect from panic 124 return "", errors.New("invalid cluster state for spec initialization") 125 } 126 if err := initClusterSpec(state.swarmNode, req.Spec); err != nil { 127 return "", err 128 } 129 return state.NodeID(), nil 130 } 131 132 // Join makes current Cluster part of an existing swarm cluster. 133 func (c *Cluster) Join(req types.JoinRequest) error { 134 c.controlMutex.Lock() 135 defer c.controlMutex.Unlock() 136 c.mu.Lock() 137 if c.nr != nil { 138 c.mu.Unlock() 139 return errors.WithStack(errSwarmExists) 140 } 141 c.mu.Unlock() 142 143 if err := validateAndSanitizeJoinRequest(&req); err != nil { 144 return errdefs.InvalidParameter(err) 145 } 146 147 listenHost, listenPort, err := resolveListenAddr(req.ListenAddr) 148 if err != nil { 149 return err 150 } 151 152 var advertiseAddr string 153 if req.AdvertiseAddr != "" { 154 advertiseHost, advertisePort, err := c.resolveAdvertiseAddr(req.AdvertiseAddr, listenPort) 155 // For joining, we don't need to provide an advertise address, 156 // since the remote side can detect it. 157 if err == nil { 158 advertiseAddr = net.JoinHostPort(advertiseHost, advertisePort) 159 } 160 } 161 162 dataPathAddr, err := resolveDataPathAddr(req.DataPathAddr) 163 if err != nil { 164 return err 165 } 166 167 nr, err := c.newNodeRunner(nodeStartConfig{ 168 RemoteAddr: req.RemoteAddrs[0], 169 ListenAddr: net.JoinHostPort(listenHost, listenPort), 170 AdvertiseAddr: advertiseAddr, 171 DataPathAddr: dataPathAddr, 172 joinAddr: req.RemoteAddrs[0], 173 joinToken: req.JoinToken, 174 availability: req.Availability, 175 }) 176 if err != nil { 177 return err 178 } 179 180 c.mu.Lock() 181 c.nr = nr 182 c.mu.Unlock() 183 184 select { 185 case <-time.After(swarmConnectTimeout): 186 return errSwarmJoinTimeoutReached 187 case err := <-nr.Ready(): 188 if err != nil { 189 c.mu.Lock() 190 c.nr = nil 191 c.mu.Unlock() 192 if err := clearPersistentState(c.root); err != nil { 193 return err 194 } 195 } 196 return err 197 } 198 } 199 200 // Inspect retrieves the configuration properties of a managed swarm cluster. 201 func (c *Cluster) Inspect() (types.Swarm, error) { 202 var swarm types.Swarm 203 if err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 204 s, err := c.inspect(ctx, state) 205 if err != nil { 206 return err 207 } 208 swarm = s 209 return nil 210 }); err != nil { 211 return types.Swarm{}, err 212 } 213 return swarm, nil 214 } 215 216 func (c *Cluster) inspect(ctx context.Context, state nodeState) (types.Swarm, error) { 217 s, err := getSwarm(ctx, state.controlClient) 218 if err != nil { 219 return types.Swarm{}, err 220 } 221 return convert.SwarmFromGRPC(*s), nil 222 } 223 224 // Update updates configuration of a managed swarm cluster. 225 func (c *Cluster) Update(version uint64, spec types.Spec, flags types.UpdateFlags) error { 226 return c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 227 swarm, err := getSwarm(ctx, state.controlClient) 228 if err != nil { 229 return err 230 } 231 232 // Validate spec name. 233 if spec.Annotations.Name == "" { 234 spec.Annotations.Name = "default" 235 } else if spec.Annotations.Name != "default" { 236 return errdefs.InvalidParameter(errors.New(`swarm spec must be named "default"`)) 237 } 238 239 // In update, client should provide the complete spec of the swarm, including 240 // Name and Labels. If a field is specified with 0 or nil, then the default value 241 // will be used to swarmkit. 242 clusterSpec, err := convert.SwarmSpecToGRPC(spec) 243 if err != nil { 244 return errdefs.InvalidParameter(err) 245 } 246 247 _, err = state.controlClient.UpdateCluster( 248 ctx, 249 &swarmapi.UpdateClusterRequest{ 250 ClusterID: swarm.ID, 251 Spec: &clusterSpec, 252 ClusterVersion: &swarmapi.Version{ 253 Index: version, 254 }, 255 Rotation: swarmapi.KeyRotation{ 256 WorkerJoinToken: flags.RotateWorkerToken, 257 ManagerJoinToken: flags.RotateManagerToken, 258 ManagerUnlockKey: flags.RotateManagerUnlockKey, 259 }, 260 }, 261 ) 262 return err 263 }) 264 } 265 266 // GetUnlockKey returns the unlock key for the swarm. 267 func (c *Cluster) GetUnlockKey() (string, error) { 268 var resp *swarmapi.GetUnlockKeyResponse 269 if err := c.lockedManagerAction(func(ctx context.Context, state nodeState) error { 270 client := swarmapi.NewCAClient(state.grpcConn) 271 272 r, err := client.GetUnlockKey(ctx, &swarmapi.GetUnlockKeyRequest{}) 273 if err != nil { 274 return err 275 } 276 resp = r 277 return nil 278 }); err != nil { 279 return "", err 280 } 281 if len(resp.UnlockKey) == 0 { 282 // no key 283 return "", nil 284 } 285 return encryption.HumanReadableKey(resp.UnlockKey), nil 286 } 287 288 // UnlockSwarm provides a key to decrypt data that is encrypted at rest. 289 func (c *Cluster) UnlockSwarm(req types.UnlockRequest) error { 290 c.controlMutex.Lock() 291 defer c.controlMutex.Unlock() 292 293 c.mu.RLock() 294 state := c.currentNodeState() 295 296 if !state.IsActiveManager() { 297 // when manager is not active, 298 // unless it is locked, otherwise return error. 299 if err := c.errNoManager(state); err != errSwarmLocked { 300 c.mu.RUnlock() 301 return err 302 } 303 } else { 304 // when manager is active, return an error of "not locked" 305 c.mu.RUnlock() 306 return notLockedError{} 307 } 308 309 // only when swarm is locked, code running reaches here 310 nr := c.nr 311 c.mu.RUnlock() 312 313 key, err := encryption.ParseHumanReadableKey(req.UnlockKey) 314 if err != nil { 315 return errdefs.InvalidParameter(err) 316 } 317 318 config := nr.config 319 config.lockKey = key 320 if err := nr.Stop(); err != nil { 321 return err 322 } 323 nr, err = c.newNodeRunner(config) 324 if err != nil { 325 return err 326 } 327 328 c.mu.Lock() 329 c.nr = nr 330 c.mu.Unlock() 331 332 if err := <-nr.Ready(); err != nil { 333 if errors.Cause(err) == errSwarmLocked { 334 return invalidUnlockKey{} 335 } 336 return errors.Errorf("swarm component could not be started: %v", err) 337 } 338 return nil 339 } 340 341 // Leave shuts down Cluster and removes current state. 342 func (c *Cluster) Leave(force bool) error { 343 c.controlMutex.Lock() 344 defer c.controlMutex.Unlock() 345 346 c.mu.Lock() 347 nr := c.nr 348 if nr == nil { 349 c.mu.Unlock() 350 return errors.WithStack(errNoSwarm) 351 } 352 353 state := c.currentNodeState() 354 355 c.mu.Unlock() 356 357 if errors.Cause(state.err) == errSwarmLocked && !force { 358 // leave a locked swarm without --force is not allowed 359 return errors.WithStack(notAvailableError("Swarm is encrypted and locked. Please unlock it first or use `--force` to ignore this message.")) 360 } 361 362 if state.IsManager() && !force { 363 msg := "You are attempting to leave the swarm on a node that is participating as a manager. " 364 if state.IsActiveManager() { 365 active, reachable, unreachable, err := managerStats(state.controlClient, state.NodeID()) 366 if err == nil { 367 if active && removingManagerCausesLossOfQuorum(reachable, unreachable) { 368 if isLastManager(reachable, unreachable) { 369 msg += "Removing the last manager erases all current state of the swarm. Use `--force` to ignore this message. " 370 return errors.WithStack(notAvailableError(msg)) 371 } 372 msg += fmt.Sprintf("Removing this node leaves %v managers out of %v. Without a Raft quorum your swarm will be inaccessible. ", reachable-1, reachable+unreachable) 373 } 374 } 375 } else { 376 msg += "Doing so may lose the consensus of your cluster. " 377 } 378 379 msg += "The only way to restore a swarm that has lost consensus is to reinitialize it with `--force-new-cluster`. Use `--force` to suppress this message." 380 return errors.WithStack(notAvailableError(msg)) 381 } 382 // release readers in here 383 if err := nr.Stop(); err != nil { 384 logrus.Errorf("failed to shut down cluster node: %v", err) 385 signal.DumpStacks("") 386 return err 387 } 388 389 c.mu.Lock() 390 c.nr = nil 391 c.mu.Unlock() 392 393 if nodeID := state.NodeID(); nodeID != "" { 394 nodeContainers, err := c.listContainerForNode(nodeID) 395 if err != nil { 396 return err 397 } 398 for _, id := range nodeContainers { 399 if err := c.config.Backend.ContainerRm(id, &apitypes.ContainerRmConfig{ForceRemove: true}); err != nil { 400 logrus.Errorf("error removing %v: %v", id, err) 401 } 402 } 403 } 404 405 // todo: cleanup optional? 406 if err := clearPersistentState(c.root); err != nil { 407 return err 408 } 409 c.config.Backend.DaemonLeavesCluster() 410 return nil 411 } 412 413 // Info returns information about the current cluster state. 414 func (c *Cluster) Info() types.Info { 415 info := types.Info{ 416 NodeAddr: c.GetAdvertiseAddress(), 417 } 418 c.mu.RLock() 419 defer c.mu.RUnlock() 420 421 state := c.currentNodeState() 422 info.LocalNodeState = state.status 423 if state.err != nil { 424 info.Error = state.err.Error() 425 } 426 427 ctx, cancel := c.getRequestContext() 428 defer cancel() 429 430 if state.IsActiveManager() { 431 info.ControlAvailable = true 432 swarm, err := c.inspect(ctx, state) 433 if err != nil { 434 info.Error = err.Error() 435 } 436 437 info.Cluster = &swarm.ClusterInfo 438 439 if r, err := state.controlClient.ListNodes(ctx, &swarmapi.ListNodesRequest{}); err != nil { 440 info.Error = err.Error() 441 } else { 442 info.Nodes = len(r.Nodes) 443 for _, n := range r.Nodes { 444 if n.ManagerStatus != nil { 445 info.Managers = info.Managers + 1 446 } 447 } 448 } 449 } 450 451 if state.swarmNode != nil { 452 for _, r := range state.swarmNode.Remotes() { 453 info.RemoteManagers = append(info.RemoteManagers, types.Peer{NodeID: r.NodeID, Addr: r.Addr}) 454 } 455 info.NodeID = state.swarmNode.NodeID() 456 } 457 458 return info 459 } 460 461 func validateAndSanitizeInitRequest(req *types.InitRequest) error { 462 var err error 463 req.ListenAddr, err = validateAddr(req.ListenAddr) 464 if err != nil { 465 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 466 } 467 468 if req.Spec.Annotations.Name == "" { 469 req.Spec.Annotations.Name = "default" 470 } else if req.Spec.Annotations.Name != "default" { 471 return errors.New(`swarm spec must be named "default"`) 472 } 473 474 return nil 475 } 476 477 func validateAndSanitizeJoinRequest(req *types.JoinRequest) error { 478 var err error 479 req.ListenAddr, err = validateAddr(req.ListenAddr) 480 if err != nil { 481 return fmt.Errorf("invalid ListenAddr %q: %v", req.ListenAddr, err) 482 } 483 if len(req.RemoteAddrs) == 0 { 484 return errors.New("at least 1 RemoteAddr is required to join") 485 } 486 for i := range req.RemoteAddrs { 487 req.RemoteAddrs[i], err = validateAddr(req.RemoteAddrs[i]) 488 if err != nil { 489 return fmt.Errorf("invalid remoteAddr %q: %v", req.RemoteAddrs[i], err) 490 } 491 } 492 return nil 493 } 494 495 func validateAddr(addr string) (string, error) { 496 if addr == "" { 497 return addr, errors.New("invalid empty address") 498 } 499 newaddr, err := opts.ParseTCPAddr(addr, defaultAddr) 500 if err != nil { 501 return addr, nil 502 } 503 return strings.TrimPrefix(newaddr, "tcp://"), nil 504 } 505 506 func initClusterSpec(node *swarmnode.Node, spec types.Spec) error { 507 ctx, _ := context.WithTimeout(context.Background(), 5*time.Second) 508 for conn := range node.ListenControlSocket(ctx) { 509 if ctx.Err() != nil { 510 return ctx.Err() 511 } 512 if conn != nil { 513 client := swarmapi.NewControlClient(conn) 514 var cluster *swarmapi.Cluster 515 for i := 0; ; i++ { 516 lcr, err := client.ListClusters(ctx, &swarmapi.ListClustersRequest{}) 517 if err != nil { 518 return fmt.Errorf("error on listing clusters: %v", err) 519 } 520 if len(lcr.Clusters) == 0 { 521 if i < 10 { 522 time.Sleep(200 * time.Millisecond) 523 continue 524 } 525 return errors.New("empty list of clusters was returned") 526 } 527 cluster = lcr.Clusters[0] 528 break 529 } 530 // In init, we take the initial default values from swarmkit, and merge 531 // any non nil or 0 value from spec to GRPC spec. This will leave the 532 // default value alone. 533 // Note that this is different from Update(), as in Update() we expect 534 // user to specify the complete spec of the cluster (as they already know 535 // the existing one and knows which field to update) 536 clusterSpec, err := convert.MergeSwarmSpecToGRPC(spec, cluster.Spec) 537 if err != nil { 538 return fmt.Errorf("error updating cluster settings: %v", err) 539 } 540 _, err = client.UpdateCluster(ctx, &swarmapi.UpdateClusterRequest{ 541 ClusterID: cluster.ID, 542 ClusterVersion: &cluster.Meta.Version, 543 Spec: &clusterSpec, 544 }) 545 if err != nil { 546 return fmt.Errorf("error updating cluster settings: %v", err) 547 } 548 return nil 549 } 550 } 551 return ctx.Err() 552 } 553 554 func (c *Cluster) listContainerForNode(nodeID string) ([]string, error) { 555 var ids []string 556 filters := filters.NewArgs() 557 filters.Add("label", fmt.Sprintf("com.docker.swarm.node.id=%s", nodeID)) 558 containers, err := c.config.Backend.Containers(&apitypes.ContainerListOptions{ 559 Filters: filters, 560 }) 561 if err != nil { 562 return []string{}, err 563 } 564 for _, c := range containers { 565 ids = append(ids, c.ID) 566 } 567 return ids, nil 568 }