github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/manager/manager_test.go (about) 1 package manager 2 3 import ( 4 "bytes" 5 "context" 6 "crypto/tls" 7 "encoding/pem" 8 "errors" 9 "fmt" 10 "io/ioutil" 11 "os" 12 "path/filepath" 13 "testing" 14 "time" 15 16 "google.golang.org/grpc" 17 "google.golang.org/grpc/credentials" 18 19 "github.com/docker/swarmkit/api" 20 "github.com/docker/swarmkit/ca" 21 "github.com/docker/swarmkit/ca/keyutils" 22 cautils "github.com/docker/swarmkit/ca/testutils" 23 "github.com/docker/swarmkit/manager/dispatcher" 24 "github.com/docker/swarmkit/manager/encryption" 25 "github.com/docker/swarmkit/manager/state/raft/storage" 26 "github.com/docker/swarmkit/manager/state/store" 27 "github.com/docker/swarmkit/testutils" 28 "github.com/stretchr/testify/require" 29 ) 30 31 func TestManager(t *testing.T) { 32 temp, err := ioutil.TempFile("", "test-socket") 33 require.NoError(t, err) 34 require.NoError(t, temp.Close()) 35 require.NoError(t, os.Remove(temp.Name())) 36 37 defer os.RemoveAll(temp.Name()) 38 39 stateDir, err := ioutil.TempDir("", "test-raft") 40 require.NoError(t, err) 41 defer os.RemoveAll(stateDir) 42 43 tc := cautils.NewTestCA(t, func(p ca.CertPaths) *ca.KeyReadWriter { 44 return ca.NewKeyReadWriter(p, []byte("kek"), nil) 45 }) 46 defer tc.Stop() 47 48 agentSecurityConfig, err := tc.NewNodeConfig(ca.WorkerRole) 49 require.NoError(t, err) 50 agentDiffOrgSecurityConfig, err := tc.NewNodeConfigOrg(ca.WorkerRole, "another-org") 51 require.NoError(t, err) 52 managerSecurityConfig, err := tc.NewNodeConfig(ca.ManagerRole) 53 require.NoError(t, err) 54 55 m, err := New(&Config{ 56 RemoteAPI: &RemoteAddrs{ListenAddr: "127.0.0.1:0"}, 57 ControlAPI: temp.Name(), 58 StateDir: stateDir, 59 SecurityConfig: managerSecurityConfig, 60 AutoLockManagers: true, 61 UnlockKey: []byte("kek"), 62 RootCAPaths: tc.Paths.RootCA, 63 }) 64 require.NoError(t, err) 65 require.NotNil(t, m) 66 67 tcpAddr := m.Addr() 68 69 done := make(chan error) 70 defer close(done) 71 go func() { 72 done <- m.Run(tc.Context) 73 }() 74 75 opts := []grpc.DialOption{ 76 grpc.WithTimeout(10 * time.Second), 77 grpc.WithTransportCredentials(agentSecurityConfig.ClientTLSCreds), 78 } 79 80 conn, err := grpc.Dial(tcpAddr, opts...) 81 require.NoError(t, err) 82 defer func() { 83 require.NoError(t, conn.Close()) 84 }() 85 86 // We have to send a dummy request to verify if the connection is actually up. 87 client := api.NewDispatcherClient(conn) 88 require.NoError(t, testutils.PollFuncWithTimeout(nil, func() error { 89 _, err = client.Heartbeat(tc.Context, &api.HeartbeatRequest{}) 90 if dispatcher.ErrNodeNotRegistered.Error() != testutils.ErrorDesc(err) { 91 return err 92 } 93 _, err = client.Session(tc.Context, &api.SessionRequest{}) 94 return err 95 }, 1*time.Second)) 96 97 // Try to have a client in a different org access this manager 98 opts = []grpc.DialOption{ 99 grpc.WithTimeout(10 * time.Second), 100 grpc.WithTransportCredentials(agentDiffOrgSecurityConfig.ClientTLSCreds), 101 } 102 103 conn2, err := grpc.Dial(tcpAddr, opts...) 104 require.NoError(t, err) 105 defer func() { 106 require.NoError(t, conn2.Close()) 107 }() 108 109 client = api.NewDispatcherClient(conn2) 110 _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) 111 require.Contains(t, testutils.ErrorDesc(err), "Permission denied: unauthorized peer role: rpc error: code = PermissionDenied desc = Permission denied: remote certificate not part of organization") 112 113 // Verify that requests to the various GRPC services running on TCP 114 // are rejected if they don't have certs. 115 opts = []grpc.DialOption{ 116 grpc.WithTimeout(10 * time.Second), 117 grpc.WithTransportCredentials(credentials.NewTLS(&tls.Config{InsecureSkipVerify: true})), 118 } 119 120 noCertConn, err := grpc.Dial(tcpAddr, opts...) 121 require.NoError(t, err) 122 defer func() { 123 require.NoError(t, noCertConn.Close()) 124 }() 125 126 client = api.NewDispatcherClient(noCertConn) 127 _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) 128 require.EqualError(t, err, "rpc error: code = PermissionDenied desc = Permission denied: unauthorized peer role: rpc error: code = PermissionDenied desc = no client certificates in request") 129 130 controlClient := api.NewControlClient(noCertConn) 131 _, err = controlClient.ListNodes(context.Background(), &api.ListNodesRequest{}) 132 require.EqualError(t, err, "rpc error: code = PermissionDenied desc = Permission denied: unauthorized peer role: rpc error: code = PermissionDenied desc = no client certificates in request") 133 134 raftClient := api.NewRaftMembershipClient(noCertConn) 135 _, err = raftClient.Join(context.Background(), &api.JoinRequest{}) 136 require.EqualError(t, err, "rpc error: code = PermissionDenied desc = Permission denied: unauthorized peer role: rpc error: code = PermissionDenied desc = no client certificates in request") 137 138 opts = []grpc.DialOption{ 139 grpc.WithTimeout(10 * time.Second), 140 grpc.WithTransportCredentials(managerSecurityConfig.ClientTLSCreds), 141 } 142 143 controlConn, err := grpc.Dial(tcpAddr, opts...) 144 require.NoError(t, err) 145 defer func() { 146 require.NoError(t, controlConn.Close()) 147 }() 148 149 // check that the kek is added to the config 150 var cluster api.Cluster 151 require.NoError(t, testutils.PollFunc(nil, func() error { 152 var ( 153 err error 154 clusters []*api.Cluster 155 ) 156 m.raftNode.MemoryStore().View(func(tx store.ReadTx) { 157 clusters, err = store.FindClusters(tx, store.All) 158 }) 159 if err != nil { 160 return err 161 } 162 if len(clusters) != 1 { 163 return errors.New("wrong number of clusters") 164 } 165 cluster = *clusters[0] 166 return nil 167 168 })) 169 require.NotNil(t, cluster) 170 require.Len(t, cluster.UnlockKeys, 1) 171 require.Equal(t, &api.EncryptionKey{ 172 Subsystem: ca.ManagerRole, 173 Key: []byte("kek"), 174 }, cluster.UnlockKeys[0]) 175 176 // Test removal of the agent node 177 agentID := agentSecurityConfig.ClientTLSCreds.NodeID() 178 require.NoError(t, m.raftNode.MemoryStore().Update(func(tx store.Tx) error { 179 return store.CreateNode(tx, 180 &api.Node{ 181 ID: agentID, 182 Certificate: api.Certificate{ 183 Role: api.NodeRoleWorker, 184 CN: agentID, 185 }, 186 }, 187 ) 188 })) 189 controlClient = api.NewControlClient(controlConn) 190 _, err = controlClient.CreateNetwork(context.Background(), &api.CreateNetworkRequest{ 191 Spec: &api.NetworkSpec{ 192 Annotations: api.Annotations{ 193 Name: "test-network-bad-driver", 194 }, 195 DriverConfig: &api.Driver{ 196 Name: "invalid-must-never-exist", 197 }, 198 }, 199 }) 200 require.Error(t, err) 201 202 _, err = controlClient.RemoveNode(context.Background(), 203 &api.RemoveNodeRequest{ 204 NodeID: agentID, 205 Force: true, 206 }, 207 ) 208 require.NoError(t, err) 209 210 client = api.NewDispatcherClient(conn) 211 _, err = client.Heartbeat(context.Background(), &api.HeartbeatRequest{}) 212 require.Contains(t, testutils.ErrorDesc(err), "removed from swarm") 213 214 m.Stop(tc.Context, false) 215 216 // After stopping we should MAY receive an error from ListenAndServe if 217 // all this happened before WaitForLeader completed, so don't check the 218 // error. 219 <-done 220 } 221 222 // Tests locking and unlocking the manager and key rotations 223 func TestManagerLockUnlock(t *testing.T) { 224 temp, err := ioutil.TempFile("", "test-manager-lock") 225 require.NoError(t, err) 226 require.NoError(t, temp.Close()) 227 require.NoError(t, os.Remove(temp.Name())) 228 229 defer os.RemoveAll(temp.Name()) 230 231 stateDir, err := ioutil.TempDir("", "test-raft") 232 require.NoError(t, err) 233 defer os.RemoveAll(stateDir) 234 235 tc := cautils.NewTestCA(t) 236 defer tc.Stop() 237 238 managerSecurityConfig, err := tc.NewNodeConfig(ca.ManagerRole) 239 require.NoError(t, err) 240 241 _, _, err = managerSecurityConfig.KeyReader().Read() 242 require.NoError(t, err) 243 244 m, err := New(&Config{ 245 RemoteAPI: &RemoteAddrs{ListenAddr: "127.0.0.1:0"}, 246 ControlAPI: temp.Name(), 247 StateDir: stateDir, 248 SecurityConfig: managerSecurityConfig, 249 RootCAPaths: tc.Paths.RootCA, 250 // start off without any encryption 251 }) 252 require.NoError(t, err) 253 require.NotNil(t, m) 254 255 done := make(chan error) 256 defer close(done) 257 go func() { 258 done <- m.Run(tc.Context) 259 }() 260 261 opts := []grpc.DialOption{ 262 grpc.WithTimeout(10 * time.Second), 263 grpc.WithTransportCredentials(managerSecurityConfig.ClientTLSCreds), 264 } 265 266 conn, err := grpc.Dial(m.Addr(), opts...) 267 require.NoError(t, err) 268 defer func() { 269 require.NoError(t, conn.Close()) 270 }() 271 272 // check that there is no kek currently - we are using the API because this 273 // lets us wait until the manager is up and listening, as well 274 var cluster *api.Cluster 275 client := api.NewControlClient(conn) 276 277 require.NoError(t, testutils.PollFuncWithTimeout(nil, func() error { 278 resp, err := client.ListClusters(tc.Context, &api.ListClustersRequest{}) 279 if err != nil { 280 return err 281 } 282 if len(resp.Clusters) == 0 { 283 return fmt.Errorf("no clusters yet") 284 } 285 cluster = resp.Clusters[0] 286 return nil 287 }, 1*time.Second)) 288 289 require.Nil(t, cluster.UnlockKeys) 290 291 // tls key is unencrypted, but there is a DEK 292 unencryptedKey, err := ioutil.ReadFile(tc.Paths.Node.Key) 293 require.NoError(t, err) 294 keyBlock, _ := pem.Decode(unencryptedKey) 295 require.NotNil(t, keyBlock) 296 require.False(t, keyutils.IsEncryptedPEMBlock(keyBlock)) 297 require.Len(t, keyBlock.Headers, 2) 298 currentDEK, err := decodePEMHeaderValue(keyBlock.Headers[pemHeaderRaftDEK], nil, false) 299 require.NoError(t, err) 300 require.NotEmpty(t, currentDEK) 301 302 // update the lock key - this may fail due to update out of sequence errors, so try again 303 for { 304 getResp, err := client.GetCluster(tc.Context, &api.GetClusterRequest{ClusterID: cluster.ID}) 305 require.NoError(t, err) 306 cluster = getResp.Cluster 307 308 spec := cluster.Spec.Copy() 309 spec.EncryptionConfig.AutoLockManagers = true 310 updateResp, err := client.UpdateCluster(tc.Context, &api.UpdateClusterRequest{ 311 ClusterID: cluster.ID, 312 ClusterVersion: &cluster.Meta.Version, 313 Spec: spec, 314 }) 315 if testutils.ErrorDesc(err) == "update out of sequence" { 316 continue 317 } 318 // if there is any other type of error, this should fail 319 if err == nil { 320 cluster = updateResp.Cluster 321 } 322 break 323 } 324 require.NoError(t, err) 325 326 caConn := api.NewCAClient(conn) 327 unlockKeyResp, err := caConn.GetUnlockKey(tc.Context, &api.GetUnlockKeyRequest{}) 328 require.NoError(t, err) 329 330 // this should update the TLS key, rotate the DEK, and finish snapshotting 331 var encryptedKey []byte 332 require.NoError(t, testutils.PollFuncWithTimeout(nil, func() error { 333 encryptedKey, err = ioutil.ReadFile(tc.Paths.Node.Key) 334 require.NoError(t, err) // this should never error due to atomic writes 335 336 if bytes.Equal(unencryptedKey, encryptedKey) { 337 return fmt.Errorf("TLS key should have been re-encrypted at least") 338 } 339 340 keyBlock, _ = pem.Decode(encryptedKey) 341 require.NotNil(t, keyBlock) // this should never error due to atomic writes 342 343 if !keyutils.IsEncryptedPEMBlock(keyBlock) { 344 return fmt.Errorf("Key not encrypted") 345 } 346 347 // we don't check that the TLS key has been rotated, because that may take 348 // a little bit, and is best effort only 349 currentDEKString, ok := keyBlock.Headers[pemHeaderRaftDEK] 350 require.True(t, ok) // there should never NOT be a current header 351 nowCurrentDEK, err := decodePEMHeaderValue(currentDEKString, unlockKeyResp.UnlockKey, false) 352 require.NoError(t, err) // it should always be encrypted 353 if bytes.Equal(currentDEK, nowCurrentDEK) { 354 return fmt.Errorf("snapshot has not been finished yet") 355 } 356 357 currentDEK = nowCurrentDEK 358 return nil 359 }, 1*time.Second)) 360 361 _, ok := keyBlock.Headers[pemHeaderRaftPendingDEK] 362 require.False(t, ok) // once the snapshot is done, the pending DEK should have been deleted 363 364 _, ok = keyBlock.Headers[pemHeaderRaftDEKNeedsRotation] 365 require.False(t, ok) 366 367 // verify that the snapshot is readable with the new DEK 368 encrypter, decrypter := encryption.Defaults(currentDEK, false) 369 // we can't use the raftLogger, because the WALs are still locked while the raft node is up. And once we remove 370 // the manager, they'll be deleted. 371 snapshot, err := storage.NewSnapFactory(encrypter, decrypter).New(filepath.Join(stateDir, "raft", "snap-v3-encrypted")).Load() 372 require.NoError(t, err) 373 require.NotNil(t, snapshot) 374 375 // update the lock key to nil 376 for i := 0; i < 3; i++ { 377 getResp, err := client.GetCluster(tc.Context, &api.GetClusterRequest{ClusterID: cluster.ID}) 378 require.NoError(t, err) 379 cluster = getResp.Cluster 380 381 spec := cluster.Spec.Copy() 382 spec.EncryptionConfig.AutoLockManagers = false 383 _, err = client.UpdateCluster(tc.Context, &api.UpdateClusterRequest{ 384 ClusterID: cluster.ID, 385 ClusterVersion: &cluster.Meta.Version, 386 Spec: spec, 387 }) 388 if testutils.ErrorDesc(err) == "update out of sequence" { 389 continue 390 } 391 require.NoError(t, err) 392 } 393 394 // this should update the TLS key 395 var unlockedKey []byte 396 require.NoError(t, testutils.PollFuncWithTimeout(nil, func() error { 397 unlockedKey, err = ioutil.ReadFile(tc.Paths.Node.Key) 398 if err != nil { 399 return err 400 } 401 402 if bytes.Equal(unlockedKey, encryptedKey) { 403 return fmt.Errorf("TLS key should have been rotated") 404 } 405 406 // Previously, we did not check that the TLS key got rotated after going from 407 // unlocked -> locked, because it might take a while for the snapshot to be done, 408 // and the rotation happens on a best effort basis. However, that *could* 409 // have happened, in which case the encrypted key may have changed, so we have 410 // to poll to make sure that the key is eventually decrypted, rather than 411 // just waiting for it to look different. 412 413 // the new key should not be encrypted, and the DEK should also be unencrypted 414 keyBlock, _ = pem.Decode(unlockedKey) 415 if keyBlock == nil { 416 return fmt.Errorf("keyblock is nil") 417 } 418 if keyutils.IsEncryptedPEMBlock(keyBlock) { 419 return fmt.Errorf("key is still encrypted") 420 } 421 return nil 422 }, 1*time.Second)) 423 424 // the new key should not be encrypted, and the DEK should also be unencrypted 425 // but not rotated 426 keyBlock, _ = pem.Decode(unlockedKey) 427 require.NotNil(t, keyBlock) 428 require.False(t, keyutils.IsEncryptedPEMBlock(keyBlock)) 429 430 unencryptedDEK, err := decodePEMHeaderValue(keyBlock.Headers[pemHeaderRaftDEK], nil, false) 431 require.NoError(t, err) 432 require.NotNil(t, unencryptedDEK) 433 require.Equal(t, currentDEK, unencryptedDEK) 434 435 m.Stop(tc.Context, false) 436 437 // After stopping we should MAY receive an error from ListenAndServe if 438 // all this happened before WaitForLeader completed, so don't check the 439 // error. 440 <-done 441 }