github.com/hashicorp/vault/sdk@v0.11.0/helper/testcluster/docker/environment.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package docker 5 6 import ( 7 "bufio" 8 "bytes" 9 "context" 10 "crypto/ecdsa" 11 "crypto/elliptic" 12 "crypto/rand" 13 "crypto/tls" 14 "crypto/x509" 15 "crypto/x509/pkix" 16 "encoding/hex" 17 "encoding/json" 18 "encoding/pem" 19 "errors" 20 "fmt" 21 "io" 22 "io/ioutil" 23 "math/big" 24 mathrand "math/rand" 25 "net" 26 "net/http" 27 "os" 28 "path/filepath" 29 "strconv" 30 "strings" 31 "sync" 32 "testing" 33 "time" 34 35 "github.com/docker/docker/api/types" 36 "github.com/docker/docker/api/types/container" 37 "github.com/docker/docker/api/types/volume" 38 docker "github.com/docker/docker/client" 39 "github.com/hashicorp/go-cleanhttp" 40 log "github.com/hashicorp/go-hclog" 41 "github.com/hashicorp/go-multierror" 42 "github.com/hashicorp/vault/api" 43 dockhelper "github.com/hashicorp/vault/sdk/helper/docker" 44 "github.com/hashicorp/vault/sdk/helper/logging" 45 "github.com/hashicorp/vault/sdk/helper/strutil" 46 "github.com/hashicorp/vault/sdk/helper/testcluster" 47 uberAtomic "go.uber.org/atomic" 48 "golang.org/x/net/http2" 49 ) 50 51 var ( 52 _ testcluster.VaultCluster = &DockerCluster{} 53 _ testcluster.VaultClusterNode = &DockerClusterNode{} 54 ) 55 56 const MaxClusterNameLength = 52 57 58 // DockerCluster is used to managing the lifecycle of the test Vault cluster 59 type DockerCluster struct { 60 ClusterName string 61 62 ClusterNodes []*DockerClusterNode 63 64 // Certificate fields 65 *testcluster.CA 66 RootCAs *x509.CertPool 67 68 barrierKeys [][]byte 69 recoveryKeys [][]byte 70 tmpDir string 71 72 // rootToken is the initial root token created when the Vault cluster is 73 // created. 74 rootToken string 75 DockerAPI *docker.Client 76 ID string 77 Logger log.Logger 78 builtTags map[string]struct{} 79 80 storage testcluster.ClusterStorage 81 } 82 83 func (dc *DockerCluster) NamedLogger(s string) log.Logger { 84 return dc.Logger.Named(s) 85 } 86 87 func (dc *DockerCluster) ClusterID() string { 88 return dc.ID 89 } 90 91 func (dc *DockerCluster) Nodes() []testcluster.VaultClusterNode { 92 ret := make([]testcluster.VaultClusterNode, len(dc.ClusterNodes)) 93 for i := range dc.ClusterNodes { 94 ret[i] = dc.ClusterNodes[i] 95 } 96 return ret 97 } 98 99 func (dc *DockerCluster) GetBarrierKeys() [][]byte { 100 return dc.barrierKeys 101 } 102 103 func testKeyCopy(key []byte) []byte { 104 result := make([]byte, len(key)) 105 copy(result, key) 106 return result 107 } 108 109 func (dc *DockerCluster) GetRecoveryKeys() [][]byte { 110 ret := make([][]byte, len(dc.recoveryKeys)) 111 for i, k := range dc.recoveryKeys { 112 ret[i] = testKeyCopy(k) 113 } 114 return ret 115 } 116 117 func (dc *DockerCluster) GetBarrierOrRecoveryKeys() [][]byte { 118 return dc.GetBarrierKeys() 119 } 120 121 func (dc *DockerCluster) SetBarrierKeys(keys [][]byte) { 122 dc.barrierKeys = make([][]byte, len(keys)) 123 for i, k := range keys { 124 dc.barrierKeys[i] = testKeyCopy(k) 125 } 126 } 127 128 func (dc *DockerCluster) SetRecoveryKeys(keys [][]byte) { 129 dc.recoveryKeys = make([][]byte, len(keys)) 130 for i, k := range keys { 131 dc.recoveryKeys[i] = testKeyCopy(k) 132 } 133 } 134 135 func (dc *DockerCluster) GetCACertPEMFile() string { 136 return dc.CACertPEMFile 137 } 138 139 func (dc *DockerCluster) Cleanup() { 140 dc.cleanup() 141 } 142 143 func (dc *DockerCluster) cleanup() error { 144 var result *multierror.Error 145 for _, node := range dc.ClusterNodes { 146 if err := node.cleanup(); err != nil { 147 result = multierror.Append(result, err) 148 } 149 } 150 151 return result.ErrorOrNil() 152 } 153 154 // GetRootToken returns the root token of the cluster, if set 155 func (dc *DockerCluster) GetRootToken() string { 156 return dc.rootToken 157 } 158 159 func (dc *DockerCluster) SetRootToken(s string) { 160 dc.Logger.Trace("cluster root token changed", "helpful_env", fmt.Sprintf("VAULT_TOKEN=%s VAULT_CACERT=/vault/config/ca.pem", s)) 161 dc.rootToken = s 162 } 163 164 func (n *DockerClusterNode) Name() string { 165 return n.Cluster.ClusterName + "-" + n.NodeID 166 } 167 168 func (dc *DockerCluster) setupNode0(ctx context.Context) error { 169 client := dc.ClusterNodes[0].client 170 171 var resp *api.InitResponse 172 var err error 173 for ctx.Err() == nil { 174 resp, err = client.Sys().Init(&api.InitRequest{ 175 SecretShares: 3, 176 SecretThreshold: 3, 177 }) 178 if err == nil && resp != nil { 179 break 180 } 181 time.Sleep(500 * time.Millisecond) 182 } 183 if err != nil { 184 return err 185 } 186 if resp == nil { 187 return fmt.Errorf("nil response to init request") 188 } 189 190 for _, k := range resp.Keys { 191 raw, err := hex.DecodeString(k) 192 if err != nil { 193 return err 194 } 195 dc.barrierKeys = append(dc.barrierKeys, raw) 196 } 197 198 for _, k := range resp.RecoveryKeys { 199 raw, err := hex.DecodeString(k) 200 if err != nil { 201 return err 202 } 203 dc.recoveryKeys = append(dc.recoveryKeys, raw) 204 } 205 206 dc.rootToken = resp.RootToken 207 client.SetToken(dc.rootToken) 208 dc.ClusterNodes[0].client = client 209 210 err = testcluster.UnsealNode(ctx, dc, 0) 211 if err != nil { 212 return err 213 } 214 215 err = ensureLeaderMatches(ctx, client, func(leader *api.LeaderResponse) error { 216 if !leader.IsSelf { 217 return fmt.Errorf("node %d leader=%v, expected=%v", 0, leader.IsSelf, true) 218 } 219 220 return nil 221 }) 222 223 status, err := client.Sys().SealStatusWithContext(ctx) 224 if err != nil { 225 return err 226 } 227 dc.ID = status.ClusterID 228 return err 229 } 230 231 func (dc *DockerCluster) clusterReady(ctx context.Context) error { 232 for i, node := range dc.ClusterNodes { 233 expectLeader := i == 0 234 err := ensureLeaderMatches(ctx, node.client, func(leader *api.LeaderResponse) error { 235 if expectLeader != leader.IsSelf { 236 return fmt.Errorf("node %d leader=%v, expected=%v", i, leader.IsSelf, expectLeader) 237 } 238 239 return nil 240 }) 241 if err != nil { 242 return err 243 } 244 } 245 246 return nil 247 } 248 249 func (dc *DockerCluster) setupCA(opts *DockerClusterOptions) error { 250 var err error 251 var ca testcluster.CA 252 253 if opts != nil && opts.CAKey != nil { 254 ca.CAKey = opts.CAKey 255 } else { 256 ca.CAKey, err = ecdsa.GenerateKey(elliptic.P256(), rand.Reader) 257 if err != nil { 258 return err 259 } 260 } 261 262 var caBytes []byte 263 if opts != nil && len(opts.CACert) > 0 { 264 caBytes = opts.CACert 265 } else { 266 serialNumber := mathrand.New(mathrand.NewSource(time.Now().UnixNano())).Int63() 267 CACertTemplate := &x509.Certificate{ 268 Subject: pkix.Name{ 269 CommonName: "localhost", 270 }, 271 KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, 272 SerialNumber: big.NewInt(serialNumber), 273 NotBefore: time.Now().Add(-30 * time.Second), 274 NotAfter: time.Now().Add(262980 * time.Hour), 275 BasicConstraintsValid: true, 276 IsCA: true, 277 } 278 caBytes, err = x509.CreateCertificate(rand.Reader, CACertTemplate, CACertTemplate, ca.CAKey.Public(), ca.CAKey) 279 if err != nil { 280 return err 281 } 282 } 283 CACert, err := x509.ParseCertificate(caBytes) 284 if err != nil { 285 return err 286 } 287 ca.CACert = CACert 288 ca.CACertBytes = caBytes 289 290 CACertPEMBlock := &pem.Block{ 291 Type: "CERTIFICATE", 292 Bytes: caBytes, 293 } 294 ca.CACertPEM = pem.EncodeToMemory(CACertPEMBlock) 295 296 ca.CACertPEMFile = filepath.Join(dc.tmpDir, "ca", "ca.pem") 297 err = os.WriteFile(ca.CACertPEMFile, ca.CACertPEM, 0o755) 298 if err != nil { 299 return err 300 } 301 302 marshaledCAKey, err := x509.MarshalECPrivateKey(ca.CAKey) 303 if err != nil { 304 return err 305 } 306 CAKeyPEMBlock := &pem.Block{ 307 Type: "EC PRIVATE KEY", 308 Bytes: marshaledCAKey, 309 } 310 ca.CAKeyPEM = pem.EncodeToMemory(CAKeyPEMBlock) 311 312 dc.CA = &ca 313 314 return nil 315 } 316 317 func (n *DockerClusterNode) setupCert(ip string) error { 318 var err error 319 320 n.ServerKey, err = ecdsa.GenerateKey(elliptic.P256(), rand.Reader) 321 if err != nil { 322 return err 323 } 324 325 serialNumber := mathrand.New(mathrand.NewSource(time.Now().UnixNano())).Int63() 326 certTemplate := &x509.Certificate{ 327 Subject: pkix.Name{ 328 CommonName: n.Name(), 329 }, 330 DNSNames: []string{"localhost", n.Name()}, 331 IPAddresses: []net.IP{net.IPv6loopback, net.ParseIP("127.0.0.1"), net.ParseIP(ip)}, 332 ExtKeyUsage: []x509.ExtKeyUsage{ 333 x509.ExtKeyUsageServerAuth, 334 x509.ExtKeyUsageClientAuth, 335 }, 336 KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment | x509.KeyUsageKeyAgreement, 337 SerialNumber: big.NewInt(serialNumber), 338 NotBefore: time.Now().Add(-30 * time.Second), 339 NotAfter: time.Now().Add(262980 * time.Hour), 340 } 341 n.ServerCertBytes, err = x509.CreateCertificate(rand.Reader, certTemplate, n.Cluster.CACert, n.ServerKey.Public(), n.Cluster.CAKey) 342 if err != nil { 343 return err 344 } 345 n.ServerCert, err = x509.ParseCertificate(n.ServerCertBytes) 346 if err != nil { 347 return err 348 } 349 n.ServerCertPEM = pem.EncodeToMemory(&pem.Block{ 350 Type: "CERTIFICATE", 351 Bytes: n.ServerCertBytes, 352 }) 353 354 marshaledKey, err := x509.MarshalECPrivateKey(n.ServerKey) 355 if err != nil { 356 return err 357 } 358 n.ServerKeyPEM = pem.EncodeToMemory(&pem.Block{ 359 Type: "EC PRIVATE KEY", 360 Bytes: marshaledKey, 361 }) 362 363 n.ServerCertPEMFile = filepath.Join(n.WorkDir, "cert.pem") 364 err = os.WriteFile(n.ServerCertPEMFile, n.ServerCertPEM, 0o755) 365 if err != nil { 366 return err 367 } 368 369 n.ServerKeyPEMFile = filepath.Join(n.WorkDir, "key.pem") 370 err = os.WriteFile(n.ServerKeyPEMFile, n.ServerKeyPEM, 0o755) 371 if err != nil { 372 return err 373 } 374 375 tlsCert, err := tls.X509KeyPair(n.ServerCertPEM, n.ServerKeyPEM) 376 if err != nil { 377 return err 378 } 379 380 certGetter := NewCertificateGetter(n.ServerCertPEMFile, n.ServerKeyPEMFile, "") 381 if err := certGetter.Reload(); err != nil { 382 return err 383 } 384 tlsConfig := &tls.Config{ 385 Certificates: []tls.Certificate{tlsCert}, 386 RootCAs: n.Cluster.RootCAs, 387 ClientCAs: n.Cluster.RootCAs, 388 ClientAuth: tls.RequestClientCert, 389 NextProtos: []string{"h2", "http/1.1"}, 390 GetCertificate: certGetter.GetCertificate, 391 } 392 393 n.tlsConfig = tlsConfig 394 395 err = os.WriteFile(filepath.Join(n.WorkDir, "ca.pem"), n.Cluster.CACertPEM, 0o755) 396 if err != nil { 397 return err 398 } 399 return nil 400 } 401 402 func NewTestDockerCluster(t *testing.T, opts *DockerClusterOptions) *DockerCluster { 403 if opts == nil { 404 opts = &DockerClusterOptions{} 405 } 406 if opts.ClusterName == "" { 407 opts.ClusterName = strings.ReplaceAll(t.Name(), "/", "-") 408 } 409 if opts.Logger == nil { 410 opts.Logger = logging.NewVaultLogger(log.Trace).Named(t.Name()) 411 } 412 if opts.NetworkName == "" { 413 opts.NetworkName = os.Getenv("TEST_DOCKER_NETWORK_NAME") 414 } 415 416 ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) 417 t.Cleanup(cancel) 418 419 dc, err := NewDockerCluster(ctx, opts) 420 if err != nil { 421 t.Fatal(err) 422 } 423 dc.Logger.Trace("cluster started", "helpful_env", fmt.Sprintf("VAULT_TOKEN=%s VAULT_CACERT=/vault/config/ca.pem", dc.GetRootToken())) 424 return dc 425 } 426 427 func NewDockerCluster(ctx context.Context, opts *DockerClusterOptions) (*DockerCluster, error) { 428 api, err := dockhelper.NewDockerAPI() 429 if err != nil { 430 return nil, err 431 } 432 433 if opts == nil { 434 opts = &DockerClusterOptions{} 435 } 436 if opts.Logger == nil { 437 opts.Logger = log.NewNullLogger() 438 } 439 if opts.VaultLicense == "" { 440 opts.VaultLicense = os.Getenv(testcluster.EnvVaultLicenseCI) 441 } 442 443 dc := &DockerCluster{ 444 DockerAPI: api, 445 ClusterName: opts.ClusterName, 446 Logger: opts.Logger, 447 builtTags: map[string]struct{}{}, 448 CA: opts.CA, 449 storage: opts.Storage, 450 } 451 452 if err := dc.setupDockerCluster(ctx, opts); err != nil { 453 dc.Cleanup() 454 return nil, err 455 } 456 457 return dc, nil 458 } 459 460 // DockerClusterNode represents a single instance of Vault in a cluster 461 type DockerClusterNode struct { 462 NodeID string 463 HostPort string 464 client *api.Client 465 ServerCert *x509.Certificate 466 ServerCertBytes []byte 467 ServerCertPEM []byte 468 ServerCertPEMFile string 469 ServerKey *ecdsa.PrivateKey 470 ServerKeyPEM []byte 471 ServerKeyPEMFile string 472 tlsConfig *tls.Config 473 WorkDir string 474 Cluster *DockerCluster 475 Container *types.ContainerJSON 476 DockerAPI *docker.Client 477 runner *dockhelper.Runner 478 Logger log.Logger 479 cleanupContainer func() 480 RealAPIAddr string 481 ContainerNetworkName string 482 ContainerIPAddress string 483 ImageRepo string 484 ImageTag string 485 DataVolumeName string 486 cleanupVolume func() 487 AllClients []*api.Client 488 } 489 490 func (n *DockerClusterNode) TLSConfig() *tls.Config { 491 return n.tlsConfig.Clone() 492 } 493 494 func (n *DockerClusterNode) APIClient() *api.Client { 495 // We clone to ensure that whenever this method is called, the caller gets 496 // back a pristine client, without e.g. any namespace or token changes that 497 // might pollute a shared client. We clone the config instead of the 498 // client because (1) Client.clone propagates the replicationStateStore and 499 // the httpClient pointers, (2) it doesn't copy the tlsConfig at all, and 500 // (3) if clone returns an error, it doesn't feel as appropriate to panic 501 // below. Who knows why clone might return an error? 502 cfg := n.client.CloneConfig() 503 client, err := api.NewClient(cfg) 504 if err != nil { 505 // It seems fine to panic here, since this should be the same input 506 // we provided to NewClient when we were setup, and we didn't panic then. 507 // Better not to completely ignore the error though, suppose there's a 508 // bug in CloneConfig? 509 panic(fmt.Sprintf("NewClient error on cloned config: %v", err)) 510 } 511 client.SetToken(n.Cluster.rootToken) 512 return client 513 } 514 515 func (n *DockerClusterNode) APIClientN(listenerNumber int) (*api.Client, error) { 516 // We clone to ensure that whenever this method is called, the caller gets 517 // back a pristine client, without e.g. any namespace or token changes that 518 // might pollute a shared client. We clone the config instead of the 519 // client because (1) Client.clone propagates the replicationStateStore and 520 // the httpClient pointers, (2) it doesn't copy the tlsConfig at all, and 521 // (3) if clone returns an error, it doesn't feel as appropriate to panic 522 // below. Who knows why clone might return an error? 523 if listenerNumber >= len(n.AllClients) { 524 return nil, fmt.Errorf("invalid listener number %d", listenerNumber) 525 } 526 cfg := n.AllClients[listenerNumber].CloneConfig() 527 client, err := api.NewClient(cfg) 528 if err != nil { 529 // It seems fine to panic here, since this should be the same input 530 // we provided to NewClient when we were setup, and we didn't panic then. 531 // Better not to completely ignore the error though, suppose there's a 532 // bug in CloneConfig? 533 panic(fmt.Sprintf("NewClient error on cloned config: %v", err)) 534 } 535 client.SetToken(n.Cluster.rootToken) 536 return client, nil 537 } 538 539 // NewAPIClient creates and configures a Vault API client to communicate with 540 // the running Vault Cluster for this DockerClusterNode 541 func (n *DockerClusterNode) apiConfig() (*api.Config, error) { 542 transport := cleanhttp.DefaultPooledTransport() 543 transport.TLSClientConfig = n.TLSConfig() 544 if err := http2.ConfigureTransport(transport); err != nil { 545 return nil, err 546 } 547 client := &http.Client{ 548 Transport: transport, 549 CheckRedirect: func(*http.Request, []*http.Request) error { 550 // This can of course be overridden per-test by using its own client 551 return fmt.Errorf("redirects not allowed in these tests") 552 }, 553 } 554 config := api.DefaultConfig() 555 if config.Error != nil { 556 return nil, config.Error 557 } 558 config.Address = fmt.Sprintf("https://%s", n.HostPort) 559 config.HttpClient = client 560 config.MaxRetries = 0 561 return config, nil 562 } 563 564 func (n *DockerClusterNode) newAPIClient() (*api.Client, error) { 565 config, err := n.apiConfig() 566 if err != nil { 567 return nil, err 568 } 569 client, err := api.NewClient(config) 570 if err != nil { 571 return nil, err 572 } 573 client.SetToken(n.Cluster.GetRootToken()) 574 return client, nil 575 } 576 577 func (n *DockerClusterNode) newAPIClientForAddress(address string) (*api.Client, error) { 578 config, err := n.apiConfig() 579 if err != nil { 580 return nil, err 581 } 582 config.Address = fmt.Sprintf("https://%s", address) 583 client, err := api.NewClient(config) 584 if err != nil { 585 return nil, err 586 } 587 client.SetToken(n.Cluster.GetRootToken()) 588 return client, nil 589 } 590 591 // Cleanup kills the container of the node and deletes its data volume 592 func (n *DockerClusterNode) Cleanup() { 593 n.cleanup() 594 } 595 596 // Stop kills the container of the node 597 func (n *DockerClusterNode) Stop() { 598 n.cleanupContainer() 599 } 600 601 func (n *DockerClusterNode) cleanup() error { 602 if n.Container == nil || n.Container.ID == "" { 603 return nil 604 } 605 n.cleanupContainer() 606 n.cleanupVolume() 607 return nil 608 } 609 610 func (n *DockerClusterNode) createDefaultListenerConfig() map[string]interface{} { 611 return map[string]interface{}{"tcp": map[string]interface{}{ 612 "address": fmt.Sprintf("%s:%d", "0.0.0.0", 8200), 613 "tls_cert_file": "/vault/config/cert.pem", 614 "tls_key_file": "/vault/config/key.pem", 615 "telemetry": map[string]interface{}{ 616 "unauthenticated_metrics_access": true, 617 }, 618 }} 619 } 620 621 func (n *DockerClusterNode) Start(ctx context.Context, opts *DockerClusterOptions) error { 622 if n.DataVolumeName == "" { 623 vol, err := n.DockerAPI.VolumeCreate(ctx, volume.CreateOptions{}) 624 if err != nil { 625 return err 626 } 627 n.DataVolumeName = vol.Name 628 n.cleanupVolume = func() { 629 _ = n.DockerAPI.VolumeRemove(ctx, vol.Name, false) 630 } 631 } 632 vaultCfg := map[string]interface{}{} 633 var listenerConfig []map[string]interface{} 634 listenerConfig = append(listenerConfig, n.createDefaultListenerConfig()) 635 ports := []string{"8200/tcp", "8201/tcp"} 636 637 if opts.VaultNodeConfig != nil && opts.VaultNodeConfig.AdditionalListeners != nil { 638 for _, config := range opts.VaultNodeConfig.AdditionalListeners { 639 cfg := n.createDefaultListenerConfig() 640 listener := cfg["tcp"].(map[string]interface{}) 641 listener["address"] = fmt.Sprintf("%s:%d", "0.0.0.0", config.Port) 642 listener["chroot_namespace"] = config.ChrootNamespace 643 listenerConfig = append(listenerConfig, cfg) 644 portStr := fmt.Sprintf("%d/tcp", config.Port) 645 if strutil.StrListContains(ports, portStr) { 646 return fmt.Errorf("duplicate port %d specified", config.Port) 647 } 648 ports = append(ports, portStr) 649 } 650 } 651 vaultCfg["listener"] = listenerConfig 652 vaultCfg["telemetry"] = map[string]interface{}{ 653 "disable_hostname": true, 654 } 655 656 // Setup storage. Default is raft. 657 storageType := "raft" 658 storageOpts := map[string]interface{}{ 659 // TODO add options from vnc 660 "path": "/vault/file", 661 "node_id": n.NodeID, 662 } 663 664 if opts.Storage != nil { 665 storageType = opts.Storage.Type() 666 storageOpts = opts.Storage.Opts() 667 } 668 669 if opts != nil && opts.VaultNodeConfig != nil { 670 for k, v := range opts.VaultNodeConfig.StorageOptions { 671 if _, ok := storageOpts[k].(string); !ok { 672 storageOpts[k] = v 673 } 674 } 675 } 676 vaultCfg["storage"] = map[string]interface{}{ 677 storageType: storageOpts, 678 } 679 680 //// disable_mlock is required for working in the Docker environment with 681 //// custom plugins 682 vaultCfg["disable_mlock"] = true 683 vaultCfg["api_addr"] = `https://{{- GetAllInterfaces | exclude "flags" "loopback" | attr "address" -}}:8200` 684 vaultCfg["cluster_addr"] = `https://{{- GetAllInterfaces | exclude "flags" "loopback" | attr "address" -}}:8201` 685 686 vaultCfg["administrative_namespace_path"] = opts.AdministrativeNamespacePath 687 688 systemJSON, err := json.Marshal(vaultCfg) 689 if err != nil { 690 return err 691 } 692 err = os.WriteFile(filepath.Join(n.WorkDir, "system.json"), systemJSON, 0o644) 693 if err != nil { 694 return err 695 } 696 697 if opts.VaultNodeConfig != nil { 698 localCfg := *opts.VaultNodeConfig 699 if opts.VaultNodeConfig.LicensePath != "" { 700 b, err := os.ReadFile(opts.VaultNodeConfig.LicensePath) 701 if err != nil || len(b) == 0 { 702 return fmt.Errorf("unable to read LicensePath at %q: %w", opts.VaultNodeConfig.LicensePath, err) 703 } 704 localCfg.LicensePath = "/vault/config/license" 705 dest := filepath.Join(n.WorkDir, "license") 706 err = os.WriteFile(dest, b, 0o644) 707 if err != nil { 708 return fmt.Errorf("error writing license to %q: %w", dest, err) 709 } 710 711 } 712 userJSON, err := json.Marshal(localCfg) 713 if err != nil { 714 return err 715 } 716 err = os.WriteFile(filepath.Join(n.WorkDir, "user.json"), userJSON, 0o644) 717 if err != nil { 718 return err 719 } 720 } 721 722 // Create a temporary cert so vault will start up 723 err = n.setupCert("127.0.0.1") 724 if err != nil { 725 return err 726 } 727 728 caDir := filepath.Join(n.Cluster.tmpDir, "ca") 729 730 // setup plugin bin copy if needed 731 copyFromTo := map[string]string{ 732 n.WorkDir: "/vault/config", 733 caDir: "/usr/local/share/ca-certificates/", 734 } 735 736 var wg sync.WaitGroup 737 wg.Add(1) 738 var seenLogs uberAtomic.Bool 739 logConsumer := func(s string) { 740 if seenLogs.CAS(false, true) { 741 wg.Done() 742 } 743 n.Logger.Trace(s) 744 } 745 logStdout := &LogConsumerWriter{logConsumer} 746 logStderr := &LogConsumerWriter{func(s string) { 747 if seenLogs.CAS(false, true) { 748 wg.Done() 749 } 750 testcluster.JSONLogNoTimestamp(n.Logger, s) 751 }} 752 753 r, err := dockhelper.NewServiceRunner(dockhelper.RunOptions{ 754 ImageRepo: n.ImageRepo, 755 ImageTag: n.ImageTag, 756 // We don't need to run update-ca-certificates in the container, because 757 // we're providing the CA in the raft join call, and otherwise Vault 758 // servers don't talk to one another on the API port. 759 Cmd: append([]string{"server"}, opts.Args...), 760 Env: []string{ 761 // For now we're using disable_mlock, because this is for testing 762 // anyway, and because it prevents us using external plugins. 763 "SKIP_SETCAP=true", 764 "VAULT_LOG_FORMAT=json", 765 "VAULT_LICENSE=" + opts.VaultLicense, 766 }, 767 Ports: ports, 768 ContainerName: n.Name(), 769 NetworkName: opts.NetworkName, 770 CopyFromTo: copyFromTo, 771 LogConsumer: logConsumer, 772 LogStdout: logStdout, 773 LogStderr: logStderr, 774 PreDelete: true, 775 DoNotAutoRemove: true, 776 PostStart: func(containerID string, realIP string) error { 777 err := n.setupCert(realIP) 778 if err != nil { 779 return err 780 } 781 782 // If we signal Vault before it installs its sighup handler, it'll die. 783 wg.Wait() 784 n.Logger.Trace("running poststart", "containerID", containerID, "IP", realIP) 785 return n.runner.RefreshFiles(ctx, containerID) 786 }, 787 Capabilities: []string{"NET_ADMIN"}, 788 OmitLogTimestamps: true, 789 VolumeNameToMountPoint: map[string]string{ 790 n.DataVolumeName: "/vault/file", 791 }, 792 }) 793 if err != nil { 794 return err 795 } 796 n.runner = r 797 798 probe := opts.StartProbe 799 if probe == nil { 800 probe = func(c *api.Client) error { 801 _, err = c.Sys().SealStatus() 802 return err 803 } 804 } 805 svc, _, err := r.StartNewService(ctx, false, false, func(ctx context.Context, host string, port int) (dockhelper.ServiceConfig, error) { 806 config, err := n.apiConfig() 807 if err != nil { 808 return nil, err 809 } 810 config.Address = fmt.Sprintf("https://%s:%d", host, port) 811 client, err := api.NewClient(config) 812 if err != nil { 813 return nil, err 814 } 815 err = probe(client) 816 if err != nil { 817 return nil, err 818 } 819 820 return dockhelper.NewServiceHostPort(host, port), nil 821 }) 822 if err != nil { 823 return err 824 } 825 826 n.HostPort = svc.Config.Address() 827 n.Container = svc.Container 828 netName := opts.NetworkName 829 if netName == "" { 830 if len(svc.Container.NetworkSettings.Networks) > 1 { 831 return fmt.Errorf("Set d.RunOptions.NetworkName instead for container with multiple networks: %v", svc.Container.NetworkSettings.Networks) 832 } 833 for netName = range svc.Container.NetworkSettings.Networks { 834 // Networks above is a map; we just need to find the first and 835 // only key of this map (network name). The range handles this 836 // for us, but we need a loop construction in order to use range. 837 } 838 } 839 n.ContainerNetworkName = netName 840 n.ContainerIPAddress = svc.Container.NetworkSettings.Networks[netName].IPAddress 841 n.RealAPIAddr = "https://" + n.ContainerIPAddress + ":8200" 842 n.cleanupContainer = svc.Cleanup 843 844 client, err := n.newAPIClient() 845 if err != nil { 846 return err 847 } 848 client.SetToken(n.Cluster.rootToken) 849 n.client = client 850 851 n.AllClients = append(n.AllClients, client) 852 853 for _, addr := range svc.StartResult.Addrs[2:] { 854 // The second element of this list of addresses is the cluster address 855 // We do not want to create a client for the cluster address mapping 856 client, err := n.newAPIClientForAddress(addr) 857 if err != nil { 858 return err 859 } 860 client.SetToken(n.Cluster.rootToken) 861 n.AllClients = append(n.AllClients, client) 862 } 863 return nil 864 } 865 866 func (n *DockerClusterNode) Pause(ctx context.Context) error { 867 return n.DockerAPI.ContainerPause(ctx, n.Container.ID) 868 } 869 870 func (n *DockerClusterNode) Restart(ctx context.Context) error { 871 timeout := 5 872 err := n.DockerAPI.ContainerRestart(ctx, n.Container.ID, container.StopOptions{Timeout: &timeout}) 873 if err != nil { 874 return err 875 } 876 877 resp, err := n.DockerAPI.ContainerInspect(ctx, n.Container.ID) 878 if err != nil { 879 return fmt.Errorf("error inspecting container after restart: %s", err) 880 } 881 882 var port int 883 if len(resp.NetworkSettings.Ports) > 0 { 884 for key, binding := range resp.NetworkSettings.Ports { 885 if len(binding) < 1 { 886 continue 887 } 888 889 if key == "8200/tcp" { 890 port, err = strconv.Atoi(binding[0].HostPort) 891 } 892 } 893 } 894 895 if port == 0 { 896 return fmt.Errorf("failed to find container port after restart") 897 } 898 899 hostPieces := strings.Split(n.HostPort, ":") 900 if len(hostPieces) < 2 { 901 return errors.New("could not parse node hostname") 902 } 903 904 n.HostPort = fmt.Sprintf("%s:%d", hostPieces[0], port) 905 906 client, err := n.newAPIClient() 907 if err != nil { 908 return err 909 } 910 client.SetToken(n.Cluster.rootToken) 911 n.client = client 912 913 return nil 914 } 915 916 func (n *DockerClusterNode) AddNetworkDelay(ctx context.Context, delay time.Duration, targetIP string) error { 917 ip := net.ParseIP(targetIP) 918 if ip == nil { 919 return fmt.Errorf("targetIP %q is not an IP address", targetIP) 920 } 921 // Let's attempt to get a unique handle for the filter rule; we'll assume that 922 // every targetIP has a unique last octet, which is true currently for how 923 // we're doing docker networking. 924 lastOctet := ip.To4()[3] 925 926 stdout, stderr, exitCode, err := n.runner.RunCmdWithOutput(ctx, n.Container.ID, []string{ 927 "/bin/sh", 928 "-xec", strings.Join([]string{ 929 fmt.Sprintf("echo isolating node %s", targetIP), 930 "apk add iproute2", 931 // If we're running this script a second time on the same node, 932 // the add dev will fail; since we only want to run the netem 933 // command once, we'll do so in the case where the add dev doesn't fail. 934 "tc qdisc add dev eth0 root handle 1: prio && " + 935 fmt.Sprintf("tc qdisc add dev eth0 parent 1:1 handle 2: netem delay %dms", delay/time.Millisecond), 936 // Here we create a u32 filter as per https://man7.org/linux/man-pages/man8/tc-u32.8.html 937 // Its parent is 1:0 (which I guess is the root?) 938 // Its handle must be unique, so we base it on targetIP 939 fmt.Sprintf("tc filter add dev eth0 parent 1:0 protocol ip pref 55 handle ::%x u32 match ip dst %s flowid 2:1", lastOctet, targetIP), 940 }, "; "), 941 }) 942 if err != nil { 943 return err 944 } 945 946 n.Logger.Trace(string(stdout)) 947 n.Logger.Trace(string(stderr)) 948 if exitCode != 0 { 949 return fmt.Errorf("got nonzero exit code from iptables: %d", exitCode) 950 } 951 return nil 952 } 953 954 // PartitionFromCluster will cause the node to be disconnected at the network 955 // level from the rest of the docker cluster. It does so in a way that the node 956 // will not see TCP RSTs and all packets it sends will be "black holed". It 957 // attempts to keep packets to and from the host intact which allows docker 958 // daemon to continue streaming logs and any test code to continue making 959 // requests from the host to the partitioned node. 960 func (n *DockerClusterNode) PartitionFromCluster(ctx context.Context) error { 961 stdout, stderr, exitCode, err := n.runner.RunCmdWithOutput(ctx, n.Container.ID, []string{ 962 "/bin/sh", 963 "-xec", strings.Join([]string{ 964 fmt.Sprintf("echo partitioning container from network"), 965 "apk add iproute2", 966 // Get the gateway address for the bridge so we can allow host to 967 // container traffic still. 968 "GW=$(ip r | grep default | grep eth0 | cut -f 3 -d' ')", 969 // First delete the rules in case this is called twice otherwise we'll add 970 // multiple copies and only remove one in Unpartition (yay iptables). 971 // Ignore the error if it didn't exist. 972 "iptables -D INPUT -i eth0 ! -s \"$GW\" -j DROP | true", 973 "iptables -D OUTPUT -o eth0 ! -d \"$GW\" -j DROP | true", 974 // Add rules to drop all packets in and out of the docker network 975 // connection. 976 "iptables -I INPUT -i eth0 ! -s \"$GW\" -j DROP", 977 "iptables -I OUTPUT -o eth0 ! -d \"$GW\" -j DROP", 978 }, "; "), 979 }) 980 if err != nil { 981 return err 982 } 983 984 n.Logger.Trace(string(stdout)) 985 n.Logger.Trace(string(stderr)) 986 if exitCode != 0 { 987 return fmt.Errorf("got nonzero exit code from iptables: %d", exitCode) 988 } 989 return nil 990 } 991 992 // UnpartitionFromCluster reverses a previous call to PartitionFromCluster and 993 // restores full connectivity. Currently assumes the default "bridge" network. 994 func (n *DockerClusterNode) UnpartitionFromCluster(ctx context.Context) error { 995 stdout, stderr, exitCode, err := n.runner.RunCmdWithOutput(ctx, n.Container.ID, []string{ 996 "/bin/sh", 997 "-xec", strings.Join([]string{ 998 fmt.Sprintf("echo un-partitioning container from network"), 999 // Get the gateway address for the bridge so we can allow host to 1000 // container traffic still. 1001 "GW=$(ip r | grep default | grep eth0 | cut -f 3 -d' ')", 1002 // Remove the rules, ignore if they are not present or iptables wasn't 1003 // installed yet (i.e. no-one called PartitionFromCluster yet). 1004 "iptables -D INPUT -i eth0 ! -s \"$GW\" -j DROP | true", 1005 "iptables -D OUTPUT -o eth0 ! -d \"$GW\" -j DROP | true", 1006 }, "; "), 1007 }) 1008 if err != nil { 1009 return err 1010 } 1011 1012 n.Logger.Trace(string(stdout)) 1013 n.Logger.Trace(string(stderr)) 1014 if exitCode != 0 { 1015 return fmt.Errorf("got nonzero exit code from iptables: %d", exitCode) 1016 } 1017 return nil 1018 } 1019 1020 type LogConsumerWriter struct { 1021 consumer func(string) 1022 } 1023 1024 func (l LogConsumerWriter) Write(p []byte) (n int, err error) { 1025 // TODO this assumes that we're never passed partial log lines, which 1026 // seems a safe assumption for now based on how docker looks to implement 1027 // logging, but might change in the future. 1028 scanner := bufio.NewScanner(bytes.NewReader(p)) 1029 scanner.Buffer(make([]byte, 64*1024), bufio.MaxScanTokenSize) 1030 for scanner.Scan() { 1031 l.consumer(scanner.Text()) 1032 } 1033 return len(p), nil 1034 } 1035 1036 // DockerClusterOptions has options for setting up the docker cluster 1037 type DockerClusterOptions struct { 1038 testcluster.ClusterOptions 1039 CAKey *ecdsa.PrivateKey 1040 NetworkName string 1041 ImageRepo string 1042 ImageTag string 1043 CA *testcluster.CA 1044 VaultBinary string 1045 Args []string 1046 StartProbe func(*api.Client) error 1047 Storage testcluster.ClusterStorage 1048 } 1049 1050 func ensureLeaderMatches(ctx context.Context, client *api.Client, ready func(response *api.LeaderResponse) error) error { 1051 var leader *api.LeaderResponse 1052 var err error 1053 for ctx.Err() == nil { 1054 leader, err = client.Sys().Leader() 1055 switch { 1056 case err != nil: 1057 case leader == nil: 1058 err = fmt.Errorf("nil response to leader check") 1059 default: 1060 err = ready(leader) 1061 if err == nil { 1062 return nil 1063 } 1064 } 1065 time.Sleep(500 * time.Millisecond) 1066 } 1067 return fmt.Errorf("error checking leader: %v", err) 1068 } 1069 1070 const DefaultNumCores = 3 1071 1072 // creates a managed docker container running Vault 1073 func (dc *DockerCluster) setupDockerCluster(ctx context.Context, opts *DockerClusterOptions) error { 1074 if opts.TmpDir != "" { 1075 if _, err := os.Stat(opts.TmpDir); os.IsNotExist(err) { 1076 if err := os.MkdirAll(opts.TmpDir, 0o700); err != nil { 1077 return err 1078 } 1079 } 1080 dc.tmpDir = opts.TmpDir 1081 } else { 1082 tempDir, err := ioutil.TempDir("", "vault-test-cluster-") 1083 if err != nil { 1084 return err 1085 } 1086 dc.tmpDir = tempDir 1087 } 1088 caDir := filepath.Join(dc.tmpDir, "ca") 1089 if err := os.MkdirAll(caDir, 0o755); err != nil { 1090 return err 1091 } 1092 1093 var numCores int 1094 if opts.NumCores == 0 { 1095 numCores = DefaultNumCores 1096 } else { 1097 numCores = opts.NumCores 1098 } 1099 1100 if dc.CA == nil { 1101 if err := dc.setupCA(opts); err != nil { 1102 return err 1103 } 1104 } 1105 dc.RootCAs = x509.NewCertPool() 1106 dc.RootCAs.AddCert(dc.CA.CACert) 1107 1108 if dc.storage != nil { 1109 if err := dc.storage.Start(ctx, &opts.ClusterOptions); err != nil { 1110 return err 1111 } 1112 } 1113 1114 for i := 0; i < numCores; i++ { 1115 if err := dc.addNode(ctx, opts); err != nil { 1116 return err 1117 } 1118 if opts.SkipInit { 1119 continue 1120 } 1121 if i == 0 { 1122 if err := dc.setupNode0(ctx); err != nil { 1123 return err 1124 } 1125 } else { 1126 if err := dc.joinNode(ctx, i, 0); err != nil { 1127 return err 1128 } 1129 } 1130 } 1131 1132 return nil 1133 } 1134 1135 func (dc *DockerCluster) AddNode(ctx context.Context, opts *DockerClusterOptions) error { 1136 leaderIdx, err := testcluster.LeaderNode(ctx, dc) 1137 if err != nil { 1138 return err 1139 } 1140 if err := dc.addNode(ctx, opts); err != nil { 1141 return err 1142 } 1143 1144 return dc.joinNode(ctx, len(dc.ClusterNodes)-1, leaderIdx) 1145 } 1146 1147 func (dc *DockerCluster) addNode(ctx context.Context, opts *DockerClusterOptions) error { 1148 tag, err := dc.setupImage(ctx, opts) 1149 if err != nil { 1150 return err 1151 } 1152 i := len(dc.ClusterNodes) 1153 nodeID := fmt.Sprintf("core-%d", i) 1154 node := &DockerClusterNode{ 1155 DockerAPI: dc.DockerAPI, 1156 NodeID: nodeID, 1157 Cluster: dc, 1158 WorkDir: filepath.Join(dc.tmpDir, nodeID), 1159 Logger: dc.Logger.Named(nodeID), 1160 ImageRepo: opts.ImageRepo, 1161 ImageTag: tag, 1162 } 1163 dc.ClusterNodes = append(dc.ClusterNodes, node) 1164 if err := os.MkdirAll(node.WorkDir, 0o755); err != nil { 1165 return err 1166 } 1167 if err := node.Start(ctx, opts); err != nil { 1168 return err 1169 } 1170 return nil 1171 } 1172 1173 func (dc *DockerCluster) joinNode(ctx context.Context, nodeIdx int, leaderIdx int) error { 1174 if dc.storage != nil && dc.storage.Type() != "raft" { 1175 // Storage is not raft so nothing to do but unseal. 1176 return testcluster.UnsealNode(ctx, dc, nodeIdx) 1177 } 1178 1179 leader := dc.ClusterNodes[leaderIdx] 1180 1181 if nodeIdx >= len(dc.ClusterNodes) { 1182 return fmt.Errorf("invalid node %d", nodeIdx) 1183 } 1184 node := dc.ClusterNodes[nodeIdx] 1185 client := node.APIClient() 1186 1187 var resp *api.RaftJoinResponse 1188 resp, err := client.Sys().RaftJoinWithContext(ctx, &api.RaftJoinRequest{ 1189 // When running locally on a bridge network, the containers must use their 1190 // actual (private) IP to talk to one another. Our code must instead use 1191 // the portmapped address since we're not on their network in that case. 1192 LeaderAPIAddr: leader.RealAPIAddr, 1193 LeaderCACert: string(dc.CACertPEM), 1194 LeaderClientCert: string(node.ServerCertPEM), 1195 LeaderClientKey: string(node.ServerKeyPEM), 1196 }) 1197 if resp == nil || !resp.Joined { 1198 return fmt.Errorf("nil or negative response from raft join request: %v", resp) 1199 } 1200 if err != nil { 1201 return fmt.Errorf("failed to join cluster: %w", err) 1202 } 1203 1204 return testcluster.UnsealNode(ctx, dc, nodeIdx) 1205 } 1206 1207 func (dc *DockerCluster) setupImage(ctx context.Context, opts *DockerClusterOptions) (string, error) { 1208 if opts == nil { 1209 opts = &DockerClusterOptions{} 1210 } 1211 sourceTag := opts.ImageTag 1212 if sourceTag == "" { 1213 sourceTag = "latest" 1214 } 1215 1216 if opts.VaultBinary == "" { 1217 return sourceTag, nil 1218 } 1219 1220 suffix := "testing" 1221 if sha := os.Getenv("COMMIT_SHA"); sha != "" { 1222 suffix = sha 1223 } 1224 tag := sourceTag + "-" + suffix 1225 if _, ok := dc.builtTags[tag]; ok { 1226 return tag, nil 1227 } 1228 1229 f, err := os.Open(opts.VaultBinary) 1230 if err != nil { 1231 return "", err 1232 } 1233 data, err := io.ReadAll(f) 1234 if err != nil { 1235 return "", err 1236 } 1237 bCtx := dockhelper.NewBuildContext() 1238 bCtx["vault"] = &dockhelper.FileContents{ 1239 Data: data, 1240 Mode: 0o755, 1241 } 1242 1243 containerFile := fmt.Sprintf(` 1244 FROM %s:%s 1245 COPY vault /bin/vault 1246 `, opts.ImageRepo, sourceTag) 1247 1248 _, err = dockhelper.BuildImage(ctx, dc.DockerAPI, containerFile, bCtx, 1249 dockhelper.BuildRemove(true), dockhelper.BuildForceRemove(true), 1250 dockhelper.BuildPullParent(true), 1251 dockhelper.BuildTags([]string{opts.ImageRepo + ":" + tag})) 1252 if err != nil { 1253 return "", err 1254 } 1255 dc.builtTags[tag] = struct{}{} 1256 return tag, nil 1257 } 1258 1259 func (dc *DockerCluster) GetActiveClusterNode() *DockerClusterNode { 1260 ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) 1261 defer cancel() 1262 1263 node, err := testcluster.WaitForActiveNode(ctx, dc) 1264 if err != nil { 1265 panic(fmt.Sprintf("no cluster node became active in timeout window: %v", err)) 1266 } 1267 1268 return dc.ClusterNodes[node] 1269 } 1270 1271 /* Notes on testing the non-bridge network case: 1272 - you need the test itself to be running in a container so that it can use 1273 the network; create the network using 1274 docker network create testvault 1275 - this means that you need to mount the docker socket in that test container, 1276 but on macos there's stuff that prevents that from working; to hack that, 1277 on the host run 1278 sudo ln -s "$HOME/Library/Containers/com.docker.docker/Data/docker.raw.sock" /var/run/docker.sock.raw 1279 - run the test container like 1280 docker run --rm -it --network testvault \ 1281 -v /var/run/docker.sock.raw:/var/run/docker.sock \ 1282 -v $(pwd):/home/circleci/go/src/github.com/hashicorp/vault/ \ 1283 -w /home/circleci/go/src/github.com/hashicorp/vault/ \ 1284 "docker.mirror.hashicorp.services/cimg/go:1.19.2" /bin/bash 1285 - in the container you may need to chown/chmod /var/run/docker.sock; use `docker ps` 1286 to test if it's working 1287 1288 */