github.com/hashicorp/vault/sdk@v0.13.0/helper/testcluster/docker/environment.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package docker 5 6 import ( 7 "bufio" 8 "bytes" 9 "context" 10 "crypto/ecdsa" 11 "crypto/elliptic" 12 "crypto/rand" 13 "crypto/tls" 14 "crypto/x509" 15 "crypto/x509/pkix" 16 "encoding/hex" 17 "encoding/json" 18 "encoding/pem" 19 "errors" 20 "fmt" 21 "io" 22 "io/ioutil" 23 "math/big" 24 mathrand "math/rand" 25 "net" 26 "net/http" 27 "os" 28 "path/filepath" 29 "strconv" 30 "strings" 31 "sync" 32 "testing" 33 "time" 34 35 "github.com/docker/docker/api/types" 36 "github.com/docker/docker/api/types/container" 37 "github.com/docker/docker/api/types/volume" 38 docker "github.com/docker/docker/client" 39 "github.com/hashicorp/go-cleanhttp" 40 log "github.com/hashicorp/go-hclog" 41 "github.com/hashicorp/go-multierror" 42 "github.com/hashicorp/vault/api" 43 dockhelper "github.com/hashicorp/vault/sdk/helper/docker" 44 "github.com/hashicorp/vault/sdk/helper/logging" 45 "github.com/hashicorp/vault/sdk/helper/strutil" 46 "github.com/hashicorp/vault/sdk/helper/testcluster" 47 uberAtomic "go.uber.org/atomic" 48 "golang.org/x/net/http2" 49 ) 50 51 var ( 52 _ testcluster.VaultCluster = &DockerCluster{} 53 _ testcluster.VaultClusterNode = &DockerClusterNode{} 54 ) 55 56 const MaxClusterNameLength = 52 57 58 // DockerCluster is used to managing the lifecycle of the test Vault cluster 59 type DockerCluster struct { 60 ClusterName string 61 62 ClusterNodes []*DockerClusterNode 63 64 // Certificate fields 65 *testcluster.CA 66 RootCAs *x509.CertPool 67 68 barrierKeys [][]byte 69 recoveryKeys [][]byte 70 tmpDir string 71 72 // rootToken is the initial root token created when the Vault cluster is 73 // created. 74 rootToken string 75 DockerAPI *docker.Client 76 ID string 77 Logger log.Logger 78 builtTags map[string]struct{} 79 80 storage testcluster.ClusterStorage 81 } 82 83 func (dc *DockerCluster) NamedLogger(s string) log.Logger { 84 return dc.Logger.Named(s) 85 } 86 87 func (dc *DockerCluster) ClusterID() string { 88 return dc.ID 89 } 90 91 func (dc *DockerCluster) Nodes() []testcluster.VaultClusterNode { 92 ret := make([]testcluster.VaultClusterNode, len(dc.ClusterNodes)) 93 for i := range dc.ClusterNodes { 94 ret[i] = dc.ClusterNodes[i] 95 } 96 return ret 97 } 98 99 func (dc *DockerCluster) GetBarrierKeys() [][]byte { 100 return dc.barrierKeys 101 } 102 103 func testKeyCopy(key []byte) []byte { 104 result := make([]byte, len(key)) 105 copy(result, key) 106 return result 107 } 108 109 func (dc *DockerCluster) GetRecoveryKeys() [][]byte { 110 ret := make([][]byte, len(dc.recoveryKeys)) 111 for i, k := range dc.recoveryKeys { 112 ret[i] = testKeyCopy(k) 113 } 114 return ret 115 } 116 117 func (dc *DockerCluster) GetBarrierOrRecoveryKeys() [][]byte { 118 return dc.GetBarrierKeys() 119 } 120 121 func (dc *DockerCluster) SetBarrierKeys(keys [][]byte) { 122 dc.barrierKeys = make([][]byte, len(keys)) 123 for i, k := range keys { 124 dc.barrierKeys[i] = testKeyCopy(k) 125 } 126 } 127 128 func (dc *DockerCluster) SetRecoveryKeys(keys [][]byte) { 129 dc.recoveryKeys = make([][]byte, len(keys)) 130 for i, k := range keys { 131 dc.recoveryKeys[i] = testKeyCopy(k) 132 } 133 } 134 135 func (dc *DockerCluster) GetCACertPEMFile() string { 136 return testcluster.DefaultCAFile 137 } 138 139 func (dc *DockerCluster) Cleanup() { 140 dc.cleanup() 141 } 142 143 func (dc *DockerCluster) cleanup() error { 144 var result *multierror.Error 145 for _, node := range dc.ClusterNodes { 146 if err := node.cleanup(); err != nil { 147 result = multierror.Append(result, err) 148 } 149 } 150 151 return result.ErrorOrNil() 152 } 153 154 // GetRootToken returns the root token of the cluster, if set 155 func (dc *DockerCluster) GetRootToken() string { 156 return dc.rootToken 157 } 158 159 func (dc *DockerCluster) SetRootToken(s string) { 160 dc.Logger.Trace("cluster root token changed", "helpful_env", fmt.Sprintf("VAULT_TOKEN=%s VAULT_CACERT=/vault/config/ca.pem", s)) 161 dc.rootToken = s 162 } 163 164 func (n *DockerClusterNode) Name() string { 165 return n.Cluster.ClusterName + "-" + n.NodeID 166 } 167 168 func (dc *DockerCluster) setupNode0(ctx context.Context) error { 169 client := dc.ClusterNodes[0].client 170 171 var resp *api.InitResponse 172 var err error 173 for ctx.Err() == nil { 174 resp, err = client.Sys().Init(&api.InitRequest{ 175 SecretShares: 3, 176 SecretThreshold: 3, 177 }) 178 if err == nil && resp != nil { 179 break 180 } 181 time.Sleep(500 * time.Millisecond) 182 } 183 if err != nil { 184 return err 185 } 186 if resp == nil { 187 return fmt.Errorf("nil response to init request") 188 } 189 190 for _, k := range resp.Keys { 191 raw, err := hex.DecodeString(k) 192 if err != nil { 193 return err 194 } 195 dc.barrierKeys = append(dc.barrierKeys, raw) 196 } 197 198 for _, k := range resp.RecoveryKeys { 199 raw, err := hex.DecodeString(k) 200 if err != nil { 201 return err 202 } 203 dc.recoveryKeys = append(dc.recoveryKeys, raw) 204 } 205 206 dc.rootToken = resp.RootToken 207 client.SetToken(dc.rootToken) 208 dc.ClusterNodes[0].client = client 209 210 err = testcluster.UnsealNode(ctx, dc, 0) 211 if err != nil { 212 return err 213 } 214 215 err = ensureLeaderMatches(ctx, client, func(leader *api.LeaderResponse) error { 216 if !leader.IsSelf { 217 return fmt.Errorf("node %d leader=%v, expected=%v", 0, leader.IsSelf, true) 218 } 219 220 return nil 221 }) 222 223 status, err := client.Sys().SealStatusWithContext(ctx) 224 if err != nil { 225 return err 226 } 227 dc.ID = status.ClusterID 228 return err 229 } 230 231 func (dc *DockerCluster) clusterReady(ctx context.Context) error { 232 for i, node := range dc.ClusterNodes { 233 expectLeader := i == 0 234 err := ensureLeaderMatches(ctx, node.client, func(leader *api.LeaderResponse) error { 235 if expectLeader != leader.IsSelf { 236 return fmt.Errorf("node %d leader=%v, expected=%v", i, leader.IsSelf, expectLeader) 237 } 238 239 return nil 240 }) 241 if err != nil { 242 return err 243 } 244 } 245 246 return nil 247 } 248 249 func (dc *DockerCluster) setupCA(opts *DockerClusterOptions) error { 250 var err error 251 var ca testcluster.CA 252 253 if opts != nil && opts.CAKey != nil { 254 ca.CAKey = opts.CAKey 255 } else { 256 ca.CAKey, err = ecdsa.GenerateKey(elliptic.P256(), rand.Reader) 257 if err != nil { 258 return err 259 } 260 } 261 262 var caBytes []byte 263 if opts != nil && len(opts.CACert) > 0 { 264 caBytes = opts.CACert 265 } else { 266 serialNumber := mathrand.New(mathrand.NewSource(time.Now().UnixNano())).Int63() 267 CACertTemplate := &x509.Certificate{ 268 Subject: pkix.Name{ 269 CommonName: "localhost", 270 }, 271 KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, 272 SerialNumber: big.NewInt(serialNumber), 273 NotBefore: time.Now().Add(-30 * time.Second), 274 NotAfter: time.Now().Add(262980 * time.Hour), 275 BasicConstraintsValid: true, 276 IsCA: true, 277 } 278 caBytes, err = x509.CreateCertificate(rand.Reader, CACertTemplate, CACertTemplate, ca.CAKey.Public(), ca.CAKey) 279 if err != nil { 280 return err 281 } 282 } 283 CACert, err := x509.ParseCertificate(caBytes) 284 if err != nil { 285 return err 286 } 287 ca.CACert = CACert 288 ca.CACertBytes = caBytes 289 290 CACertPEMBlock := &pem.Block{ 291 Type: "CERTIFICATE", 292 Bytes: caBytes, 293 } 294 ca.CACertPEM = pem.EncodeToMemory(CACertPEMBlock) 295 296 ca.CACertPEMFile = filepath.Join(dc.tmpDir, "ca", "ca.pem") 297 err = os.WriteFile(ca.CACertPEMFile, ca.CACertPEM, 0o755) 298 if err != nil { 299 return err 300 } 301 302 marshaledCAKey, err := x509.MarshalECPrivateKey(ca.CAKey) 303 if err != nil { 304 return err 305 } 306 CAKeyPEMBlock := &pem.Block{ 307 Type: "EC PRIVATE KEY", 308 Bytes: marshaledCAKey, 309 } 310 ca.CAKeyPEM = pem.EncodeToMemory(CAKeyPEMBlock) 311 312 dc.CA = &ca 313 314 return nil 315 } 316 317 func (n *DockerClusterNode) setupCert(ip string) error { 318 var err error 319 320 n.ServerKey, err = ecdsa.GenerateKey(elliptic.P256(), rand.Reader) 321 if err != nil { 322 return err 323 } 324 325 serialNumber := mathrand.New(mathrand.NewSource(time.Now().UnixNano())).Int63() 326 certTemplate := &x509.Certificate{ 327 Subject: pkix.Name{ 328 CommonName: n.Name(), 329 }, 330 DNSNames: []string{"localhost", n.Name()}, 331 IPAddresses: []net.IP{net.IPv6loopback, net.ParseIP("127.0.0.1"), net.ParseIP(ip)}, 332 ExtKeyUsage: []x509.ExtKeyUsage{ 333 x509.ExtKeyUsageServerAuth, 334 x509.ExtKeyUsageClientAuth, 335 }, 336 KeyUsage: x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment | x509.KeyUsageKeyAgreement, 337 SerialNumber: big.NewInt(serialNumber), 338 NotBefore: time.Now().Add(-30 * time.Second), 339 NotAfter: time.Now().Add(262980 * time.Hour), 340 } 341 n.ServerCertBytes, err = x509.CreateCertificate(rand.Reader, certTemplate, n.Cluster.CACert, n.ServerKey.Public(), n.Cluster.CAKey) 342 if err != nil { 343 return err 344 } 345 n.ServerCert, err = x509.ParseCertificate(n.ServerCertBytes) 346 if err != nil { 347 return err 348 } 349 n.ServerCertPEM = pem.EncodeToMemory(&pem.Block{ 350 Type: "CERTIFICATE", 351 Bytes: n.ServerCertBytes, 352 }) 353 354 marshaledKey, err := x509.MarshalECPrivateKey(n.ServerKey) 355 if err != nil { 356 return err 357 } 358 n.ServerKeyPEM = pem.EncodeToMemory(&pem.Block{ 359 Type: "EC PRIVATE KEY", 360 Bytes: marshaledKey, 361 }) 362 363 n.ServerCertPEMFile = filepath.Join(n.WorkDir, "cert.pem") 364 err = os.WriteFile(n.ServerCertPEMFile, n.ServerCertPEM, 0o755) 365 if err != nil { 366 return err 367 } 368 369 n.ServerKeyPEMFile = filepath.Join(n.WorkDir, "key.pem") 370 err = os.WriteFile(n.ServerKeyPEMFile, n.ServerKeyPEM, 0o755) 371 if err != nil { 372 return err 373 } 374 375 tlsCert, err := tls.X509KeyPair(n.ServerCertPEM, n.ServerKeyPEM) 376 if err != nil { 377 return err 378 } 379 380 certGetter := NewCertificateGetter(n.ServerCertPEMFile, n.ServerKeyPEMFile, "") 381 if err := certGetter.Reload(); err != nil { 382 return err 383 } 384 tlsConfig := &tls.Config{ 385 Certificates: []tls.Certificate{tlsCert}, 386 RootCAs: n.Cluster.RootCAs, 387 ClientCAs: n.Cluster.RootCAs, 388 ClientAuth: tls.RequestClientCert, 389 NextProtos: []string{"h2", "http/1.1"}, 390 GetCertificate: certGetter.GetCertificate, 391 } 392 393 n.tlsConfig = tlsConfig 394 395 err = os.WriteFile(filepath.Join(n.WorkDir, "ca.pem"), n.Cluster.CACertPEM, 0o755) 396 if err != nil { 397 return err 398 } 399 return nil 400 } 401 402 func NewTestDockerCluster(t *testing.T, opts *DockerClusterOptions) *DockerCluster { 403 if opts == nil { 404 opts = &DockerClusterOptions{} 405 } 406 if opts.ClusterName == "" { 407 opts.ClusterName = strings.ReplaceAll(t.Name(), "/", "-") 408 } 409 if opts.Logger == nil { 410 opts.Logger = logging.NewVaultLogger(log.Trace).Named(t.Name()) 411 } 412 if opts.NetworkName == "" { 413 opts.NetworkName = os.Getenv("TEST_DOCKER_NETWORK_NAME") 414 } 415 416 ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) 417 t.Cleanup(cancel) 418 419 dc, err := NewDockerCluster(ctx, opts) 420 if err != nil { 421 t.Fatal(err) 422 } 423 dc.Logger.Trace("cluster started", "helpful_env", fmt.Sprintf("VAULT_TOKEN=%s VAULT_CACERT=/vault/config/ca.pem", dc.GetRootToken())) 424 return dc 425 } 426 427 func NewDockerCluster(ctx context.Context, opts *DockerClusterOptions) (*DockerCluster, error) { 428 api, err := dockhelper.NewDockerAPI() 429 if err != nil { 430 return nil, err 431 } 432 433 if opts == nil { 434 opts = &DockerClusterOptions{} 435 } 436 if opts.Logger == nil { 437 opts.Logger = log.NewNullLogger() 438 } 439 if opts.VaultLicense == "" { 440 opts.VaultLicense = os.Getenv(testcluster.EnvVaultLicenseCI) 441 } 442 443 dc := &DockerCluster{ 444 DockerAPI: api, 445 ClusterName: opts.ClusterName, 446 Logger: opts.Logger, 447 builtTags: map[string]struct{}{}, 448 CA: opts.CA, 449 storage: opts.Storage, 450 } 451 452 if err := dc.setupDockerCluster(ctx, opts); err != nil { 453 dc.Cleanup() 454 return nil, err 455 } 456 457 return dc, nil 458 } 459 460 // DockerClusterNode represents a single instance of Vault in a cluster 461 type DockerClusterNode struct { 462 NodeID string 463 HostPort string 464 client *api.Client 465 ServerCert *x509.Certificate 466 ServerCertBytes []byte 467 ServerCertPEM []byte 468 ServerCertPEMFile string 469 ServerKey *ecdsa.PrivateKey 470 ServerKeyPEM []byte 471 ServerKeyPEMFile string 472 tlsConfig *tls.Config 473 WorkDir string 474 Cluster *DockerCluster 475 Container *types.ContainerJSON 476 DockerAPI *docker.Client 477 runner *dockhelper.Runner 478 Logger log.Logger 479 cleanupContainer func() 480 RealAPIAddr string 481 ContainerNetworkName string 482 ContainerIPAddress string 483 ImageRepo string 484 ImageTag string 485 DataVolumeName string 486 cleanupVolume func() 487 AllClients []*api.Client 488 } 489 490 func (n *DockerClusterNode) TLSConfig() *tls.Config { 491 return n.tlsConfig.Clone() 492 } 493 494 func (n *DockerClusterNode) APIClient() *api.Client { 495 // We clone to ensure that whenever this method is called, the caller gets 496 // back a pristine client, without e.g. any namespace or token changes that 497 // might pollute a shared client. We clone the config instead of the 498 // client because (1) Client.clone propagates the replicationStateStore and 499 // the httpClient pointers, (2) it doesn't copy the tlsConfig at all, and 500 // (3) if clone returns an error, it doesn't feel as appropriate to panic 501 // below. Who knows why clone might return an error? 502 cfg := n.client.CloneConfig() 503 client, err := api.NewClient(cfg) 504 if err != nil { 505 // It seems fine to panic here, since this should be the same input 506 // we provided to NewClient when we were setup, and we didn't panic then. 507 // Better not to completely ignore the error though, suppose there's a 508 // bug in CloneConfig? 509 panic(fmt.Sprintf("NewClient error on cloned config: %v", err)) 510 } 511 client.SetToken(n.Cluster.rootToken) 512 return client 513 } 514 515 func (n *DockerClusterNode) APIClientN(listenerNumber int) (*api.Client, error) { 516 // We clone to ensure that whenever this method is called, the caller gets 517 // back a pristine client, without e.g. any namespace or token changes that 518 // might pollute a shared client. We clone the config instead of the 519 // client because (1) Client.clone propagates the replicationStateStore and 520 // the httpClient pointers, (2) it doesn't copy the tlsConfig at all, and 521 // (3) if clone returns an error, it doesn't feel as appropriate to panic 522 // below. Who knows why clone might return an error? 523 if listenerNumber >= len(n.AllClients) { 524 return nil, fmt.Errorf("invalid listener number %d", listenerNumber) 525 } 526 cfg := n.AllClients[listenerNumber].CloneConfig() 527 client, err := api.NewClient(cfg) 528 if err != nil { 529 // It seems fine to panic here, since this should be the same input 530 // we provided to NewClient when we were setup, and we didn't panic then. 531 // Better not to completely ignore the error though, suppose there's a 532 // bug in CloneConfig? 533 panic(fmt.Sprintf("NewClient error on cloned config: %v", err)) 534 } 535 client.SetToken(n.Cluster.rootToken) 536 return client, nil 537 } 538 539 // NewAPIClient creates and configures a Vault API client to communicate with 540 // the running Vault Cluster for this DockerClusterNode 541 func (n *DockerClusterNode) apiConfig() (*api.Config, error) { 542 transport := cleanhttp.DefaultPooledTransport() 543 transport.TLSClientConfig = n.TLSConfig() 544 if err := http2.ConfigureTransport(transport); err != nil { 545 return nil, err 546 } 547 client := &http.Client{ 548 Transport: transport, 549 CheckRedirect: func(*http.Request, []*http.Request) error { 550 // This can of course be overridden per-test by using its own client 551 return fmt.Errorf("redirects not allowed in these tests") 552 }, 553 } 554 config := api.DefaultConfig() 555 if config.Error != nil { 556 return nil, config.Error 557 } 558 559 protocol := "https" 560 if n.tlsConfig == nil { 561 protocol = "http" 562 } 563 config.Address = fmt.Sprintf("%s://%s", protocol, n.HostPort) 564 565 config.HttpClient = client 566 config.MaxRetries = 0 567 return config, nil 568 } 569 570 func (n *DockerClusterNode) newAPIClient() (*api.Client, error) { 571 config, err := n.apiConfig() 572 if err != nil { 573 return nil, err 574 } 575 client, err := api.NewClient(config) 576 if err != nil { 577 return nil, err 578 } 579 client.SetToken(n.Cluster.GetRootToken()) 580 return client, nil 581 } 582 583 func (n *DockerClusterNode) newAPIClientForAddress(address string) (*api.Client, error) { 584 config, err := n.apiConfig() 585 if err != nil { 586 return nil, err 587 } 588 config.Address = fmt.Sprintf("https://%s", address) 589 client, err := api.NewClient(config) 590 if err != nil { 591 return nil, err 592 } 593 client.SetToken(n.Cluster.GetRootToken()) 594 return client, nil 595 } 596 597 // Cleanup kills the container of the node and deletes its data volume 598 func (n *DockerClusterNode) Cleanup() { 599 n.cleanup() 600 } 601 602 // Stop kills the container of the node 603 func (n *DockerClusterNode) Stop() { 604 n.cleanupContainer() 605 } 606 607 func (n *DockerClusterNode) cleanup() error { 608 if n.Container == nil || n.Container.ID == "" { 609 return nil 610 } 611 n.cleanupContainer() 612 n.cleanupVolume() 613 return nil 614 } 615 616 func (n *DockerClusterNode) createDefaultListenerConfig() map[string]interface{} { 617 return map[string]interface{}{"tcp": map[string]interface{}{ 618 "address": fmt.Sprintf("%s:%d", "0.0.0.0", 8200), 619 "tls_cert_file": "/vault/config/cert.pem", 620 "tls_key_file": "/vault/config/key.pem", 621 "telemetry": map[string]interface{}{ 622 "unauthenticated_metrics_access": true, 623 }, 624 }} 625 } 626 627 func (n *DockerClusterNode) createTLSDisabledListenerConfig() map[string]interface{} { 628 return map[string]interface{}{"tcp": map[string]interface{}{ 629 "address": fmt.Sprintf("%s:%d", "0.0.0.0", 8200), 630 "telemetry": map[string]interface{}{ 631 "unauthenticated_metrics_access": true, 632 }, 633 "tls_disable": true, 634 }} 635 } 636 637 func (n *DockerClusterNode) Start(ctx context.Context, opts *DockerClusterOptions) error { 638 if n.DataVolumeName == "" { 639 vol, err := n.DockerAPI.VolumeCreate(ctx, volume.CreateOptions{}) 640 if err != nil { 641 return err 642 } 643 n.DataVolumeName = vol.Name 644 n.cleanupVolume = func() { 645 _ = n.DockerAPI.VolumeRemove(ctx, vol.Name, false) 646 } 647 } 648 vaultCfg := map[string]interface{}{} 649 var listenerConfig []map[string]interface{} 650 651 var defaultListenerConfig map[string]interface{} 652 if opts.DisableTLS { 653 defaultListenerConfig = n.createTLSDisabledListenerConfig() 654 } else { 655 defaultListenerConfig = n.createDefaultListenerConfig() 656 } 657 658 listenerConfig = append(listenerConfig, defaultListenerConfig) 659 ports := []string{"8200/tcp", "8201/tcp"} 660 661 if opts.VaultNodeConfig != nil && opts.VaultNodeConfig.AdditionalListeners != nil { 662 for _, config := range opts.VaultNodeConfig.AdditionalListeners { 663 cfg := n.createDefaultListenerConfig() 664 listener := cfg["tcp"].(map[string]interface{}) 665 listener["address"] = fmt.Sprintf("%s:%d", "0.0.0.0", config.Port) 666 listener["chroot_namespace"] = config.ChrootNamespace 667 listener["redact_addresses"] = config.RedactAddresses 668 listener["redact_cluster_name"] = config.RedactClusterName 669 listener["redact_version"] = config.RedactVersion 670 listenerConfig = append(listenerConfig, cfg) 671 portStr := fmt.Sprintf("%d/tcp", config.Port) 672 if strutil.StrListContains(ports, portStr) { 673 return fmt.Errorf("duplicate port %d specified", config.Port) 674 } 675 ports = append(ports, portStr) 676 } 677 } 678 vaultCfg["listener"] = listenerConfig 679 vaultCfg["telemetry"] = map[string]interface{}{ 680 "disable_hostname": true, 681 } 682 683 // Setup storage. Default is raft. 684 storageType := "raft" 685 storageOpts := map[string]interface{}{ 686 // TODO add options from vnc 687 "path": "/vault/file", 688 "node_id": n.NodeID, 689 } 690 691 if opts.Storage != nil { 692 storageType = opts.Storage.Type() 693 storageOpts = opts.Storage.Opts() 694 } 695 696 if opts != nil && opts.VaultNodeConfig != nil { 697 for k, v := range opts.VaultNodeConfig.StorageOptions { 698 if _, ok := storageOpts[k].(string); !ok { 699 storageOpts[k] = v 700 } 701 } 702 } 703 vaultCfg["storage"] = map[string]interface{}{ 704 storageType: storageOpts, 705 } 706 707 //// disable_mlock is required for working in the Docker environment with 708 //// custom plugins 709 vaultCfg["disable_mlock"] = true 710 711 protocol := "https" 712 if opts.DisableTLS { 713 protocol = "http" 714 } 715 vaultCfg["api_addr"] = fmt.Sprintf(`%s://{{- GetAllInterfaces | exclude "flags" "loopback" | attr "address" -}}:8200`, protocol) 716 vaultCfg["cluster_addr"] = `https://{{- GetAllInterfaces | exclude "flags" "loopback" | attr "address" -}}:8201` 717 718 vaultCfg["administrative_namespace_path"] = opts.AdministrativeNamespacePath 719 720 systemJSON, err := json.Marshal(vaultCfg) 721 if err != nil { 722 return err 723 } 724 err = os.WriteFile(filepath.Join(n.WorkDir, "system.json"), systemJSON, 0o644) 725 if err != nil { 726 return err 727 } 728 729 if opts.VaultNodeConfig != nil { 730 localCfg := *opts.VaultNodeConfig 731 if opts.VaultNodeConfig.LicensePath != "" { 732 b, err := os.ReadFile(opts.VaultNodeConfig.LicensePath) 733 if err != nil || len(b) == 0 { 734 return fmt.Errorf("unable to read LicensePath at %q: %w", opts.VaultNodeConfig.LicensePath, err) 735 } 736 localCfg.LicensePath = "/vault/config/license" 737 dest := filepath.Join(n.WorkDir, "license") 738 err = os.WriteFile(dest, b, 0o644) 739 if err != nil { 740 return fmt.Errorf("error writing license to %q: %w", dest, err) 741 } 742 743 } 744 userJSON, err := json.Marshal(localCfg) 745 if err != nil { 746 return err 747 } 748 err = os.WriteFile(filepath.Join(n.WorkDir, "user.json"), userJSON, 0o644) 749 if err != nil { 750 return err 751 } 752 } 753 754 if !opts.DisableTLS { 755 // Create a temporary cert so vault will start up 756 err = n.setupCert("127.0.0.1") 757 if err != nil { 758 return err 759 } 760 } 761 762 caDir := filepath.Join(n.Cluster.tmpDir, "ca") 763 764 // setup plugin bin copy if needed 765 copyFromTo := map[string]string{ 766 n.WorkDir: "/vault/config", 767 caDir: "/usr/local/share/ca-certificates/", 768 } 769 770 var wg sync.WaitGroup 771 wg.Add(1) 772 var seenLogs uberAtomic.Bool 773 logConsumer := func(s string) { 774 if seenLogs.CAS(false, true) { 775 wg.Done() 776 } 777 n.Logger.Trace(s) 778 } 779 logStdout := &LogConsumerWriter{logConsumer} 780 logStderr := &LogConsumerWriter{func(s string) { 781 if seenLogs.CAS(false, true) { 782 wg.Done() 783 } 784 testcluster.JSONLogNoTimestamp(n.Logger, s) 785 }} 786 787 postStartFunc := func(containerID string, realIP string) error { 788 err := n.setupCert(realIP) 789 if err != nil { 790 return err 791 } 792 793 // If we signal Vault before it installs its sighup handler, it'll die. 794 wg.Wait() 795 n.Logger.Trace("running poststart", "containerID", containerID, "IP", realIP) 796 return n.runner.RefreshFiles(ctx, containerID) 797 } 798 799 if opts.DisableTLS { 800 postStartFunc = func(containerID string, realIP string) error { 801 // If we signal Vault before it installs its sighup handler, it'll die. 802 wg.Wait() 803 n.Logger.Trace("running poststart", "containerID", containerID, "IP", realIP) 804 return n.runner.RefreshFiles(ctx, containerID) 805 } 806 } 807 808 r, err := dockhelper.NewServiceRunner(dockhelper.RunOptions{ 809 ImageRepo: n.ImageRepo, 810 ImageTag: n.ImageTag, 811 // We don't need to run update-ca-certificates in the container, because 812 // we're providing the CA in the raft join call, and otherwise Vault 813 // servers don't talk to one another on the API port. 814 Cmd: append([]string{"server"}, opts.Args...), 815 Env: []string{ 816 // For now we're using disable_mlock, because this is for testing 817 // anyway, and because it prevents us using external plugins. 818 "SKIP_SETCAP=true", 819 "VAULT_LOG_FORMAT=json", 820 "VAULT_LICENSE=" + opts.VaultLicense, 821 }, 822 Ports: ports, 823 ContainerName: n.Name(), 824 NetworkName: opts.NetworkName, 825 CopyFromTo: copyFromTo, 826 LogConsumer: logConsumer, 827 LogStdout: logStdout, 828 LogStderr: logStderr, 829 PreDelete: true, 830 DoNotAutoRemove: true, 831 PostStart: postStartFunc, 832 Capabilities: []string{"NET_ADMIN"}, 833 OmitLogTimestamps: true, 834 VolumeNameToMountPoint: map[string]string{ 835 n.DataVolumeName: "/vault/file", 836 }, 837 }) 838 if err != nil { 839 return err 840 } 841 n.runner = r 842 843 probe := opts.StartProbe 844 if probe == nil { 845 probe = func(c *api.Client) error { 846 _, err = c.Sys().SealStatus() 847 return err 848 } 849 } 850 svc, _, err := r.StartNewService(ctx, false, false, func(ctx context.Context, host string, port int) (dockhelper.ServiceConfig, error) { 851 config, err := n.apiConfig() 852 if err != nil { 853 return nil, err 854 } 855 config.Address = fmt.Sprintf("%s://%s:%d", protocol, host, port) 856 client, err := api.NewClient(config) 857 if err != nil { 858 return nil, err 859 } 860 err = probe(client) 861 if err != nil { 862 return nil, err 863 } 864 865 return dockhelper.NewServiceHostPort(host, port), nil 866 }) 867 if err != nil { 868 return err 869 } 870 871 n.HostPort = svc.Config.Address() 872 n.Container = svc.Container 873 netName := opts.NetworkName 874 if netName == "" { 875 if len(svc.Container.NetworkSettings.Networks) > 1 { 876 return fmt.Errorf("Set d.RunOptions.NetworkName instead for container with multiple networks: %v", svc.Container.NetworkSettings.Networks) 877 } 878 for netName = range svc.Container.NetworkSettings.Networks { 879 // Networks above is a map; we just need to find the first and 880 // only key of this map (network name). The range handles this 881 // for us, but we need a loop construction in order to use range. 882 } 883 } 884 n.ContainerNetworkName = netName 885 n.ContainerIPAddress = svc.Container.NetworkSettings.Networks[netName].IPAddress 886 n.RealAPIAddr = protocol + "://" + n.ContainerIPAddress + ":8200" 887 n.cleanupContainer = svc.Cleanup 888 889 client, err := n.newAPIClient() 890 if err != nil { 891 return err 892 } 893 client.SetToken(n.Cluster.rootToken) 894 n.client = client 895 896 n.AllClients = append(n.AllClients, client) 897 898 for _, addr := range svc.StartResult.Addrs[2:] { 899 // The second element of this list of addresses is the cluster address 900 // We do not want to create a client for the cluster address mapping 901 client, err := n.newAPIClientForAddress(addr) 902 if err != nil { 903 return err 904 } 905 client.SetToken(n.Cluster.rootToken) 906 n.AllClients = append(n.AllClients, client) 907 } 908 return nil 909 } 910 911 func (n *DockerClusterNode) Pause(ctx context.Context) error { 912 return n.DockerAPI.ContainerPause(ctx, n.Container.ID) 913 } 914 915 func (n *DockerClusterNode) Restart(ctx context.Context) error { 916 timeout := 5 917 err := n.DockerAPI.ContainerRestart(ctx, n.Container.ID, container.StopOptions{Timeout: &timeout}) 918 if err != nil { 919 return err 920 } 921 922 resp, err := n.DockerAPI.ContainerInspect(ctx, n.Container.ID) 923 if err != nil { 924 return fmt.Errorf("error inspecting container after restart: %s", err) 925 } 926 927 var port int 928 if len(resp.NetworkSettings.Ports) > 0 { 929 for key, binding := range resp.NetworkSettings.Ports { 930 if len(binding) < 1 { 931 continue 932 } 933 934 if key == "8200/tcp" { 935 port, err = strconv.Atoi(binding[0].HostPort) 936 } 937 } 938 } 939 940 if port == 0 { 941 return fmt.Errorf("failed to find container port after restart") 942 } 943 944 hostPieces := strings.Split(n.HostPort, ":") 945 if len(hostPieces) < 2 { 946 return errors.New("could not parse node hostname") 947 } 948 949 n.HostPort = fmt.Sprintf("%s:%d", hostPieces[0], port) 950 951 client, err := n.newAPIClient() 952 if err != nil { 953 return err 954 } 955 client.SetToken(n.Cluster.rootToken) 956 n.client = client 957 958 return nil 959 } 960 961 func (n *DockerClusterNode) AddNetworkDelay(ctx context.Context, delay time.Duration, targetIP string) error { 962 ip := net.ParseIP(targetIP) 963 if ip == nil { 964 return fmt.Errorf("targetIP %q is not an IP address", targetIP) 965 } 966 // Let's attempt to get a unique handle for the filter rule; we'll assume that 967 // every targetIP has a unique last octet, which is true currently for how 968 // we're doing docker networking. 969 lastOctet := ip.To4()[3] 970 971 stdout, stderr, exitCode, err := n.runner.RunCmdWithOutput(ctx, n.Container.ID, []string{ 972 "/bin/sh", 973 "-xec", strings.Join([]string{ 974 fmt.Sprintf("echo isolating node %s", targetIP), 975 "apk add iproute2", 976 // If we're running this script a second time on the same node, 977 // the add dev will fail; since we only want to run the netem 978 // command once, we'll do so in the case where the add dev doesn't fail. 979 "tc qdisc add dev eth0 root handle 1: prio && " + 980 fmt.Sprintf("tc qdisc add dev eth0 parent 1:1 handle 2: netem delay %dms", delay/time.Millisecond), 981 // Here we create a u32 filter as per https://man7.org/linux/man-pages/man8/tc-u32.8.html 982 // Its parent is 1:0 (which I guess is the root?) 983 // Its handle must be unique, so we base it on targetIP 984 fmt.Sprintf("tc filter add dev eth0 parent 1:0 protocol ip pref 55 handle ::%x u32 match ip dst %s flowid 2:1", lastOctet, targetIP), 985 }, "; "), 986 }) 987 if err != nil { 988 return err 989 } 990 991 n.Logger.Trace(string(stdout)) 992 n.Logger.Trace(string(stderr)) 993 if exitCode != 0 { 994 return fmt.Errorf("got nonzero exit code from iptables: %d", exitCode) 995 } 996 return nil 997 } 998 999 // PartitionFromCluster will cause the node to be disconnected at the network 1000 // level from the rest of the docker cluster. It does so in a way that the node 1001 // will not see TCP RSTs and all packets it sends will be "black holed". It 1002 // attempts to keep packets to and from the host intact which allows docker 1003 // daemon to continue streaming logs and any test code to continue making 1004 // requests from the host to the partitioned node. 1005 func (n *DockerClusterNode) PartitionFromCluster(ctx context.Context) error { 1006 stdout, stderr, exitCode, err := n.runner.RunCmdWithOutput(ctx, n.Container.ID, []string{ 1007 "/bin/sh", 1008 "-xec", strings.Join([]string{ 1009 fmt.Sprintf("echo partitioning container from network"), 1010 "apk add iproute2", 1011 // Get the gateway address for the bridge so we can allow host to 1012 // container traffic still. 1013 "GW=$(ip r | grep default | grep eth0 | cut -f 3 -d' ')", 1014 // First delete the rules in case this is called twice otherwise we'll add 1015 // multiple copies and only remove one in Unpartition (yay iptables). 1016 // Ignore the error if it didn't exist. 1017 "iptables -D INPUT -i eth0 ! -s \"$GW\" -j DROP | true", 1018 "iptables -D OUTPUT -o eth0 ! -d \"$GW\" -j DROP | true", 1019 // Add rules to drop all packets in and out of the docker network 1020 // connection. 1021 "iptables -I INPUT -i eth0 ! -s \"$GW\" -j DROP", 1022 "iptables -I OUTPUT -o eth0 ! -d \"$GW\" -j DROP", 1023 }, "; "), 1024 }) 1025 if err != nil { 1026 return err 1027 } 1028 1029 n.Logger.Trace(string(stdout)) 1030 n.Logger.Trace(string(stderr)) 1031 if exitCode != 0 { 1032 return fmt.Errorf("got nonzero exit code from iptables: %d", exitCode) 1033 } 1034 return nil 1035 } 1036 1037 // UnpartitionFromCluster reverses a previous call to PartitionFromCluster and 1038 // restores full connectivity. Currently assumes the default "bridge" network. 1039 func (n *DockerClusterNode) UnpartitionFromCluster(ctx context.Context) error { 1040 stdout, stderr, exitCode, err := n.runner.RunCmdWithOutput(ctx, n.Container.ID, []string{ 1041 "/bin/sh", 1042 "-xec", strings.Join([]string{ 1043 fmt.Sprintf("echo un-partitioning container from network"), 1044 // Get the gateway address for the bridge so we can allow host to 1045 // container traffic still. 1046 "GW=$(ip r | grep default | grep eth0 | cut -f 3 -d' ')", 1047 // Remove the rules, ignore if they are not present or iptables wasn't 1048 // installed yet (i.e. no-one called PartitionFromCluster yet). 1049 "iptables -D INPUT -i eth0 ! -s \"$GW\" -j DROP | true", 1050 "iptables -D OUTPUT -o eth0 ! -d \"$GW\" -j DROP | true", 1051 }, "; "), 1052 }) 1053 if err != nil { 1054 return err 1055 } 1056 1057 n.Logger.Trace(string(stdout)) 1058 n.Logger.Trace(string(stderr)) 1059 if exitCode != 0 { 1060 return fmt.Errorf("got nonzero exit code from iptables: %d", exitCode) 1061 } 1062 return nil 1063 } 1064 1065 type LogConsumerWriter struct { 1066 consumer func(string) 1067 } 1068 1069 func (l LogConsumerWriter) Write(p []byte) (n int, err error) { 1070 // TODO this assumes that we're never passed partial log lines, which 1071 // seems a safe assumption for now based on how docker looks to implement 1072 // logging, but might change in the future. 1073 scanner := bufio.NewScanner(bytes.NewReader(p)) 1074 scanner.Buffer(make([]byte, 64*1024), bufio.MaxScanTokenSize) 1075 for scanner.Scan() { 1076 l.consumer(scanner.Text()) 1077 } 1078 return len(p), nil 1079 } 1080 1081 // DockerClusterOptions has options for setting up the docker cluster 1082 type DockerClusterOptions struct { 1083 testcluster.ClusterOptions 1084 CAKey *ecdsa.PrivateKey 1085 NetworkName string 1086 ImageRepo string 1087 ImageTag string 1088 CA *testcluster.CA 1089 VaultBinary string 1090 Args []string 1091 StartProbe func(*api.Client) error 1092 Storage testcluster.ClusterStorage 1093 DisableTLS bool 1094 } 1095 1096 func ensureLeaderMatches(ctx context.Context, client *api.Client, ready func(response *api.LeaderResponse) error) error { 1097 var leader *api.LeaderResponse 1098 var err error 1099 for ctx.Err() == nil { 1100 leader, err = client.Sys().Leader() 1101 switch { 1102 case err != nil: 1103 case leader == nil: 1104 err = fmt.Errorf("nil response to leader check") 1105 default: 1106 err = ready(leader) 1107 if err == nil { 1108 return nil 1109 } 1110 } 1111 time.Sleep(500 * time.Millisecond) 1112 } 1113 return fmt.Errorf("error checking leader: %v", err) 1114 } 1115 1116 const DefaultNumCores = 3 1117 1118 // creates a managed docker container running Vault 1119 func (dc *DockerCluster) setupDockerCluster(ctx context.Context, opts *DockerClusterOptions) error { 1120 if opts.TmpDir != "" { 1121 if _, err := os.Stat(opts.TmpDir); os.IsNotExist(err) { 1122 if err := os.MkdirAll(opts.TmpDir, 0o700); err != nil { 1123 return err 1124 } 1125 } 1126 dc.tmpDir = opts.TmpDir 1127 } else { 1128 tempDir, err := ioutil.TempDir("", "vault-test-cluster-") 1129 if err != nil { 1130 return err 1131 } 1132 dc.tmpDir = tempDir 1133 } 1134 caDir := filepath.Join(dc.tmpDir, "ca") 1135 if err := os.MkdirAll(caDir, 0o755); err != nil { 1136 return err 1137 } 1138 1139 var numCores int 1140 if opts.NumCores == 0 { 1141 numCores = DefaultNumCores 1142 } else { 1143 numCores = opts.NumCores 1144 } 1145 1146 if !opts.DisableTLS { 1147 if dc.CA == nil { 1148 if err := dc.setupCA(opts); err != nil { 1149 return err 1150 } 1151 } 1152 dc.RootCAs = x509.NewCertPool() 1153 dc.RootCAs.AddCert(dc.CA.CACert) 1154 } 1155 1156 if dc.storage != nil { 1157 if err := dc.storage.Start(ctx, &opts.ClusterOptions); err != nil { 1158 return err 1159 } 1160 } 1161 1162 for i := 0; i < numCores; i++ { 1163 if err := dc.addNode(ctx, opts); err != nil { 1164 return err 1165 } 1166 if opts.SkipInit { 1167 continue 1168 } 1169 if i == 0 { 1170 if err := dc.setupNode0(ctx); err != nil { 1171 return err 1172 } 1173 } else { 1174 if err := dc.joinNode(ctx, i, 0); err != nil { 1175 return err 1176 } 1177 } 1178 } 1179 1180 return nil 1181 } 1182 1183 func (dc *DockerCluster) AddNode(ctx context.Context, opts *DockerClusterOptions) error { 1184 leaderIdx, err := testcluster.LeaderNode(ctx, dc) 1185 if err != nil { 1186 return err 1187 } 1188 if err := dc.addNode(ctx, opts); err != nil { 1189 return err 1190 } 1191 1192 return dc.joinNode(ctx, len(dc.ClusterNodes)-1, leaderIdx) 1193 } 1194 1195 func (dc *DockerCluster) addNode(ctx context.Context, opts *DockerClusterOptions) error { 1196 tag, err := dc.setupImage(ctx, opts) 1197 if err != nil { 1198 return err 1199 } 1200 i := len(dc.ClusterNodes) 1201 nodeID := fmt.Sprintf("core-%d", i) 1202 node := &DockerClusterNode{ 1203 DockerAPI: dc.DockerAPI, 1204 NodeID: nodeID, 1205 Cluster: dc, 1206 WorkDir: filepath.Join(dc.tmpDir, nodeID), 1207 Logger: dc.Logger.Named(nodeID), 1208 ImageRepo: opts.ImageRepo, 1209 ImageTag: tag, 1210 } 1211 dc.ClusterNodes = append(dc.ClusterNodes, node) 1212 if err := os.MkdirAll(node.WorkDir, 0o755); err != nil { 1213 return err 1214 } 1215 if err := node.Start(ctx, opts); err != nil { 1216 return err 1217 } 1218 return nil 1219 } 1220 1221 func (dc *DockerCluster) joinNode(ctx context.Context, nodeIdx int, leaderIdx int) error { 1222 if dc.storage != nil && dc.storage.Type() != "raft" { 1223 // Storage is not raft so nothing to do but unseal. 1224 return testcluster.UnsealNode(ctx, dc, nodeIdx) 1225 } 1226 1227 leader := dc.ClusterNodes[leaderIdx] 1228 1229 if nodeIdx >= len(dc.ClusterNodes) { 1230 return fmt.Errorf("invalid node %d", nodeIdx) 1231 } 1232 node := dc.ClusterNodes[nodeIdx] 1233 client := node.APIClient() 1234 1235 var resp *api.RaftJoinResponse 1236 resp, err := client.Sys().RaftJoinWithContext(ctx, &api.RaftJoinRequest{ 1237 // When running locally on a bridge network, the containers must use their 1238 // actual (private) IP to talk to one another. Our code must instead use 1239 // the portmapped address since we're not on their network in that case. 1240 LeaderAPIAddr: leader.RealAPIAddr, 1241 LeaderCACert: string(dc.CACertPEM), 1242 LeaderClientCert: string(node.ServerCertPEM), 1243 LeaderClientKey: string(node.ServerKeyPEM), 1244 }) 1245 if resp == nil || !resp.Joined { 1246 return fmt.Errorf("nil or negative response from raft join request: %v", resp) 1247 } 1248 if err != nil { 1249 return fmt.Errorf("failed to join cluster: %w", err) 1250 } 1251 1252 return testcluster.UnsealNode(ctx, dc, nodeIdx) 1253 } 1254 1255 func (dc *DockerCluster) setupImage(ctx context.Context, opts *DockerClusterOptions) (string, error) { 1256 if opts == nil { 1257 opts = &DockerClusterOptions{} 1258 } 1259 sourceTag := opts.ImageTag 1260 if sourceTag == "" { 1261 sourceTag = "latest" 1262 } 1263 1264 if opts.VaultBinary == "" { 1265 return sourceTag, nil 1266 } 1267 1268 suffix := "testing" 1269 if sha := os.Getenv("COMMIT_SHA"); sha != "" { 1270 suffix = sha 1271 } 1272 tag := sourceTag + "-" + suffix 1273 if _, ok := dc.builtTags[tag]; ok { 1274 return tag, nil 1275 } 1276 1277 f, err := os.Open(opts.VaultBinary) 1278 if err != nil { 1279 return "", err 1280 } 1281 data, err := io.ReadAll(f) 1282 if err != nil { 1283 return "", err 1284 } 1285 bCtx := dockhelper.NewBuildContext() 1286 bCtx["vault"] = &dockhelper.FileContents{ 1287 Data: data, 1288 Mode: 0o755, 1289 } 1290 1291 containerFile := fmt.Sprintf(` 1292 FROM %s:%s 1293 COPY vault /bin/vault 1294 `, opts.ImageRepo, sourceTag) 1295 1296 _, err = dockhelper.BuildImage(ctx, dc.DockerAPI, containerFile, bCtx, 1297 dockhelper.BuildRemove(true), dockhelper.BuildForceRemove(true), 1298 dockhelper.BuildPullParent(true), 1299 dockhelper.BuildTags([]string{opts.ImageRepo + ":" + tag})) 1300 if err != nil { 1301 return "", err 1302 } 1303 dc.builtTags[tag] = struct{}{} 1304 return tag, nil 1305 } 1306 1307 func (dc *DockerCluster) GetActiveClusterNode() *DockerClusterNode { 1308 ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) 1309 defer cancel() 1310 1311 node, err := testcluster.WaitForActiveNode(ctx, dc) 1312 if err != nil { 1313 panic(fmt.Sprintf("no cluster node became active in timeout window: %v", err)) 1314 } 1315 1316 return dc.ClusterNodes[node] 1317 } 1318 1319 /* Notes on testing the non-bridge network case: 1320 - you need the test itself to be running in a container so that it can use 1321 the network; create the network using 1322 docker network create testvault 1323 - this means that you need to mount the docker socket in that test container, 1324 but on macos there's stuff that prevents that from working; to hack that, 1325 on the host run 1326 sudo ln -s "$HOME/Library/Containers/com.docker.docker/Data/docker.raw.sock" /var/run/docker.sock.raw 1327 - run the test container like 1328 docker run --rm -it --network testvault \ 1329 -v /var/run/docker.sock.raw:/var/run/docker.sock \ 1330 -v $(pwd):/home/circleci/go/src/github.com/hashicorp/vault/ \ 1331 -w /home/circleci/go/src/github.com/hashicorp/vault/ \ 1332 "docker.mirror.hashicorp.services/cimg/go:1.19.2" /bin/bash 1333 - in the container you may need to chown/chmod /var/run/docker.sock; use `docker ps` 1334 to test if it's working 1335 1336 */