github.com/hashicorp/vault/sdk@v0.11.0/helper/testcluster/replication.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package testcluster 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 "reflect" 11 "strings" 12 "time" 13 14 "github.com/hashicorp/go-hclog" 15 "github.com/hashicorp/go-secure-stdlib/strutil" 16 "github.com/hashicorp/go-uuid" 17 "github.com/hashicorp/vault/api" 18 "github.com/hashicorp/vault/sdk/helper/consts" 19 "github.com/mitchellh/mapstructure" 20 ) 21 22 func GetPerformanceToken(pri VaultCluster, id, secondaryPublicKey string) (string, error) { 23 client := pri.Nodes()[0].APIClient() 24 req := map[string]interface{}{ 25 "id": id, 26 } 27 if secondaryPublicKey != "" { 28 req["secondary_public_key"] = secondaryPublicKey 29 } 30 secret, err := client.Logical().Write("sys/replication/performance/primary/secondary-token", req) 31 if err != nil { 32 return "", err 33 } 34 35 if secondaryPublicKey != "" { 36 return secret.Data["token"].(string), nil 37 } 38 return secret.WrapInfo.Token, nil 39 } 40 41 func EnablePerfPrimary(ctx context.Context, pri VaultCluster) error { 42 client := pri.Nodes()[0].APIClient() 43 _, err := client.Logical().WriteWithContext(ctx, "sys/replication/performance/primary/enable", nil) 44 if err != nil { 45 return err 46 } 47 48 err = WaitForPerfReplicationState(ctx, pri, consts.ReplicationPerformancePrimary) 49 if err != nil { 50 return err 51 } 52 return WaitForActiveNodeAndPerfStandbys(ctx, pri) 53 } 54 55 func WaitForPerfReplicationState(ctx context.Context, cluster VaultCluster, state consts.ReplicationState) error { 56 client := cluster.Nodes()[0].APIClient() 57 var health *api.HealthResponse 58 var err error 59 for ctx.Err() == nil { 60 health, err = client.Sys().HealthWithContext(ctx) 61 if err == nil && health.ReplicationPerformanceMode == state.GetPerformanceString() { 62 return nil 63 } 64 time.Sleep(500 * time.Millisecond) 65 } 66 if err == nil { 67 err = ctx.Err() 68 } 69 return err 70 } 71 72 func EnablePerformanceSecondaryNoWait(ctx context.Context, perfToken string, pri, sec VaultCluster, updatePrimary bool) error { 73 postData := map[string]interface{}{ 74 "token": perfToken, 75 "ca_file": DefaultCAFile, 76 } 77 path := "sys/replication/performance/secondary/enable" 78 if updatePrimary { 79 path = "sys/replication/performance/secondary/update-primary" 80 } 81 err := WaitForActiveNodeAndPerfStandbys(ctx, sec) 82 if err != nil { 83 return err 84 } 85 _, err = sec.Nodes()[0].APIClient().Logical().Write(path, postData) 86 if err != nil { 87 return err 88 } 89 90 return WaitForPerfReplicationState(ctx, sec, consts.ReplicationPerformanceSecondary) 91 } 92 93 func EnablePerformanceSecondary(ctx context.Context, perfToken string, pri, sec VaultCluster, updatePrimary, skipPoisonPill bool) (string, error) { 94 if err := EnablePerformanceSecondaryNoWait(ctx, perfToken, pri, sec, updatePrimary); err != nil { 95 return "", err 96 } 97 if err := WaitForMatchingMerkleRoots(ctx, "sys/replication/performance/", pri, sec); err != nil { 98 return "", err 99 } 100 root, err := WaitForPerformanceSecondary(ctx, pri, sec, skipPoisonPill) 101 if err != nil { 102 return "", err 103 } 104 if err := WaitForPerfReplicationWorking(ctx, pri, sec); err != nil { 105 return "", err 106 } 107 return root, nil 108 } 109 110 func WaitForMatchingMerkleRoots(ctx context.Context, endpoint string, pri, sec VaultCluster) error { 111 getRoot := func(mode string, cli *api.Client) (string, error) { 112 status, err := cli.Logical().Read(endpoint + "status") 113 if err != nil { 114 return "", err 115 } 116 if status == nil || status.Data == nil || status.Data["mode"] == nil { 117 return "", fmt.Errorf("got nil secret or data") 118 } 119 if status.Data["mode"].(string) != mode { 120 return "", fmt.Errorf("expected mode=%s, got %s", mode, status.Data["mode"].(string)) 121 } 122 return status.Data["merkle_root"].(string), nil 123 } 124 125 secClient := sec.Nodes()[0].APIClient() 126 priClient := pri.Nodes()[0].APIClient() 127 for i := 0; i < 30; i++ { 128 secRoot, err := getRoot("secondary", secClient) 129 if err != nil { 130 return err 131 } 132 priRoot, err := getRoot("primary", priClient) 133 if err != nil { 134 return err 135 } 136 137 if reflect.DeepEqual(priRoot, secRoot) { 138 return nil 139 } 140 time.Sleep(time.Second) 141 } 142 143 return fmt.Errorf("roots did not become equal") 144 } 145 146 func WaitForPerformanceWAL(ctx context.Context, pri, sec VaultCluster) error { 147 endpoint := "sys/replication/performance/" 148 if err := WaitForMatchingMerkleRoots(ctx, endpoint, pri, sec); err != nil { 149 return nil 150 } 151 getWAL := func(mode, walKey string, cli *api.Client) (int64, error) { 152 status, err := cli.Logical().Read(endpoint + "status") 153 if err != nil { 154 return 0, err 155 } 156 if status == nil || status.Data == nil || status.Data["mode"] == nil { 157 return 0, fmt.Errorf("got nil secret or data") 158 } 159 if status.Data["mode"].(string) != mode { 160 return 0, fmt.Errorf("expected mode=%s, got %s", mode, status.Data["mode"].(string)) 161 } 162 return status.Data[walKey].(json.Number).Int64() 163 } 164 165 secClient := sec.Nodes()[0].APIClient() 166 priClient := pri.Nodes()[0].APIClient() 167 for ctx.Err() == nil { 168 secLastRemoteWAL, err := getWAL("secondary", "last_remote_wal", secClient) 169 if err != nil { 170 return err 171 } 172 priLastPerfWAL, err := getWAL("primary", "last_performance_wal", priClient) 173 if err != nil { 174 return err 175 } 176 177 if secLastRemoteWAL >= priLastPerfWAL { 178 return nil 179 } 180 time.Sleep(time.Second) 181 } 182 183 return fmt.Errorf("performance WALs on the secondary did not catch up with the primary, context err: %w", ctx.Err()) 184 } 185 186 func WaitForPerformanceSecondary(ctx context.Context, pri, sec VaultCluster, skipPoisonPill bool) (string, error) { 187 if len(pri.GetRecoveryKeys()) > 0 { 188 sec.SetBarrierKeys(pri.GetRecoveryKeys()) 189 sec.SetRecoveryKeys(pri.GetRecoveryKeys()) 190 } else { 191 sec.SetBarrierKeys(pri.GetBarrierKeys()) 192 sec.SetRecoveryKeys(pri.GetBarrierKeys()) 193 } 194 195 if len(sec.Nodes()) > 1 { 196 if skipPoisonPill { 197 // As part of prepareSecondary on the active node the keyring is 198 // deleted from storage. Its absence can cause standbys to seal 199 // themselves. But it's not reliable, so we'll seal them 200 // ourselves to force the issue. 201 for i := range sec.Nodes()[1:] { 202 if err := SealNode(ctx, sec, i+1); err != nil { 203 return "", err 204 } 205 } 206 } else { 207 // We want to make sure we unseal all the nodes so we first need to wait 208 // until two of the nodes seal due to the poison pill being written 209 if err := WaitForNCoresSealed(ctx, sec, len(sec.Nodes())-1); err != nil { 210 return "", err 211 } 212 } 213 } 214 if _, err := WaitForActiveNode(ctx, sec); err != nil { 215 return "", err 216 } 217 if err := UnsealAllNodes(ctx, sec); err != nil { 218 return "", err 219 } 220 221 perfSecondaryRootToken, err := GenerateRoot(sec, GenerateRootRegular) 222 if err != nil { 223 return "", err 224 } 225 sec.SetRootToken(perfSecondaryRootToken) 226 if err := WaitForActiveNodeAndPerfStandbys(ctx, sec); err != nil { 227 return "", err 228 } 229 230 return perfSecondaryRootToken, nil 231 } 232 233 func WaitForPerfReplicationWorking(ctx context.Context, pri, sec VaultCluster) error { 234 priActiveIdx, err := WaitForActiveNode(ctx, pri) 235 if err != nil { 236 return err 237 } 238 secActiveIdx, err := WaitForActiveNode(ctx, sec) 239 if err != nil { 240 return err 241 } 242 243 priClient, secClient := pri.Nodes()[priActiveIdx].APIClient(), sec.Nodes()[secActiveIdx].APIClient() 244 mountPoint, err := uuid.GenerateUUID() 245 if err != nil { 246 return err 247 } 248 err = priClient.Sys().Mount(mountPoint, &api.MountInput{ 249 Type: "kv", 250 Local: false, 251 }) 252 if err != nil { 253 return fmt.Errorf("unable to mount KV engine on primary") 254 } 255 256 path := mountPoint + "/foo" 257 _, err = priClient.Logical().Write(path, map[string]interface{}{ 258 "bar": 1, 259 }) 260 if err != nil { 261 return fmt.Errorf("unable to write KV on primary, path=%s", path) 262 } 263 264 for ctx.Err() == nil { 265 var secret *api.Secret 266 secret, err = secClient.Logical().Read(path) 267 if err == nil && secret != nil { 268 err = priClient.Sys().Unmount(mountPoint) 269 if err != nil { 270 return fmt.Errorf("unable to unmount KV engine on primary") 271 } 272 return nil 273 } 274 time.Sleep(100 * time.Millisecond) 275 } 276 if err == nil { 277 err = ctx.Err() 278 } 279 return fmt.Errorf("unable to read replicated KV on secondary, path=%s, err=%v", path, err) 280 } 281 282 func SetupTwoClusterPerfReplication(ctx context.Context, pri, sec VaultCluster) error { 283 if err := EnablePerfPrimary(ctx, pri); err != nil { 284 return err 285 } 286 perfToken, err := GetPerformanceToken(pri, sec.ClusterID(), "") 287 if err != nil { 288 return err 289 } 290 291 _, err = EnablePerformanceSecondary(ctx, perfToken, pri, sec, false, false) 292 return err 293 } 294 295 // PassiveWaitForActiveNodeAndPerfStandbys should be used instead of 296 // WaitForActiveNodeAndPerfStandbys when you don't want to do any writes 297 // as a side-effect. This returns perfStandby nodes in the cluster and 298 // an error. 299 func PassiveWaitForActiveNodeAndPerfStandbys(ctx context.Context, pri VaultCluster) (VaultClusterNode, []VaultClusterNode, error) { 300 leaderNode, standbys, err := GetActiveAndStandbys(ctx, pri) 301 if err != nil { 302 return nil, nil, fmt.Errorf("failed to derive standby nodes, %w", err) 303 } 304 305 for i, node := range standbys { 306 client := node.APIClient() 307 // Make sure we get perf standby nodes 308 if err = EnsureCoreIsPerfStandby(ctx, client); err != nil { 309 return nil, nil, fmt.Errorf("standby node %d is not a perfStandby, %w", i, err) 310 } 311 } 312 313 return leaderNode, standbys, nil 314 } 315 316 func GetActiveAndStandbys(ctx context.Context, cluster VaultCluster) (VaultClusterNode, []VaultClusterNode, error) { 317 var leaderIndex int 318 var err error 319 if leaderIndex, err = WaitForActiveNode(ctx, cluster); err != nil { 320 return nil, nil, err 321 } 322 323 var leaderNode VaultClusterNode 324 var nodes []VaultClusterNode 325 for i, node := range cluster.Nodes() { 326 if i == leaderIndex { 327 leaderNode = node 328 continue 329 } 330 nodes = append(nodes, node) 331 } 332 333 return leaderNode, nodes, nil 334 } 335 336 func EnsureCoreIsPerfStandby(ctx context.Context, client *api.Client) error { 337 var err error 338 var health *api.HealthResponse 339 for ctx.Err() == nil { 340 health, err = client.Sys().HealthWithContext(ctx) 341 if err == nil && health.PerformanceStandby { 342 return nil 343 } 344 time.Sleep(time.Millisecond * 500) 345 } 346 if err == nil { 347 err = ctx.Err() 348 } 349 return err 350 } 351 352 func WaitForDRReplicationState(ctx context.Context, cluster VaultCluster, state consts.ReplicationState) error { 353 client := cluster.Nodes()[0].APIClient() 354 var health *api.HealthResponse 355 var err error 356 for ctx.Err() == nil { 357 health, err = client.Sys().HealthWithContext(ctx) 358 if err == nil && health.ReplicationDRMode == state.GetDRString() { 359 return nil 360 } 361 time.Sleep(500 * time.Millisecond) 362 } 363 if err == nil { 364 err = ctx.Err() 365 } 366 return err 367 } 368 369 func EnableDrPrimary(ctx context.Context, pri VaultCluster) error { 370 client := pri.Nodes()[0].APIClient() 371 _, err := client.Logical().Write("sys/replication/dr/primary/enable", nil) 372 if err != nil { 373 return err 374 } 375 376 err = WaitForDRReplicationState(ctx, pri, consts.ReplicationDRPrimary) 377 if err != nil { 378 return err 379 } 380 return WaitForActiveNodeAndPerfStandbys(ctx, pri) 381 } 382 383 func GenerateDRActivationToken(pri VaultCluster, id, secondaryPublicKey string) (string, error) { 384 client := pri.Nodes()[0].APIClient() 385 req := map[string]interface{}{ 386 "id": id, 387 } 388 if secondaryPublicKey != "" { 389 req["secondary_public_key"] = secondaryPublicKey 390 } 391 secret, err := client.Logical().Write("sys/replication/dr/primary/secondary-token", req) 392 if err != nil { 393 return "", err 394 } 395 396 if secondaryPublicKey != "" { 397 return secret.Data["token"].(string), nil 398 } 399 return secret.WrapInfo.Token, nil 400 } 401 402 func WaitForDRSecondary(ctx context.Context, pri, sec VaultCluster, skipPoisonPill bool) error { 403 if len(pri.GetRecoveryKeys()) > 0 { 404 sec.SetBarrierKeys(pri.GetRecoveryKeys()) 405 sec.SetRecoveryKeys(pri.GetRecoveryKeys()) 406 } else { 407 sec.SetBarrierKeys(pri.GetBarrierKeys()) 408 sec.SetRecoveryKeys(pri.GetBarrierKeys()) 409 } 410 411 if len(sec.Nodes()) > 1 { 412 if skipPoisonPill { 413 // As part of prepareSecondary on the active node the keyring is 414 // deleted from storage. Its absence can cause standbys to seal 415 // themselves. But it's not reliable, so we'll seal them 416 // ourselves to force the issue. 417 for i := range sec.Nodes()[1:] { 418 if err := SealNode(ctx, sec, i+1); err != nil { 419 return err 420 } 421 } 422 } else { 423 // We want to make sure we unseal all the nodes so we first need to wait 424 // until two of the nodes seal due to the poison pill being written 425 if err := WaitForNCoresSealed(ctx, sec, len(sec.Nodes())-1); err != nil { 426 return err 427 } 428 } 429 } 430 if _, err := WaitForActiveNode(ctx, sec); err != nil { 431 return err 432 } 433 434 // unseal nodes 435 for i := range sec.Nodes() { 436 if err := UnsealNode(ctx, sec, i); err != nil { 437 // Sometimes when we get here it's already unsealed on its own 438 // and then this fails for DR secondaries so check again 439 // The error is "path disabled in replication DR secondary mode". 440 if healthErr := NodeHealthy(ctx, sec, i); healthErr != nil { 441 // return the original error 442 return err 443 } 444 } 445 } 446 447 sec.SetRootToken(pri.GetRootToken()) 448 449 if _, err := WaitForActiveNode(ctx, sec); err != nil { 450 return err 451 } 452 453 return nil 454 } 455 456 func EnableDRSecondaryNoWait(ctx context.Context, sec VaultCluster, drToken string) error { 457 postData := map[string]interface{}{ 458 "token": drToken, 459 "ca_file": DefaultCAFile, 460 } 461 462 _, err := sec.Nodes()[0].APIClient().Logical().Write("sys/replication/dr/secondary/enable", postData) 463 if err != nil { 464 return err 465 } 466 467 return WaitForDRReplicationState(ctx, sec, consts.ReplicationDRSecondary) 468 } 469 470 func WaitForReplicationStatus(ctx context.Context, client *api.Client, dr bool, accept func(map[string]interface{}) error) error { 471 url := "sys/replication/performance/status" 472 if dr { 473 url = "sys/replication/dr/status" 474 } 475 476 var err error 477 var secret *api.Secret 478 for ctx.Err() == nil { 479 secret, err = client.Logical().Read(url) 480 if err == nil && secret != nil && secret.Data != nil { 481 if err = accept(secret.Data); err == nil { 482 return nil 483 } 484 } 485 time.Sleep(500 * time.Millisecond) 486 } 487 if err == nil { 488 err = ctx.Err() 489 } 490 491 return fmt.Errorf("unable to get acceptable replication status: error=%v secret=%#v", err, secret) 492 } 493 494 func WaitForDRReplicationWorking(ctx context.Context, pri, sec VaultCluster) error { 495 priClient := pri.Nodes()[0].APIClient() 496 secClient := sec.Nodes()[0].APIClient() 497 498 // Make sure we've entered stream-wals mode 499 err := WaitForReplicationStatus(ctx, secClient, true, func(secret map[string]interface{}) error { 500 state := secret["state"] 501 if state == string("stream-wals") { 502 return nil 503 } 504 return fmt.Errorf("expected stream-wals replication state, got %v", state) 505 }) 506 if err != nil { 507 return err 508 } 509 510 // Now write some data and make sure that we see last_remote_wal nonzero, i.e. 511 // at least one WAL has been streamed. 512 secret, err := priClient.Auth().Token().Create(&api.TokenCreateRequest{}) 513 if err != nil { 514 return err 515 } 516 517 // Revoke the token since some tests won't be happy to see it. 518 err = priClient.Auth().Token().RevokeTree(secret.Auth.ClientToken) 519 if err != nil { 520 return err 521 } 522 523 err = WaitForReplicationStatus(ctx, secClient, true, func(secret map[string]interface{}) error { 524 state := secret["state"] 525 if state != string("stream-wals") { 526 return fmt.Errorf("expected stream-wals replication state, got %v", state) 527 } 528 529 if secret["last_remote_wal"] != nil { 530 lastRemoteWal, _ := secret["last_remote_wal"].(json.Number).Int64() 531 if lastRemoteWal <= 0 { 532 return fmt.Errorf("expected last_remote_wal to be greater than zero") 533 } 534 return nil 535 } 536 537 return fmt.Errorf("replication seems to be still catching up, maybe need to wait more") 538 }) 539 if err != nil { 540 return err 541 } 542 return nil 543 } 544 545 func EnableDrSecondary(ctx context.Context, pri, sec VaultCluster, drToken string) error { 546 err := EnableDRSecondaryNoWait(ctx, sec, drToken) 547 if err != nil { 548 return err 549 } 550 551 if err = WaitForMatchingMerkleRoots(ctx, "sys/replication/dr/", pri, sec); err != nil { 552 return err 553 } 554 555 err = WaitForDRSecondary(ctx, pri, sec, false) 556 if err != nil { 557 return err 558 } 559 560 if err = WaitForDRReplicationWorking(ctx, pri, sec); err != nil { 561 return err 562 } 563 return nil 564 } 565 566 func SetupTwoClusterDRReplication(ctx context.Context, pri, sec VaultCluster) error { 567 if err := EnableDrPrimary(ctx, pri); err != nil { 568 return err 569 } 570 571 drToken, err := GenerateDRActivationToken(pri, sec.ClusterID(), "") 572 if err != nil { 573 return err 574 } 575 err = EnableDrSecondary(ctx, pri, sec, drToken) 576 if err != nil { 577 return err 578 } 579 return nil 580 } 581 582 func DemoteDRPrimary(client *api.Client) error { 583 _, err := client.Logical().Write("sys/replication/dr/primary/demote", map[string]interface{}{}) 584 return err 585 } 586 587 func createBatchToken(client *api.Client, path string) (string, error) { 588 // TODO: should these be more random in case more than one batch token needs to be created? 589 suffix := strings.Replace(path, "/", "", -1) 590 policyName := "path-batch-policy-" + suffix 591 roleName := "path-batch-role-" + suffix 592 593 rules := fmt.Sprintf(`path "%s" { capabilities = [ "read", "update" ] }`, path) 594 595 // create policy 596 _, err := client.Logical().Write("sys/policy/"+policyName, map[string]interface{}{ 597 "policy": rules, 598 }) 599 if err != nil { 600 return "", err 601 } 602 603 // create a role 604 _, err = client.Logical().Write("auth/token/roles/"+roleName, map[string]interface{}{ 605 "allowed_policies": policyName, 606 "orphan": true, 607 "renewable": false, 608 "token_type": "batch", 609 }) 610 if err != nil { 611 return "", err 612 } 613 614 // create batch token 615 secret, err := client.Logical().Write("auth/token/create/"+roleName, nil) 616 if err != nil { 617 return "", err 618 } 619 620 return secret.Auth.ClientToken, nil 621 } 622 623 // PromoteDRSecondaryWithBatchToken creates a batch token for DR promotion 624 // before promotion, it demotes the primary cluster. The primary cluster needs 625 // to be functional for the generation of the batch token 626 func PromoteDRSecondaryWithBatchToken(ctx context.Context, pri, sec VaultCluster) error { 627 client := pri.Nodes()[0].APIClient() 628 drToken, err := createBatchToken(client, "sys/replication/dr/secondary/promote") 629 if err != nil { 630 return err 631 } 632 633 err = DemoteDRPrimary(client) 634 if err != nil { 635 return err 636 } 637 638 return promoteDRSecondaryInternal(ctx, sec, drToken) 639 } 640 641 // PromoteDRSecondary generates a DR operation token on the secondary using 642 // unseal/recovery keys. Therefore, the primary cluster could potentially 643 // be out of service. 644 func PromoteDRSecondary(ctx context.Context, sec VaultCluster) error { 645 // generate DR operation token to do update primary on vC to point to 646 // the new perfSec primary vD 647 drToken, err := GenerateRoot(sec, GenerateRootDR) 648 if err != nil { 649 return err 650 } 651 return promoteDRSecondaryInternal(ctx, sec, drToken) 652 } 653 654 func promoteDRSecondaryInternal(ctx context.Context, sec VaultCluster, drToken string) error { 655 secClient := sec.Nodes()[0].APIClient() 656 657 // Allow retries of 503s, e.g.: replication is still catching up, 658 // try again later or provide the "force" argument 659 oldMaxRetries := secClient.MaxRetries() 660 secClient.SetMaxRetries(10) 661 defer secClient.SetMaxRetries(oldMaxRetries) 662 resp, err := secClient.Logical().Write("sys/replication/dr/secondary/promote", map[string]interface{}{ 663 "dr_operation_token": drToken, 664 }) 665 if err != nil { 666 return err 667 } 668 if resp == nil { 669 return fmt.Errorf("nil status response during DR promotion") 670 } 671 672 if _, err := WaitForActiveNode(ctx, sec); err != nil { 673 return err 674 } 675 676 return WaitForDRReplicationState(ctx, sec, consts.ReplicationDRPrimary) 677 } 678 679 func checkClusterAddr(ctx context.Context, pri, sec VaultCluster) error { 680 priClient := pri.Nodes()[0].APIClient() 681 priLeader, err := priClient.Sys().LeaderWithContext(ctx) 682 if err != nil { 683 return err 684 } 685 secClient := sec.Nodes()[0].APIClient() 686 endpoint := "sys/replication/dr/" 687 status, err := secClient.Logical().Read(endpoint + "status") 688 if err != nil { 689 return err 690 } 691 if status == nil || status.Data == nil { 692 return fmt.Errorf("got nil secret or data") 693 } 694 695 var priAddrs []string 696 err = mapstructure.Decode(status.Data["known_primary_cluster_addrs"], &priAddrs) 697 if err != nil { 698 return err 699 } 700 if !strutil.StrListContains(priAddrs, priLeader.LeaderClusterAddress) { 701 return fmt.Errorf("failed to fine the expected primary cluster address %v in known_primary_cluster_addrs", priLeader.LeaderClusterAddress) 702 } 703 704 return nil 705 } 706 707 func UpdatePrimary(ctx context.Context, pri, sec VaultCluster) error { 708 // generate DR operation token to do update primary on vC to point to 709 // the new perfSec primary vD 710 rootToken, err := GenerateRoot(sec, GenerateRootDR) 711 if err != nil { 712 return err 713 } 714 715 // secondary activation token 716 drToken, err := GenerateDRActivationToken(pri, sec.ClusterID(), "") 717 if err != nil { 718 return err 719 } 720 721 // update-primary on vC (new perfSec Dr secondary) to point to 722 // the new perfSec Dr primary 723 secClient := sec.Nodes()[0].APIClient() 724 resp, err := secClient.Logical().Write("sys/replication/dr/secondary/update-primary", map[string]interface{}{ 725 "dr_operation_token": rootToken, 726 "token": drToken, 727 "ca_file": DefaultCAFile, 728 }) 729 if err != nil { 730 return err 731 } 732 if resp == nil { 733 return fmt.Errorf("nil status response during update primary") 734 } 735 736 if _, err = WaitForActiveNode(ctx, sec); err != nil { 737 return err 738 } 739 740 if err = WaitForDRReplicationState(ctx, sec, consts.ReplicationDRSecondary); err != nil { 741 return err 742 } 743 744 if err = checkClusterAddr(ctx, pri, sec); err != nil { 745 return err 746 } 747 748 return nil 749 } 750 751 func SetupFourClusterReplication(ctx context.Context, pri, sec, pridr, secdr VaultCluster) error { 752 err := SetupTwoClusterPerfReplication(ctx, pri, sec) 753 if err != nil { 754 return err 755 } 756 err = SetupTwoClusterDRReplication(ctx, pri, pridr) 757 if err != nil { 758 return err 759 } 760 err = SetupTwoClusterDRReplication(ctx, sec, secdr) 761 if err != nil { 762 return err 763 } 764 return nil 765 } 766 767 type ReplicationSet struct { 768 // By convention, we recommend the following naming scheme for 769 // clusters in this map: 770 // A: perf primary 771 // B: primary's DR 772 // C: first perf secondary of A 773 // D: C's DR 774 // E: second perf secondary of A 775 // F: E's DR 776 // ... etc. 777 // 778 // We use generic names rather than role-specific names because 779 // that's less confusing when promotions take place that result in role 780 // changes. In other words, if D gets promoted to replace C as a perf 781 // secondary, and C gets demoted and updated to become D's DR secondary, 782 // they should maintain their initial names of D and C throughout. 783 Clusters map[string]VaultCluster 784 Builder ClusterBuilder 785 Logger hclog.Logger 786 CA *CA 787 } 788 789 type ClusterBuilder func(ctx context.Context, name string, logger hclog.Logger) (VaultCluster, error) 790 791 func NewReplicationSet(b ClusterBuilder) (*ReplicationSet, error) { 792 return &ReplicationSet{ 793 Clusters: map[string]VaultCluster{}, 794 Builder: b, 795 Logger: hclog.NewNullLogger(), 796 }, nil 797 } 798 799 func (r *ReplicationSet) StandardPerfReplication(ctx context.Context) error { 800 for _, name := range []string{"A", "C"} { 801 if _, ok := r.Clusters[name]; !ok { 802 cluster, err := r.Builder(ctx, name, r.Logger) 803 if err != nil { 804 return err 805 } 806 r.Clusters[name] = cluster 807 } 808 } 809 810 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 811 defer cancel() 812 err := SetupTwoClusterPerfReplication(ctx, r.Clusters["A"], r.Clusters["C"]) 813 if err != nil { 814 return err 815 } 816 817 return nil 818 } 819 820 func (r *ReplicationSet) StandardDRReplication(ctx context.Context) error { 821 for _, name := range []string{"A", "B"} { 822 if _, ok := r.Clusters[name]; !ok { 823 cluster, err := r.Builder(ctx, name, r.Logger) 824 if err != nil { 825 return err 826 } 827 r.Clusters[name] = cluster 828 } 829 } 830 831 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 832 defer cancel() 833 err := SetupTwoClusterDRReplication(ctx, r.Clusters["A"], r.Clusters["B"]) 834 if err != nil { 835 return err 836 } 837 838 return nil 839 } 840 841 func (r *ReplicationSet) GetFourReplicationCluster(ctx context.Context) error { 842 for _, name := range []string{"A", "B", "C", "D"} { 843 if _, ok := r.Clusters[name]; !ok { 844 cluster, err := r.Builder(ctx, name, r.Logger) 845 if err != nil { 846 return err 847 } 848 r.Clusters[name] = cluster 849 } 850 } 851 852 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 853 defer cancel() 854 err := SetupFourClusterReplication(ctx, r.Clusters["A"], r.Clusters["C"], r.Clusters["B"], r.Clusters["D"]) 855 if err != nil { 856 return err 857 } 858 return nil 859 } 860 861 func (r *ReplicationSet) Cleanup() { 862 for _, cluster := range r.Clusters { 863 cluster.Cleanup() 864 } 865 } 866 867 func WaitForPerfReplicationConnectionStatus(ctx context.Context, client *api.Client) error { 868 type Primary struct { 869 APIAddress string `mapstructure:"api_address"` 870 ConnectionStatus string `mapstructure:"connection_status"` 871 ClusterAddress string `mapstructure:"cluster_address"` 872 LastHeartbeat string `mapstructure:"last_heartbeat"` 873 } 874 type Status struct { 875 Primaries []Primary `mapstructure:"primaries"` 876 } 877 return WaitForPerfReplicationStatus(ctx, client, func(m map[string]interface{}) error { 878 var status Status 879 err := mapstructure.Decode(m, &status) 880 if err != nil { 881 return err 882 } 883 if len(status.Primaries) == 0 { 884 return fmt.Errorf("primaries is zero") 885 } 886 for _, v := range status.Primaries { 887 if v.ConnectionStatus == "connected" { 888 return nil 889 } 890 } 891 return fmt.Errorf("no primaries connected") 892 }) 893 } 894 895 func WaitForPerfReplicationStatus(ctx context.Context, client *api.Client, accept func(map[string]interface{}) error) error { 896 var err error 897 var secret *api.Secret 898 for ctx.Err() == nil { 899 secret, err = client.Logical().Read("sys/replication/performance/status") 900 if err == nil && secret != nil && secret.Data != nil { 901 if err = accept(secret.Data); err == nil { 902 return nil 903 } 904 } 905 time.Sleep(500 * time.Millisecond) 906 } 907 return fmt.Errorf("unable to get acceptable replication status within allotted time: error=%v secret=%#v", err, secret) 908 }