github.com/hashicorp/vault/sdk@v0.13.0/helper/testcluster/replication.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package testcluster 5 6 import ( 7 "context" 8 "encoding/json" 9 "fmt" 10 "reflect" 11 "strings" 12 "time" 13 14 "github.com/hashicorp/go-hclog" 15 "github.com/hashicorp/go-secure-stdlib/strutil" 16 "github.com/hashicorp/go-uuid" 17 "github.com/hashicorp/vault/api" 18 "github.com/hashicorp/vault/sdk/helper/consts" 19 "github.com/mitchellh/mapstructure" 20 ) 21 22 func GetPerformanceToken(pri VaultCluster, id, secondaryPublicKey string) (string, error) { 23 client := pri.Nodes()[0].APIClient() 24 req := map[string]interface{}{ 25 "id": id, 26 } 27 if secondaryPublicKey != "" { 28 req["secondary_public_key"] = secondaryPublicKey 29 } 30 secret, err := client.Logical().Write("sys/replication/performance/primary/secondary-token", req) 31 if err != nil { 32 return "", err 33 } 34 35 if secondaryPublicKey != "" { 36 return secret.Data["token"].(string), nil 37 } 38 return secret.WrapInfo.Token, nil 39 } 40 41 func EnablePerfPrimary(ctx context.Context, pri VaultCluster) error { 42 client := pri.Nodes()[0].APIClient() 43 _, err := client.Logical().WriteWithContext(ctx, "sys/replication/performance/primary/enable", nil) 44 if err != nil { 45 return fmt.Errorf("error enabling perf primary: %w", err) 46 } 47 48 err = WaitForPerfReplicationState(ctx, pri, consts.ReplicationPerformancePrimary) 49 if err != nil { 50 return fmt.Errorf("error waiting for perf primary to have the correct state: %w", err) 51 } 52 return WaitForActiveNodeAndPerfStandbys(ctx, pri) 53 } 54 55 func WaitForPerfReplicationState(ctx context.Context, cluster VaultCluster, state consts.ReplicationState) error { 56 client := cluster.Nodes()[0].APIClient() 57 var health *api.HealthResponse 58 var err error 59 for ctx.Err() == nil { 60 health, err = client.Sys().HealthWithContext(ctx) 61 if err == nil && health.ReplicationPerformanceMode == state.GetPerformanceString() { 62 return nil 63 } 64 time.Sleep(500 * time.Millisecond) 65 } 66 if err == nil { 67 err = ctx.Err() 68 } 69 return err 70 } 71 72 func EnablePerformanceSecondaryNoWait(ctx context.Context, perfToken string, pri, sec VaultCluster, updatePrimary bool) error { 73 postData := map[string]interface{}{ 74 "token": perfToken, 75 "ca_file": pri.GetCACertPEMFile(), 76 } 77 path := "sys/replication/performance/secondary/enable" 78 if updatePrimary { 79 path = "sys/replication/performance/secondary/update-primary" 80 } 81 err := WaitForActiveNodeAndPerfStandbys(ctx, sec) 82 if err != nil { 83 return err 84 } 85 _, err = sec.Nodes()[0].APIClient().Logical().Write(path, postData) 86 if err != nil { 87 return err 88 } 89 90 return WaitForPerfReplicationState(ctx, sec, consts.ReplicationPerformanceSecondary) 91 } 92 93 func EnablePerformanceSecondary(ctx context.Context, perfToken string, pri, sec VaultCluster, updatePrimary, skipPoisonPill bool) (string, error) { 94 if err := EnablePerformanceSecondaryNoWait(ctx, perfToken, pri, sec, updatePrimary); err != nil { 95 return "", err 96 } 97 if err := WaitForMatchingMerkleRoots(ctx, "sys/replication/performance/", pri, sec); err != nil { 98 return "", err 99 } 100 root, err := WaitForPerformanceSecondary(ctx, pri, sec, skipPoisonPill) 101 if err != nil { 102 return "", err 103 } 104 if err := WaitForPerfReplicationWorking(ctx, pri, sec); err != nil { 105 return "", err 106 } 107 return root, nil 108 } 109 110 func WaitForMatchingMerkleRoots(ctx context.Context, endpoint string, pri, sec VaultCluster) error { 111 return WaitForMatchingMerkleRootsClients(ctx, endpoint, pri.Nodes()[0].APIClient(), sec.Nodes()[0].APIClient()) 112 } 113 114 func WaitForMatchingMerkleRootsClients(ctx context.Context, endpoint string, pri, sec *api.Client) error { 115 getRoot := func(mode string, cli *api.Client) (string, error) { 116 status, err := cli.Logical().Read(endpoint + "status") 117 if err != nil { 118 return "", err 119 } 120 if status == nil || status.Data == nil || status.Data["mode"] == nil { 121 return "", fmt.Errorf("got nil secret or data") 122 } 123 if status.Data["mode"].(string) != mode { 124 return "", fmt.Errorf("expected mode=%s, got %s", mode, status.Data["mode"].(string)) 125 } 126 return status.Data["merkle_root"].(string), nil 127 } 128 129 var priRoot, secRoot string 130 var err error 131 genRet := func() error { 132 return fmt.Errorf("unequal merkle roots, pri=%s sec=%s, err=%w", priRoot, secRoot, err) 133 } 134 for ctx.Err() == nil { 135 secRoot, err = getRoot("secondary", sec) 136 if err != nil { 137 return genRet() 138 } 139 priRoot, err = getRoot("primary", pri) 140 if err != nil { 141 return genRet() 142 } 143 144 if reflect.DeepEqual(priRoot, secRoot) { 145 return nil 146 } 147 time.Sleep(time.Second) 148 } 149 150 return fmt.Errorf("roots did not become equal") 151 } 152 153 func WaitForPerformanceWAL(ctx context.Context, pri, sec VaultCluster) error { 154 endpoint := "sys/replication/performance/" 155 if err := WaitForMatchingMerkleRoots(ctx, endpoint, pri, sec); err != nil { 156 return nil 157 } 158 getWAL := func(mode, walKey string, cli *api.Client) (int64, error) { 159 status, err := cli.Logical().Read(endpoint + "status") 160 if err != nil { 161 return 0, err 162 } 163 if status == nil || status.Data == nil || status.Data["mode"] == nil { 164 return 0, fmt.Errorf("got nil secret or data") 165 } 166 if status.Data["mode"].(string) != mode { 167 return 0, fmt.Errorf("expected mode=%s, got %s", mode, status.Data["mode"].(string)) 168 } 169 return status.Data[walKey].(json.Number).Int64() 170 } 171 172 secClient := sec.Nodes()[0].APIClient() 173 priClient := pri.Nodes()[0].APIClient() 174 for ctx.Err() == nil { 175 secLastRemoteWAL, err := getWAL("secondary", "last_remote_wal", secClient) 176 if err != nil { 177 return err 178 } 179 priLastPerfWAL, err := getWAL("primary", "last_performance_wal", priClient) 180 if err != nil { 181 return err 182 } 183 184 if secLastRemoteWAL >= priLastPerfWAL { 185 return nil 186 } 187 time.Sleep(time.Second) 188 } 189 190 return fmt.Errorf("performance WALs on the secondary did not catch up with the primary, context err: %w", ctx.Err()) 191 } 192 193 func WaitForPerformanceSecondary(ctx context.Context, pri, sec VaultCluster, skipPoisonPill bool) (string, error) { 194 if len(pri.GetRecoveryKeys()) > 0 { 195 sec.SetBarrierKeys(pri.GetRecoveryKeys()) 196 sec.SetRecoveryKeys(pri.GetRecoveryKeys()) 197 } else { 198 sec.SetBarrierKeys(pri.GetBarrierKeys()) 199 sec.SetRecoveryKeys(pri.GetBarrierKeys()) 200 } 201 202 if len(sec.Nodes()) > 1 { 203 if skipPoisonPill { 204 // As part of prepareSecondary on the active node the keyring is 205 // deleted from storage. Its absence can cause standbys to seal 206 // themselves. But it's not reliable, so we'll seal them 207 // ourselves to force the issue. 208 for i := range sec.Nodes()[1:] { 209 if err := SealNode(ctx, sec, i+1); err != nil { 210 return "", err 211 } 212 } 213 } else { 214 // We want to make sure we unseal all the nodes so we first need to wait 215 // until two of the nodes seal due to the poison pill being written 216 if err := WaitForNCoresSealed(ctx, sec, len(sec.Nodes())-1); err != nil { 217 return "", err 218 } 219 } 220 } 221 if _, err := WaitForActiveNode(ctx, sec); err != nil { 222 return "", err 223 } 224 if err := UnsealAllNodes(ctx, sec); err != nil { 225 return "", err 226 } 227 228 perfSecondaryRootToken, err := GenerateRoot(sec, GenerateRootRegular) 229 if err != nil { 230 return "", err 231 } 232 sec.SetRootToken(perfSecondaryRootToken) 233 if err := WaitForActiveNodeAndPerfStandbys(ctx, sec); err != nil { 234 return "", err 235 } 236 237 return perfSecondaryRootToken, nil 238 } 239 240 func WaitForPerfReplicationWorking(ctx context.Context, pri, sec VaultCluster) error { 241 priActiveIdx, err := WaitForActiveNode(ctx, pri) 242 if err != nil { 243 return err 244 } 245 secActiveIdx, err := WaitForActiveNode(ctx, sec) 246 if err != nil { 247 return err 248 } 249 250 priClient, secClient := pri.Nodes()[priActiveIdx].APIClient(), sec.Nodes()[secActiveIdx].APIClient() 251 mountPoint, err := uuid.GenerateUUID() 252 if err != nil { 253 return err 254 } 255 err = priClient.Sys().Mount(mountPoint, &api.MountInput{ 256 Type: "kv", 257 Local: false, 258 }) 259 if err != nil { 260 return fmt.Errorf("unable to mount KV engine on primary") 261 } 262 263 path := mountPoint + "/foo" 264 _, err = priClient.Logical().Write(path, map[string]interface{}{ 265 "bar": 1, 266 }) 267 if err != nil { 268 return fmt.Errorf("unable to write KV on primary, path=%s", path) 269 } 270 271 for ctx.Err() == nil { 272 var secret *api.Secret 273 secret, err = secClient.Logical().Read(path) 274 if err == nil && secret != nil { 275 err = priClient.Sys().Unmount(mountPoint) 276 if err != nil { 277 return fmt.Errorf("unable to unmount KV engine on primary") 278 } 279 return nil 280 } 281 time.Sleep(100 * time.Millisecond) 282 } 283 if err == nil { 284 err = ctx.Err() 285 } 286 return fmt.Errorf("unable to read replicated KV on secondary, path=%s, err=%v", path, err) 287 } 288 289 func SetupTwoClusterPerfReplication(ctx context.Context, pri, sec VaultCluster) error { 290 if err := EnablePerfPrimary(ctx, pri); err != nil { 291 return fmt.Errorf("failed to enable perf primary: %w", err) 292 } 293 perfToken, err := GetPerformanceToken(pri, sec.ClusterID(), "") 294 if err != nil { 295 return fmt.Errorf("failed to get performance token from perf primary: %w", err) 296 } 297 298 _, err = EnablePerformanceSecondary(ctx, perfToken, pri, sec, false, false) 299 if err != nil { 300 return fmt.Errorf("failed to enable perf secondary: %w", err) 301 } 302 return nil 303 } 304 305 // PassiveWaitForActiveNodeAndPerfStandbys should be used instead of 306 // WaitForActiveNodeAndPerfStandbys when you don't want to do any writes 307 // as a side-effect. This returns perfStandby nodes in the cluster and 308 // an error. 309 func PassiveWaitForActiveNodeAndPerfStandbys(ctx context.Context, pri VaultCluster) (VaultClusterNode, []VaultClusterNode, error) { 310 leaderNode, standbys, err := GetActiveAndStandbys(ctx, pri) 311 if err != nil { 312 return nil, nil, fmt.Errorf("failed to derive standby nodes, %w", err) 313 } 314 315 for i, node := range standbys { 316 client := node.APIClient() 317 // Make sure we get perf standby nodes 318 if err = EnsureCoreIsPerfStandby(ctx, client); err != nil { 319 return nil, nil, fmt.Errorf("standby node %d is not a perfStandby, %w", i, err) 320 } 321 } 322 323 return leaderNode, standbys, nil 324 } 325 326 func GetActiveAndStandbys(ctx context.Context, cluster VaultCluster) (VaultClusterNode, []VaultClusterNode, error) { 327 var leaderIndex int 328 var err error 329 if leaderIndex, err = WaitForActiveNode(ctx, cluster); err != nil { 330 return nil, nil, err 331 } 332 333 var leaderNode VaultClusterNode 334 var nodes []VaultClusterNode 335 for i, node := range cluster.Nodes() { 336 if i == leaderIndex { 337 leaderNode = node 338 continue 339 } 340 nodes = append(nodes, node) 341 } 342 343 return leaderNode, nodes, nil 344 } 345 346 func EnsureCoreIsPerfStandby(ctx context.Context, client *api.Client) error { 347 var err error 348 var health *api.HealthResponse 349 for ctx.Err() == nil { 350 health, err = client.Sys().HealthWithContext(ctx) 351 if err == nil && health.PerformanceStandby { 352 return nil 353 } 354 time.Sleep(time.Millisecond * 500) 355 } 356 if err == nil { 357 err = ctx.Err() 358 } 359 return err 360 } 361 362 func WaitForDRReplicationState(ctx context.Context, cluster VaultCluster, state consts.ReplicationState) error { 363 client := cluster.Nodes()[0].APIClient() 364 var health *api.HealthResponse 365 var err error 366 for ctx.Err() == nil { 367 health, err = client.Sys().HealthWithContext(ctx) 368 if err == nil && health.ReplicationDRMode == state.GetDRString() { 369 return nil 370 } 371 time.Sleep(500 * time.Millisecond) 372 } 373 if err == nil { 374 err = ctx.Err() 375 } 376 return err 377 } 378 379 func EnableDrPrimary(ctx context.Context, pri VaultCluster) error { 380 client := pri.Nodes()[0].APIClient() 381 _, err := client.Logical().Write("sys/replication/dr/primary/enable", nil) 382 if err != nil { 383 return err 384 } 385 386 err = WaitForDRReplicationState(ctx, pri, consts.ReplicationDRPrimary) 387 if err != nil { 388 return err 389 } 390 return WaitForActiveNodeAndPerfStandbys(ctx, pri) 391 } 392 393 func GenerateDRActivationToken(pri VaultCluster, id, secondaryPublicKey string) (string, error) { 394 client := pri.Nodes()[0].APIClient() 395 req := map[string]interface{}{ 396 "id": id, 397 } 398 if secondaryPublicKey != "" { 399 req["secondary_public_key"] = secondaryPublicKey 400 } 401 secret, err := client.Logical().Write("sys/replication/dr/primary/secondary-token", req) 402 if err != nil { 403 return "", err 404 } 405 406 if secondaryPublicKey != "" { 407 return secret.Data["token"].(string), nil 408 } 409 return secret.WrapInfo.Token, nil 410 } 411 412 func WaitForDRSecondary(ctx context.Context, pri, sec VaultCluster, skipPoisonPill bool) error { 413 if len(pri.GetRecoveryKeys()) > 0 { 414 sec.SetBarrierKeys(pri.GetRecoveryKeys()) 415 sec.SetRecoveryKeys(pri.GetRecoveryKeys()) 416 } else { 417 sec.SetBarrierKeys(pri.GetBarrierKeys()) 418 sec.SetRecoveryKeys(pri.GetBarrierKeys()) 419 } 420 421 if len(sec.Nodes()) > 1 { 422 if skipPoisonPill { 423 // As part of prepareSecondary on the active node the keyring is 424 // deleted from storage. Its absence can cause standbys to seal 425 // themselves. But it's not reliable, so we'll seal them 426 // ourselves to force the issue. 427 for i := range sec.Nodes()[1:] { 428 if err := SealNode(ctx, sec, i+1); err != nil { 429 return err 430 } 431 } 432 } else { 433 // We want to make sure we unseal all the nodes so we first need to wait 434 // until two of the nodes seal due to the poison pill being written 435 if err := WaitForNCoresSealed(ctx, sec, len(sec.Nodes())-1); err != nil { 436 return err 437 } 438 } 439 } 440 if _, err := WaitForActiveNode(ctx, sec); err != nil { 441 return err 442 } 443 444 // unseal nodes 445 for i := range sec.Nodes() { 446 if err := UnsealNode(ctx, sec, i); err != nil { 447 // Sometimes when we get here it's already unsealed on its own 448 // and then this fails for DR secondaries so check again 449 // The error is "path disabled in replication DR secondary mode". 450 if healthErr := NodeHealthy(ctx, sec, i); healthErr != nil { 451 // return the original error 452 return err 453 } 454 } 455 } 456 457 sec.SetRootToken(pri.GetRootToken()) 458 459 if _, err := WaitForActiveNode(ctx, sec); err != nil { 460 return err 461 } 462 463 return nil 464 } 465 466 func EnableDRSecondaryNoWait(ctx context.Context, sec VaultCluster, drToken string) error { 467 postData := map[string]interface{}{ 468 "token": drToken, 469 "ca_file": sec.GetCACertPEMFile(), 470 } 471 472 _, err := sec.Nodes()[0].APIClient().Logical().Write("sys/replication/dr/secondary/enable", postData) 473 if err != nil { 474 return err 475 } 476 477 return WaitForDRReplicationState(ctx, sec, consts.ReplicationDRSecondary) 478 } 479 480 func WaitForReplicationStatus(ctx context.Context, client *api.Client, dr bool, accept func(map[string]interface{}) error) error { 481 url := "sys/replication/performance/status" 482 if dr { 483 url = "sys/replication/dr/status" 484 } 485 486 var err error 487 var secret *api.Secret 488 for ctx.Err() == nil { 489 secret, err = client.Logical().Read(url) 490 if err == nil && secret != nil && secret.Data != nil { 491 if err = accept(secret.Data); err == nil { 492 return nil 493 } 494 } 495 time.Sleep(500 * time.Millisecond) 496 } 497 if err == nil { 498 err = ctx.Err() 499 } 500 501 return fmt.Errorf("unable to get acceptable replication status: error=%v secret=%#v", err, secret) 502 } 503 504 func WaitForDRReplicationWorking(ctx context.Context, pri, sec VaultCluster) error { 505 priClient := pri.Nodes()[0].APIClient() 506 secClient := sec.Nodes()[0].APIClient() 507 508 // Make sure we've entered stream-wals mode 509 err := WaitForReplicationStatus(ctx, secClient, true, func(secret map[string]interface{}) error { 510 state := secret["state"] 511 if state == string("stream-wals") { 512 return nil 513 } 514 return fmt.Errorf("expected stream-wals replication state, got %v", state) 515 }) 516 if err != nil { 517 return err 518 } 519 520 // Now write some data and make sure that we see last_remote_wal nonzero, i.e. 521 // at least one WAL has been streamed. 522 secret, err := priClient.Auth().Token().Create(&api.TokenCreateRequest{}) 523 if err != nil { 524 return err 525 } 526 527 // Revoke the token since some tests won't be happy to see it. 528 err = priClient.Auth().Token().RevokeTree(secret.Auth.ClientToken) 529 if err != nil { 530 return err 531 } 532 533 err = WaitForReplicationStatus(ctx, secClient, true, func(secret map[string]interface{}) error { 534 state := secret["state"] 535 if state != string("stream-wals") { 536 return fmt.Errorf("expected stream-wals replication state, got %v", state) 537 } 538 539 if secret["last_remote_wal"] != nil { 540 lastRemoteWal, _ := secret["last_remote_wal"].(json.Number).Int64() 541 if lastRemoteWal <= 0 { 542 return fmt.Errorf("expected last_remote_wal to be greater than zero") 543 } 544 return nil 545 } 546 547 return fmt.Errorf("replication seems to be still catching up, maybe need to wait more") 548 }) 549 if err != nil { 550 return err 551 } 552 return nil 553 } 554 555 func EnableDrSecondary(ctx context.Context, pri, sec VaultCluster, drToken string) error { 556 err := EnableDRSecondaryNoWait(ctx, sec, drToken) 557 if err != nil { 558 return err 559 } 560 561 if err = WaitForMatchingMerkleRoots(ctx, "sys/replication/dr/", pri, sec); err != nil { 562 return err 563 } 564 565 err = WaitForDRSecondary(ctx, pri, sec, false) 566 if err != nil { 567 return err 568 } 569 570 if err = WaitForDRReplicationWorking(ctx, pri, sec); err != nil { 571 return err 572 } 573 return nil 574 } 575 576 func SetupTwoClusterDRReplication(ctx context.Context, pri, sec VaultCluster) error { 577 if err := EnableDrPrimary(ctx, pri); err != nil { 578 return err 579 } 580 581 drToken, err := GenerateDRActivationToken(pri, sec.ClusterID(), "") 582 if err != nil { 583 return err 584 } 585 err = EnableDrSecondary(ctx, pri, sec, drToken) 586 if err != nil { 587 return err 588 } 589 return nil 590 } 591 592 func DemoteDRPrimary(client *api.Client) error { 593 _, err := client.Logical().Write("sys/replication/dr/primary/demote", map[string]interface{}{}) 594 return err 595 } 596 597 func createBatchToken(client *api.Client, path string) (string, error) { 598 // TODO: should these be more random in case more than one batch token needs to be created? 599 suffix := strings.Replace(path, "/", "", -1) 600 policyName := "path-batch-policy-" + suffix 601 roleName := "path-batch-role-" + suffix 602 603 rules := fmt.Sprintf(`path "%s" { capabilities = [ "read", "update" ] }`, path) 604 605 // create policy 606 _, err := client.Logical().Write("sys/policy/"+policyName, map[string]interface{}{ 607 "policy": rules, 608 }) 609 if err != nil { 610 return "", err 611 } 612 613 // create a role 614 _, err = client.Logical().Write("auth/token/roles/"+roleName, map[string]interface{}{ 615 "allowed_policies": policyName, 616 "orphan": true, 617 "renewable": false, 618 "token_type": "batch", 619 }) 620 if err != nil { 621 return "", err 622 } 623 624 // create batch token 625 secret, err := client.Logical().Write("auth/token/create/"+roleName, nil) 626 if err != nil { 627 return "", err 628 } 629 630 return secret.Auth.ClientToken, nil 631 } 632 633 // PromoteDRSecondaryWithBatchToken creates a batch token for DR promotion 634 // before promotion, it demotes the primary cluster. The primary cluster needs 635 // to be functional for the generation of the batch token 636 func PromoteDRSecondaryWithBatchToken(ctx context.Context, pri, sec VaultCluster) error { 637 client := pri.Nodes()[0].APIClient() 638 drToken, err := createBatchToken(client, "sys/replication/dr/secondary/promote") 639 if err != nil { 640 return err 641 } 642 643 err = DemoteDRPrimary(client) 644 if err != nil { 645 return err 646 } 647 648 return promoteDRSecondaryInternal(ctx, sec, drToken) 649 } 650 651 // PromoteDRSecondary generates a DR operation token on the secondary using 652 // unseal/recovery keys. Therefore, the primary cluster could potentially 653 // be out of service. 654 func PromoteDRSecondary(ctx context.Context, sec VaultCluster) error { 655 // generate DR operation token to do update primary on vC to point to 656 // the new perfSec primary vD 657 drToken, err := GenerateRoot(sec, GenerateRootDR) 658 if err != nil { 659 return err 660 } 661 return promoteDRSecondaryInternal(ctx, sec, drToken) 662 } 663 664 func promoteDRSecondaryInternal(ctx context.Context, sec VaultCluster, drToken string) error { 665 secClient := sec.Nodes()[0].APIClient() 666 667 // Allow retries of 503s, e.g.: replication is still catching up, 668 // try again later or provide the "force" argument 669 oldMaxRetries := secClient.MaxRetries() 670 secClient.SetMaxRetries(10) 671 defer secClient.SetMaxRetries(oldMaxRetries) 672 resp, err := secClient.Logical().Write("sys/replication/dr/secondary/promote", map[string]interface{}{ 673 "dr_operation_token": drToken, 674 }) 675 if err != nil { 676 return err 677 } 678 if resp == nil { 679 return fmt.Errorf("nil status response during DR promotion") 680 } 681 682 if _, err := WaitForActiveNode(ctx, sec); err != nil { 683 return err 684 } 685 686 return WaitForDRReplicationState(ctx, sec, consts.ReplicationDRPrimary) 687 } 688 689 func checkClusterAddr(ctx context.Context, pri, sec VaultCluster) error { 690 priClient := pri.Nodes()[0].APIClient() 691 priLeader, err := priClient.Sys().LeaderWithContext(ctx) 692 if err != nil { 693 return err 694 } 695 secClient := sec.Nodes()[0].APIClient() 696 endpoint := "sys/replication/dr/" 697 status, err := secClient.Logical().Read(endpoint + "status") 698 if err != nil { 699 return err 700 } 701 if status == nil || status.Data == nil { 702 return fmt.Errorf("got nil secret or data") 703 } 704 705 var priAddrs []string 706 err = mapstructure.Decode(status.Data["known_primary_cluster_addrs"], &priAddrs) 707 if err != nil { 708 return err 709 } 710 if !strutil.StrListContains(priAddrs, priLeader.LeaderClusterAddress) { 711 return fmt.Errorf("failed to fine the expected primary cluster address %v in known_primary_cluster_addrs", priLeader.LeaderClusterAddress) 712 } 713 714 return nil 715 } 716 717 func UpdatePrimary(ctx context.Context, pri, sec VaultCluster) error { 718 // generate DR operation token to do update primary on vC to point to 719 // the new perfSec primary vD 720 rootToken, err := GenerateRoot(sec, GenerateRootDR) 721 if err != nil { 722 return err 723 } 724 725 // secondary activation token 726 drToken, err := GenerateDRActivationToken(pri, sec.ClusterID(), "") 727 if err != nil { 728 return err 729 } 730 731 // update-primary on vC (new perfSec Dr secondary) to point to 732 // the new perfSec Dr primary 733 secClient := sec.Nodes()[0].APIClient() 734 resp, err := secClient.Logical().Write("sys/replication/dr/secondary/update-primary", map[string]interface{}{ 735 "dr_operation_token": rootToken, 736 "token": drToken, 737 "ca_file": sec.GetCACertPEMFile(), 738 }) 739 if err != nil { 740 return err 741 } 742 if resp == nil { 743 return fmt.Errorf("nil status response during update primary") 744 } 745 746 if _, err = WaitForActiveNode(ctx, sec); err != nil { 747 return err 748 } 749 750 if err = WaitForDRReplicationState(ctx, sec, consts.ReplicationDRSecondary); err != nil { 751 return err 752 } 753 754 if err = checkClusterAddr(ctx, pri, sec); err != nil { 755 return err 756 } 757 758 return nil 759 } 760 761 func SetupFourClusterReplication(ctx context.Context, pri, sec, pridr, secdr VaultCluster) error { 762 err := SetupTwoClusterPerfReplication(ctx, pri, sec) 763 if err != nil { 764 return err 765 } 766 err = SetupTwoClusterDRReplication(ctx, pri, pridr) 767 if err != nil { 768 return err 769 } 770 err = SetupTwoClusterDRReplication(ctx, sec, secdr) 771 if err != nil { 772 return err 773 } 774 return nil 775 } 776 777 type ReplicationSet struct { 778 // By convention, we recommend the following naming scheme for 779 // clusters in this map: 780 // A: perf primary 781 // B: primary's DR 782 // C: first perf secondary of A 783 // D: C's DR 784 // E: second perf secondary of A 785 // F: E's DR 786 // ... etc. 787 // 788 // We use generic names rather than role-specific names because 789 // that's less confusing when promotions take place that result in role 790 // changes. In other words, if D gets promoted to replace C as a perf 791 // secondary, and C gets demoted and updated to become D's DR secondary, 792 // they should maintain their initial names of D and C throughout. 793 Clusters map[string]VaultCluster 794 Builder ClusterBuilder 795 Logger hclog.Logger 796 CA *CA 797 } 798 799 type ClusterBuilder func(ctx context.Context, name string, logger hclog.Logger) (VaultCluster, error) 800 801 func NewReplicationSet(b ClusterBuilder) (*ReplicationSet, error) { 802 return &ReplicationSet{ 803 Clusters: map[string]VaultCluster{}, 804 Builder: b, 805 Logger: hclog.NewNullLogger(), 806 }, nil 807 } 808 809 func (r *ReplicationSet) StandardPerfReplication(ctx context.Context) error { 810 for _, name := range []string{"A", "C"} { 811 if _, ok := r.Clusters[name]; !ok { 812 cluster, err := r.Builder(ctx, name, r.Logger) 813 if err != nil { 814 return err 815 } 816 r.Clusters[name] = cluster 817 } 818 } 819 820 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 821 defer cancel() 822 err := SetupTwoClusterPerfReplication(ctx, r.Clusters["A"], r.Clusters["C"]) 823 if err != nil { 824 return err 825 } 826 827 return nil 828 } 829 830 func (r *ReplicationSet) StandardDRReplication(ctx context.Context) error { 831 for _, name := range []string{"A", "B"} { 832 if _, ok := r.Clusters[name]; !ok { 833 cluster, err := r.Builder(ctx, name, r.Logger) 834 if err != nil { 835 return err 836 } 837 r.Clusters[name] = cluster 838 } 839 } 840 841 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 842 defer cancel() 843 err := SetupTwoClusterDRReplication(ctx, r.Clusters["A"], r.Clusters["B"]) 844 if err != nil { 845 return err 846 } 847 848 return nil 849 } 850 851 func (r *ReplicationSet) GetFourReplicationCluster(ctx context.Context) error { 852 for _, name := range []string{"A", "B", "C", "D"} { 853 if _, ok := r.Clusters[name]; !ok { 854 cluster, err := r.Builder(ctx, name, r.Logger) 855 if err != nil { 856 return err 857 } 858 r.Clusters[name] = cluster 859 } 860 } 861 862 ctx, cancel := context.WithTimeout(context.Background(), time.Minute) 863 defer cancel() 864 err := SetupFourClusterReplication(ctx, r.Clusters["A"], r.Clusters["C"], r.Clusters["B"], r.Clusters["D"]) 865 if err != nil { 866 return err 867 } 868 return nil 869 } 870 871 func (r *ReplicationSet) Cleanup() { 872 for _, cluster := range r.Clusters { 873 cluster.Cleanup() 874 } 875 } 876 877 func WaitForPerfReplicationConnectionStatus(ctx context.Context, client *api.Client) error { 878 type Primary struct { 879 APIAddress string `mapstructure:"api_address"` 880 ConnectionStatus string `mapstructure:"connection_status"` 881 ClusterAddress string `mapstructure:"cluster_address"` 882 LastHeartbeat string `mapstructure:"last_heartbeat"` 883 } 884 type Status struct { 885 Primaries []Primary `mapstructure:"primaries"` 886 } 887 return WaitForPerfReplicationStatus(ctx, client, func(m map[string]interface{}) error { 888 var status Status 889 err := mapstructure.Decode(m, &status) 890 if err != nil { 891 return err 892 } 893 if len(status.Primaries) == 0 { 894 return fmt.Errorf("primaries is zero") 895 } 896 for _, v := range status.Primaries { 897 if v.ConnectionStatus == "connected" { 898 return nil 899 } 900 } 901 return fmt.Errorf("no primaries connected") 902 }) 903 } 904 905 func WaitForPerfReplicationStatus(ctx context.Context, client *api.Client, accept func(map[string]interface{}) error) error { 906 var err error 907 var secret *api.Secret 908 for ctx.Err() == nil { 909 secret, err = client.Logical().Read("sys/replication/performance/status") 910 if err == nil && secret != nil && secret.Data != nil { 911 if err = accept(secret.Data); err == nil { 912 return nil 913 } 914 } 915 time.Sleep(500 * time.Millisecond) 916 } 917 return fmt.Errorf("unable to get acceptable replication status within allotted time: error=%v secret=%#v", err, secret) 918 }