sigs.k8s.io/cluster-api@v1.6.3/cmd/clusterctl/client/cluster/mover.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package cluster
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"os"
    23  	"path/filepath"
    24  	"time"
    25  
    26  	"github.com/pkg/errors"
    27  	corev1 "k8s.io/api/core/v1"
    28  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    29  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    30  	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
    31  	"k8s.io/apimachinery/pkg/runtime"
    32  	"k8s.io/apimachinery/pkg/types"
    33  	kerrors "k8s.io/apimachinery/pkg/util/errors"
    34  	"k8s.io/apimachinery/pkg/util/sets"
    35  	"k8s.io/apimachinery/pkg/util/version"
    36  	"k8s.io/apimachinery/pkg/util/wait"
    37  	"k8s.io/klog/v2"
    38  	"sigs.k8s.io/controller-runtime/pkg/client"
    39  
    40  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    41  	clusterctlv1 "sigs.k8s.io/cluster-api/cmd/clusterctl/api/v1alpha3"
    42  	logf "sigs.k8s.io/cluster-api/cmd/clusterctl/log"
    43  	"sigs.k8s.io/cluster-api/util/conditions"
    44  	"sigs.k8s.io/cluster-api/util/patch"
    45  	"sigs.k8s.io/cluster-api/util/yaml"
    46  )
    47  
    48  // ResourceMutatorFunc holds the type for mutators to be applied on resources during a move operation.
    49  type ResourceMutatorFunc func(u *unstructured.Unstructured) error
    50  
    51  // ObjectMover defines methods for moving Cluster API objects to another management cluster.
    52  type ObjectMover interface {
    53  	// Move moves all the Cluster API objects existing in a namespace (or from all the namespaces if empty) to a target management cluster.
    54  	Move(ctx context.Context, namespace string, toCluster Client, dryRun bool, mutators ...ResourceMutatorFunc) error
    55  
    56  	// ToDirectory writes all the Cluster API objects existing in a namespace (or from all the namespaces if empty) to a target directory.
    57  	ToDirectory(ctx context.Context, namespace string, directory string) error
    58  
    59  	// FromDirectory reads all the Cluster API objects existing in a configured directory to a target management cluster.
    60  	FromDirectory(ctx context.Context, toCluster Client, directory string) error
    61  }
    62  
    63  // objectMover implements the ObjectMover interface.
    64  type objectMover struct {
    65  	fromProxy             Proxy
    66  	fromProviderInventory InventoryClient
    67  	dryRun                bool
    68  }
    69  
    70  // ensure objectMover implements the ObjectMover interface.
    71  var _ ObjectMover = &objectMover{}
    72  
    73  func (o *objectMover) Move(ctx context.Context, namespace string, toCluster Client, dryRun bool, mutators ...ResourceMutatorFunc) error {
    74  	log := logf.Log
    75  	log.Info("Performing move...")
    76  	o.dryRun = dryRun
    77  	if o.dryRun {
    78  		log.Info("********************************************************")
    79  		log.Info("This is a dry-run move, will not perform any real action")
    80  		log.Info("********************************************************")
    81  	}
    82  
    83  	// checks that all the required providers in place in the target cluster.
    84  	if !o.dryRun {
    85  		if err := o.checkTargetProviders(ctx, toCluster.ProviderInventory()); err != nil {
    86  			return errors.Wrap(err, "failed to check providers in target cluster")
    87  		}
    88  	}
    89  
    90  	objectGraph, err := o.getObjectGraph(ctx, namespace)
    91  	if err != nil {
    92  		return errors.Wrap(err, "failed to get object graph")
    93  	}
    94  
    95  	// Move the objects to the target cluster.
    96  	var proxy Proxy
    97  	if !o.dryRun {
    98  		proxy = toCluster.Proxy()
    99  	}
   100  
   101  	return o.move(ctx, objectGraph, proxy, mutators...)
   102  }
   103  
   104  func (o *objectMover) ToDirectory(ctx context.Context, namespace string, directory string) error {
   105  	log := logf.Log
   106  	log.Info("Moving to directory...")
   107  
   108  	objectGraph, err := o.getObjectGraph(ctx, namespace)
   109  	if err != nil {
   110  		return errors.Wrap(err, "failed to get object graph")
   111  	}
   112  
   113  	return o.toDirectory(ctx, objectGraph, directory)
   114  }
   115  
   116  func (o *objectMover) FromDirectory(ctx context.Context, toCluster Client, directory string) error {
   117  	log := logf.Log
   118  	log.Info("Moving from directory...")
   119  
   120  	// Build an empty object graph used for the fromDirectory sequence not tied to a specific namespace
   121  	objectGraph := newObjectGraph(o.fromProxy, o.fromProviderInventory)
   122  
   123  	// Gets all the types defined by the CRDs installed by clusterctl plus the ConfigMap/Secret core types.
   124  	err := objectGraph.getDiscoveryTypes(ctx)
   125  	if err != nil {
   126  		return errors.Wrap(err, "failed to retrieve discovery types")
   127  	}
   128  
   129  	objs, err := o.filesToObjs(directory)
   130  	if err != nil {
   131  		return errors.Wrap(err, "failed to process object files")
   132  	}
   133  
   134  	for i := range objs {
   135  		if err = objectGraph.addRestoredObj(&objs[i]); err != nil {
   136  			return err
   137  		}
   138  	}
   139  
   140  	// Completes rebuilding the graph from file by searching for soft ownership relations such as secrets linked to the cluster
   141  	// by a naming convention (without any explicit OwnerReference).
   142  	objectGraph.setSoftOwnership()
   143  
   144  	// Completes the graph by setting for each node the list of tenants the node belongs to.
   145  	objectGraph.setTenants()
   146  
   147  	// Check whether nodes are not included in GVK considered for fromDirectory.
   148  	objectGraph.checkVirtualNode()
   149  
   150  	// Restore the objects to the target cluster.
   151  	proxy := toCluster.Proxy()
   152  
   153  	return o.fromDirectory(ctx, objectGraph, proxy)
   154  }
   155  
   156  func (o *objectMover) filesToObjs(dir string) ([]unstructured.Unstructured, error) {
   157  	log := logf.Log
   158  	log.Info(fmt.Sprintf("Restoring files from %s", dir))
   159  
   160  	files, err := os.ReadDir(dir)
   161  	if err != nil {
   162  		return nil, err
   163  	}
   164  
   165  	rawYAMLs := make([][]byte, 0)
   166  	for i := range files {
   167  		path := filepath.Clean(filepath.Join(dir, files[i].Name()))
   168  
   169  		byObj, err := os.ReadFile(path)
   170  		if err != nil {
   171  			return nil, err
   172  		}
   173  
   174  		rawYAMLs = append(rawYAMLs, byObj)
   175  	}
   176  
   177  	processedYAMLs := yaml.JoinYaml(rawYAMLs...)
   178  
   179  	objs, err := yaml.ToUnstructured(processedYAMLs)
   180  	if err != nil {
   181  		return nil, err
   182  	}
   183  
   184  	return objs, nil
   185  }
   186  
   187  func (o *objectMover) getObjectGraph(ctx context.Context, namespace string) (*objectGraph, error) {
   188  	objectGraph := newObjectGraph(o.fromProxy, o.fromProviderInventory)
   189  
   190  	// Gets all the types defined by the CRDs installed by clusterctl plus the ConfigMap/Secret core types.
   191  	err := objectGraph.getDiscoveryTypes(ctx)
   192  	if err != nil {
   193  		return nil, errors.Wrap(err, "failed to retrieve discovery types")
   194  	}
   195  
   196  	// Discovery the object graph for the selected types:
   197  	// - Nodes are defined the Kubernetes objects (Clusters, Machines etc.) identified during the discovery process.
   198  	// - Edges are derived by the OwnerReferences between nodes.
   199  	if err := objectGraph.Discovery(ctx, namespace); err != nil {
   200  		return nil, errors.Wrap(err, "failed to discover the object graph")
   201  	}
   202  
   203  	// Checks if Cluster API has already completed the provisioning of the infrastructure for the objects involved in the move/toDirectory operation.
   204  	// This is required because if the infrastructure is provisioned, then we can reasonably assume that the objects we are moving/backing up are
   205  	// not currently waiting for long-running reconciliation loops, and so we can safely rely on the pause field on the Cluster object
   206  	// for blocking any further object reconciliation on the source objects.
   207  	if err := o.checkProvisioningCompleted(ctx, objectGraph); err != nil {
   208  		return nil, errors.Wrap(err, "failed to check for provisioned infrastructure")
   209  	}
   210  
   211  	// Check whether nodes are not included in GVK considered for move
   212  	objectGraph.checkVirtualNode()
   213  
   214  	return objectGraph, nil
   215  }
   216  
   217  func newObjectMover(fromProxy Proxy, fromProviderInventory InventoryClient) *objectMover {
   218  	return &objectMover{
   219  		fromProxy:             fromProxy,
   220  		fromProviderInventory: fromProviderInventory,
   221  	}
   222  }
   223  
   224  // checkProvisioningCompleted checks if Cluster API has already completed the provisioning of the infrastructure for the objects involved in the move operation.
   225  func (o *objectMover) checkProvisioningCompleted(ctx context.Context, graph *objectGraph) error {
   226  	if o.dryRun {
   227  		return nil
   228  	}
   229  	errList := []error{}
   230  
   231  	// Checking all the clusters have infrastructure is ready
   232  	readClusterBackoff := newReadBackoff()
   233  	clusters := graph.getClusters()
   234  	for i := range clusters {
   235  		cluster := clusters[i]
   236  		clusterObj := &clusterv1.Cluster{}
   237  		if err := retryWithExponentialBackoff(ctx, readClusterBackoff, func(ctx context.Context) error {
   238  			return getClusterObj(ctx, o.fromProxy, cluster, clusterObj)
   239  		}); err != nil {
   240  			return err
   241  		}
   242  
   243  		if !clusterObj.Status.InfrastructureReady {
   244  			errList = append(errList, errors.Errorf("cannot start the move operation while %q %s/%s is still provisioning the infrastructure", clusterObj.GroupVersionKind(), clusterObj.GetNamespace(), clusterObj.GetName()))
   245  			continue
   246  		}
   247  
   248  		// Note: can't use IsFalse here because we need to handle the absence of the condition as well as false.
   249  		if !conditions.IsTrue(clusterObj, clusterv1.ControlPlaneInitializedCondition) {
   250  			errList = append(errList, errors.Errorf("cannot start the move operation while the control plane for %q %s/%s is not yet initialized", clusterObj.GroupVersionKind(), clusterObj.GetNamespace(), clusterObj.GetName()))
   251  			continue
   252  		}
   253  
   254  		if clusterObj.Spec.ControlPlaneRef != nil && !clusterObj.Status.ControlPlaneReady {
   255  			errList = append(errList, errors.Errorf("cannot start the move operation while the control plane for %q %s/%s is not yet ready", clusterObj.GroupVersionKind(), clusterObj.GetNamespace(), clusterObj.GetName()))
   256  			continue
   257  		}
   258  	}
   259  
   260  	// Checking all the machine have a NodeRef
   261  	// Nb. NodeRef is considered a better signal than InfrastructureReady, because it ensures the node in the workload cluster is up and running.
   262  	readMachinesBackoff := newReadBackoff()
   263  	machines := graph.getMachines()
   264  	for i := range machines {
   265  		machine := machines[i]
   266  		machineObj := &clusterv1.Machine{}
   267  		if err := retryWithExponentialBackoff(ctx, readMachinesBackoff, func(ctx context.Context) error {
   268  			return getMachineObj(ctx, o.fromProxy, machine, machineObj)
   269  		}); err != nil {
   270  			return err
   271  		}
   272  
   273  		if machineObj.Status.NodeRef == nil {
   274  			errList = append(errList, errors.Errorf("cannot start the move operation while %q %s/%s is still provisioning the node", machineObj.GroupVersionKind(), machineObj.GetNamespace(), machineObj.GetName()))
   275  		}
   276  	}
   277  
   278  	return kerrors.NewAggregate(errList)
   279  }
   280  
   281  // getClusterObj retrieves the clusterObj corresponding to a node with type Cluster.
   282  func getClusterObj(ctx context.Context, proxy Proxy, cluster *node, clusterObj *clusterv1.Cluster) error {
   283  	c, err := proxy.NewClient()
   284  	if err != nil {
   285  		return err
   286  	}
   287  	clusterObjKey := client.ObjectKey{
   288  		Namespace: cluster.identity.Namespace,
   289  		Name:      cluster.identity.Name,
   290  	}
   291  
   292  	if err := c.Get(ctx, clusterObjKey, clusterObj); err != nil {
   293  		return errors.Wrapf(err, "error reading Cluster %s/%s",
   294  			clusterObj.GetNamespace(), clusterObj.GetName())
   295  	}
   296  	return nil
   297  }
   298  
   299  // getMachineObj retrieves the machineObj corresponding to a node with type Machine.
   300  func getMachineObj(ctx context.Context, proxy Proxy, machine *node, machineObj *clusterv1.Machine) error {
   301  	c, err := proxy.NewClient()
   302  	if err != nil {
   303  		return err
   304  	}
   305  	machineObjKey := client.ObjectKey{
   306  		Namespace: machine.identity.Namespace,
   307  		Name:      machine.identity.Name,
   308  	}
   309  
   310  	if err := c.Get(ctx, machineObjKey, machineObj); err != nil {
   311  		return errors.Wrapf(err, "error reading Machine %s/%s",
   312  			machineObj.GetNamespace(), machineObj.GetName())
   313  	}
   314  	return nil
   315  }
   316  
   317  // Move moves all the Cluster API objects existing in a namespace (or from all the namespaces if empty) to a target management cluster.
   318  func (o *objectMover) move(ctx context.Context, graph *objectGraph, toProxy Proxy, mutators ...ResourceMutatorFunc) error {
   319  	log := logf.Log
   320  
   321  	clusters := graph.getClusters()
   322  	log.Info("Moving Cluster API objects", "Clusters", len(clusters))
   323  
   324  	clusterClasses := graph.getClusterClasses()
   325  	log.Info("Moving Cluster API objects", "ClusterClasses", len(clusterClasses))
   326  
   327  	// Sets the pause field on the Cluster object in the source management cluster, so the controllers stop reconciling it.
   328  	log.V(1).Info("Pausing the source cluster")
   329  	if err := setClusterPause(ctx, o.fromProxy, clusters, true, o.dryRun); err != nil {
   330  		return err
   331  	}
   332  
   333  	log.V(1).Info("Pausing the source ClusterClasses")
   334  	if err := setClusterClassPause(ctx, o.fromProxy, clusterClasses, true, o.dryRun); err != nil {
   335  		return errors.Wrap(err, "error pausing ClusterClasses")
   336  	}
   337  
   338  	log.Info("Waiting for all resources to be ready to move")
   339  	// exponential backoff configuration which returns durations for a total time of ~2m.
   340  	// Example: 0, 5s, 8s, 11s, 17s, 26s, 38s, 57s, 86s, 128s
   341  	waitForMoveUnblockedBackoff := wait.Backoff{
   342  		Duration: 5 * time.Second,
   343  		Factor:   1.5,
   344  		Steps:    10,
   345  		Jitter:   0.1,
   346  	}
   347  	if err := waitReadyForMove(ctx, o.fromProxy, graph.getMoveNodes(), o.dryRun, waitForMoveUnblockedBackoff); err != nil {
   348  		return errors.Wrap(err, "error waiting for resources to be ready to move")
   349  	}
   350  
   351  	// Nb. DO NOT call ensureNamespaces at this point because:
   352  	// - namespace will be ensured to exist before creating the resource.
   353  	// - If it's done here, we might create a namespace that can end up unused on target cluster (due to mutators).
   354  
   355  	// Define the move sequence by processing the ownerReference chain, so we ensure that a Kubernetes object is moved only after its owners.
   356  	// The sequence is bases on object graph nodes, each one representing a Kubernetes object; nodes are grouped, so bulk of nodes can be moved in parallel. e.g.
   357  	// - All the Clusters should be moved first (group 1, processed in parallel)
   358  	// - All the MachineDeployments should be moved second (group 1, processed in parallel)
   359  	// - then all the MachineSets, then all the Machines, etc.
   360  	moveSequence := getMoveSequence(graph)
   361  
   362  	// Create all objects group by group, ensuring all the ownerReferences are re-created.
   363  	log.Info("Creating objects in the target cluster")
   364  	for groupIndex := 0; groupIndex < len(moveSequence.groups); groupIndex++ {
   365  		if err := o.createGroup(ctx, moveSequence.getGroup(groupIndex), toProxy, mutators...); err != nil {
   366  			return err
   367  		}
   368  	}
   369  
   370  	// Nb. mutators used after this point (after creating the resources on target clusters) are mainly intended for
   371  	// using the right namespace to fetch the resource from the target cluster.
   372  	// mutators affecting non metadata fields are no-op after this point.
   373  
   374  	// Delete all objects group by group in reverse order.
   375  	log.Info("Deleting objects from the source cluster")
   376  	for groupIndex := len(moveSequence.groups) - 1; groupIndex >= 0; groupIndex-- {
   377  		if err := o.deleteGroup(ctx, moveSequence.getGroup(groupIndex)); err != nil {
   378  			return err
   379  		}
   380  	}
   381  
   382  	// Resume the ClusterClasses in the target management cluster, so the controllers start reconciling it.
   383  	log.V(1).Info("Resuming the target ClusterClasses")
   384  	if err := setClusterClassPause(ctx, toProxy, clusterClasses, false, o.dryRun, mutators...); err != nil {
   385  		return errors.Wrap(err, "error resuming ClusterClasses")
   386  	}
   387  
   388  	// Reset the pause field on the Cluster object in the target management cluster, so the controllers start reconciling it.
   389  	log.V(1).Info("Resuming the target cluster")
   390  	return setClusterPause(ctx, toProxy, clusters, false, o.dryRun, mutators...)
   391  }
   392  
   393  func (o *objectMover) toDirectory(ctx context.Context, graph *objectGraph, directory string) error {
   394  	log := logf.Log
   395  
   396  	clusters := graph.getClusters()
   397  	log.Info("Starting move of Cluster API objects", "Clusters", len(clusters))
   398  
   399  	clusterClasses := graph.getClusterClasses()
   400  	log.Info("Moving Cluster API objects", "ClusterClasses", len(clusterClasses))
   401  
   402  	// Sets the pause field on the Cluster object in the source management cluster, so the controllers stop reconciling it.
   403  	log.V(1).Info("Pausing the source cluster")
   404  	if err := setClusterPause(ctx, o.fromProxy, clusters, true, o.dryRun); err != nil {
   405  		return err
   406  	}
   407  
   408  	log.V(1).Info("Pausing the source ClusterClasses")
   409  	if err := setClusterClassPause(ctx, o.fromProxy, clusterClasses, true, o.dryRun); err != nil {
   410  		return errors.Wrap(err, "error pausing ClusterClasses")
   411  	}
   412  
   413  	// Define the move sequence by processing the ownerReference chain, so we ensure that a Kubernetes object is moved only after its owners.
   414  	// The sequence is bases on object graph nodes, each one representing a Kubernetes object; nodes are grouped, so bulk of nodes can be moved in parallel. e.g.
   415  	// - All the Clusters should be moved first (group 1, processed in parallel)
   416  	// - All the MachineDeployments should be moved second (group 1, processed in parallel)
   417  	// - then all the MachineSets, then all the Machines, etc.
   418  	moveSequence := getMoveSequence(graph)
   419  
   420  	// Save all objects group by group
   421  	log.Info(fmt.Sprintf("Saving files to %s", directory))
   422  	for groupIndex := 0; groupIndex < len(moveSequence.groups); groupIndex++ {
   423  		if err := o.backupGroup(ctx, moveSequence.getGroup(groupIndex), directory); err != nil {
   424  			return err
   425  		}
   426  	}
   427  
   428  	// Resume the ClusterClasses in the target management cluster, so the controllers start reconciling it.
   429  	log.V(1).Info("Resuming the target ClusterClasses")
   430  	if err := setClusterClassPause(ctx, o.fromProxy, clusterClasses, false, o.dryRun); err != nil {
   431  		return errors.Wrap(err, "error resuming ClusterClasses")
   432  	}
   433  
   434  	// Reset the pause field on the Cluster object in the target management cluster, so the controllers start reconciling it.
   435  	log.V(1).Info("Resuming the source cluster")
   436  	return setClusterPause(ctx, o.fromProxy, clusters, false, o.dryRun)
   437  }
   438  
   439  func (o *objectMover) fromDirectory(ctx context.Context, graph *objectGraph, toProxy Proxy) error {
   440  	log := logf.Log
   441  
   442  	// Get clusters from graph
   443  	clusters := graph.getClusters()
   444  	// Get clusterclasses from graph
   445  	clusterClasses := graph.getClusterClasses()
   446  
   447  	// Ensure all the expected target namespaces are in place before creating objects.
   448  	log.V(1).Info("Creating target namespaces, if missing")
   449  	if err := o.ensureNamespaces(ctx, graph, toProxy); err != nil {
   450  		return err
   451  	}
   452  
   453  	// Define the move sequence by processing the ownerReference chain, so we ensure that a Kubernetes object is moved only after its owners.
   454  	// The sequence is bases on object graph nodes, each one representing a Kubernetes object; nodes are grouped, so bulk of nodes can be moved in parallel. e.g.
   455  	// - All the Clusters should be moved first (group 1, processed in parallel)
   456  	// - All the MachineDeployments should be moved second (group 1, processed in parallel)
   457  	// - then all the MachineSets, then all the Machines, etc.
   458  	moveSequence := getMoveSequence(graph)
   459  
   460  	// Create all objects group by group, ensuring all the ownerReferences are re-created.
   461  	log.Info("Restoring objects into the target cluster")
   462  	for groupIndex := 0; groupIndex < len(moveSequence.groups); groupIndex++ {
   463  		if err := o.restoreGroup(ctx, moveSequence.getGroup(groupIndex), toProxy); err != nil {
   464  			return err
   465  		}
   466  	}
   467  
   468  	// Resume reconciling the ClusterClasses after being restored from a backup.
   469  	// By default, during backup, ClusterClasses are paused so they must be unpaused to be used again
   470  	log.V(1).Info("Resuming the target ClusterClasses")
   471  	if err := setClusterClassPause(ctx, toProxy, clusterClasses, false, o.dryRun); err != nil {
   472  		return errors.Wrap(err, "error resuming ClusterClasses")
   473  	}
   474  
   475  	// Resume reconciling the Clusters after being restored from a directory.
   476  	// By default, when moved to a directory, Clusters are paused, so they must be unpaused to be used again.
   477  	log.V(1).Info("Resuming the target cluster")
   478  	return setClusterPause(ctx, toProxy, clusters, false, o.dryRun)
   479  }
   480  
   481  // moveSequence defines a list of group of moveGroups.
   482  type moveSequence struct {
   483  	groups   []moveGroup
   484  	nodesMap map[*node]empty
   485  }
   486  
   487  // moveGroup defines is a list of nodes read from the object graph that can be moved in parallel.
   488  type moveGroup []*node
   489  
   490  func (s *moveSequence) addGroup(group moveGroup) {
   491  	// Add the group
   492  	s.groups = append(s.groups, group)
   493  	// Add all the nodes in the group to the nodeMap so we can check if a node is already in the move sequence or not
   494  	for _, n := range group {
   495  		s.nodesMap[n] = empty{}
   496  	}
   497  }
   498  
   499  func (s *moveSequence) hasNode(n *node) bool {
   500  	_, ok := s.nodesMap[n]
   501  	return ok
   502  }
   503  
   504  func (s *moveSequence) getGroup(i int) moveGroup {
   505  	return s.groups[i]
   506  }
   507  
   508  // Define the move sequence by processing the ownerReference chain.
   509  func getMoveSequence(graph *objectGraph) *moveSequence {
   510  	moveSequence := &moveSequence{
   511  		groups:   []moveGroup{},
   512  		nodesMap: make(map[*node]empty),
   513  	}
   514  
   515  	for {
   516  		// Determine the next move group by processing all the nodes in the graph that belong to a Cluster.
   517  		// NB. it is necessary to filter out nodes not belonging to a cluster because e.g. discovery reads all the secrets,
   518  		// but only few of them are related to Clusters/Machines etc.
   519  		moveGroup := moveGroup{}
   520  
   521  		for _, n := range graph.getMoveNodes() {
   522  			// If the node was already included in the moveSequence, skip it.
   523  			if moveSequence.hasNode(n) {
   524  				continue
   525  			}
   526  
   527  			// Check if all the ownerReferences are already included in the move sequence; if yes, add the node to move group,
   528  			// otherwise skip it (the node will be re-processed in the next group).
   529  			ownersInPlace := true
   530  			for owner := range n.owners {
   531  				if !moveSequence.hasNode(owner) {
   532  					ownersInPlace = false
   533  					break
   534  				}
   535  			}
   536  			for owner := range n.softOwners {
   537  				if !moveSequence.hasNode(owner) {
   538  					ownersInPlace = false
   539  					break
   540  				}
   541  			}
   542  			if ownersInPlace {
   543  				moveGroup = append(moveGroup, n)
   544  			}
   545  		}
   546  
   547  		// If the resulting move group is empty it means that all the nodes are already in the sequence, so exit.
   548  		if len(moveGroup) == 0 {
   549  			break
   550  		}
   551  		moveSequence.addGroup(moveGroup)
   552  	}
   553  	return moveSequence
   554  }
   555  
   556  // setClusterPause sets the paused field on nodes referring to Cluster objects.
   557  func setClusterPause(ctx context.Context, proxy Proxy, clusters []*node, value bool, dryRun bool, mutators ...ResourceMutatorFunc) error {
   558  	if dryRun {
   559  		return nil
   560  	}
   561  
   562  	log := logf.Log
   563  	patchValue := "true"
   564  	if !value {
   565  		// If the `value` is false lets drop the field.
   566  		// This makes sure that clusterctl does now own the field and would avoid any ownership conflicts.
   567  		patchValue = "null"
   568  	}
   569  	patch := client.RawPatch(types.MergePatchType, []byte(fmt.Sprintf("{\"spec\":{\"paused\":%s}}", patchValue)))
   570  
   571  	setClusterPauseBackoff := newWriteBackoff()
   572  	for i := range clusters {
   573  		cluster := clusters[i]
   574  		log.V(5).Info("Set Cluster.Spec.Paused", "paused", value, "Cluster", klog.KRef(cluster.identity.Namespace, cluster.identity.Name))
   575  
   576  		// Nb. The operation is wrapped in a retry loop to make setClusterPause more resilient to unexpected conditions.
   577  		if err := retryWithExponentialBackoff(ctx, setClusterPauseBackoff, func(ctx context.Context) error {
   578  			return patchCluster(ctx, proxy, cluster, patch, mutators...)
   579  		}); err != nil {
   580  			return errors.Wrapf(err, "error setting Cluster.Spec.Paused=%t", value)
   581  		}
   582  	}
   583  	return nil
   584  }
   585  
   586  // setClusterClassPause sets the paused annotation on nodes referring to ClusterClass objects.
   587  func setClusterClassPause(ctx context.Context, proxy Proxy, clusterclasses []*node, pause bool, dryRun bool, mutators ...ResourceMutatorFunc) error {
   588  	if dryRun {
   589  		return nil
   590  	}
   591  
   592  	log := logf.Log
   593  
   594  	setClusterClassPauseBackoff := newWriteBackoff()
   595  	for i := range clusterclasses {
   596  		clusterclass := clusterclasses[i]
   597  		if pause {
   598  			log.V(5).Info("Set Paused annotation", "ClusterClass", clusterclass.identity.Name, "Namespace", clusterclass.identity.Namespace)
   599  		} else {
   600  			log.V(5).Info("Remove Paused annotation", "ClusterClass", clusterclass.identity.Name, "Namespace", clusterclass.identity.Namespace)
   601  		}
   602  
   603  		// Nb. The operation is wrapped in a retry loop to make setClusterClassPause more resilient to unexpected conditions.
   604  		if err := retryWithExponentialBackoff(ctx, setClusterClassPauseBackoff, func(ctx context.Context) error {
   605  			return pauseClusterClass(ctx, proxy, clusterclass, pause, mutators...)
   606  		}); err != nil {
   607  			return errors.Wrapf(err, "error updating ClusterClass %s/%s", clusterclass.identity.Namespace, clusterclass.identity.Name)
   608  		}
   609  	}
   610  	return nil
   611  }
   612  
   613  func waitReadyForMove(ctx context.Context, proxy Proxy, nodes []*node, dryRun bool, backoff wait.Backoff) error {
   614  	if dryRun {
   615  		return nil
   616  	}
   617  
   618  	log := logf.Log
   619  
   620  	c, err := proxy.NewClient()
   621  	if err != nil {
   622  		return errors.Wrap(err, "error creating client")
   623  	}
   624  
   625  	for _, n := range nodes {
   626  		log := log.WithValues(
   627  			"apiVersion", n.identity.GroupVersionKind(),
   628  			"resource", klog.ObjectRef{
   629  				Name:      n.identity.Name,
   630  				Namespace: n.identity.Namespace,
   631  			},
   632  		)
   633  		if !n.blockingMove {
   634  			log.V(5).Info("Resource not blocking move")
   635  			continue
   636  		}
   637  
   638  		obj := &metav1.PartialObjectMetadata{
   639  			ObjectMeta: metav1.ObjectMeta{
   640  				Name:      n.identity.Name,
   641  				Namespace: n.identity.Namespace,
   642  			},
   643  			TypeMeta: metav1.TypeMeta{
   644  				APIVersion: n.identity.APIVersion,
   645  				Kind:       n.identity.Kind,
   646  			},
   647  		}
   648  		key := client.ObjectKeyFromObject(obj)
   649  
   650  		blockLogged := false
   651  		if err := retryWithExponentialBackoff(ctx, backoff, func(ctx context.Context) error {
   652  			if err := c.Get(ctx, key, obj); err != nil {
   653  				return errors.Wrapf(err, "error getting %s/%s", obj.GroupVersionKind(), key)
   654  			}
   655  
   656  			if _, exists := obj.GetAnnotations()[clusterctlv1.BlockMoveAnnotation]; exists {
   657  				if !blockLogged {
   658  					log.Info(fmt.Sprintf("Move blocked by %s annotation, waiting for it to be removed", clusterctlv1.BlockMoveAnnotation))
   659  					blockLogged = true
   660  				}
   661  				return errors.Errorf("resource is not ready to move: %s/%s", obj.GroupVersionKind(), key)
   662  			}
   663  			log.V(5).Info("Resource is ready to move")
   664  			return nil
   665  		}); err != nil {
   666  			return err
   667  		}
   668  	}
   669  
   670  	return nil
   671  }
   672  
   673  // patchCluster applies a patch to a node referring to a Cluster object.
   674  func patchCluster(ctx context.Context, proxy Proxy, n *node, patch client.Patch, mutators ...ResourceMutatorFunc) error {
   675  	cFrom, err := proxy.NewClient()
   676  	if err != nil {
   677  		return err
   678  	}
   679  
   680  	// Since the patch has been generated already in caller of this function, the ONLY affect that mutators can have
   681  	// here is on namespace of the resource.
   682  	clusterObj, err := applyMutators(&clusterv1.Cluster{
   683  		TypeMeta: metav1.TypeMeta{
   684  			Kind:       clusterv1.ClusterKind,
   685  			APIVersion: clusterv1.GroupVersion.String(),
   686  		},
   687  		ObjectMeta: metav1.ObjectMeta{
   688  			Name:      n.identity.Name,
   689  			Namespace: n.identity.Namespace,
   690  		},
   691  	}, mutators...)
   692  	if err != nil {
   693  		return err
   694  	}
   695  
   696  	if err := cFrom.Get(ctx, client.ObjectKeyFromObject(clusterObj), clusterObj); err != nil {
   697  		return errors.Wrapf(err, "error reading Cluster %s/%s",
   698  			clusterObj.GetNamespace(), clusterObj.GetName())
   699  	}
   700  
   701  	if err := cFrom.Patch(ctx, clusterObj, patch); err != nil {
   702  		return errors.Wrapf(err, "error patching Cluster %s/%s",
   703  			clusterObj.GetNamespace(), clusterObj.GetName())
   704  	}
   705  
   706  	return nil
   707  }
   708  
   709  func pauseClusterClass(ctx context.Context, proxy Proxy, n *node, pause bool, mutators ...ResourceMutatorFunc) error {
   710  	cFrom, err := proxy.NewClient()
   711  	if err != nil {
   712  		return errors.Wrap(err, "error creating client")
   713  	}
   714  
   715  	// Get a mutated copy of the ClusterClass to identify the target namespace.
   716  	// The ClusterClass could have been moved to a different namespace after the move.
   717  	mutatedClusterClass, err := applyMutators(&clusterv1.ClusterClass{
   718  		TypeMeta: metav1.TypeMeta{
   719  			Kind:       clusterv1.ClusterClassKind,
   720  			APIVersion: clusterv1.GroupVersion.String(),
   721  		},
   722  		ObjectMeta: metav1.ObjectMeta{
   723  			Name:      n.identity.Name,
   724  			Namespace: n.identity.Namespace,
   725  		}}, mutators...)
   726  	if err != nil {
   727  		return err
   728  	}
   729  
   730  	clusterClass := &clusterv1.ClusterClass{}
   731  	// Construct an object key using the mutatedClusterClass reflecting any changes to the namespace.
   732  	clusterClassObjKey := client.ObjectKey{
   733  		Name:      mutatedClusterClass.GetName(),
   734  		Namespace: mutatedClusterClass.GetNamespace(),
   735  	}
   736  	// Get a copy of the ClusterClass.
   737  	// This will ensure that any other changes from the mutator are ignored here as we work with a fresh copy of the cluster class.
   738  	if err := cFrom.Get(ctx, clusterClassObjKey, clusterClass); err != nil {
   739  		return errors.Wrapf(err, "error reading ClusterClass %s/%s", n.identity.Namespace, n.identity.Name)
   740  	}
   741  
   742  	patchHelper, err := patch.NewHelper(clusterClass, cFrom)
   743  	if err != nil {
   744  		return errors.Wrapf(err, "error creating patcher for ClusterClass %s/%s", n.identity.Namespace, n.identity.Name)
   745  	}
   746  
   747  	// Update the annotation to the desired state
   748  	ccAnnotations := clusterClass.GetAnnotations()
   749  	if ccAnnotations == nil {
   750  		ccAnnotations = map[string]string{}
   751  	}
   752  	if pause {
   753  		// Set the pause annotation.
   754  		ccAnnotations[clusterv1.PausedAnnotation] = ""
   755  	} else {
   756  		// Delete the pause annotation.
   757  		delete(ccAnnotations, clusterv1.PausedAnnotation)
   758  	}
   759  
   760  	// Update the ClusterClass with the new annotations.
   761  	clusterClass.SetAnnotations(ccAnnotations)
   762  	if err := patchHelper.Patch(ctx, clusterClass); err != nil {
   763  		return errors.Wrapf(err, "error patching ClusterClass %s/%s", n.identity.Namespace, n.identity.Name)
   764  	}
   765  
   766  	return nil
   767  }
   768  
   769  // ensureNamespaces ensures all the expected target namespaces are in place before creating objects.
   770  func (o *objectMover) ensureNamespaces(ctx context.Context, graph *objectGraph, toProxy Proxy) error {
   771  	if o.dryRun {
   772  		return nil
   773  	}
   774  
   775  	ensureNamespaceBackoff := newWriteBackoff()
   776  	namespaces := sets.Set[string]{}
   777  	for _, node := range graph.getMoveNodes() {
   778  		// ignore global/cluster-wide objects
   779  		if node.isGlobal {
   780  			continue
   781  		}
   782  
   783  		namespace := node.identity.Namespace
   784  
   785  		// If the namespace was already processed, skip it.
   786  		if namespaces.Has(namespace) {
   787  			continue
   788  		}
   789  		namespaces.Insert(namespace)
   790  
   791  		if err := retryWithExponentialBackoff(ctx, ensureNamespaceBackoff, func(ctx context.Context) error {
   792  			return o.ensureNamespace(ctx, toProxy, namespace)
   793  		}); err != nil {
   794  			return err
   795  		}
   796  	}
   797  
   798  	return nil
   799  }
   800  
   801  // ensureNamespace ensures a target namespaces is in place before creating objects.
   802  func (o *objectMover) ensureNamespace(ctx context.Context, toProxy Proxy, namespace string) error {
   803  	log := logf.Log
   804  
   805  	cs, err := toProxy.NewClient()
   806  	if err != nil {
   807  		return err
   808  	}
   809  
   810  	// Otherwise check if namespace exists (also dealing with RBAC restrictions).
   811  	ns := &corev1.Namespace{}
   812  	key := client.ObjectKey{
   813  		Name: namespace,
   814  	}
   815  
   816  	err = cs.Get(ctx, key, ns)
   817  	if err == nil {
   818  		return nil
   819  	}
   820  	if apierrors.IsForbidden(err) {
   821  		namespaces := &corev1.NamespaceList{}
   822  		namespaceExists := false
   823  		for {
   824  			if err := cs.List(ctx, namespaces, client.Continue(namespaces.Continue)); err != nil {
   825  				return err
   826  			}
   827  
   828  			for _, ns := range namespaces.Items {
   829  				if ns.Name == namespace {
   830  					namespaceExists = true
   831  					break
   832  				}
   833  			}
   834  
   835  			if namespaces.Continue == "" {
   836  				break
   837  			}
   838  		}
   839  		if namespaceExists {
   840  			return nil
   841  		}
   842  	}
   843  	if !apierrors.IsNotFound(err) {
   844  		return err
   845  	}
   846  
   847  	// If the namespace does not exist, create it.
   848  	ns = &corev1.Namespace{
   849  		TypeMeta: metav1.TypeMeta{
   850  			APIVersion: "v1",
   851  			Kind:       "Namespace",
   852  		},
   853  		ObjectMeta: metav1.ObjectMeta{
   854  			Name: namespace,
   855  		},
   856  	}
   857  	log.V(1).Info("Creating", ns.Kind, ns.Name)
   858  	if err := cs.Create(ctx, ns); err != nil && !apierrors.IsAlreadyExists(err) {
   859  		return err
   860  	}
   861  	return nil
   862  }
   863  
   864  // createGroup creates all the Kubernetes objects into the target management cluster corresponding to the object graph nodes in a moveGroup.
   865  func (o *objectMover) createGroup(ctx context.Context, group moveGroup, toProxy Proxy, mutators ...ResourceMutatorFunc) error {
   866  	createTargetObjectBackoff := newWriteBackoff()
   867  	errList := []error{}
   868  
   869  	// Maintain a cache of namespaces that have been verified to already exist.
   870  	// Nb. This prevents us from making repetitive (and expensive) calls in listing all namespaces to ensure a namespace exists before creating a resource.
   871  	existingNamespaces := sets.New[string]()
   872  	for _, nodeToCreate := range group {
   873  		// Creates the Kubernetes object corresponding to the nodeToCreate.
   874  		// Nb. The operation is wrapped in a retry loop to make move more resilient to unexpected conditions.
   875  		err := retryWithExponentialBackoff(ctx, createTargetObjectBackoff, func(ctx context.Context) error {
   876  			return o.createTargetObject(ctx, nodeToCreate, toProxy, mutators, existingNamespaces)
   877  		})
   878  		if err != nil {
   879  			errList = append(errList, err)
   880  		}
   881  	}
   882  
   883  	if len(errList) > 0 {
   884  		return kerrors.NewAggregate(errList)
   885  	}
   886  
   887  	return nil
   888  }
   889  
   890  func (o *objectMover) backupGroup(ctx context.Context, group moveGroup, directory string) error {
   891  	backupTargetObjectBackoff := newWriteBackoff()
   892  	errList := []error{}
   893  
   894  	for _, nodeToBackup := range group {
   895  		// Backs-up the Kubernetes object corresponding to the nodeToBackup.
   896  		// Nb. The operation is wrapped in a retry loop to make move more resilient to unexpected conditions.
   897  		err := retryWithExponentialBackoff(ctx, backupTargetObjectBackoff, func(ctx context.Context) error {
   898  			return o.backupTargetObject(ctx, nodeToBackup, directory)
   899  		})
   900  		if err != nil {
   901  			errList = append(errList, err)
   902  		}
   903  	}
   904  
   905  	if len(errList) > 0 {
   906  		return kerrors.NewAggregate(errList)
   907  	}
   908  
   909  	return nil
   910  }
   911  
   912  func (o *objectMover) restoreGroup(ctx context.Context, group moveGroup, toProxy Proxy) error {
   913  	restoreTargetObjectBackoff := newWriteBackoff()
   914  	errList := []error{}
   915  
   916  	for _, nodeToRestore := range group {
   917  		// Creates the Kubernetes object corresponding to the nodeToRestore.
   918  		// Nb. The operation is wrapped in a retry loop to make move more resilient to unexpected conditions.
   919  		err := retryWithExponentialBackoff(ctx, restoreTargetObjectBackoff, func(ctx context.Context) error {
   920  			return o.restoreTargetObject(ctx, nodeToRestore, toProxy)
   921  		})
   922  		if err != nil {
   923  			errList = append(errList, err)
   924  		}
   925  	}
   926  
   927  	if len(errList) > 0 {
   928  		return kerrors.NewAggregate(errList)
   929  	}
   930  
   931  	return nil
   932  }
   933  
   934  // createTargetObject creates the Kubernetes object in the target Management cluster corresponding to the object graph node, taking care of restoring the OwnerReference with the owner nodes, if any.
   935  func (o *objectMover) createTargetObject(ctx context.Context, nodeToCreate *node, toProxy Proxy, mutators []ResourceMutatorFunc, existingNamespaces sets.Set[string]) error {
   936  	log := logf.Log
   937  	log.V(1).Info("Creating", nodeToCreate.identity.Kind, nodeToCreate.identity.Name, "Namespace", nodeToCreate.identity.Namespace)
   938  
   939  	if o.dryRun {
   940  		return nil
   941  	}
   942  
   943  	cFrom, err := o.fromProxy.NewClient()
   944  	if err != nil {
   945  		return err
   946  	}
   947  
   948  	// Get the source object
   949  	obj := &unstructured.Unstructured{}
   950  	obj.SetAPIVersion(nodeToCreate.identity.APIVersion)
   951  	obj.SetKind(nodeToCreate.identity.Kind)
   952  	objKey := client.ObjectKey{
   953  		Namespace: nodeToCreate.identity.Namespace,
   954  		Name:      nodeToCreate.identity.Name,
   955  	}
   956  
   957  	if err := cFrom.Get(ctx, objKey, obj); err != nil {
   958  		return errors.Wrapf(err, "error reading %q %s/%s",
   959  			obj.GroupVersionKind(), obj.GetNamespace(), obj.GetName())
   960  	}
   961  
   962  	// New objects cannot have a specified resource version. Clear it out.
   963  	obj.SetResourceVersion("")
   964  
   965  	// Removes current OwnerReferences
   966  	obj.SetOwnerReferences(nil)
   967  
   968  	// Rebuild the owner reference chain
   969  	o.buildOwnerChain(obj, nodeToCreate)
   970  
   971  	// FIXME Workaround for https://github.com/kubernetes/kubernetes/issues/32220. Remove when the issue is fixed.
   972  	// If the resource already exists, the API server ordinarily returns an AlreadyExists error. Due to the above issue, if the resource has a non-empty metadata.generateName field, the API server returns a ServerTimeoutError. To ensure that the API server returns an AlreadyExists error, we set the metadata.generateName field to an empty string.
   973  	if len(obj.GetName()) > 0 && len(obj.GetGenerateName()) > 0 {
   974  		obj.SetGenerateName("")
   975  	}
   976  
   977  	// Creates the targetObj into the target management cluster.
   978  	cTo, err := toProxy.NewClient()
   979  	if err != nil {
   980  		return err
   981  	}
   982  
   983  	obj, err = applyMutators(obj, mutators...)
   984  	if err != nil {
   985  		return err
   986  	}
   987  	// Applying mutators MAY change the namespace, so ensure the namespace exists before creating the resource.
   988  	if !nodeToCreate.isGlobal && !existingNamespaces.Has(obj.GetNamespace()) {
   989  		if err = o.ensureNamespace(ctx, toProxy, obj.GetNamespace()); err != nil {
   990  			return err
   991  		}
   992  		existingNamespaces.Insert(obj.GetNamespace())
   993  	}
   994  	oldManagedFields := obj.GetManagedFields()
   995  	if err := cTo.Create(ctx, obj); err != nil {
   996  		if !apierrors.IsAlreadyExists(err) {
   997  			return errors.Wrapf(err, "error creating %q %s/%s",
   998  				obj.GroupVersionKind(), obj.GetNamespace(), obj.GetName())
   999  		}
  1000  
  1001  		// If the object already exists, try to update it if it is node a global object / something belonging to a global object hierarchy (e.g. a secrets owned by a global identity object).
  1002  		if nodeToCreate.isGlobal || nodeToCreate.isGlobalHierarchy {
  1003  			log.V(5).Info("Object already exists, skipping upgrade because it is global/it is owned by a global object", nodeToCreate.identity.Kind, nodeToCreate.identity.Name, "Namespace", nodeToCreate.identity.Namespace)
  1004  		} else {
  1005  			// Nb. This should not happen, but it is supported to make move more resilient to unexpected interrupt/restarts of the move process.
  1006  			log.V(5).Info("Object already exists, updating", nodeToCreate.identity.Kind, nodeToCreate.identity.Name, "Namespace", nodeToCreate.identity.Namespace)
  1007  
  1008  			// Retrieve the UID and the resource version for the update.
  1009  			existingTargetObj := &unstructured.Unstructured{}
  1010  			existingTargetObj.SetAPIVersion(obj.GetAPIVersion())
  1011  			existingTargetObj.SetKind(obj.GetKind())
  1012  			if err := cTo.Get(ctx, objKey, existingTargetObj); err != nil {
  1013  				return errors.Wrapf(err, "error reading resource for %q %s/%s",
  1014  					existingTargetObj.GroupVersionKind(), existingTargetObj.GetNamespace(), existingTargetObj.GetName())
  1015  			}
  1016  
  1017  			obj.SetUID(existingTargetObj.GetUID())
  1018  			obj.SetResourceVersion(existingTargetObj.GetResourceVersion())
  1019  			if err := cTo.Update(ctx, obj); err != nil {
  1020  				return errors.Wrapf(err, "error updating %q %s/%s",
  1021  					obj.GroupVersionKind(), obj.GetNamespace(), obj.GetName())
  1022  			}
  1023  		}
  1024  	}
  1025  
  1026  	// Stores the newUID assigned to the newly created object.
  1027  	nodeToCreate.newUID = obj.GetUID()
  1028  
  1029  	if err := patchTopologyManagedFields(ctx, oldManagedFields, obj, cTo); err != nil {
  1030  		return errors.Wrap(err, "error patching the managed fields")
  1031  	}
  1032  
  1033  	return nil
  1034  }
  1035  
  1036  func (o *objectMover) backupTargetObject(ctx context.Context, nodeToCreate *node, directory string) error {
  1037  	log := logf.Log
  1038  	log.V(1).Info("Saving", nodeToCreate.identity.Kind, nodeToCreate.identity.Name, "Namespace", nodeToCreate.identity.Namespace)
  1039  
  1040  	cFrom, err := o.fromProxy.NewClient()
  1041  	if err != nil {
  1042  		return err
  1043  	}
  1044  
  1045  	// Get the source object
  1046  	obj := &unstructured.Unstructured{}
  1047  	obj.SetAPIVersion(nodeToCreate.identity.APIVersion)
  1048  	obj.SetKind(nodeToCreate.identity.Kind)
  1049  	objKey := client.ObjectKey{
  1050  		Namespace: nodeToCreate.identity.Namespace,
  1051  		Name:      nodeToCreate.identity.Name,
  1052  	}
  1053  
  1054  	if err := cFrom.Get(ctx, objKey, obj); err != nil {
  1055  		return errors.Wrapf(err, "error reading %q %s/%s",
  1056  			obj.GroupVersionKind(), obj.GetNamespace(), obj.GetName())
  1057  	}
  1058  
  1059  	// Get JSON for object and write it into the configured directory
  1060  	byObj, err := obj.MarshalJSON()
  1061  	if err != nil {
  1062  		return err
  1063  	}
  1064  
  1065  	filenameObj := nodeToCreate.getFilename()
  1066  	objectFile := filepath.Join(directory, filenameObj)
  1067  
  1068  	// If file exists, then remove it to be written again
  1069  	_, err = os.Stat(objectFile)
  1070  	if err != nil && !os.IsNotExist(err) {
  1071  		return err
  1072  	}
  1073  	if err == nil {
  1074  		if err := os.Remove(objectFile); err != nil {
  1075  			return err
  1076  		}
  1077  	}
  1078  
  1079  	err = os.WriteFile(objectFile, byObj, 0600)
  1080  	if err != nil {
  1081  		return err
  1082  	}
  1083  
  1084  	return nil
  1085  }
  1086  
  1087  func (o *objectMover) restoreTargetObject(ctx context.Context, nodeToCreate *node, toProxy Proxy) error {
  1088  	log := logf.Log
  1089  	log.V(1).Info("Restoring", nodeToCreate.identity.Kind, nodeToCreate.identity.Name, "Namespace", nodeToCreate.identity.Namespace)
  1090  
  1091  	// Creates the targetObj into the target management cluster.
  1092  	cTo, err := toProxy.NewClient()
  1093  	if err != nil {
  1094  		return err
  1095  	}
  1096  
  1097  	// Attempt to retrieve an existing object. If it exists, update the UID to rebuild the owner chain
  1098  	objKey := client.ObjectKey{
  1099  		Namespace: nodeToCreate.identity.Namespace,
  1100  		Name:      nodeToCreate.identity.Name,
  1101  	}
  1102  
  1103  	existingTargetObj := &unstructured.Unstructured{}
  1104  	existingTargetObj.SetAPIVersion(nodeToCreate.restoreObject.GetAPIVersion())
  1105  	existingTargetObj.SetKind(nodeToCreate.restoreObject.GetKind())
  1106  	if err := cTo.Get(ctx, objKey, existingTargetObj); err == nil {
  1107  		log.V(5).Info("Object already exists, skipping moving from directory", nodeToCreate.identity.Kind, nodeToCreate.identity.Name, "Namespace", nodeToCreate.identity.Namespace)
  1108  
  1109  		// Update the nodes UID since it already exists. Any nodes owned by this existing node will be updated when the owner chain is rebuilt
  1110  		nodeToCreate.newUID = existingTargetObj.GetUID()
  1111  
  1112  		// Return early since the object already exists
  1113  		return nil
  1114  	}
  1115  
  1116  	// Rebuild the source object
  1117  	obj := nodeToCreate.restoreObject
  1118  
  1119  	obj.SetAPIVersion(nodeToCreate.identity.APIVersion)
  1120  	obj.SetKind(nodeToCreate.identity.Kind)
  1121  
  1122  	// New objects cannot have a specified resource version. Clear it out.
  1123  	obj.SetResourceVersion("")
  1124  
  1125  	// Removes current OwnerReferences
  1126  	obj.SetOwnerReferences(nil)
  1127  
  1128  	// Rebuild the owner reference chain
  1129  	o.buildOwnerChain(obj, nodeToCreate)
  1130  
  1131  	if err := cTo.Create(ctx, obj); err != nil {
  1132  		if !apierrors.IsAlreadyExists(err) {
  1133  			return errors.Wrapf(err, "error creating %q %s/%s",
  1134  				obj.GroupVersionKind(), obj.GetNamespace(), obj.GetName())
  1135  		}
  1136  	}
  1137  
  1138  	// Stores the newUID assigned to the newly created object.
  1139  	nodeToCreate.newUID = obj.GetUID()
  1140  
  1141  	return nil
  1142  }
  1143  
  1144  // Recreate all the OwnerReferences using the newUID of the owner nodes.
  1145  func (o *objectMover) buildOwnerChain(obj *unstructured.Unstructured, n *node) {
  1146  	if len(n.owners) > 0 {
  1147  		ownerRefs := []metav1.OwnerReference{}
  1148  		for ownerNode := range n.owners {
  1149  			ownerRef := metav1.OwnerReference{
  1150  				APIVersion: ownerNode.identity.APIVersion,
  1151  				Kind:       ownerNode.identity.Kind,
  1152  				Name:       ownerNode.identity.Name,
  1153  				UID:        ownerNode.newUID, // Use the owner's newUID read from the target management cluster (instead of the UID read during discovery).
  1154  			}
  1155  
  1156  			// Restores the attributes of the OwnerReference.
  1157  			if attributes, ok := n.owners[ownerNode]; ok {
  1158  				ownerRef.Controller = attributes.Controller
  1159  				ownerRef.BlockOwnerDeletion = attributes.BlockOwnerDeletion
  1160  			}
  1161  
  1162  			ownerRefs = append(ownerRefs, ownerRef)
  1163  		}
  1164  		obj.SetOwnerReferences(ownerRefs)
  1165  	}
  1166  }
  1167  
  1168  // deleteGroup deletes all the Kubernetes objects from the source management cluster corresponding to the object graph nodes in a moveGroup.
  1169  func (o *objectMover) deleteGroup(ctx context.Context, group moveGroup) error {
  1170  	deleteSourceObjectBackoff := newWriteBackoff()
  1171  	errList := []error{}
  1172  	for i := range group {
  1173  		nodeToDelete := group[i]
  1174  
  1175  		// Delete the Kubernetes object corresponding to the current node.
  1176  		// Nb. The operation is wrapped in a retry loop to make move more resilient to unexpected conditions.
  1177  		err := retryWithExponentialBackoff(ctx, deleteSourceObjectBackoff, func(ctx context.Context) error {
  1178  			return o.deleteSourceObject(ctx, nodeToDelete)
  1179  		})
  1180  
  1181  		if err != nil {
  1182  			errList = append(errList, err)
  1183  		}
  1184  	}
  1185  
  1186  	return kerrors.NewAggregate(errList)
  1187  }
  1188  
  1189  var (
  1190  	removeFinalizersPatch           = client.RawPatch(types.MergePatchType, []byte("{\"metadata\":{\"finalizers\":[]}}"))
  1191  	addDeleteForMoveAnnotationPatch = client.RawPatch(types.JSONPatchType, []byte(fmt.Sprintf("[{\"op\": \"add\", \"path\":\"/metadata/annotations\", \"value\":{%q:\"\"}}]", clusterctlv1.DeleteForMoveAnnotation)))
  1192  )
  1193  
  1194  // deleteSourceObject deletes the Kubernetes object corresponding to the node from the source management cluster, taking care of removing all the finalizers so
  1195  // the objects gets immediately deleted (force delete).
  1196  func (o *objectMover) deleteSourceObject(ctx context.Context, nodeToDelete *node) error {
  1197  	// Don't delete cluster-wide nodes or nodes that are below a hierarchy that starts with a global object (e.g. a secrets owned by a global identity object).
  1198  	if nodeToDelete.isGlobal || nodeToDelete.isGlobalHierarchy {
  1199  		return nil
  1200  	}
  1201  
  1202  	log := logf.Log
  1203  	log.V(1).Info("Deleting", nodeToDelete.identity.Kind, nodeToDelete.identity.Name, "Namespace", nodeToDelete.identity.Namespace)
  1204  
  1205  	if o.dryRun {
  1206  		return nil
  1207  	}
  1208  
  1209  	cFrom, err := o.fromProxy.NewClient()
  1210  	if err != nil {
  1211  		return err
  1212  	}
  1213  
  1214  	// Get the source object
  1215  	sourceObj := &unstructured.Unstructured{}
  1216  	sourceObj.SetAPIVersion(nodeToDelete.identity.APIVersion)
  1217  	sourceObj.SetKind(nodeToDelete.identity.Kind)
  1218  	sourceObjKey := client.ObjectKey{
  1219  		Namespace: nodeToDelete.identity.Namespace,
  1220  		Name:      nodeToDelete.identity.Name,
  1221  	}
  1222  
  1223  	if err := cFrom.Get(ctx, sourceObjKey, sourceObj); err != nil {
  1224  		if apierrors.IsNotFound(err) {
  1225  			// If the object is already deleted, move on.
  1226  			log.V(5).Info("Object already deleted, skipping delete for", nodeToDelete.identity.Kind, nodeToDelete.identity.Name, "Namespace", nodeToDelete.identity.Namespace)
  1227  			return nil
  1228  		}
  1229  		return errors.Wrapf(err, "error reading %q %s/%s",
  1230  			sourceObj.GroupVersionKind(), sourceObj.GetNamespace(), sourceObj.GetName())
  1231  	}
  1232  
  1233  	if err := cFrom.Patch(ctx, sourceObj, addDeleteForMoveAnnotationPatch); err != nil {
  1234  		return errors.Wrapf(err, "error adding delete-for-move annotation from %q %s/%s",
  1235  			sourceObj.GroupVersionKind(), sourceObj.GetNamespace(), sourceObj.GetName())
  1236  	}
  1237  
  1238  	if len(sourceObj.GetFinalizers()) > 0 {
  1239  		if err := cFrom.Patch(ctx, sourceObj, removeFinalizersPatch); err != nil {
  1240  			return errors.Wrapf(err, "error removing finalizers from %q %s/%s",
  1241  				sourceObj.GroupVersionKind(), sourceObj.GetNamespace(), sourceObj.GetName())
  1242  		}
  1243  	}
  1244  
  1245  	if err := cFrom.Delete(ctx, sourceObj); err != nil {
  1246  		return errors.Wrapf(err, "error deleting %q %s/%s",
  1247  			sourceObj.GroupVersionKind(), sourceObj.GetNamespace(), sourceObj.GetName())
  1248  	}
  1249  
  1250  	return nil
  1251  }
  1252  
  1253  // checkTargetProviders checks that all the providers installed in the source cluster exists in the target cluster as well (with a version >= of the current version).
  1254  func (o *objectMover) checkTargetProviders(ctx context.Context, toInventory InventoryClient) error {
  1255  	if o.dryRun {
  1256  		return nil
  1257  	}
  1258  
  1259  	// Gets the list of providers in the source/target cluster.
  1260  	fromProviders, err := o.fromProviderInventory.List(ctx)
  1261  	if err != nil {
  1262  		return errors.Wrapf(err, "failed to get provider list from the source cluster")
  1263  	}
  1264  
  1265  	toProviders, err := toInventory.List(ctx)
  1266  	if err != nil {
  1267  		return errors.Wrapf(err, "failed to get provider list from the target cluster")
  1268  	}
  1269  
  1270  	// Checks all the providers installed in the source cluster
  1271  	errList := []error{}
  1272  	for _, sourceProvider := range fromProviders.Items {
  1273  		sourceVersion, err := version.ParseSemantic(sourceProvider.Version)
  1274  		if err != nil {
  1275  			return errors.Wrapf(err, "unable to parse version %q for the %s provider in the source cluster", sourceProvider.Version, sourceProvider.InstanceName())
  1276  		}
  1277  
  1278  		// Check corresponding providers in the target cluster and gets the latest version installed.
  1279  		var maxTargetVersion *version.Version
  1280  		for _, targetProvider := range toProviders.Items {
  1281  			// Skips other providers.
  1282  			if !sourceProvider.SameAs(targetProvider) {
  1283  				continue
  1284  			}
  1285  
  1286  			targetVersion, err := version.ParseSemantic(targetProvider.Version)
  1287  			if err != nil {
  1288  				return errors.Wrapf(err, "unable to parse version %q for the %s provider in the target cluster", targetProvider.Version, targetProvider.InstanceName())
  1289  			}
  1290  			if maxTargetVersion == nil || maxTargetVersion.LessThan(targetVersion) {
  1291  				maxTargetVersion = targetVersion
  1292  			}
  1293  		}
  1294  		if maxTargetVersion == nil {
  1295  			errList = append(errList, errors.Errorf("provider %s not found in the target cluster", sourceProvider.Name))
  1296  			continue
  1297  		}
  1298  
  1299  		if !maxTargetVersion.AtLeast(sourceVersion) {
  1300  			errList = append(errList, errors.Errorf("provider %s in the target cluster is older than in the source cluster (source: %s, target: %s)", sourceProvider.Name, sourceVersion.String(), maxTargetVersion.String()))
  1301  		}
  1302  	}
  1303  
  1304  	return kerrors.NewAggregate(errList)
  1305  }
  1306  
  1307  // patchTopologyManagedFields patches the managed fields of obj.
  1308  // Without patching the managed fields, clusterctl would be the owner of the fields
  1309  // which would lead to co-ownership and preventing other controllers using SSA from deleting fields.
  1310  func patchTopologyManagedFields(ctx context.Context, oldManagedFields []metav1.ManagedFieldsEntry, obj *unstructured.Unstructured, cTo client.Client) error {
  1311  	base := obj.DeepCopy()
  1312  	obj.SetManagedFields(oldManagedFields)
  1313  
  1314  	if err := cTo.Patch(ctx, obj, client.MergeFrom(base)); err != nil {
  1315  		return errors.Wrapf(err, "error patching managed fields %q %s/%s",
  1316  			obj.GroupVersionKind(), obj.GetNamespace(), obj.GetName())
  1317  	}
  1318  	return nil
  1319  }
  1320  
  1321  func applyMutators(object client.Object, mutators ...ResourceMutatorFunc) (*unstructured.Unstructured, error) {
  1322  	if object == nil {
  1323  		return nil, nil
  1324  	}
  1325  	u := &unstructured.Unstructured{}
  1326  	to, err := runtime.DefaultUnstructuredConverter.ToUnstructured(object)
  1327  	if err != nil {
  1328  		return nil, err
  1329  	}
  1330  	u.SetUnstructuredContent(to)
  1331  	for _, mutator := range mutators {
  1332  		if err := mutator(u); err != nil {
  1333  			return nil, errors.Wrapf(err, "error applying resource mutator to %q %s/%s",
  1334  				u.GroupVersionKind(), object.GetNamespace(), object.GetName())
  1335  		}
  1336  	}
  1337  	return u, nil
  1338  }