sigs.k8s.io/cluster-api-provider-aws@v1.5.5/pkg/cloud/scope/machinepool.go (about)

     1  /*
     2  Copyright 2018 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package scope
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"strings"
    23  
    24  	"github.com/go-logr/logr"
    25  	"github.com/pkg/errors"
    26  	corev1 "k8s.io/api/core/v1"
    27  	"k8s.io/apimachinery/pkg/runtime"
    28  	"k8s.io/apimachinery/pkg/types"
    29  	"k8s.io/klog/v2/klogr"
    30  	"k8s.io/utils/pointer"
    31  	"sigs.k8s.io/controller-runtime/pkg/client"
    32  
    33  	infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1"
    34  	expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/exp/api/v1beta1"
    35  	clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
    36  	"sigs.k8s.io/cluster-api/controllers/remote"
    37  	capierrors "sigs.k8s.io/cluster-api/errors"
    38  	expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
    39  	"sigs.k8s.io/cluster-api/util"
    40  	"sigs.k8s.io/cluster-api/util/patch"
    41  )
    42  
    43  const (
    44  	// ReplicasManagedByAnnotation is an annotation that indicates external (non-Cluster API) management of infra scaling.
    45  	// The practical effect of this is that the capi "replica" count is derived from the number of observed infra machines,
    46  	// instead of being a source of truth for eventual consistency.
    47  	//
    48  	// N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged.
    49  	ReplicasManagedByAnnotation = "cluster.x-k8s.io/replicas-managed-by"
    50  
    51  	// ExternalAutoscalerReplicasManagedByAnnotationValue is used with the "cluster.x-k8s.io/replicas-managed-by" annotation
    52  	// to indicate an external autoscaler enforces replica count.
    53  	//
    54  	// N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged.
    55  	ExternalAutoscalerReplicasManagedByAnnotationValue = "external-autoscaler"
    56  )
    57  
    58  // MachinePoolScope defines a scope defined around a machine and its cluster.
    59  type MachinePoolScope struct {
    60  	logr.Logger
    61  	client.Client
    62  	patchHelper                *patch.Helper
    63  	capiMachinePoolPatchHelper *patch.Helper
    64  
    65  	Cluster        *clusterv1.Cluster
    66  	MachinePool    *expclusterv1.MachinePool
    67  	InfraCluster   EC2Scope
    68  	AWSMachinePool *expinfrav1.AWSMachinePool
    69  }
    70  
    71  // MachinePoolScopeParams defines a scope defined around a machine and its cluster.
    72  type MachinePoolScopeParams struct {
    73  	Client client.Client
    74  	Logger *logr.Logger
    75  
    76  	Cluster        *clusterv1.Cluster
    77  	MachinePool    *expclusterv1.MachinePool
    78  	InfraCluster   EC2Scope
    79  	AWSMachinePool *expinfrav1.AWSMachinePool
    80  }
    81  
    82  // GetProviderID returns the AWSMachine providerID from the spec.
    83  func (m *MachinePoolScope) GetProviderID() string {
    84  	if m.AWSMachinePool.Spec.ProviderID != "" {
    85  		return m.AWSMachinePool.Spec.ProviderID
    86  	}
    87  	return ""
    88  }
    89  
    90  // NewMachinePoolScope creates a new MachinePoolScope from the supplied parameters.
    91  // This is meant to be called for each reconcile iteration.
    92  func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, error) {
    93  	if params.Client == nil {
    94  		return nil, errors.New("client is required when creating a MachinePoolScope")
    95  	}
    96  	if params.MachinePool == nil {
    97  		return nil, errors.New("machinepool is required when creating a MachinePoolScope")
    98  	}
    99  	if params.Cluster == nil {
   100  		return nil, errors.New("cluster is required when creating a MachinePoolScope")
   101  	}
   102  	if params.AWSMachinePool == nil {
   103  		return nil, errors.New("aws machine pool is required when creating a MachinePoolScope")
   104  	}
   105  	if params.InfraCluster == nil {
   106  		return nil, errors.New("aws cluster is required when creating a MachinePoolScope")
   107  	}
   108  
   109  	if params.Logger == nil {
   110  		log := klogr.New()
   111  		params.Logger = &log
   112  	}
   113  
   114  	ampHelper, err := patch.NewHelper(params.AWSMachinePool, params.Client)
   115  	if err != nil {
   116  		return nil, errors.Wrap(err, "failed to init AWSMachinePool patch helper")
   117  	}
   118  	mpHelper, err := patch.NewHelper(params.MachinePool, params.Client)
   119  	if err != nil {
   120  		return nil, errors.Wrap(err, "failed to init MachinePool patch helper")
   121  	}
   122  
   123  	return &MachinePoolScope{
   124  		Logger:                     *params.Logger,
   125  		Client:                     params.Client,
   126  		patchHelper:                ampHelper,
   127  		capiMachinePoolPatchHelper: mpHelper,
   128  		Cluster:                    params.Cluster,
   129  		MachinePool:                params.MachinePool,
   130  		InfraCluster:               params.InfraCluster,
   131  		AWSMachinePool:             params.AWSMachinePool,
   132  	}, nil
   133  }
   134  
   135  // Name returns the AWSMachinePool name.
   136  func (m *MachinePoolScope) Name() string {
   137  	return m.AWSMachinePool.Name
   138  }
   139  
   140  // Namespace returns the namespace name.
   141  func (m *MachinePoolScope) Namespace() string {
   142  	return m.AWSMachinePool.Namespace
   143  }
   144  
   145  // GetRawBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName.
   146  // todo(rudoi): stolen from MachinePool - any way to reuse?
   147  func (m *MachinePoolScope) GetRawBootstrapData() ([]byte, error) {
   148  	data, _, err := m.getBootstrapData()
   149  
   150  	return data, err
   151  }
   152  
   153  func (m *MachinePoolScope) GetRawBootstrapDataWithFormat() ([]byte, string, error) {
   154  	return m.getBootstrapData()
   155  }
   156  
   157  func (m *MachinePoolScope) getBootstrapData() ([]byte, string, error) {
   158  	if m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil {
   159  		return nil, "", errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil")
   160  	}
   161  
   162  	secret := &corev1.Secret{}
   163  	key := types.NamespacedName{Namespace: m.Namespace(), Name: *m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName}
   164  
   165  	if err := m.Client.Get(context.TODO(), key, secret); err != nil {
   166  		return nil, "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSMachine %s/%s", m.Namespace(), m.Name())
   167  	}
   168  
   169  	value, ok := secret.Data["value"]
   170  	if !ok {
   171  		return nil, "", errors.New("error retrieving bootstrap data: secret value key is missing")
   172  	}
   173  
   174  	return value, string(secret.Data["format"]), nil
   175  }
   176  
   177  // AdditionalTags merges AdditionalTags from the scope's AWSCluster and AWSMachinePool. If the same key is present in both,
   178  // the value from AWSMachinePool takes precedence. The returned Tags will never be nil.
   179  func (m *MachinePoolScope) AdditionalTags() infrav1.Tags {
   180  	tags := make(infrav1.Tags)
   181  
   182  	// Start with the cluster-wide tags...
   183  	tags.Merge(m.InfraCluster.AdditionalTags())
   184  	// ... and merge in the Machine's
   185  	tags.Merge(m.AWSMachinePool.Spec.AdditionalTags)
   186  
   187  	return tags
   188  }
   189  
   190  // PatchObject persists the machinepool spec and status.
   191  func (m *MachinePoolScope) PatchObject() error {
   192  	return m.patchHelper.Patch(
   193  		context.TODO(),
   194  		m.AWSMachinePool,
   195  		patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{
   196  			expinfrav1.ASGReadyCondition,
   197  			expinfrav1.LaunchTemplateReadyCondition,
   198  		}})
   199  }
   200  
   201  // PatchCAPIMachinePoolObject persists the capi machinepool configuration and status.
   202  func (m *MachinePoolScope) PatchCAPIMachinePoolObject(ctx context.Context) error {
   203  	return m.capiMachinePoolPatchHelper.Patch(
   204  		ctx,
   205  		m.MachinePool,
   206  	)
   207  }
   208  
   209  // Close the MachinePoolScope by updating the machinepool spec, machine status.
   210  func (m *MachinePoolScope) Close() error {
   211  	return m.PatchObject()
   212  }
   213  
   214  // SetAnnotation sets a key value annotation on the AWSMachine.
   215  func (m *MachinePoolScope) SetAnnotation(key, value string) {
   216  	if m.AWSMachinePool.Annotations == nil {
   217  		m.AWSMachinePool.Annotations = map[string]string{}
   218  	}
   219  	m.AWSMachinePool.Annotations[key] = value
   220  }
   221  
   222  // SetFailureMessage sets the AWSMachine status failure message.
   223  func (m *MachinePoolScope) SetFailureMessage(v error) {
   224  	m.AWSMachinePool.Status.FailureMessage = pointer.StringPtr(v.Error())
   225  }
   226  
   227  // SetFailureReason sets the AWSMachine status failure reason.
   228  func (m *MachinePoolScope) SetFailureReason(v capierrors.MachineStatusError) {
   229  	m.AWSMachinePool.Status.FailureReason = &v
   230  }
   231  
   232  // HasFailed returns true when the AWSMachinePool's Failure reason or Failure message is populated.
   233  func (m *MachinePoolScope) HasFailed() bool {
   234  	return m.AWSMachinePool.Status.FailureReason != nil || m.AWSMachinePool.Status.FailureMessage != nil
   235  }
   236  
   237  // SetNotReady sets the AWSMachinePool Ready Status to false.
   238  func (m *MachinePoolScope) SetNotReady() {
   239  	m.AWSMachinePool.Status.Ready = false
   240  }
   241  
   242  // GetASGStatus returns the AWSMachinePool instance state from the status.
   243  func (m *MachinePoolScope) GetASGStatus() *expinfrav1.ASGStatus {
   244  	return m.AWSMachinePool.Status.ASGStatus
   245  }
   246  
   247  // SetASGStatus sets the AWSMachinePool status instance state.
   248  func (m *MachinePoolScope) SetASGStatus(v expinfrav1.ASGStatus) {
   249  	m.AWSMachinePool.Status.ASGStatus = &v
   250  }
   251  
   252  // SetLaunchTemplateIDStatus sets the AWSMachinePool LaunchTemplateID status.
   253  func (m *MachinePoolScope) SetLaunchTemplateIDStatus(id string) {
   254  	m.AWSMachinePool.Status.LaunchTemplateID = id
   255  }
   256  
   257  // IsEKSManaged checks if the AWSMachinePool is EKS managed.
   258  func (m *MachinePoolScope) IsEKSManaged() bool {
   259  	return m.InfraCluster.InfraCluster().GetObjectKind().GroupVersionKind().Kind == "AWSManagedControlPlane"
   260  }
   261  
   262  // SubnetIDs returns the machine pool subnet IDs.
   263  func (m *MachinePoolScope) SubnetIDs(subnetIDs []string) ([]string, error) {
   264  	strategy, err := newDefaultSubnetPlacementStrategy(&m.Logger)
   265  	if err != nil {
   266  		return subnetIDs, fmt.Errorf("getting subnet placement strategy: %w", err)
   267  	}
   268  
   269  	return strategy.Place(&placementInput{
   270  		SpecSubnetIDs:           subnetIDs,
   271  		SpecAvailabilityZones:   m.AWSMachinePool.Spec.AvailabilityZones,
   272  		ParentAvailabilityZones: m.MachinePool.Spec.FailureDomains,
   273  		ControlplaneSubnets:     m.InfraCluster.Subnets(),
   274  	})
   275  }
   276  
   277  // NodeStatus represents the status of a Kubernetes node.
   278  type NodeStatus struct {
   279  	Ready   bool
   280  	Version string
   281  }
   282  
   283  // UpdateInstanceStatuses ties ASG instances and Node status data together and updates AWSMachinePool
   284  // This updates if ASG instances ready and kubelet version running on the node..
   285  func (m *MachinePoolScope) UpdateInstanceStatuses(ctx context.Context, instances []infrav1.Instance) error {
   286  	providerIDs := make([]string, len(instances))
   287  	for i, instance := range instances {
   288  		providerIDs[i] = fmt.Sprintf("aws:////%s", instance.ID)
   289  	}
   290  
   291  	nodeStatusByProviderID, err := m.getNodeStatusByProviderID(ctx, providerIDs)
   292  	if err != nil {
   293  		return errors.Wrap(err, "failed to get node status by provider id")
   294  	}
   295  
   296  	var readyReplicas int32
   297  	instanceStatuses := make([]expinfrav1.AWSMachinePoolInstanceStatus, len(instances))
   298  	for i, instance := range instances {
   299  		instanceStatuses[i] = expinfrav1.AWSMachinePoolInstanceStatus{
   300  			InstanceID: instance.ID,
   301  		}
   302  
   303  		instanceStatus := instanceStatuses[i]
   304  		if nodeStatus, ok := nodeStatusByProviderID[fmt.Sprintf("aws:////%s", instanceStatus.InstanceID)]; ok {
   305  			instanceStatus.Version = &nodeStatus.Version
   306  			if nodeStatus.Ready {
   307  				readyReplicas++
   308  			}
   309  		}
   310  	}
   311  
   312  	// TODO: readyReplicas can be used as status.replicas but this will delay machinepool to become ready. next reconcile updates this.
   313  	m.AWSMachinePool.Status.Instances = instanceStatuses
   314  	return nil
   315  }
   316  
   317  func (m *MachinePoolScope) getNodeStatusByProviderID(ctx context.Context, providerIDList []string) (map[string]*NodeStatus, error) {
   318  	nodeStatusMap := map[string]*NodeStatus{}
   319  	for _, id := range providerIDList {
   320  		nodeStatusMap[id] = &NodeStatus{}
   321  	}
   322  
   323  	workloadClient, err := remote.NewClusterClient(ctx, "", m.Client, util.ObjectKey(m.Cluster))
   324  	if err != nil {
   325  		return nil, err
   326  	}
   327  
   328  	nodeList := corev1.NodeList{}
   329  	for {
   330  		if err := workloadClient.List(ctx, &nodeList, client.Continue(nodeList.Continue)); err != nil {
   331  			return nil, errors.Wrapf(err, "failed to List nodes")
   332  		}
   333  
   334  		for _, node := range nodeList.Items {
   335  			strList := strings.Split(node.Spec.ProviderID, "/")
   336  
   337  			if status, ok := nodeStatusMap[fmt.Sprintf("aws:////%s", strList[len(strList)-1])]; ok {
   338  				status.Ready = nodeIsReady(node)
   339  				status.Version = node.Status.NodeInfo.KubeletVersion
   340  			}
   341  		}
   342  
   343  		if nodeList.Continue == "" {
   344  			break
   345  		}
   346  	}
   347  
   348  	return nodeStatusMap, nil
   349  }
   350  
   351  func nodeIsReady(node corev1.Node) bool {
   352  	for _, n := range node.Status.Conditions {
   353  		if n.Type == corev1.NodeReady {
   354  			return n.Status == corev1.ConditionTrue
   355  		}
   356  	}
   357  	return false
   358  }
   359  
   360  func (m *MachinePoolScope) GetLaunchTemplate() *expinfrav1.AWSLaunchTemplate {
   361  	return &m.AWSMachinePool.Spec.AWSLaunchTemplate
   362  }
   363  
   364  func (m *MachinePoolScope) GetMachinePool() *expclusterv1.MachinePool {
   365  	return m.MachinePool
   366  }
   367  
   368  func (m *MachinePoolScope) LaunchTemplateName() string {
   369  	return m.Name()
   370  }
   371  
   372  func (m *MachinePoolScope) GetRuntimeObject() runtime.Object {
   373  	return m.AWSMachinePool
   374  }
   375  
   376  func ReplicasExternallyManaged(mp *expclusterv1.MachinePool) bool {
   377  	val, ok := mp.Annotations[ReplicasManagedByAnnotation]
   378  	return ok && val == ExternalAutoscalerReplicasManagedByAnnotationValue
   379  }