sigs.k8s.io/cluster-api-provider-aws@v1.5.5/pkg/cloud/scope/machinepool.go (about) 1 /* 2 Copyright 2018 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package scope 18 19 import ( 20 "context" 21 "fmt" 22 "strings" 23 24 "github.com/go-logr/logr" 25 "github.com/pkg/errors" 26 corev1 "k8s.io/api/core/v1" 27 "k8s.io/apimachinery/pkg/runtime" 28 "k8s.io/apimachinery/pkg/types" 29 "k8s.io/klog/v2/klogr" 30 "k8s.io/utils/pointer" 31 "sigs.k8s.io/controller-runtime/pkg/client" 32 33 infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1" 34 expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/exp/api/v1beta1" 35 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" 36 "sigs.k8s.io/cluster-api/controllers/remote" 37 capierrors "sigs.k8s.io/cluster-api/errors" 38 expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" 39 "sigs.k8s.io/cluster-api/util" 40 "sigs.k8s.io/cluster-api/util/patch" 41 ) 42 43 const ( 44 // ReplicasManagedByAnnotation is an annotation that indicates external (non-Cluster API) management of infra scaling. 45 // The practical effect of this is that the capi "replica" count is derived from the number of observed infra machines, 46 // instead of being a source of truth for eventual consistency. 47 // 48 // N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged. 49 ReplicasManagedByAnnotation = "cluster.x-k8s.io/replicas-managed-by" 50 51 // ExternalAutoscalerReplicasManagedByAnnotationValue is used with the "cluster.x-k8s.io/replicas-managed-by" annotation 52 // to indicate an external autoscaler enforces replica count. 53 // 54 // N.B. this is to be replaced by a direct reference to CAPI once https://github.com/kubernetes-sigs/cluster-api/pull/7107 is meged. 55 ExternalAutoscalerReplicasManagedByAnnotationValue = "external-autoscaler" 56 ) 57 58 // MachinePoolScope defines a scope defined around a machine and its cluster. 59 type MachinePoolScope struct { 60 logr.Logger 61 client.Client 62 patchHelper *patch.Helper 63 capiMachinePoolPatchHelper *patch.Helper 64 65 Cluster *clusterv1.Cluster 66 MachinePool *expclusterv1.MachinePool 67 InfraCluster EC2Scope 68 AWSMachinePool *expinfrav1.AWSMachinePool 69 } 70 71 // MachinePoolScopeParams defines a scope defined around a machine and its cluster. 72 type MachinePoolScopeParams struct { 73 Client client.Client 74 Logger *logr.Logger 75 76 Cluster *clusterv1.Cluster 77 MachinePool *expclusterv1.MachinePool 78 InfraCluster EC2Scope 79 AWSMachinePool *expinfrav1.AWSMachinePool 80 } 81 82 // GetProviderID returns the AWSMachine providerID from the spec. 83 func (m *MachinePoolScope) GetProviderID() string { 84 if m.AWSMachinePool.Spec.ProviderID != "" { 85 return m.AWSMachinePool.Spec.ProviderID 86 } 87 return "" 88 } 89 90 // NewMachinePoolScope creates a new MachinePoolScope from the supplied parameters. 91 // This is meant to be called for each reconcile iteration. 92 func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, error) { 93 if params.Client == nil { 94 return nil, errors.New("client is required when creating a MachinePoolScope") 95 } 96 if params.MachinePool == nil { 97 return nil, errors.New("machinepool is required when creating a MachinePoolScope") 98 } 99 if params.Cluster == nil { 100 return nil, errors.New("cluster is required when creating a MachinePoolScope") 101 } 102 if params.AWSMachinePool == nil { 103 return nil, errors.New("aws machine pool is required when creating a MachinePoolScope") 104 } 105 if params.InfraCluster == nil { 106 return nil, errors.New("aws cluster is required when creating a MachinePoolScope") 107 } 108 109 if params.Logger == nil { 110 log := klogr.New() 111 params.Logger = &log 112 } 113 114 ampHelper, err := patch.NewHelper(params.AWSMachinePool, params.Client) 115 if err != nil { 116 return nil, errors.Wrap(err, "failed to init AWSMachinePool patch helper") 117 } 118 mpHelper, err := patch.NewHelper(params.MachinePool, params.Client) 119 if err != nil { 120 return nil, errors.Wrap(err, "failed to init MachinePool patch helper") 121 } 122 123 return &MachinePoolScope{ 124 Logger: *params.Logger, 125 Client: params.Client, 126 patchHelper: ampHelper, 127 capiMachinePoolPatchHelper: mpHelper, 128 Cluster: params.Cluster, 129 MachinePool: params.MachinePool, 130 InfraCluster: params.InfraCluster, 131 AWSMachinePool: params.AWSMachinePool, 132 }, nil 133 } 134 135 // Name returns the AWSMachinePool name. 136 func (m *MachinePoolScope) Name() string { 137 return m.AWSMachinePool.Name 138 } 139 140 // Namespace returns the namespace name. 141 func (m *MachinePoolScope) Namespace() string { 142 return m.AWSMachinePool.Namespace 143 } 144 145 // GetRawBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName. 146 // todo(rudoi): stolen from MachinePool - any way to reuse? 147 func (m *MachinePoolScope) GetRawBootstrapData() ([]byte, error) { 148 data, _, err := m.getBootstrapData() 149 150 return data, err 151 } 152 153 func (m *MachinePoolScope) GetRawBootstrapDataWithFormat() ([]byte, string, error) { 154 return m.getBootstrapData() 155 } 156 157 func (m *MachinePoolScope) getBootstrapData() ([]byte, string, error) { 158 if m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { 159 return nil, "", errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") 160 } 161 162 secret := &corev1.Secret{} 163 key := types.NamespacedName{Namespace: m.Namespace(), Name: *m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName} 164 165 if err := m.Client.Get(context.TODO(), key, secret); err != nil { 166 return nil, "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSMachine %s/%s", m.Namespace(), m.Name()) 167 } 168 169 value, ok := secret.Data["value"] 170 if !ok { 171 return nil, "", errors.New("error retrieving bootstrap data: secret value key is missing") 172 } 173 174 return value, string(secret.Data["format"]), nil 175 } 176 177 // AdditionalTags merges AdditionalTags from the scope's AWSCluster and AWSMachinePool. If the same key is present in both, 178 // the value from AWSMachinePool takes precedence. The returned Tags will never be nil. 179 func (m *MachinePoolScope) AdditionalTags() infrav1.Tags { 180 tags := make(infrav1.Tags) 181 182 // Start with the cluster-wide tags... 183 tags.Merge(m.InfraCluster.AdditionalTags()) 184 // ... and merge in the Machine's 185 tags.Merge(m.AWSMachinePool.Spec.AdditionalTags) 186 187 return tags 188 } 189 190 // PatchObject persists the machinepool spec and status. 191 func (m *MachinePoolScope) PatchObject() error { 192 return m.patchHelper.Patch( 193 context.TODO(), 194 m.AWSMachinePool, 195 patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ 196 expinfrav1.ASGReadyCondition, 197 expinfrav1.LaunchTemplateReadyCondition, 198 }}) 199 } 200 201 // PatchCAPIMachinePoolObject persists the capi machinepool configuration and status. 202 func (m *MachinePoolScope) PatchCAPIMachinePoolObject(ctx context.Context) error { 203 return m.capiMachinePoolPatchHelper.Patch( 204 ctx, 205 m.MachinePool, 206 ) 207 } 208 209 // Close the MachinePoolScope by updating the machinepool spec, machine status. 210 func (m *MachinePoolScope) Close() error { 211 return m.PatchObject() 212 } 213 214 // SetAnnotation sets a key value annotation on the AWSMachine. 215 func (m *MachinePoolScope) SetAnnotation(key, value string) { 216 if m.AWSMachinePool.Annotations == nil { 217 m.AWSMachinePool.Annotations = map[string]string{} 218 } 219 m.AWSMachinePool.Annotations[key] = value 220 } 221 222 // SetFailureMessage sets the AWSMachine status failure message. 223 func (m *MachinePoolScope) SetFailureMessage(v error) { 224 m.AWSMachinePool.Status.FailureMessage = pointer.StringPtr(v.Error()) 225 } 226 227 // SetFailureReason sets the AWSMachine status failure reason. 228 func (m *MachinePoolScope) SetFailureReason(v capierrors.MachineStatusError) { 229 m.AWSMachinePool.Status.FailureReason = &v 230 } 231 232 // HasFailed returns true when the AWSMachinePool's Failure reason or Failure message is populated. 233 func (m *MachinePoolScope) HasFailed() bool { 234 return m.AWSMachinePool.Status.FailureReason != nil || m.AWSMachinePool.Status.FailureMessage != nil 235 } 236 237 // SetNotReady sets the AWSMachinePool Ready Status to false. 238 func (m *MachinePoolScope) SetNotReady() { 239 m.AWSMachinePool.Status.Ready = false 240 } 241 242 // GetASGStatus returns the AWSMachinePool instance state from the status. 243 func (m *MachinePoolScope) GetASGStatus() *expinfrav1.ASGStatus { 244 return m.AWSMachinePool.Status.ASGStatus 245 } 246 247 // SetASGStatus sets the AWSMachinePool status instance state. 248 func (m *MachinePoolScope) SetASGStatus(v expinfrav1.ASGStatus) { 249 m.AWSMachinePool.Status.ASGStatus = &v 250 } 251 252 // SetLaunchTemplateIDStatus sets the AWSMachinePool LaunchTemplateID status. 253 func (m *MachinePoolScope) SetLaunchTemplateIDStatus(id string) { 254 m.AWSMachinePool.Status.LaunchTemplateID = id 255 } 256 257 // IsEKSManaged checks if the AWSMachinePool is EKS managed. 258 func (m *MachinePoolScope) IsEKSManaged() bool { 259 return m.InfraCluster.InfraCluster().GetObjectKind().GroupVersionKind().Kind == "AWSManagedControlPlane" 260 } 261 262 // SubnetIDs returns the machine pool subnet IDs. 263 func (m *MachinePoolScope) SubnetIDs(subnetIDs []string) ([]string, error) { 264 strategy, err := newDefaultSubnetPlacementStrategy(&m.Logger) 265 if err != nil { 266 return subnetIDs, fmt.Errorf("getting subnet placement strategy: %w", err) 267 } 268 269 return strategy.Place(&placementInput{ 270 SpecSubnetIDs: subnetIDs, 271 SpecAvailabilityZones: m.AWSMachinePool.Spec.AvailabilityZones, 272 ParentAvailabilityZones: m.MachinePool.Spec.FailureDomains, 273 ControlplaneSubnets: m.InfraCluster.Subnets(), 274 }) 275 } 276 277 // NodeStatus represents the status of a Kubernetes node. 278 type NodeStatus struct { 279 Ready bool 280 Version string 281 } 282 283 // UpdateInstanceStatuses ties ASG instances and Node status data together and updates AWSMachinePool 284 // This updates if ASG instances ready and kubelet version running on the node.. 285 func (m *MachinePoolScope) UpdateInstanceStatuses(ctx context.Context, instances []infrav1.Instance) error { 286 providerIDs := make([]string, len(instances)) 287 for i, instance := range instances { 288 providerIDs[i] = fmt.Sprintf("aws:////%s", instance.ID) 289 } 290 291 nodeStatusByProviderID, err := m.getNodeStatusByProviderID(ctx, providerIDs) 292 if err != nil { 293 return errors.Wrap(err, "failed to get node status by provider id") 294 } 295 296 var readyReplicas int32 297 instanceStatuses := make([]expinfrav1.AWSMachinePoolInstanceStatus, len(instances)) 298 for i, instance := range instances { 299 instanceStatuses[i] = expinfrav1.AWSMachinePoolInstanceStatus{ 300 InstanceID: instance.ID, 301 } 302 303 instanceStatus := instanceStatuses[i] 304 if nodeStatus, ok := nodeStatusByProviderID[fmt.Sprintf("aws:////%s", instanceStatus.InstanceID)]; ok { 305 instanceStatus.Version = &nodeStatus.Version 306 if nodeStatus.Ready { 307 readyReplicas++ 308 } 309 } 310 } 311 312 // TODO: readyReplicas can be used as status.replicas but this will delay machinepool to become ready. next reconcile updates this. 313 m.AWSMachinePool.Status.Instances = instanceStatuses 314 return nil 315 } 316 317 func (m *MachinePoolScope) getNodeStatusByProviderID(ctx context.Context, providerIDList []string) (map[string]*NodeStatus, error) { 318 nodeStatusMap := map[string]*NodeStatus{} 319 for _, id := range providerIDList { 320 nodeStatusMap[id] = &NodeStatus{} 321 } 322 323 workloadClient, err := remote.NewClusterClient(ctx, "", m.Client, util.ObjectKey(m.Cluster)) 324 if err != nil { 325 return nil, err 326 } 327 328 nodeList := corev1.NodeList{} 329 for { 330 if err := workloadClient.List(ctx, &nodeList, client.Continue(nodeList.Continue)); err != nil { 331 return nil, errors.Wrapf(err, "failed to List nodes") 332 } 333 334 for _, node := range nodeList.Items { 335 strList := strings.Split(node.Spec.ProviderID, "/") 336 337 if status, ok := nodeStatusMap[fmt.Sprintf("aws:////%s", strList[len(strList)-1])]; ok { 338 status.Ready = nodeIsReady(node) 339 status.Version = node.Status.NodeInfo.KubeletVersion 340 } 341 } 342 343 if nodeList.Continue == "" { 344 break 345 } 346 } 347 348 return nodeStatusMap, nil 349 } 350 351 func nodeIsReady(node corev1.Node) bool { 352 for _, n := range node.Status.Conditions { 353 if n.Type == corev1.NodeReady { 354 return n.Status == corev1.ConditionTrue 355 } 356 } 357 return false 358 } 359 360 func (m *MachinePoolScope) GetLaunchTemplate() *expinfrav1.AWSLaunchTemplate { 361 return &m.AWSMachinePool.Spec.AWSLaunchTemplate 362 } 363 364 func (m *MachinePoolScope) GetMachinePool() *expclusterv1.MachinePool { 365 return m.MachinePool 366 } 367 368 func (m *MachinePoolScope) LaunchTemplateName() string { 369 return m.Name() 370 } 371 372 func (m *MachinePoolScope) GetRuntimeObject() runtime.Object { 373 return m.AWSMachinePool 374 } 375 376 func ReplicasExternallyManaged(mp *expclusterv1.MachinePool) bool { 377 val, ok := mp.Annotations[ReplicasManagedByAnnotation] 378 return ok && val == ExternalAutoscalerReplicasManagedByAnnotationValue 379 }