sigs.k8s.io/cluster-api-provider-aws@v1.5.5/exp/instancestate/awsinstancestate_controller.go (about) 1 /* 2 Copyright 2020 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package instancestate 18 19 import ( 20 "context" 21 "encoding/json" 22 "sync" 23 "time" 24 25 "github.com/aws/aws-sdk-go/aws" 26 "github.com/aws/aws-sdk-go/aws/awserr" 27 "github.com/aws/aws-sdk-go/service/sqs" 28 "github.com/aws/aws-sdk-go/service/sqs/sqsiface" 29 "github.com/go-logr/logr" 30 apierrors "k8s.io/apimachinery/pkg/api/errors" 31 ctrl "sigs.k8s.io/controller-runtime" 32 "sigs.k8s.io/controller-runtime/pkg/client" 33 "sigs.k8s.io/controller-runtime/pkg/controller" 34 "sigs.k8s.io/controller-runtime/pkg/reconcile" 35 36 infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1" 37 "sigs.k8s.io/cluster-api-provider-aws/controllers" 38 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/scope" 39 "sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/instancestate" 40 "sigs.k8s.io/cluster-api/util/patch" 41 "sigs.k8s.io/cluster-api/util/predicates" 42 ) 43 44 // Ec2InstanceStateLabelKey defines an ec2 instance state label. 45 const Ec2InstanceStateLabelKey = "ec2-instance-state" 46 47 // AwsInstanceStateReconciler reconciles a AwsInstanceState object. 48 type AwsInstanceStateReconciler struct { 49 client.Client 50 Log logr.Logger 51 sqsServiceFactory func() sqsiface.SQSAPI 52 queueURLs sync.Map 53 Endpoints []scope.ServiceEndpoint 54 WatchFilterValue string 55 } 56 57 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusters,verbs=get;list;watch 58 // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines,verbs=get;list;watch 59 60 func (r *AwsInstanceStateReconciler) getSQSService(region string) (sqsiface.SQSAPI, error) { 61 if r.sqsServiceFactory != nil { 62 return r.sqsServiceFactory(), nil 63 } 64 65 globalScope, err := scope.NewGlobalScope(scope.GlobalScopeParams{ 66 ControllerName: "awsinstancestate", 67 Region: region, 68 Endpoints: r.Endpoints, 69 }) 70 71 if err != nil { 72 return nil, err 73 } 74 return scope.NewGlobalSQSClient(globalScope, globalScope), nil 75 } 76 77 func (r *AwsInstanceStateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { 78 // Fetch the AWSCluster instance 79 awsCluster := &infrav1.AWSCluster{} 80 err := r.Get(ctx, req.NamespacedName, awsCluster) 81 if err != nil { 82 if apierrors.IsNotFound(err) { 83 r.Log.Info("cluster not found, removing queue URL", "cluster", req.Name) 84 r.queueURLs.Delete(req.Name) 85 return reconcile.Result{}, nil 86 } 87 return reconcile.Result{}, err 88 } 89 90 // Handle deleted clusters 91 if !awsCluster.DeletionTimestamp.IsZero() { 92 r.queueURLs.Delete(req.Name) 93 return reconcile.Result{}, nil 94 } 95 96 // retrieve queue URL if it isn't already tracked 97 if _, ok := r.queueURLs.Load(awsCluster.Name); !ok { 98 URL, err := r.getQueueURL(awsCluster) 99 if err != nil { 100 if queueNotFoundError(err) { 101 return reconcile.Result{}, nil 102 } 103 return reconcile.Result{}, err 104 } 105 r.queueURLs.Store(awsCluster.Name, queueParams{region: awsCluster.Spec.Region, URL: URL}) 106 } 107 108 return ctrl.Result{}, nil 109 } 110 111 func (r *AwsInstanceStateReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { 112 go func() { 113 r.watchQueuesForInstanceEvents() 114 }() 115 return ctrl.NewControllerManagedBy(mgr). 116 For(&infrav1.AWSCluster{}). 117 WithOptions(options). 118 WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)). 119 Complete(r) 120 } 121 122 func (r *AwsInstanceStateReconciler) watchQueuesForInstanceEvents() { 123 ctx := context.TODO() 124 awsClusterList := &infrav1.AWSClusterList{} 125 if err := r.Client.List(ctx, awsClusterList); err == nil { 126 for i, cluster := range awsClusterList.Items { 127 if URL, err := r.getQueueURL(&awsClusterList.Items[i]); err == nil { 128 r.queueURLs.Store(cluster.Name, queueParams{region: cluster.Spec.Region, URL: URL}) 129 } 130 } 131 } 132 for range time.Tick(1 * time.Second) { 133 // go through each cluster and check for messages on its queue 134 r.queueURLs.Range(func(key, val interface{}) bool { 135 go func() { 136 qp := val.(queueParams) 137 sqsSvs, err := r.getSQSService(qp.region) 138 if err != nil { 139 r.Log.Error(err, "unable to create SQS client") 140 return 141 } 142 resp, err := sqsSvs.ReceiveMessage(&sqs.ReceiveMessageInput{QueueUrl: aws.String(qp.URL)}) 143 if err != nil { 144 r.Log.Error(err, "failed to receive messages") 145 return 146 } 147 for _, msg := range resp.Messages { 148 m := message{} 149 err := json.Unmarshal([]byte(*msg.Body), &m) 150 151 if err != nil { 152 r.Log.Error(err, "unable to marshall") 153 return 154 } 155 // TODO: handle errors during process message. We currently deletes the message regardless. 156 r.processMessage(ctx, m) 157 158 _, err = sqsSvs.DeleteMessage(&sqs.DeleteMessageInput{ 159 QueueUrl: aws.String(qp.URL), 160 ReceiptHandle: msg.ReceiptHandle, 161 }) 162 163 if err != nil { 164 r.Log.Error(err, "error deleting message", "queueURL", qp.URL, "messageReceiptHandle", msg.ReceiptHandle) 165 } 166 } 167 }() 168 169 return true 170 }) 171 } 172 } 173 174 // processMessage triggers a reconcile on an AWSMachine if its EC2 instance state changed. 175 func (r *AwsInstanceStateReconciler) processMessage(ctx context.Context, msg message) { 176 if msg.Source != "aws.ec2" || msg.DetailType != instancestate.Ec2StateChangeNotification || msg.MessageDetail == nil { 177 return 178 } 179 180 // Fetch the awsMachine instance by InstanceID 181 awsMachines := &infrav1.AWSMachineList{} 182 err := r.List(ctx, awsMachines, client.MatchingFields{controllers.InstanceIDIndex: msg.MessageDetail.InstanceID}) 183 184 if err != nil { 185 r.Log.Error(err, "unable to list machines by instance ID", "instanceID", msg.MessageDetail.InstanceID) 186 } 187 188 if len(awsMachines.Items) > 0 { 189 machine := awsMachines.Items[0] 190 if !machine.ObjectMeta.DeletionTimestamp.IsZero() { 191 return 192 } 193 patchHelper, err := patch.NewHelper(&machine, r.Client) 194 if err != nil { 195 r.Log.Error(err, "unable to create patch helper") 196 } 197 // Trigger an update on the machine 198 labels := machine.GetLabels() 199 if labels == nil { 200 labels = make(map[string]string) 201 } 202 203 labels[Ec2InstanceStateLabelKey] = string(msg.MessageDetail.State) 204 machine.SetLabels(labels) 205 206 err = patchHelper.Patch(ctx, &machine) 207 if err != nil { 208 r.Log.Error(err, "unable to patch AWS machine") 209 } 210 } 211 } 212 213 // getQueueURL retrieves the SQS queue URL for a given cluster. 214 func (r *AwsInstanceStateReconciler) getQueueURL(cluster *infrav1.AWSCluster) (string, error) { 215 sqsSvs, err := r.getSQSService(cluster.Spec.Region) 216 if err != nil { 217 return "", err 218 } 219 queueName := instancestate.GenerateQueueName(cluster.Name) 220 resp, err := sqsSvs.GetQueueUrl(&sqs.GetQueueUrlInput{QueueName: aws.String(queueName)}) 221 222 if err != nil { 223 return "", err 224 } 225 226 return *resp.QueueUrl, nil 227 } 228 229 func queueNotFoundError(err error) bool { 230 if aerr, ok := err.(awserr.Error); ok { 231 if aerr.Code() == sqs.ErrCodeQueueDoesNotExist { 232 return true 233 } 234 } 235 return false 236 } 237 238 type queueParams struct { 239 region string 240 URL string 241 } 242 243 type message struct { 244 Source string `json:"source"` 245 DetailType string `json:"detail-type,omitempty"` 246 MessageDetail *messageDetail `json:"detail,omitempty"` 247 } 248 249 type messageDetail struct { 250 InstanceID string `json:"instance-id,omitempty"` 251 State infrav1.InstanceState `json:"state,omitempty"` 252 }