sigs.k8s.io/cluster-api-provider-aws@v1.5.5/exp/instancestate/awsinstancestate_controller.go (about)

     1  /*
     2  Copyright 2020 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package instancestate
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/aws/aws-sdk-go/aws"
    26  	"github.com/aws/aws-sdk-go/aws/awserr"
    27  	"github.com/aws/aws-sdk-go/service/sqs"
    28  	"github.com/aws/aws-sdk-go/service/sqs/sqsiface"
    29  	"github.com/go-logr/logr"
    30  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    31  	ctrl "sigs.k8s.io/controller-runtime"
    32  	"sigs.k8s.io/controller-runtime/pkg/client"
    33  	"sigs.k8s.io/controller-runtime/pkg/controller"
    34  	"sigs.k8s.io/controller-runtime/pkg/reconcile"
    35  
    36  	infrav1 "sigs.k8s.io/cluster-api-provider-aws/api/v1beta1"
    37  	"sigs.k8s.io/cluster-api-provider-aws/controllers"
    38  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/scope"
    39  	"sigs.k8s.io/cluster-api-provider-aws/pkg/cloud/services/instancestate"
    40  	"sigs.k8s.io/cluster-api/util/patch"
    41  	"sigs.k8s.io/cluster-api/util/predicates"
    42  )
    43  
    44  // Ec2InstanceStateLabelKey defines an ec2 instance state label.
    45  const Ec2InstanceStateLabelKey = "ec2-instance-state"
    46  
    47  // AwsInstanceStateReconciler reconciles a AwsInstanceState object.
    48  type AwsInstanceStateReconciler struct {
    49  	client.Client
    50  	Log               logr.Logger
    51  	sqsServiceFactory func() sqsiface.SQSAPI
    52  	queueURLs         sync.Map
    53  	Endpoints         []scope.ServiceEndpoint
    54  	WatchFilterValue  string
    55  }
    56  
    57  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsclusters,verbs=get;list;watch
    58  // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachines,verbs=get;list;watch
    59  
    60  func (r *AwsInstanceStateReconciler) getSQSService(region string) (sqsiface.SQSAPI, error) {
    61  	if r.sqsServiceFactory != nil {
    62  		return r.sqsServiceFactory(), nil
    63  	}
    64  
    65  	globalScope, err := scope.NewGlobalScope(scope.GlobalScopeParams{
    66  		ControllerName: "awsinstancestate",
    67  		Region:         region,
    68  		Endpoints:      r.Endpoints,
    69  	})
    70  
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  	return scope.NewGlobalSQSClient(globalScope, globalScope), nil
    75  }
    76  
    77  func (r *AwsInstanceStateReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
    78  	// Fetch the AWSCluster instance
    79  	awsCluster := &infrav1.AWSCluster{}
    80  	err := r.Get(ctx, req.NamespacedName, awsCluster)
    81  	if err != nil {
    82  		if apierrors.IsNotFound(err) {
    83  			r.Log.Info("cluster not found, removing queue URL", "cluster", req.Name)
    84  			r.queueURLs.Delete(req.Name)
    85  			return reconcile.Result{}, nil
    86  		}
    87  		return reconcile.Result{}, err
    88  	}
    89  
    90  	// Handle deleted clusters
    91  	if !awsCluster.DeletionTimestamp.IsZero() {
    92  		r.queueURLs.Delete(req.Name)
    93  		return reconcile.Result{}, nil
    94  	}
    95  
    96  	// retrieve queue URL if it isn't already tracked
    97  	if _, ok := r.queueURLs.Load(awsCluster.Name); !ok {
    98  		URL, err := r.getQueueURL(awsCluster)
    99  		if err != nil {
   100  			if queueNotFoundError(err) {
   101  				return reconcile.Result{}, nil
   102  			}
   103  			return reconcile.Result{}, err
   104  		}
   105  		r.queueURLs.Store(awsCluster.Name, queueParams{region: awsCluster.Spec.Region, URL: URL})
   106  	}
   107  
   108  	return ctrl.Result{}, nil
   109  }
   110  
   111  func (r *AwsInstanceStateReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error {
   112  	go func() {
   113  		r.watchQueuesForInstanceEvents()
   114  	}()
   115  	return ctrl.NewControllerManagedBy(mgr).
   116  		For(&infrav1.AWSCluster{}).
   117  		WithOptions(options).
   118  		WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
   119  		Complete(r)
   120  }
   121  
   122  func (r *AwsInstanceStateReconciler) watchQueuesForInstanceEvents() {
   123  	ctx := context.TODO()
   124  	awsClusterList := &infrav1.AWSClusterList{}
   125  	if err := r.Client.List(ctx, awsClusterList); err == nil {
   126  		for i, cluster := range awsClusterList.Items {
   127  			if URL, err := r.getQueueURL(&awsClusterList.Items[i]); err == nil {
   128  				r.queueURLs.Store(cluster.Name, queueParams{region: cluster.Spec.Region, URL: URL})
   129  			}
   130  		}
   131  	}
   132  	for range time.Tick(1 * time.Second) {
   133  		// go through each cluster and check for messages on its queue
   134  		r.queueURLs.Range(func(key, val interface{}) bool {
   135  			go func() {
   136  				qp := val.(queueParams)
   137  				sqsSvs, err := r.getSQSService(qp.region)
   138  				if err != nil {
   139  					r.Log.Error(err, "unable to create SQS client")
   140  					return
   141  				}
   142  				resp, err := sqsSvs.ReceiveMessage(&sqs.ReceiveMessageInput{QueueUrl: aws.String(qp.URL)})
   143  				if err != nil {
   144  					r.Log.Error(err, "failed to receive messages")
   145  					return
   146  				}
   147  				for _, msg := range resp.Messages {
   148  					m := message{}
   149  					err := json.Unmarshal([]byte(*msg.Body), &m)
   150  
   151  					if err != nil {
   152  						r.Log.Error(err, "unable to marshall")
   153  						return
   154  					}
   155  					// TODO: handle errors during process message. We currently deletes the message regardless.
   156  					r.processMessage(ctx, m)
   157  
   158  					_, err = sqsSvs.DeleteMessage(&sqs.DeleteMessageInput{
   159  						QueueUrl:      aws.String(qp.URL),
   160  						ReceiptHandle: msg.ReceiptHandle,
   161  					})
   162  
   163  					if err != nil {
   164  						r.Log.Error(err, "error deleting message", "queueURL", qp.URL, "messageReceiptHandle", msg.ReceiptHandle)
   165  					}
   166  				}
   167  			}()
   168  
   169  			return true
   170  		})
   171  	}
   172  }
   173  
   174  // processMessage triggers a reconcile on an AWSMachine if its EC2 instance state changed.
   175  func (r *AwsInstanceStateReconciler) processMessage(ctx context.Context, msg message) {
   176  	if msg.Source != "aws.ec2" || msg.DetailType != instancestate.Ec2StateChangeNotification || msg.MessageDetail == nil {
   177  		return
   178  	}
   179  
   180  	// Fetch the awsMachine instance by InstanceID
   181  	awsMachines := &infrav1.AWSMachineList{}
   182  	err := r.List(ctx, awsMachines, client.MatchingFields{controllers.InstanceIDIndex: msg.MessageDetail.InstanceID})
   183  
   184  	if err != nil {
   185  		r.Log.Error(err, "unable to list machines by instance ID", "instanceID", msg.MessageDetail.InstanceID)
   186  	}
   187  
   188  	if len(awsMachines.Items) > 0 {
   189  		machine := awsMachines.Items[0]
   190  		if !machine.ObjectMeta.DeletionTimestamp.IsZero() {
   191  			return
   192  		}
   193  		patchHelper, err := patch.NewHelper(&machine, r.Client)
   194  		if err != nil {
   195  			r.Log.Error(err, "unable to create patch helper")
   196  		}
   197  		// Trigger an update on the machine
   198  		labels := machine.GetLabels()
   199  		if labels == nil {
   200  			labels = make(map[string]string)
   201  		}
   202  
   203  		labels[Ec2InstanceStateLabelKey] = string(msg.MessageDetail.State)
   204  		machine.SetLabels(labels)
   205  
   206  		err = patchHelper.Patch(ctx, &machine)
   207  		if err != nil {
   208  			r.Log.Error(err, "unable to patch AWS machine")
   209  		}
   210  	}
   211  }
   212  
   213  // getQueueURL retrieves the SQS queue URL for a given cluster.
   214  func (r *AwsInstanceStateReconciler) getQueueURL(cluster *infrav1.AWSCluster) (string, error) {
   215  	sqsSvs, err := r.getSQSService(cluster.Spec.Region)
   216  	if err != nil {
   217  		return "", err
   218  	}
   219  	queueName := instancestate.GenerateQueueName(cluster.Name)
   220  	resp, err := sqsSvs.GetQueueUrl(&sqs.GetQueueUrlInput{QueueName: aws.String(queueName)})
   221  
   222  	if err != nil {
   223  		return "", err
   224  	}
   225  
   226  	return *resp.QueueUrl, nil
   227  }
   228  
   229  func queueNotFoundError(err error) bool {
   230  	if aerr, ok := err.(awserr.Error); ok {
   231  		if aerr.Code() == sqs.ErrCodeQueueDoesNotExist {
   232  			return true
   233  		}
   234  	}
   235  	return false
   236  }
   237  
   238  type queueParams struct {
   239  	region string
   240  	URL    string
   241  }
   242  
   243  type message struct {
   244  	Source        string         `json:"source"`
   245  	DetailType    string         `json:"detail-type,omitempty"`
   246  	MessageDetail *messageDetail `json:"detail,omitempty"`
   247  }
   248  
   249  type messageDetail struct {
   250  	InstanceID string                `json:"instance-id,omitempty"`
   251  	State      infrav1.InstanceState `json:"state,omitempty"`
   252  }