github.com/crossplane/upjet@v1.3.0/pkg/controller/external.go (about)

     1  // SPDX-FileCopyrightText: 2023 The Crossplane Authors <https://crossplane.io>
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  
     5  package controller
     6  
     7  import (
     8  	"context"
     9  	"time"
    10  
    11  	xpv1 "github.com/crossplane/crossplane-runtime/apis/common/v1"
    12  	"github.com/crossplane/crossplane-runtime/pkg/logging"
    13  	"github.com/crossplane/crossplane-runtime/pkg/reconciler/managed"
    14  	xpresource "github.com/crossplane/crossplane-runtime/pkg/resource"
    15  	"github.com/pkg/errors"
    16  	"k8s.io/apimachinery/pkg/util/sets"
    17  	"sigs.k8s.io/controller-runtime/pkg/client"
    18  
    19  	"github.com/crossplane/upjet/pkg/config"
    20  	"github.com/crossplane/upjet/pkg/controller/handler"
    21  	"github.com/crossplane/upjet/pkg/metrics"
    22  	"github.com/crossplane/upjet/pkg/resource"
    23  	"github.com/crossplane/upjet/pkg/resource/json"
    24  	"github.com/crossplane/upjet/pkg/terraform"
    25  	tferrors "github.com/crossplane/upjet/pkg/terraform/errors"
    26  )
    27  
    28  const (
    29  	errUnexpectedObject  = "the custom resource is not a Terraformed resource"
    30  	errGetTerraformSetup = "cannot get terraform setup"
    31  	errGetWorkspace      = "cannot get a terraform workspace for resource"
    32  	errRefresh           = "cannot run refresh"
    33  	errImport            = "cannot run import"
    34  	errPlan              = "cannot run plan"
    35  	errStartAsyncApply   = "cannot start async apply"
    36  	errStartAsyncDestroy = "cannot start async destroy"
    37  	errApply             = "cannot apply"
    38  	errDestroy           = "cannot destroy"
    39  	errScheduleProvider  = "cannot schedule native Terraform provider process, please consider increasing its TTL with the --provider-ttl command-line option"
    40  	errUpdateAnnotations = "cannot update managed resource annotations"
    41  )
    42  
    43  const (
    44  	rateLimiterScheduler = "scheduler"
    45  	rateLimiterStatus    = "status"
    46  	retryLimit           = 20
    47  )
    48  
    49  // Option allows you to configure Connector.
    50  type Option func(*Connector)
    51  
    52  // WithCallbackProvider configures the controller to use async variant of the functions
    53  // of the Terraform client and run given callbacks once those operations are
    54  // completed.
    55  func WithCallbackProvider(ac CallbackProvider) Option {
    56  	return func(c *Connector) {
    57  		c.callback = ac
    58  	}
    59  }
    60  
    61  // WithLogger configures a logger for the Connector.
    62  func WithLogger(l logging.Logger) Option {
    63  	return func(c *Connector) {
    64  		c.logger = l
    65  	}
    66  }
    67  
    68  // WithConnectorEventHandler configures the EventHandler so that
    69  // the external clients can requeue reconciliation requests.
    70  func WithConnectorEventHandler(e *handler.EventHandler) Option {
    71  	return func(c *Connector) {
    72  		c.eventHandler = e
    73  	}
    74  }
    75  
    76  // NewConnector returns a new Connector object.
    77  func NewConnector(kube client.Client, ws Store, sf terraform.SetupFn, cfg *config.Resource, opts ...Option) *Connector {
    78  	c := &Connector{
    79  		kube:              kube,
    80  		getTerraformSetup: sf,
    81  		store:             ws,
    82  		config:            cfg,
    83  		logger:            logging.NewNopLogger(),
    84  	}
    85  	for _, f := range opts {
    86  		f(c)
    87  	}
    88  	return c
    89  }
    90  
    91  // Connector initializes the external client with credentials and other configuration
    92  // parameters.
    93  type Connector struct {
    94  	kube              client.Client
    95  	store             Store
    96  	getTerraformSetup terraform.SetupFn
    97  	config            *config.Resource
    98  	callback          CallbackProvider
    99  	eventHandler      *handler.EventHandler
   100  	logger            logging.Logger
   101  }
   102  
   103  // Connect makes sure the underlying client is ready to issue requests to the
   104  // provider API.
   105  func (c *Connector) Connect(ctx context.Context, mg xpresource.Managed) (managed.ExternalClient, error) {
   106  	tr, ok := mg.(resource.Terraformed)
   107  	if !ok {
   108  		return nil, errors.New(errUnexpectedObject)
   109  	}
   110  
   111  	ts, err := c.getTerraformSetup(ctx, c.kube, mg)
   112  	if err != nil {
   113  		return nil, errors.Wrap(err, errGetTerraformSetup)
   114  	}
   115  
   116  	ws, err := c.store.Workspace(ctx, &APISecretClient{kube: c.kube}, tr, ts, c.config)
   117  	if err != nil {
   118  		return nil, errors.Wrap(err, errGetWorkspace)
   119  	}
   120  	return &external{
   121  		workspace:         ws,
   122  		config:            c.config,
   123  		callback:          c.callback,
   124  		providerScheduler: ts.Scheduler,
   125  		providerHandle:    ws.ProviderHandle,
   126  		eventHandler:      c.eventHandler,
   127  		kube:              c.kube,
   128  		logger:            c.logger.WithValues("uid", mg.GetUID(), "name", mg.GetName(), "gvk", mg.GetObjectKind().GroupVersionKind().String()),
   129  	}, nil
   130  }
   131  
   132  type external struct {
   133  	workspace         Workspace
   134  	config            *config.Resource
   135  	callback          CallbackProvider
   136  	providerScheduler terraform.ProviderScheduler
   137  	providerHandle    terraform.ProviderHandle
   138  	eventHandler      *handler.EventHandler
   139  	kube              client.Client
   140  	logger            logging.Logger
   141  }
   142  
   143  func (e *external) scheduleProvider(name string) (bool, error) {
   144  	if e.providerScheduler == nil || e.workspace == nil {
   145  		return false, nil
   146  	}
   147  	inuse, attachmentConfig, err := e.providerScheduler.Start(e.providerHandle)
   148  	if err != nil {
   149  		retryLimit := retryLimit
   150  		if tferrors.IsRetryScheduleError(err) && (e.eventHandler != nil && e.eventHandler.RequestReconcile(rateLimiterScheduler, name, &retryLimit)) {
   151  			// the reconcile request has been requeued for a rate-limited retry
   152  			return true, nil
   153  		}
   154  		return false, errors.Wrap(err, errScheduleProvider)
   155  	}
   156  	if e.eventHandler != nil {
   157  		e.eventHandler.Forget(rateLimiterScheduler, name)
   158  	}
   159  	if ps, ok := e.workspace.(ProviderSharer); ok {
   160  		ps.UseProvider(inuse, attachmentConfig)
   161  	}
   162  	return false, nil
   163  }
   164  
   165  func (e *external) stopProvider() {
   166  	if e.providerScheduler == nil {
   167  		return
   168  	}
   169  	if err := e.providerScheduler.Stop(e.providerHandle); err != nil {
   170  		e.logger.Info("ExternalClient failed to stop the native provider", "error", err)
   171  	}
   172  }
   173  
   174  func (e *external) Observe(ctx context.Context, mg xpresource.Managed) (managed.ExternalObservation, error) { //nolint:gocyclo
   175  	// We skip the gocyclo check because most of the operations are straight-forward
   176  	// and serial.
   177  	// TODO(muvaf): Look for ways to reduce the cyclomatic complexity without
   178  	// increasing the difficulty of understanding the flow.
   179  	requeued, err := e.scheduleProvider(mg.GetName())
   180  	if err != nil {
   181  		return managed.ExternalObservation{}, errors.Wrapf(err, "cannot schedule a native provider during observe: %s", mg.GetUID())
   182  	}
   183  	if requeued {
   184  		// return a noop for Observe after requeuing the reconcile request
   185  		// for a retry.
   186  		return managed.ExternalObservation{
   187  			ResourceExists:   true,
   188  			ResourceUpToDate: true,
   189  		}, nil
   190  	}
   191  	defer e.stopProvider()
   192  
   193  	tr, ok := mg.(resource.Terraformed)
   194  	if !ok {
   195  		return managed.ExternalObservation{}, errors.New(errUnexpectedObject)
   196  	}
   197  
   198  	policySet := sets.New[xpv1.ManagementAction](tr.GetManagementPolicies()...)
   199  
   200  	// Note(turkenh): We don't need to check if the management policies are
   201  	// enabled or not because the crossplane-runtime's managed reconciler already
   202  	// does that for us. In other words, if the management policies are set
   203  	// without management policies being enabled, the managed
   204  	// reconciler will error out before reaching this point.
   205  	// https://github.com/crossplane/crossplane-runtime/pull/384/files#diff-97300a2543f95f5a2ada3560bf47dd7334e237e27976574d15d1cddef2e66c01R696
   206  	// Note (lsviben) We are only using import instead of refresh if the
   207  	// management policies do not contain create or update as they need the
   208  	// required fields to be set, which is not the case for import.
   209  	if !policySet.HasAny(xpv1.ManagementActionCreate, xpv1.ManagementActionUpdate, xpv1.ManagementActionAll) {
   210  		return e.Import(ctx, tr)
   211  	}
   212  
   213  	res, err := e.workspace.Refresh(ctx)
   214  	if err != nil {
   215  		return managed.ExternalObservation{}, errors.Wrap(err, errRefresh)
   216  	}
   217  
   218  	switch {
   219  	case res.ASyncInProgress:
   220  		mg.SetConditions(resource.AsyncOperationOngoingCondition())
   221  		return managed.ExternalObservation{
   222  			ResourceExists:   true,
   223  			ResourceUpToDate: true,
   224  		}, nil
   225  	case !res.Exists:
   226  		return managed.ExternalObservation{
   227  			ResourceExists: false,
   228  		}, nil
   229  	}
   230  	// There might be a case where async operation is finished and the status
   231  	// update marking it as finished didn't go through. At this point, we are
   232  	// sure that there is no ongoing operation.
   233  	if e.config.UseAsync {
   234  		tr.SetConditions(resource.AsyncOperationFinishedCondition())
   235  	}
   236  
   237  	// No operation was in progress, our observation completed successfully, and
   238  	// we have an observation to consume.
   239  	tfstate := map[string]any{}
   240  	if err := json.JSParser.Unmarshal(res.State.GetAttributes(), &tfstate); err != nil {
   241  		return managed.ExternalObservation{}, errors.Wrap(err, "cannot unmarshal state attributes")
   242  	}
   243  	if err := tr.SetObservation(tfstate); err != nil {
   244  		return managed.ExternalObservation{}, errors.Wrap(err, "cannot set observation")
   245  	}
   246  
   247  	// NOTE(lsviben) although the annotations were supposed to be set and the
   248  	// managed resource updated during the Create step, we are checking and
   249  	// updating the annotations here due to the fact that in most cases, the
   250  	// Create step is done asynchronously and the managed resource is not
   251  	// updated with the annotations. That is why below we are prioritizing the
   252  	// annotations update before anything else. We are setting lateInitialized
   253  	// to true so that the reconciler updates the managed resource. This
   254  	// behavior conflicts with management policies in which LateInitialize is
   255  	// turned off. To circumvent this, we are checking if the management policy
   256  	// does not contain LateInitialize and if it does not, we are updating the
   257  	// annotations manually.
   258  	annotationsUpdated, err := resource.SetCriticalAnnotations(tr, e.config, tfstate, string(res.State.GetPrivateRaw()))
   259  	if err != nil {
   260  		return managed.ExternalObservation{}, errors.Wrap(err, "cannot set critical annotations")
   261  	}
   262  	policyHasLateInit := policySet.HasAny(xpv1.ManagementActionLateInitialize, xpv1.ManagementActionAll)
   263  	if annotationsUpdated && !policyHasLateInit {
   264  		if err := e.kube.Update(ctx, mg); err != nil {
   265  			return managed.ExternalObservation{}, errors.Wrap(err, errUpdateAnnotations)
   266  		}
   267  		annotationsUpdated = false
   268  	}
   269  	conn, err := resource.GetConnectionDetails(tfstate, tr, e.config)
   270  	if err != nil {
   271  		return managed.ExternalObservation{}, errors.Wrap(err, "cannot get connection details")
   272  	}
   273  
   274  	var lateInitedParams bool
   275  	if policyHasLateInit {
   276  		lateInitedParams, err = tr.LateInitialize(res.State.GetAttributes())
   277  		if err != nil {
   278  			return managed.ExternalObservation{}, errors.Wrap(err, "cannot late initialize parameters")
   279  		}
   280  	}
   281  	markedAvailable := tr.GetCondition(xpv1.TypeReady).Equal(xpv1.Available())
   282  
   283  	// In the following switch block, before running a relatively costly
   284  	// Terraform apply and that may fail before critical annotations are
   285  	// updated, or late-initialized configuration is written to main.tf.json,
   286  	// we try to perform the following in the given order:
   287  	// 1. Update critical annotations if they have changed
   288  	// 2. Update status
   289  	// 3. Update spec with late-initialized fields
   290  	// We prioritize critical annotation updates most not to lose certain info
   291  	// (like the Cloud provider generated ID) before anything else. Then we
   292  	// prioritize status updates over late-initialization spec updates to
   293  	// mark the resource as available as soon as possible because a spec
   294  	// update due to late-initialized fields will void the status update.
   295  	switch {
   296  	// we prioritize critical annotation updates over status updates
   297  	case annotationsUpdated:
   298  		e.logger.Debug("Critical annotations have been updated.")
   299  		return managed.ExternalObservation{
   300  			ResourceExists:          true,
   301  			ResourceUpToDate:        true,
   302  			ConnectionDetails:       conn,
   303  			ResourceLateInitialized: true,
   304  		}, nil
   305  	// we prioritize status updates over late-init'ed spec updates
   306  	case !markedAvailable:
   307  		addTTR(tr)
   308  		tr.SetConditions(xpv1.Available())
   309  		e.logger.Debug("Resource is marked as available.")
   310  		if e.eventHandler != nil {
   311  			e.eventHandler.RequestReconcile(rateLimiterStatus, mg.GetName(), nil)
   312  		}
   313  		return managed.ExternalObservation{
   314  			ResourceExists:    true,
   315  			ResourceUpToDate:  true,
   316  			ConnectionDetails: conn,
   317  		}, nil
   318  	// with the least priority wrt critical annotation updates and status updates
   319  	// we allow a late-initialization before the Workspace.Plan call
   320  	case lateInitedParams:
   321  		e.logger.Debug("Resource is late-initialized.")
   322  		return managed.ExternalObservation{
   323  			ResourceExists:          true,
   324  			ResourceUpToDate:        true,
   325  			ConnectionDetails:       conn,
   326  			ResourceLateInitialized: true,
   327  		}, nil
   328  	// now we do a Workspace.Refresh
   329  	default:
   330  		if e.eventHandler != nil {
   331  			e.eventHandler.Forget(rateLimiterStatus, mg.GetName())
   332  		}
   333  		plan, err := e.workspace.Plan(ctx)
   334  		if err != nil {
   335  			return managed.ExternalObservation{}, errors.Wrap(err, errPlan)
   336  		}
   337  
   338  		resource.SetUpToDateCondition(mg, plan.UpToDate)
   339  		e.logger.Debug("Called plan on the resource.", "upToDate", plan.UpToDate)
   340  
   341  		return managed.ExternalObservation{
   342  			ResourceExists:    true,
   343  			ResourceUpToDate:  plan.UpToDate,
   344  			ConnectionDetails: conn,
   345  		}, nil
   346  	}
   347  }
   348  
   349  func addTTR(mg xpresource.Managed) {
   350  	gvk := mg.GetObjectKind().GroupVersionKind()
   351  	metrics.TTRMeasurements.WithLabelValues(gvk.Group, gvk.Version, gvk.Kind).Observe(time.Since(mg.GetCreationTimestamp().Time).Seconds())
   352  }
   353  
   354  func (e *external) Create(ctx context.Context, mg xpresource.Managed) (managed.ExternalCreation, error) {
   355  	requeued, err := e.scheduleProvider(mg.GetName())
   356  	if err != nil {
   357  		return managed.ExternalCreation{}, errors.Wrapf(err, "cannot schedule a native provider during create: %s", mg.GetUID())
   358  	}
   359  	if requeued {
   360  		return managed.ExternalCreation{}, nil
   361  	}
   362  	defer e.stopProvider()
   363  	if e.config.UseAsync {
   364  		return managed.ExternalCreation{}, errors.Wrap(e.workspace.ApplyAsync(e.callback.Create(mg.GetName())), errStartAsyncApply)
   365  	}
   366  	tr, ok := mg.(resource.Terraformed)
   367  	if !ok {
   368  		return managed.ExternalCreation{}, errors.New(errUnexpectedObject)
   369  	}
   370  	res, err := e.workspace.Apply(ctx)
   371  	if err != nil {
   372  		return managed.ExternalCreation{}, errors.Wrap(err, errApply)
   373  	}
   374  	tfstate := map[string]any{}
   375  	if err := json.JSParser.Unmarshal(res.State.GetAttributes(), &tfstate); err != nil {
   376  		return managed.ExternalCreation{}, errors.Wrap(err, "cannot unmarshal state attributes")
   377  	}
   378  
   379  	conn, err := resource.GetConnectionDetails(tfstate, tr, e.config)
   380  	if err != nil {
   381  		return managed.ExternalCreation{}, errors.Wrap(err, "cannot get connection details")
   382  	}
   383  
   384  	// NOTE(muvaf): Only spec and metadata changes are saved after Create call.
   385  	_, err = resource.SetCriticalAnnotations(tr, e.config, tfstate, string(res.State.GetPrivateRaw()))
   386  	return managed.ExternalCreation{ConnectionDetails: conn}, errors.Wrap(err, "cannot set critical annotations")
   387  }
   388  
   389  func (e *external) Update(ctx context.Context, mg xpresource.Managed) (managed.ExternalUpdate, error) {
   390  	requeued, err := e.scheduleProvider(mg.GetName())
   391  	if err != nil {
   392  		return managed.ExternalUpdate{}, errors.Wrapf(err, "cannot schedule a native provider during update: %s", mg.GetUID())
   393  	}
   394  	if requeued {
   395  		return managed.ExternalUpdate{}, nil
   396  	}
   397  	defer e.stopProvider()
   398  	if e.config.UseAsync {
   399  		return managed.ExternalUpdate{}, errors.Wrap(e.workspace.ApplyAsync(e.callback.Update(mg.GetName())), errStartAsyncApply)
   400  	}
   401  	tr, ok := mg.(resource.Terraformed)
   402  	if !ok {
   403  		return managed.ExternalUpdate{}, errors.New(errUnexpectedObject)
   404  	}
   405  	res, err := e.workspace.Apply(ctx)
   406  	if err != nil {
   407  		return managed.ExternalUpdate{}, errors.Wrap(err, errApply)
   408  	}
   409  	attr := map[string]any{}
   410  	if err := json.JSParser.Unmarshal(res.State.GetAttributes(), &attr); err != nil {
   411  		return managed.ExternalUpdate{}, errors.Wrap(err, "cannot unmarshal state attributes")
   412  	}
   413  	return managed.ExternalUpdate{}, errors.Wrap(tr.SetObservation(attr), "cannot set observation")
   414  }
   415  
   416  func (e *external) Delete(ctx context.Context, mg xpresource.Managed) error {
   417  	requeued, err := e.scheduleProvider(mg.GetName())
   418  	if err != nil {
   419  		return errors.Wrapf(err, "cannot schedule a native provider during delete: %s", mg.GetUID())
   420  	}
   421  	if requeued {
   422  		return nil
   423  	}
   424  	defer e.stopProvider()
   425  	if e.config.UseAsync {
   426  		return errors.Wrap(e.workspace.DestroyAsync(e.callback.Destroy(mg.GetName())), errStartAsyncDestroy)
   427  	}
   428  	return errors.Wrap(e.workspace.Destroy(ctx), errDestroy)
   429  }
   430  
   431  func (e *external) Import(ctx context.Context, tr resource.Terraformed) (managed.ExternalObservation, error) {
   432  	res, err := e.workspace.Import(ctx, tr)
   433  	if err != nil {
   434  		return managed.ExternalObservation{}, errors.Wrap(err, errImport)
   435  	}
   436  	// We normally don't expect apply/destroy to be in progress when the
   437  	// management policy is set to "ObserveOnly". However, this could happen
   438  	// if the policy is changed to "ObserveOnly" while an async operation is
   439  	// in progress. In that case, we want to wait for the operation to finish
   440  	// before we start observing.
   441  	if res.ASyncInProgress {
   442  		tr.SetConditions(resource.AsyncOperationOngoingCondition())
   443  		return managed.ExternalObservation{
   444  			ResourceExists:   true,
   445  			ResourceUpToDate: true,
   446  		}, nil
   447  	}
   448  	// If the resource doesn't exist, we don't need to do anything else.
   449  	// We report it to the managed reconciler as a non-existent resource and
   450  	// it will take care of reporting it to the user as an error case for
   451  	// observe-only policy.
   452  	if !res.Exists {
   453  		return managed.ExternalObservation{
   454  			ResourceExists: false,
   455  		}, nil
   456  	}
   457  
   458  	// No operation was in progress, our observation completed successfully, and
   459  	// we have an observation to consume.
   460  	tfstate := map[string]any{}
   461  	if err := json.JSParser.Unmarshal(res.State.GetAttributes(), &tfstate); err != nil {
   462  		return managed.ExternalObservation{}, errors.Wrap(err, "cannot unmarshal state attributes")
   463  	}
   464  	if err := tr.SetObservation(tfstate); err != nil {
   465  		return managed.ExternalObservation{}, errors.Wrap(err, "cannot set observation")
   466  	}
   467  	conn, err := resource.GetConnectionDetails(tfstate, tr, e.config)
   468  	if err != nil {
   469  		return managed.ExternalObservation{}, errors.Wrap(err, "cannot get connection details")
   470  	}
   471  
   472  	tr.SetConditions(xpv1.Available())
   473  	return managed.ExternalObservation{
   474  		ResourceExists:    true,
   475  		ResourceUpToDate:  true,
   476  		ConnectionDetails: conn,
   477  	}, nil
   478  }