istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/webhooks/validation/controller/controller.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package controller implements a k8s controller for managing the lifecycle of a validating webhook.
    16  package controller
    17  
    18  import (
    19  	"bytes"
    20  	"context"
    21  	"errors"
    22  	"fmt"
    23  	"math"
    24  	"strings"
    25  	"time"
    26  
    27  	"github.com/hashicorp/go-multierror"
    28  	kubeApiAdmission "k8s.io/api/admissionregistration/v1"
    29  	kerrors "k8s.io/apimachinery/pkg/api/errors"
    30  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    31  	klabels "k8s.io/apimachinery/pkg/labels"
    32  	"k8s.io/apimachinery/pkg/types"
    33  	"k8s.io/client-go/util/workqueue"
    34  
    35  	"istio.io/api/label"
    36  	networking "istio.io/api/networking/v1alpha3"
    37  	"istio.io/client-go/pkg/apis/networking/v1alpha3"
    38  	"istio.io/istio/pilot/pkg/keycertbundle"
    39  	"istio.io/istio/pkg/config/constants"
    40  	"istio.io/istio/pkg/config/labels"
    41  	"istio.io/istio/pkg/kube"
    42  	"istio.io/istio/pkg/kube/controllers"
    43  	"istio.io/istio/pkg/kube/kclient"
    44  	"istio.io/istio/pkg/log"
    45  	"istio.io/istio/pkg/ptr"
    46  	"istio.io/istio/pkg/webhooks/util"
    47  )
    48  
    49  var scope = log.RegisterScope("validationController", "validation webhook controller")
    50  
    51  type Options struct {
    52  	// Istio system namespace where istiod resides.
    53  	WatchedNamespace string
    54  
    55  	// File path to the x509 certificate bundle used by the webhook server
    56  	// and patched into the webhook config.
    57  	CABundleWatcher *keycertbundle.Watcher
    58  
    59  	// Revision for control plane performing patching on the validating webhook.
    60  	Revision string
    61  
    62  	// Name of the service running the webhook server.
    63  	ServiceName string
    64  }
    65  
    66  // Validate the options that exposed to end users
    67  func (o Options) Validate() error {
    68  	var errs *multierror.Error
    69  	if o.WatchedNamespace == "" || !labels.IsDNS1123Label(o.WatchedNamespace) {
    70  		errs = multierror.Append(errs, fmt.Errorf("invalid namespace: %q", o.WatchedNamespace))
    71  	}
    72  	if o.ServiceName == "" || !labels.IsDNS1123Label(o.ServiceName) {
    73  		errs = multierror.Append(errs, fmt.Errorf("invalid service name: %q", o.ServiceName))
    74  	}
    75  	if o.CABundleWatcher == nil {
    76  		errs = multierror.Append(errs, errors.New("CA bundle watcher not specified"))
    77  	}
    78  	return errs.ErrorOrNil()
    79  }
    80  
    81  // String produces a string field version of the arguments for debugging.
    82  func (o Options) String() string {
    83  	buf := &bytes.Buffer{}
    84  	_, _ = fmt.Fprintf(buf, "WatchedNamespace: %v\n", o.WatchedNamespace)
    85  	_, _ = fmt.Fprintf(buf, "Revision: %v\n", o.Revision)
    86  	_, _ = fmt.Fprintf(buf, "ServiceName: %v\n", o.ServiceName)
    87  	return buf.String()
    88  }
    89  
    90  type Controller struct {
    91  	o      Options
    92  	client kube.Client
    93  
    94  	queue                         controllers.Queue
    95  	dryRunOfInvalidConfigRejected bool
    96  	webhooks                      kclient.Client[*kubeApiAdmission.ValidatingWebhookConfiguration]
    97  }
    98  
    99  // NewValidatingWebhookController creates a new Controller.
   100  func NewValidatingWebhookController(client kube.Client,
   101  	revision, ns string, caBundleWatcher *keycertbundle.Watcher,
   102  ) *Controller {
   103  	o := Options{
   104  		WatchedNamespace: ns,
   105  		CABundleWatcher:  caBundleWatcher,
   106  		Revision:         revision,
   107  		ServiceName:      "istiod",
   108  	}
   109  	return newController(o, client)
   110  }
   111  
   112  func newController(o Options, client kube.Client) *Controller {
   113  	c := &Controller{
   114  		o:      o,
   115  		client: client,
   116  	}
   117  
   118  	c.queue = controllers.NewQueue("validation",
   119  		controllers.WithReconciler(c.Reconcile),
   120  		// Webhook patching has to be retried forever. But the retries would be rate limited.
   121  		controllers.WithMaxAttempts(math.MaxInt),
   122  		// Retry with backoff. Failures could be from conflicts of other instances (quick retry helps), or
   123  		// longer lasting concerns which will eventually be retried on 1min interval.
   124  		// Unlike the mutating webhook controller, we do not use NewItemFastSlowRateLimiter. This is because
   125  		// the validation controller waits for its own service to be ready, so typically this takes a few seconds
   126  		// before we are ready; using FastSlow means we tend to always take the Slow time (1min).
   127  		controllers.WithRateLimiter(workqueue.NewItemExponentialFailureRateLimiter(100*time.Millisecond, 1*time.Minute)))
   128  
   129  	c.webhooks = kclient.NewFiltered[*kubeApiAdmission.ValidatingWebhookConfiguration](client, kclient.Filter{
   130  		LabelSelector: fmt.Sprintf("%s=%s", label.IoIstioRev.Name, o.Revision),
   131  	})
   132  	c.webhooks.AddEventHandler(controllers.ObjectHandler(c.queue.AddObject))
   133  
   134  	return c
   135  }
   136  
   137  func (c *Controller) Reconcile(key types.NamespacedName) error {
   138  	name := key.Name
   139  	whc := c.webhooks.Get(name, "")
   140  	scope := scope.WithLabels("webhook", name)
   141  	// Stop early if webhook is not present, rather than attempting (and failing) to reconcile permanently
   142  	// If the webhook is later added a new reconciliation request will trigger it to update
   143  	if whc == nil {
   144  		scope.Infof("Skip patching webhook, not found")
   145  		return nil
   146  	}
   147  
   148  	scope.Debugf("Reconcile(enter)")
   149  	defer func() { scope.Debugf("Reconcile(exit)") }()
   150  
   151  	caBundle, err := util.LoadCABundle(c.o.CABundleWatcher)
   152  	if err != nil {
   153  		scope.Errorf("Failed to load CA bundle: %v", err)
   154  		reportValidationConfigLoadError(err.(*util.ConfigError).Reason())
   155  		// no point in retrying unless cert file changes.
   156  		return nil
   157  	}
   158  	ready := c.readyForFailClose()
   159  	if err := c.updateValidatingWebhookConfiguration(whc, caBundle, ready); err != nil {
   160  		return fmt.Errorf("fail to update webhook: %v", err)
   161  	}
   162  	if !ready {
   163  		return fmt.Errorf("webhook is not ready, retry")
   164  	}
   165  	return nil
   166  }
   167  
   168  func (c *Controller) Run(stop <-chan struct{}) {
   169  	kube.WaitForCacheSync("validation", stop, c.webhooks.HasSynced)
   170  	go c.startCaBundleWatcher(stop)
   171  	c.queue.Run(stop)
   172  }
   173  
   174  // startCaBundleWatcher listens for updates to the CA bundle and patches the webhooks.
   175  // shouldn't we be doing this for both validating and mutating webhooks...?
   176  func (c *Controller) startCaBundleWatcher(stop <-chan struct{}) {
   177  	if c.o.CABundleWatcher == nil {
   178  		return
   179  	}
   180  	id, watchCh := c.o.CABundleWatcher.AddWatcher()
   181  	defer c.o.CABundleWatcher.RemoveWatcher(id)
   182  
   183  	for {
   184  		select {
   185  		case <-watchCh:
   186  			c.syncAll()
   187  		case <-stop:
   188  			return
   189  		}
   190  	}
   191  }
   192  
   193  func (c *Controller) readyForFailClose() bool {
   194  	if !c.dryRunOfInvalidConfigRejected {
   195  		if rejected, reason := c.isDryRunOfInvalidConfigRejected(); !rejected {
   196  			scope.Infof("Not ready to switch validation to fail-closed: %v", reason)
   197  			return false
   198  		}
   199  		scope.Info("Endpoint successfully rejected invalid config. Switching to fail-close.")
   200  		c.dryRunOfInvalidConfigRejected = true
   201  		// Sync all webhooks; this ensures if we have multiple webhooks all of them are updated
   202  		c.syncAll()
   203  	}
   204  	return true
   205  }
   206  
   207  const (
   208  	deniedRequestMessageFragment     = `denied the request`
   209  	missingResourceMessageFragment   = `the server could not find the requested resource`
   210  	unsupportedDryRunMessageFragment = `does not support dry run`
   211  )
   212  
   213  // Confirm invalid configuration is successfully rejected before switching to FAIL-CLOSE.
   214  func (c *Controller) isDryRunOfInvalidConfigRejected() (rejected bool, reason string) {
   215  	invalidGateway := &v1alpha3.Gateway{
   216  		ObjectMeta: metav1.ObjectMeta{
   217  			Name:      "invalid-gateway",
   218  			Namespace: c.o.WatchedNamespace,
   219  			// Must ensure that this is the revision validating the known-bad config
   220  			Labels: map[string]string{
   221  				label.IoIstioRev.Name: c.o.Revision,
   222  			},
   223  			Annotations: map[string]string{
   224  				// Add always-reject annotation. For now, we are invalid for two reasons: missing `spec.servers`, and this
   225  				// annotation. In the future, the CRD will reject a missing `spec.servers` before we hit the webhook, so we will
   226  				// only have that annotation. For backwards compatibility, we keep both methods for some time.
   227  				constants.AlwaysReject: "true",
   228  			},
   229  		},
   230  		Spec: networking.Gateway{},
   231  	}
   232  
   233  	createOptions := metav1.CreateOptions{DryRun: []string{metav1.DryRunAll}}
   234  	istioClient := c.client.Istio().NetworkingV1alpha3()
   235  	_, err := istioClient.Gateways(c.o.WatchedNamespace).Create(context.TODO(), invalidGateway, createOptions)
   236  	if kerrors.IsAlreadyExists(err) {
   237  		updateOptions := metav1.UpdateOptions{DryRun: []string{metav1.DryRunAll}}
   238  		_, err = istioClient.Gateways(c.o.WatchedNamespace).Update(context.TODO(), invalidGateway, updateOptions)
   239  	}
   240  	if err == nil {
   241  		return false, "dummy invalid config not rejected"
   242  	}
   243  	// We expect to get deniedRequestMessageFragment (the config was rejected, as expected)
   244  	if strings.Contains(err.Error(), deniedRequestMessageFragment) {
   245  		return true, ""
   246  	}
   247  	// If the CRD does not exist, we will get this error. This is to handle when Pilot is run
   248  	// without CRDs - in this case, this check will not be possible.
   249  	if strings.Contains(err.Error(), missingResourceMessageFragment) {
   250  		scope.Warnf("Missing Gateway CRD, cannot perform validation check. Assuming validation is ready")
   251  		return true, ""
   252  	}
   253  	// If some validating webhooks does not support dryRun(sideEffects=Unknown or Some), we will get this error.
   254  	// We should assume valdiation is ready because there is no point in retrying this request.
   255  	if strings.Contains(err.Error(), unsupportedDryRunMessageFragment) {
   256  		scope.Warnf("One of the validating webhooks does not support DryRun, cannot perform validation check. Assuming validation is ready. Details: %v", err)
   257  		return true, ""
   258  	}
   259  	return false, fmt.Sprintf("dummy invalid rejected for the wrong reason: %v", err)
   260  }
   261  
   262  func (c *Controller) updateValidatingWebhookConfiguration(current *kubeApiAdmission.ValidatingWebhookConfiguration,
   263  	caBundle []byte, ready bool,
   264  ) error {
   265  	dirty := false
   266  	for i := range current.Webhooks {
   267  		caNeed := !bytes.Equal(current.Webhooks[i].ClientConfig.CABundle, caBundle)
   268  		failureNeed := ready && (current.Webhooks[i].FailurePolicy != nil && *current.Webhooks[i].FailurePolicy != kubeApiAdmission.Fail)
   269  		if caNeed || failureNeed {
   270  			dirty = true
   271  			break
   272  		}
   273  	}
   274  	scope := scope.WithLabels(
   275  		"name", current.Name,
   276  		"fail closed", ready,
   277  		"resource version", current.ResourceVersion,
   278  	)
   279  	if !dirty {
   280  		scope.Debugf("up-to-date, no change required")
   281  		return nil
   282  	}
   283  	updated := current.DeepCopy()
   284  	for i := range updated.Webhooks {
   285  		updated.Webhooks[i].ClientConfig.CABundle = caBundle
   286  		if ready {
   287  			updated.Webhooks[i].FailurePolicy = ptr.Of(kubeApiAdmission.Fail)
   288  		}
   289  	}
   290  
   291  	latest, err := c.webhooks.Update(updated)
   292  	if err != nil {
   293  		scope.Errorf("failed to updated: %v", err)
   294  		reportValidationConfigUpdateError(kerrors.ReasonForError(err))
   295  		return err
   296  	}
   297  
   298  	scope.WithLabels("resource version", latest.ResourceVersion).Infof("successfully updated")
   299  	reportValidationConfigUpdate()
   300  	return nil
   301  }
   302  
   303  func (c *Controller) syncAll() {
   304  	for _, whc := range c.webhooks.List("", klabels.Everything()) {
   305  		c.queue.AddObject(whc)
   306  	}
   307  }