istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pkg/webhooks/validation/controller/controller.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package controller implements a k8s controller for managing the lifecycle of a validating webhook. 16 package controller 17 18 import ( 19 "bytes" 20 "context" 21 "errors" 22 "fmt" 23 "math" 24 "strings" 25 "time" 26 27 "github.com/hashicorp/go-multierror" 28 kubeApiAdmission "k8s.io/api/admissionregistration/v1" 29 kerrors "k8s.io/apimachinery/pkg/api/errors" 30 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 31 klabels "k8s.io/apimachinery/pkg/labels" 32 "k8s.io/apimachinery/pkg/types" 33 "k8s.io/client-go/util/workqueue" 34 35 "istio.io/api/label" 36 networking "istio.io/api/networking/v1alpha3" 37 "istio.io/client-go/pkg/apis/networking/v1alpha3" 38 "istio.io/istio/pilot/pkg/keycertbundle" 39 "istio.io/istio/pkg/config/constants" 40 "istio.io/istio/pkg/config/labels" 41 "istio.io/istio/pkg/kube" 42 "istio.io/istio/pkg/kube/controllers" 43 "istio.io/istio/pkg/kube/kclient" 44 "istio.io/istio/pkg/log" 45 "istio.io/istio/pkg/ptr" 46 "istio.io/istio/pkg/webhooks/util" 47 ) 48 49 var scope = log.RegisterScope("validationController", "validation webhook controller") 50 51 type Options struct { 52 // Istio system namespace where istiod resides. 53 WatchedNamespace string 54 55 // File path to the x509 certificate bundle used by the webhook server 56 // and patched into the webhook config. 57 CABundleWatcher *keycertbundle.Watcher 58 59 // Revision for control plane performing patching on the validating webhook. 60 Revision string 61 62 // Name of the service running the webhook server. 63 ServiceName string 64 } 65 66 // Validate the options that exposed to end users 67 func (o Options) Validate() error { 68 var errs *multierror.Error 69 if o.WatchedNamespace == "" || !labels.IsDNS1123Label(o.WatchedNamespace) { 70 errs = multierror.Append(errs, fmt.Errorf("invalid namespace: %q", o.WatchedNamespace)) 71 } 72 if o.ServiceName == "" || !labels.IsDNS1123Label(o.ServiceName) { 73 errs = multierror.Append(errs, fmt.Errorf("invalid service name: %q", o.ServiceName)) 74 } 75 if o.CABundleWatcher == nil { 76 errs = multierror.Append(errs, errors.New("CA bundle watcher not specified")) 77 } 78 return errs.ErrorOrNil() 79 } 80 81 // String produces a string field version of the arguments for debugging. 82 func (o Options) String() string { 83 buf := &bytes.Buffer{} 84 _, _ = fmt.Fprintf(buf, "WatchedNamespace: %v\n", o.WatchedNamespace) 85 _, _ = fmt.Fprintf(buf, "Revision: %v\n", o.Revision) 86 _, _ = fmt.Fprintf(buf, "ServiceName: %v\n", o.ServiceName) 87 return buf.String() 88 } 89 90 type Controller struct { 91 o Options 92 client kube.Client 93 94 queue controllers.Queue 95 dryRunOfInvalidConfigRejected bool 96 webhooks kclient.Client[*kubeApiAdmission.ValidatingWebhookConfiguration] 97 } 98 99 // NewValidatingWebhookController creates a new Controller. 100 func NewValidatingWebhookController(client kube.Client, 101 revision, ns string, caBundleWatcher *keycertbundle.Watcher, 102 ) *Controller { 103 o := Options{ 104 WatchedNamespace: ns, 105 CABundleWatcher: caBundleWatcher, 106 Revision: revision, 107 ServiceName: "istiod", 108 } 109 return newController(o, client) 110 } 111 112 func newController(o Options, client kube.Client) *Controller { 113 c := &Controller{ 114 o: o, 115 client: client, 116 } 117 118 c.queue = controllers.NewQueue("validation", 119 controllers.WithReconciler(c.Reconcile), 120 // Webhook patching has to be retried forever. But the retries would be rate limited. 121 controllers.WithMaxAttempts(math.MaxInt), 122 // Retry with backoff. Failures could be from conflicts of other instances (quick retry helps), or 123 // longer lasting concerns which will eventually be retried on 1min interval. 124 // Unlike the mutating webhook controller, we do not use NewItemFastSlowRateLimiter. This is because 125 // the validation controller waits for its own service to be ready, so typically this takes a few seconds 126 // before we are ready; using FastSlow means we tend to always take the Slow time (1min). 127 controllers.WithRateLimiter(workqueue.NewItemExponentialFailureRateLimiter(100*time.Millisecond, 1*time.Minute))) 128 129 c.webhooks = kclient.NewFiltered[*kubeApiAdmission.ValidatingWebhookConfiguration](client, kclient.Filter{ 130 LabelSelector: fmt.Sprintf("%s=%s", label.IoIstioRev.Name, o.Revision), 131 }) 132 c.webhooks.AddEventHandler(controllers.ObjectHandler(c.queue.AddObject)) 133 134 return c 135 } 136 137 func (c *Controller) Reconcile(key types.NamespacedName) error { 138 name := key.Name 139 whc := c.webhooks.Get(name, "") 140 scope := scope.WithLabels("webhook", name) 141 // Stop early if webhook is not present, rather than attempting (and failing) to reconcile permanently 142 // If the webhook is later added a new reconciliation request will trigger it to update 143 if whc == nil { 144 scope.Infof("Skip patching webhook, not found") 145 return nil 146 } 147 148 scope.Debugf("Reconcile(enter)") 149 defer func() { scope.Debugf("Reconcile(exit)") }() 150 151 caBundle, err := util.LoadCABundle(c.o.CABundleWatcher) 152 if err != nil { 153 scope.Errorf("Failed to load CA bundle: %v", err) 154 reportValidationConfigLoadError(err.(*util.ConfigError).Reason()) 155 // no point in retrying unless cert file changes. 156 return nil 157 } 158 ready := c.readyForFailClose() 159 if err := c.updateValidatingWebhookConfiguration(whc, caBundle, ready); err != nil { 160 return fmt.Errorf("fail to update webhook: %v", err) 161 } 162 if !ready { 163 return fmt.Errorf("webhook is not ready, retry") 164 } 165 return nil 166 } 167 168 func (c *Controller) Run(stop <-chan struct{}) { 169 kube.WaitForCacheSync("validation", stop, c.webhooks.HasSynced) 170 go c.startCaBundleWatcher(stop) 171 c.queue.Run(stop) 172 } 173 174 // startCaBundleWatcher listens for updates to the CA bundle and patches the webhooks. 175 // shouldn't we be doing this for both validating and mutating webhooks...? 176 func (c *Controller) startCaBundleWatcher(stop <-chan struct{}) { 177 if c.o.CABundleWatcher == nil { 178 return 179 } 180 id, watchCh := c.o.CABundleWatcher.AddWatcher() 181 defer c.o.CABundleWatcher.RemoveWatcher(id) 182 183 for { 184 select { 185 case <-watchCh: 186 c.syncAll() 187 case <-stop: 188 return 189 } 190 } 191 } 192 193 func (c *Controller) readyForFailClose() bool { 194 if !c.dryRunOfInvalidConfigRejected { 195 if rejected, reason := c.isDryRunOfInvalidConfigRejected(); !rejected { 196 scope.Infof("Not ready to switch validation to fail-closed: %v", reason) 197 return false 198 } 199 scope.Info("Endpoint successfully rejected invalid config. Switching to fail-close.") 200 c.dryRunOfInvalidConfigRejected = true 201 // Sync all webhooks; this ensures if we have multiple webhooks all of them are updated 202 c.syncAll() 203 } 204 return true 205 } 206 207 const ( 208 deniedRequestMessageFragment = `denied the request` 209 missingResourceMessageFragment = `the server could not find the requested resource` 210 unsupportedDryRunMessageFragment = `does not support dry run` 211 ) 212 213 // Confirm invalid configuration is successfully rejected before switching to FAIL-CLOSE. 214 func (c *Controller) isDryRunOfInvalidConfigRejected() (rejected bool, reason string) { 215 invalidGateway := &v1alpha3.Gateway{ 216 ObjectMeta: metav1.ObjectMeta{ 217 Name: "invalid-gateway", 218 Namespace: c.o.WatchedNamespace, 219 // Must ensure that this is the revision validating the known-bad config 220 Labels: map[string]string{ 221 label.IoIstioRev.Name: c.o.Revision, 222 }, 223 Annotations: map[string]string{ 224 // Add always-reject annotation. For now, we are invalid for two reasons: missing `spec.servers`, and this 225 // annotation. In the future, the CRD will reject a missing `spec.servers` before we hit the webhook, so we will 226 // only have that annotation. For backwards compatibility, we keep both methods for some time. 227 constants.AlwaysReject: "true", 228 }, 229 }, 230 Spec: networking.Gateway{}, 231 } 232 233 createOptions := metav1.CreateOptions{DryRun: []string{metav1.DryRunAll}} 234 istioClient := c.client.Istio().NetworkingV1alpha3() 235 _, err := istioClient.Gateways(c.o.WatchedNamespace).Create(context.TODO(), invalidGateway, createOptions) 236 if kerrors.IsAlreadyExists(err) { 237 updateOptions := metav1.UpdateOptions{DryRun: []string{metav1.DryRunAll}} 238 _, err = istioClient.Gateways(c.o.WatchedNamespace).Update(context.TODO(), invalidGateway, updateOptions) 239 } 240 if err == nil { 241 return false, "dummy invalid config not rejected" 242 } 243 // We expect to get deniedRequestMessageFragment (the config was rejected, as expected) 244 if strings.Contains(err.Error(), deniedRequestMessageFragment) { 245 return true, "" 246 } 247 // If the CRD does not exist, we will get this error. This is to handle when Pilot is run 248 // without CRDs - in this case, this check will not be possible. 249 if strings.Contains(err.Error(), missingResourceMessageFragment) { 250 scope.Warnf("Missing Gateway CRD, cannot perform validation check. Assuming validation is ready") 251 return true, "" 252 } 253 // If some validating webhooks does not support dryRun(sideEffects=Unknown or Some), we will get this error. 254 // We should assume valdiation is ready because there is no point in retrying this request. 255 if strings.Contains(err.Error(), unsupportedDryRunMessageFragment) { 256 scope.Warnf("One of the validating webhooks does not support DryRun, cannot perform validation check. Assuming validation is ready. Details: %v", err) 257 return true, "" 258 } 259 return false, fmt.Sprintf("dummy invalid rejected for the wrong reason: %v", err) 260 } 261 262 func (c *Controller) updateValidatingWebhookConfiguration(current *kubeApiAdmission.ValidatingWebhookConfiguration, 263 caBundle []byte, ready bool, 264 ) error { 265 dirty := false 266 for i := range current.Webhooks { 267 caNeed := !bytes.Equal(current.Webhooks[i].ClientConfig.CABundle, caBundle) 268 failureNeed := ready && (current.Webhooks[i].FailurePolicy != nil && *current.Webhooks[i].FailurePolicy != kubeApiAdmission.Fail) 269 if caNeed || failureNeed { 270 dirty = true 271 break 272 } 273 } 274 scope := scope.WithLabels( 275 "name", current.Name, 276 "fail closed", ready, 277 "resource version", current.ResourceVersion, 278 ) 279 if !dirty { 280 scope.Debugf("up-to-date, no change required") 281 return nil 282 } 283 updated := current.DeepCopy() 284 for i := range updated.Webhooks { 285 updated.Webhooks[i].ClientConfig.CABundle = caBundle 286 if ready { 287 updated.Webhooks[i].FailurePolicy = ptr.Of(kubeApiAdmission.Fail) 288 } 289 } 290 291 latest, err := c.webhooks.Update(updated) 292 if err != nil { 293 scope.Errorf("failed to updated: %v", err) 294 reportValidationConfigUpdateError(kerrors.ReasonForError(err)) 295 return err 296 } 297 298 scope.WithLabels("resource version", latest.ResourceVersion).Infof("successfully updated") 299 reportValidationConfigUpdate() 300 return nil 301 } 302 303 func (c *Controller) syncAll() { 304 for _, whc := range c.webhooks.List("", klabels.Everything()) { 305 c.queue.AddObject(whc) 306 } 307 }