k8s.io/apiserver@v0.31.1/pkg/server/options/encryptionconfig/config.go (about) 1 /* 2 Copyright 2017 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package encryptionconfig 18 19 import ( 20 "context" 21 "crypto/aes" 22 "crypto/cipher" 23 "crypto/sha256" 24 "encoding/base64" 25 "errors" 26 "fmt" 27 "net/http" 28 "os" 29 "sync" 30 "sync/atomic" 31 "time" 32 33 "k8s.io/apimachinery/pkg/runtime" 34 "k8s.io/apimachinery/pkg/runtime/schema" 35 "k8s.io/apimachinery/pkg/runtime/serializer" 36 utilerrors "k8s.io/apimachinery/pkg/util/errors" 37 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 38 "k8s.io/apimachinery/pkg/util/uuid" 39 "k8s.io/apimachinery/pkg/util/wait" 40 "k8s.io/apiserver/pkg/apis/apiserver" 41 apiserverv1 "k8s.io/apiserver/pkg/apis/apiserver/v1" 42 "k8s.io/apiserver/pkg/apis/apiserver/validation" 43 "k8s.io/apiserver/pkg/features" 44 "k8s.io/apiserver/pkg/server/healthz" 45 "k8s.io/apiserver/pkg/server/options/encryptionconfig/metrics" 46 storagevalue "k8s.io/apiserver/pkg/storage/value" 47 aestransformer "k8s.io/apiserver/pkg/storage/value/encrypt/aes" 48 "k8s.io/apiserver/pkg/storage/value/encrypt/envelope" 49 envelopekmsv2 "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2" 50 kmstypes "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/kmsv2/v2" 51 envelopemetrics "k8s.io/apiserver/pkg/storage/value/encrypt/envelope/metrics" 52 "k8s.io/apiserver/pkg/storage/value/encrypt/identity" 53 "k8s.io/apiserver/pkg/storage/value/encrypt/secretbox" 54 utilfeature "k8s.io/apiserver/pkg/util/feature" 55 "k8s.io/klog/v2" 56 kmsservice "k8s.io/kms/pkg/service" 57 ) 58 59 const ( 60 aesCBCTransformerPrefixV1 = "k8s:enc:aescbc:v1:" 61 aesGCMTransformerPrefixV1 = "k8s:enc:aesgcm:v1:" 62 secretboxTransformerPrefixV1 = "k8s:enc:secretbox:v1:" 63 kmsTransformerPrefixV1 = "k8s:enc:kms:v1:" 64 kmsTransformerPrefixV2 = "k8s:enc:kms:v2:" 65 66 // these constants relate to how the KMS v2 plugin status poll logic 67 // and the DEK/seed generation logic behave. In particular, the positive 68 // interval and max TTL are closely related as the difference between 69 // these values defines the worst case window in which the write DEK/seed 70 // could expire due to the plugin going into an error state. The 71 // worst case window divided by the negative interval defines the 72 // minimum amount of times the server will attempt to return to a 73 // healthy state before the DEK/seed expires and writes begin to fail. 74 // 75 // For now, these values are kept small and hardcoded to support being 76 // able to perform a "passive" storage migration while tolerating some 77 // amount of plugin downtime. 78 // 79 // With the current approach, a user can update the key ID their plugin 80 // is using and then can simply schedule a migration for 3 + N + M minutes 81 // later where N is how long it takes their plugin to pick up new config 82 // and M is extra buffer to allow the API server to process the config. 83 // At that point, they are guaranteed to either migrate to the new key 84 // or get errors during the migration. 85 // 86 // If the API server coasted forever on the last DEK/seed, they would need 87 // to actively check if it had observed the new key ID before starting 88 // a migration - otherwise it could keep using the old DEK/seed and their 89 // storage migration would not do what they thought it did. 90 kmsv2PluginHealthzPositiveInterval = 1 * time.Minute 91 kmsv2PluginHealthzNegativeInterval = 10 * time.Second 92 kmsv2PluginWriteDEKSourceMaxTTL = 3 * time.Minute 93 94 kmsPluginHealthzNegativeTTL = 3 * time.Second 95 kmsPluginHealthzPositiveTTL = 20 * time.Second 96 kmsAPIVersionV1 = "v1" 97 kmsAPIVersionV2 = "v2" 98 // this name is used for two different healthz endpoints: 99 // - when one or more KMS v2 plugins are in use and no KMS v1 plugins are in use 100 // in this case, all v2 plugins are probed via this single endpoint 101 // - when automatic reload of encryption config is enabled 102 // in this case, all KMS plugins are probed via this single endpoint 103 // the endpoint is present even if there are no KMS plugins configured (it is a no-op then) 104 kmsReloadHealthCheckName = "kms-providers" 105 ) 106 107 var codecs serializer.CodecFactory 108 109 // this atomic bool allows us to swap enablement of the KMSv2KDF feature in tests 110 // as the feature gate is now locked to true starting with v1.29 111 // Note: it cannot be set by an end user 112 var kdfDisabled atomic.Bool 113 114 // this function should only be called in tests to swap enablement of the KMSv2KDF feature 115 func SetKDFForTests(b bool) func() { 116 kdfDisabled.Store(!b) 117 return func() { 118 kdfDisabled.Store(false) 119 } 120 } 121 122 // this function should be used to determine enablement of the KMSv2KDF feature 123 // instead of getting it from DefaultFeatureGate as the feature gate is now locked 124 // to true starting with v1.29 125 func GetKDF() bool { 126 return !kdfDisabled.Load() 127 } 128 129 func init() { 130 configScheme := runtime.NewScheme() 131 utilruntime.Must(apiserver.AddToScheme(configScheme)) 132 utilruntime.Must(apiserverv1.AddToScheme(configScheme)) 133 codecs = serializer.NewCodecFactory(configScheme, serializer.EnableStrict) 134 envelopemetrics.RegisterMetrics() 135 storagevalue.RegisterMetrics() 136 metrics.RegisterMetrics() 137 } 138 139 type kmsPluginHealthzResponse struct { 140 err error 141 received time.Time 142 } 143 144 type kmsPluginProbe struct { 145 name string 146 ttl time.Duration 147 service envelope.Service 148 lastResponse *kmsPluginHealthzResponse 149 l *sync.Mutex 150 } 151 152 type kmsv2PluginProbe struct { 153 state atomic.Pointer[envelopekmsv2.State] 154 name string 155 ttl time.Duration 156 service kmsservice.Service 157 lastResponse *kmsPluginHealthzResponse 158 l *sync.Mutex 159 apiServerID string 160 version string 161 } 162 163 type kmsHealthChecker []healthz.HealthChecker 164 165 func (k kmsHealthChecker) Name() string { 166 return kmsReloadHealthCheckName 167 } 168 169 func (k kmsHealthChecker) Check(req *http.Request) error { 170 var errs []error 171 172 for i := range k { 173 checker := k[i] 174 if err := checker.Check(req); err != nil { 175 errs = append(errs, fmt.Errorf("%s: %w", checker.Name(), err)) 176 } 177 } 178 179 return utilerrors.Reduce(utilerrors.NewAggregate(errs)) 180 } 181 182 func (h *kmsPluginProbe) toHealthzCheck(idx int) healthz.HealthChecker { 183 return healthz.NamedCheck(fmt.Sprintf("kms-provider-%d", idx), func(r *http.Request) error { 184 return h.check() 185 }) 186 } 187 188 func (h *kmsv2PluginProbe) toHealthzCheck(idx int) healthz.HealthChecker { 189 return healthz.NamedCheck(fmt.Sprintf("kms-provider-%d", idx), func(r *http.Request) error { 190 return h.check(r.Context()) 191 }) 192 } 193 194 // EncryptionConfiguration represents the parsed and normalized encryption configuration for the apiserver. 195 type EncryptionConfiguration struct { 196 // Transformers is a list of value.Transformer that will be used to encrypt and decrypt data. 197 Transformers map[schema.GroupResource]storagevalue.Transformer 198 199 // HealthChecks is a list of healthz.HealthChecker that will be used to check the health of the encryption providers. 200 HealthChecks []healthz.HealthChecker 201 202 // EncryptionFileContentHash is the hash of the encryption config file. 203 EncryptionFileContentHash string 204 205 // KMSCloseGracePeriod is the duration we will wait before closing old transformers. 206 // We wait for any in-flight requests to finish by using the duration which is longer than their timeout. 207 KMSCloseGracePeriod time.Duration 208 } 209 210 // LoadEncryptionConfig parses and validates the encryption config specified by filepath. 211 // It may launch multiple go routines whose lifecycle is controlled by ctx. 212 // In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched. 213 // If reload is true, or KMS v2 plugins are used with no KMS v1 plugins, the returned slice of health checkers will always be of length 1. 214 func LoadEncryptionConfig(ctx context.Context, filepath string, reload bool, apiServerID string) (*EncryptionConfiguration, error) { 215 config, contentHash, err := loadConfig(filepath, reload) 216 if err != nil { 217 return nil, fmt.Errorf("error while parsing file: %w", err) 218 } 219 220 transformers, kmsHealthChecks, kmsUsed, err := getTransformerOverridesAndKMSPluginHealthzCheckers(ctx, config, apiServerID) 221 if err != nil { 222 return nil, fmt.Errorf("error while building transformers: %w", err) 223 } 224 225 if reload || (kmsUsed.v2Used && !kmsUsed.v1Used) { 226 kmsHealthChecks = []healthz.HealthChecker{kmsHealthChecker(kmsHealthChecks)} 227 } 228 229 // KMSCloseGracePeriod is the duration we will wait before closing old transformers. 230 // The way we calculate is as follows: 231 // 1. Sum all timeouts across all KMS plugins. (check kmsPrefixTransformer for differences between v1 and v2) 232 // 2. Multiply that by 2 (to allow for some buffer) 233 // The reason we sum all timeout is because kmsHealthChecker() will run all health checks serially 234 return &EncryptionConfiguration{ 235 Transformers: transformers, 236 HealthChecks: kmsHealthChecks, 237 EncryptionFileContentHash: contentHash, 238 KMSCloseGracePeriod: 2 * kmsUsed.kmsTimeoutSum, 239 }, nil 240 } 241 242 // getTransformerOverridesAndKMSPluginHealthzCheckers creates the set of transformers and KMS healthz checks based on the given config. 243 // It may launch multiple go routines whose lifecycle is controlled by ctx. 244 // In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched. 245 func getTransformerOverridesAndKMSPluginHealthzCheckers(ctx context.Context, config *apiserver.EncryptionConfiguration, apiServerID string) (map[schema.GroupResource]storagevalue.Transformer, []healthz.HealthChecker, *kmsState, error) { 246 var kmsHealthChecks []healthz.HealthChecker 247 transformers, probes, kmsUsed, err := getTransformerOverridesAndKMSPluginProbes(ctx, config, apiServerID) 248 if err != nil { 249 return nil, nil, nil, err 250 } 251 for i := range probes { 252 probe := probes[i] 253 kmsHealthChecks = append(kmsHealthChecks, probe.toHealthzCheck(i)) 254 } 255 256 return transformers, kmsHealthChecks, kmsUsed, nil 257 } 258 259 type healthChecker interface { 260 toHealthzCheck(idx int) healthz.HealthChecker 261 } 262 263 // getTransformerOverridesAndKMSPluginProbes creates the set of transformers and KMS probes based on the given config. 264 // It may launch multiple go routines whose lifecycle is controlled by ctx. 265 // In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched. 266 func getTransformerOverridesAndKMSPluginProbes(ctx context.Context, config *apiserver.EncryptionConfiguration, apiServerID string) (map[schema.GroupResource]storagevalue.Transformer, []healthChecker, *kmsState, error) { 267 resourceToPrefixTransformer := map[schema.GroupResource][]storagevalue.PrefixTransformer{} 268 var probes []healthChecker 269 var kmsUsed kmsState 270 271 // For each entry in the configuration 272 for _, resourceConfig := range config.Resources { 273 resourceConfig := resourceConfig 274 275 transformers, p, used, err := prefixTransformersAndProbes(ctx, resourceConfig, apiServerID) 276 if err != nil { 277 return nil, nil, nil, err 278 } 279 kmsUsed.accumulate(used) 280 281 // For each resource, create a list of providers to use 282 for _, resource := range resourceConfig.Resources { 283 resource := resource 284 gr := schema.ParseGroupResource(resource) 285 286 // check if resource is masked by *.group rule 287 anyResourceInGroup := schema.GroupResource{Group: gr.Group, Resource: "*"} 288 if _, masked := resourceToPrefixTransformer[anyResourceInGroup]; masked { 289 // an earlier rule already configured a transformer for *.group, masking this rule 290 // return error since this is not allowed 291 return nil, nil, nil, fmt.Errorf("resource %q is masked by earlier rule %q", grYAMLString(gr), grYAMLString(anyResourceInGroup)) 292 } 293 294 if _, masked := resourceToPrefixTransformer[anyGroupAnyResource]; masked { 295 // an earlier rule already configured a transformer for *.*, masking this rule 296 // return error since this is not allowed 297 return nil, nil, nil, fmt.Errorf("resource %q is masked by earlier rule %q", grYAMLString(gr), grYAMLString(anyGroupAnyResource)) 298 } 299 300 resourceToPrefixTransformer[gr] = append(resourceToPrefixTransformer[gr], transformers...) 301 } 302 303 probes = append(probes, p...) 304 } 305 306 transformers := make(map[schema.GroupResource]storagevalue.Transformer, len(resourceToPrefixTransformer)) 307 for gr, transList := range resourceToPrefixTransformer { 308 gr := gr 309 transList := transList 310 transformers[gr] = storagevalue.NewPrefixTransformers(fmt.Errorf("no matching prefix found"), transList...) 311 } 312 313 return transformers, probes, &kmsUsed, nil 314 } 315 316 // check encrypts and decrypts test data against KMS-Plugin's gRPC endpoint. 317 func (h *kmsPluginProbe) check() error { 318 h.l.Lock() 319 defer h.l.Unlock() 320 321 if (time.Since(h.lastResponse.received)) < h.ttl { 322 return h.lastResponse.err 323 } 324 325 p, err := h.service.Encrypt([]byte("ping")) 326 if err != nil { 327 h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()} 328 h.ttl = kmsPluginHealthzNegativeTTL 329 return fmt.Errorf("failed to perform encrypt section of the healthz check for KMS Provider %s, error: %w", h.name, err) 330 } 331 332 if _, err := h.service.Decrypt(p); err != nil { 333 h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()} 334 h.ttl = kmsPluginHealthzNegativeTTL 335 return fmt.Errorf("failed to perform decrypt section of the healthz check for KMS Provider %s, error: %w", h.name, err) 336 } 337 338 h.lastResponse = &kmsPluginHealthzResponse{err: nil, received: time.Now()} 339 h.ttl = kmsPluginHealthzPositiveTTL 340 return nil 341 } 342 343 // check gets the healthz status of the KMSv2-Plugin using the Status() method. 344 func (h *kmsv2PluginProbe) check(ctx context.Context) error { 345 h.l.Lock() 346 defer h.l.Unlock() 347 348 if time.Since(h.lastResponse.received) < h.ttl { 349 return h.lastResponse.err 350 } 351 352 p, err := h.service.Status(ctx) 353 if err != nil { 354 h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()} 355 h.ttl = kmsPluginHealthzNegativeTTL 356 return fmt.Errorf("failed to perform status section of the healthz check for KMS Provider %s, error: %w", h.name, err) 357 } 358 359 if err := h.isKMSv2ProviderHealthyAndMaybeRotateDEK(ctx, p); err != nil { 360 h.lastResponse = &kmsPluginHealthzResponse{err: err, received: time.Now()} 361 h.ttl = kmsPluginHealthzNegativeTTL 362 return err 363 } 364 365 h.lastResponse = &kmsPluginHealthzResponse{err: nil, received: time.Now()} 366 h.ttl = kmsPluginHealthzPositiveTTL 367 return nil 368 } 369 370 // rotateDEKOnKeyIDChange tries to rotate to a new DEK/seed if the key ID returned by Status does not match the 371 // current state. If a successful rotation is performed, the new DEK/seed and keyID overwrite the existing state. 372 // On any failure during rotation (including mismatch between status and encrypt calls), the current state is 373 // preserved and will remain valid to use for encryption until its expiration (the system attempts to coast). 374 // If the key ID returned by Status matches the current state, the expiration of the current state is extended 375 // and no rotation is performed. 376 func (h *kmsv2PluginProbe) rotateDEKOnKeyIDChange(ctx context.Context, statusKeyID, uid string) error { 377 // we do not check ValidateEncryptCapability here because it is fine to re-use an old key 378 // that was marked as expired during an unhealthy period. As long as the key ID matches 379 // what we expect then there is no need to rotate here. 380 state, errState := h.getCurrentState() 381 382 // allow reads indefinitely in all cases 383 // allow writes indefinitely as long as there is no error 384 // allow writes for only up to kmsv2PluginWriteDEKSourceMaxTTL from now when there are errors 385 // we start the timer before we make the network call because kmsv2PluginWriteDEKSourceMaxTTL is meant to be the upper bound 386 expirationTimestamp := envelopekmsv2.NowFunc().Add(kmsv2PluginWriteDEKSourceMaxTTL) 387 388 // dynamically check if we want to use KDF seed to derive DEKs or just a single DEK 389 // this gate can only change during tests, but the check is cheap enough to always make 390 // this allows us to easily exercise both modes without restarting the API server 391 // TODO integration test that this dynamically takes effect 392 useSeed := GetKDF() 393 stateUseSeed := state.EncryptedObject.EncryptedDEKSourceType == kmstypes.EncryptedDEKSourceType_HKDF_SHA256_XNONCE_AES_GCM_SEED 394 395 // state is valid and status keyID is unchanged from when we generated this DEK/seed so there is no need to rotate it 396 // just move the expiration of the current state forward by the reuse interval 397 // useSeed can only change at runtime during tests, so we check it here to allow us to easily exercise both modes 398 if errState == nil && state.EncryptedObject.KeyID == statusKeyID && stateUseSeed == useSeed { 399 state.ExpirationTimestamp = expirationTimestamp 400 h.state.Store(&state) 401 return nil 402 } 403 404 transformer, encObject, cacheKey, errGen := envelopekmsv2.GenerateTransformer(ctx, uid, h.service, useSeed) 405 406 if encObject == nil { 407 encObject = &kmstypes.EncryptedObject{} // avoid nil panics 408 } 409 410 // happy path, should be the common case 411 // TODO maybe add success metrics? 412 if errGen == nil && encObject.KeyID == statusKeyID { 413 h.state.Store(&envelopekmsv2.State{ 414 Transformer: transformer, 415 EncryptedObject: *encObject, 416 UID: uid, 417 ExpirationTimestamp: expirationTimestamp, 418 CacheKey: cacheKey, 419 }) 420 421 // it should be logically impossible for the new state to be invalid but check just in case 422 _, errGen = h.getCurrentState() 423 if errGen == nil { 424 klogV6 := klog.V(6) 425 if klogV6.Enabled() { 426 klogV6.InfoS("successfully rotated DEK", 427 "uid", uid, 428 "useSeed", useSeed, 429 "newKeyIDHash", envelopekmsv2.GetHashIfNotEmpty(encObject.KeyID), 430 "oldKeyIDHash", envelopekmsv2.GetHashIfNotEmpty(state.EncryptedObject.KeyID), 431 "expirationTimestamp", expirationTimestamp.Format(time.RFC3339), 432 ) 433 } 434 return nil 435 } 436 } 437 438 return fmt.Errorf("failed to rotate DEK uid=%q, useSeed=%v, errState=%v, errGen=%v, statusKeyIDHash=%q, encryptKeyIDHash=%q, stateKeyIDHash=%q, expirationTimestamp=%s", 439 uid, useSeed, errState, errGen, envelopekmsv2.GetHashIfNotEmpty(statusKeyID), envelopekmsv2.GetHashIfNotEmpty(encObject.KeyID), envelopekmsv2.GetHashIfNotEmpty(state.EncryptedObject.KeyID), state.ExpirationTimestamp.Format(time.RFC3339)) 440 } 441 442 // getCurrentState returns the latest state from the last status and encrypt calls. 443 // If the returned error is nil, the state is considered valid indefinitely for read requests. 444 // For write requests, the caller must also check that state.ValidateEncryptCapability does not error. 445 func (h *kmsv2PluginProbe) getCurrentState() (envelopekmsv2.State, error) { 446 state := *h.state.Load() 447 448 if state.Transformer == nil { 449 return envelopekmsv2.State{}, fmt.Errorf("got unexpected nil transformer") 450 } 451 452 encryptedObjectCopy := state.EncryptedObject 453 if len(encryptedObjectCopy.EncryptedData) != 0 { 454 return envelopekmsv2.State{}, fmt.Errorf("got unexpected non-empty EncryptedData") 455 } 456 encryptedObjectCopy.EncryptedData = []byte{0} // any non-empty value to pass validation 457 if err := envelopekmsv2.ValidateEncryptedObject(&encryptedObjectCopy); err != nil { 458 return envelopekmsv2.State{}, fmt.Errorf("got invalid EncryptedObject: %w", err) 459 } 460 461 if state.ExpirationTimestamp.IsZero() { 462 return envelopekmsv2.State{}, fmt.Errorf("got unexpected zero expirationTimestamp") 463 } 464 465 if len(state.CacheKey) == 0 { 466 return envelopekmsv2.State{}, fmt.Errorf("got unexpected empty cacheKey") 467 } 468 469 return state, nil 470 } 471 472 func (h *kmsv2PluginProbe) isKMSv2ProviderHealthyAndMaybeRotateDEK(ctx context.Context, response *kmsservice.StatusResponse) error { 473 var errs []error 474 if response.Healthz != "ok" { 475 errs = append(errs, fmt.Errorf("got unexpected healthz status: %s", response.Healthz)) 476 } 477 if response.Version != envelopekmsv2.KMSAPIVersionv2 && response.Version != envelopekmsv2.KMSAPIVersionv2beta1 { 478 errs = append(errs, fmt.Errorf("expected KMSv2 API version %s, got %s", envelopekmsv2.KMSAPIVersionv2, response.Version)) 479 } else { 480 // set version for the first status response 481 if len(h.version) == 0 { 482 h.version = response.Version 483 } 484 if h.version != response.Version { 485 errs = append(errs, fmt.Errorf("KMSv2 API version should not change after the initial status response version %s, got %s", h.version, response.Version)) 486 } 487 } 488 489 if errCode, err := envelopekmsv2.ValidateKeyID(response.KeyID); err != nil { 490 envelopemetrics.RecordInvalidKeyIDFromStatus(h.name, string(errCode)) 491 errs = append(errs, fmt.Errorf("got invalid KMSv2 KeyID hash %q: %w", envelopekmsv2.GetHashIfNotEmpty(response.KeyID), err)) 492 } else { 493 envelopemetrics.RecordKeyIDFromStatus(h.name, response.KeyID, h.apiServerID) 494 // unconditionally append as we filter out nil errors below 495 errs = append(errs, h.rotateDEKOnKeyIDChange(ctx, response.KeyID, string(uuid.NewUUID()))) 496 } 497 498 if err := utilerrors.Reduce(utilerrors.NewAggregate(errs)); err != nil { 499 return fmt.Errorf("kmsv2 Provider %s is not healthy, error: %w", h.name, err) 500 } 501 return nil 502 } 503 504 // loadConfig parses the encryption configuration file at filepath and returns the parsed config and hash of the file. 505 func loadConfig(filepath string, reload bool) (*apiserver.EncryptionConfiguration, string, error) { 506 data, contentHash, err := loadDataAndHash(filepath) 507 if err != nil { 508 return nil, "", fmt.Errorf("error while loading file: %w", err) 509 } 510 511 configObj, gvk, err := codecs.UniversalDecoder().Decode(data, nil, nil) 512 if err != nil { 513 return nil, "", fmt.Errorf("error decoding encryption provider configuration file %q: %w", filepath, err) 514 } 515 config, ok := configObj.(*apiserver.EncryptionConfiguration) 516 if !ok { 517 return nil, "", fmt.Errorf("got unexpected config type: %v", gvk) 518 } 519 520 return config, contentHash, validation.ValidateEncryptionConfiguration(config, reload).ToAggregate() 521 } 522 523 func loadDataAndHash(filepath string) ([]byte, string, error) { 524 data, err := os.ReadFile(filepath) 525 if err != nil { 526 return nil, "", fmt.Errorf("error reading encryption provider configuration file %q: %w", filepath, err) 527 } 528 if len(data) == 0 { 529 return nil, "", fmt.Errorf("encryption provider configuration file %q is empty", filepath) 530 } 531 532 return data, computeEncryptionConfigHash(data), nil 533 } 534 535 // GetEncryptionConfigHash reads the encryption configuration file at filepath and returns the hash of the file. 536 // It does not attempt to decode or load the config, and serves as a cheap check to determine if the file has changed. 537 func GetEncryptionConfigHash(filepath string) (string, error) { 538 _, contentHash, err := loadDataAndHash(filepath) 539 return contentHash, err 540 } 541 542 // prefixTransformersAndProbes creates the set of transformers and KMS probes based on the given resource config. 543 // It may launch multiple go routines whose lifecycle is controlled by ctx. 544 // In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched. 545 func prefixTransformersAndProbes(ctx context.Context, config apiserver.ResourceConfiguration, apiServerID string) ([]storagevalue.PrefixTransformer, []healthChecker, *kmsState, error) { 546 var transformers []storagevalue.PrefixTransformer 547 var probes []healthChecker 548 var kmsUsed kmsState 549 550 for _, provider := range config.Providers { 551 provider := provider 552 var ( 553 transformer storagevalue.PrefixTransformer 554 transformerErr error 555 probe healthChecker 556 used *kmsState 557 ) 558 559 switch { 560 case provider.AESGCM != nil: 561 transformer, transformerErr = aesPrefixTransformer(provider.AESGCM, aestransformer.NewGCMTransformer, aesGCMTransformerPrefixV1) 562 563 case provider.AESCBC != nil: 564 cbcTransformer := func(block cipher.Block) (storagevalue.Transformer, error) { 565 return aestransformer.NewCBCTransformer(block), nil 566 } 567 transformer, transformerErr = aesPrefixTransformer(provider.AESCBC, cbcTransformer, aesCBCTransformerPrefixV1) 568 569 case provider.Secretbox != nil: 570 transformer, transformerErr = secretboxPrefixTransformer(provider.Secretbox) 571 572 case provider.KMS != nil: 573 transformer, probe, used, transformerErr = kmsPrefixTransformer(ctx, provider.KMS, apiServerID) 574 if transformerErr == nil { 575 probes = append(probes, probe) 576 kmsUsed.accumulate(used) 577 } 578 579 case provider.Identity != nil: 580 transformer = storagevalue.PrefixTransformer{ 581 Transformer: identity.NewEncryptCheckTransformer(), 582 Prefix: []byte{}, 583 } 584 585 default: 586 return nil, nil, nil, errors.New("provider does not contain any of the expected providers: KMS, AESGCM, AESCBC, Secretbox, Identity") 587 } 588 589 if transformerErr != nil { 590 return nil, nil, nil, transformerErr 591 } 592 593 transformers = append(transformers, transformer) 594 } 595 596 return transformers, probes, &kmsUsed, nil 597 } 598 599 type blockTransformerFunc func(cipher.Block) (storagevalue.Transformer, error) 600 601 func aesPrefixTransformer(config *apiserver.AESConfiguration, fn blockTransformerFunc, prefix string) (storagevalue.PrefixTransformer, error) { 602 var result storagevalue.PrefixTransformer 603 604 if len(config.Keys) == 0 { 605 return result, fmt.Errorf("aes provider has no valid keys") 606 } 607 for _, key := range config.Keys { 608 key := key 609 if key.Name == "" { 610 return result, fmt.Errorf("key with invalid name provided") 611 } 612 if key.Secret == "" { 613 return result, fmt.Errorf("key %v has no provided secret", key.Name) 614 } 615 } 616 617 keyTransformers := []storagevalue.PrefixTransformer{} 618 619 for _, keyData := range config.Keys { 620 keyData := keyData 621 key, err := base64.StdEncoding.DecodeString(keyData.Secret) 622 if err != nil { 623 return result, fmt.Errorf("could not obtain secret for named key %s: %w", keyData.Name, err) 624 } 625 block, err := aes.NewCipher(key) 626 if err != nil { 627 return result, fmt.Errorf("error while creating cipher for named key %s: %w", keyData.Name, err) 628 } 629 transformer, err := fn(block) 630 if err != nil { 631 return result, fmt.Errorf("error while creating transformer for named key %s: %w", keyData.Name, err) 632 } 633 634 // Create a new PrefixTransformer for this key 635 keyTransformers = append(keyTransformers, 636 storagevalue.PrefixTransformer{ 637 Transformer: transformer, 638 Prefix: []byte(keyData.Name + ":"), 639 }) 640 } 641 642 // Create a prefixTransformer which can choose between these keys 643 keyTransformer := storagevalue.NewPrefixTransformers( 644 fmt.Errorf("no matching key was found for the provided AES transformer"), keyTransformers...) 645 646 // Create a PrefixTransformer which shall later be put in a list with other providers 647 result = storagevalue.PrefixTransformer{ 648 Transformer: keyTransformer, 649 Prefix: []byte(prefix), 650 } 651 return result, nil 652 } 653 654 func secretboxPrefixTransformer(config *apiserver.SecretboxConfiguration) (storagevalue.PrefixTransformer, error) { 655 var result storagevalue.PrefixTransformer 656 657 if len(config.Keys) == 0 { 658 return result, fmt.Errorf("secretbox provider has no valid keys") 659 } 660 for _, key := range config.Keys { 661 key := key 662 if key.Name == "" { 663 return result, fmt.Errorf("key with invalid name provided") 664 } 665 if key.Secret == "" { 666 return result, fmt.Errorf("key %v has no provided secret", key.Name) 667 } 668 } 669 670 keyTransformers := []storagevalue.PrefixTransformer{} 671 672 for _, keyData := range config.Keys { 673 keyData := keyData 674 key, err := base64.StdEncoding.DecodeString(keyData.Secret) 675 if err != nil { 676 return result, fmt.Errorf("could not obtain secret for named key %s: %s", keyData.Name, err) 677 } 678 679 if len(key) != 32 { 680 return result, fmt.Errorf("expected key size 32 for secretbox provider, got %v", len(key)) 681 } 682 683 keyArray := [32]byte{} 684 copy(keyArray[:], key) 685 686 // Create a new PrefixTransformer for this key 687 keyTransformers = append(keyTransformers, 688 storagevalue.PrefixTransformer{ 689 Transformer: secretbox.NewSecretboxTransformer(keyArray), 690 Prefix: []byte(keyData.Name + ":"), 691 }) 692 } 693 694 // Create a prefixTransformer which can choose between these keys 695 keyTransformer := storagevalue.NewPrefixTransformers( 696 fmt.Errorf("no matching key was found for the provided Secretbox transformer"), keyTransformers...) 697 698 // Create a PrefixTransformer which shall later be put in a list with other providers 699 result = storagevalue.PrefixTransformer{ 700 Transformer: keyTransformer, 701 Prefix: []byte(secretboxTransformerPrefixV1), 702 } 703 return result, nil 704 } 705 706 var ( 707 // The factory to create kms service. This is to make writing test easier. 708 envelopeServiceFactory = envelope.NewGRPCService 709 710 // The factory to create kmsv2 service. Exported for integration tests. 711 EnvelopeKMSv2ServiceFactory = envelopekmsv2.NewGRPCService 712 ) 713 714 type kmsState struct { 715 v1Used, v2Used bool 716 kmsTimeoutSum time.Duration 717 } 718 719 // accumulate computes the KMS state by: 720 // - determining which KMS plugin versions are in use 721 // - calculating kmsTimeoutSum which is used as transformTracker.kmsCloseGracePeriod 722 // DynamicTransformers.Set waits for this period before closing old transformers after a config reload 723 func (s *kmsState) accumulate(other *kmsState) { 724 s.v1Used = s.v1Used || other.v1Used 725 s.v2Used = s.v2Used || other.v2Used 726 s.kmsTimeoutSum += other.kmsTimeoutSum 727 } 728 729 // kmsPrefixTransformer creates a KMS transformer and probe based on the given KMS config. 730 // It may launch multiple go routines whose lifecycle is controlled by ctx. 731 // In case of an error, the caller is responsible for canceling ctx to clean up any go routines that may have been launched. 732 func kmsPrefixTransformer(ctx context.Context, config *apiserver.KMSConfiguration, apiServerID string) (storagevalue.PrefixTransformer, healthChecker, *kmsState, error) { 733 kmsName := config.Name 734 switch config.APIVersion { 735 case kmsAPIVersionV1: 736 if !utilfeature.DefaultFeatureGate.Enabled(features.KMSv1) { 737 return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("KMSv1 is deprecated and will only receive security updates going forward. Use KMSv2 instead. Set --feature-gates=KMSv1=true to use the deprecated KMSv1 feature.") 738 } 739 klog.InfoS("KMSv1 is deprecated and will only receive security updates going forward. Use KMSv2 instead.") 740 741 envelopeService, err := envelopeServiceFactory(ctx, config.Endpoint, config.Timeout.Duration) 742 if err != nil { 743 return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv1-Plugin's probe %q, error: %w", kmsName, err) 744 } 745 746 probe := &kmsPluginProbe{ 747 name: kmsName, 748 ttl: kmsPluginHealthzNegativeTTL, 749 service: envelopeService, 750 l: &sync.Mutex{}, 751 lastResponse: &kmsPluginHealthzResponse{}, 752 } 753 754 transformer := envelopePrefixTransformer(config, envelopeService, kmsTransformerPrefixV1) 755 756 return transformer, probe, &kmsState{ 757 v1Used: true, 758 // for v1 we will do encrypt and decrypt for health check. Since these are serial operations, we will double the timeout. 759 kmsTimeoutSum: 2 * config.Timeout.Duration, 760 }, nil 761 762 case kmsAPIVersionV2: 763 if !utilfeature.DefaultFeatureGate.Enabled(features.KMSv2) { 764 return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2 plugin %q, KMSv2 feature is not enabled", kmsName) 765 } 766 767 envelopeService, err := EnvelopeKMSv2ServiceFactory(ctx, config.Endpoint, config.Name, config.Timeout.Duration) 768 if err != nil { 769 return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMSv2-Plugin's probe %q, error: %w", kmsName, err) 770 } 771 772 probe := &kmsv2PluginProbe{ 773 name: kmsName, 774 ttl: kmsPluginHealthzNegativeTTL, 775 service: envelopeService, 776 l: &sync.Mutex{}, 777 lastResponse: &kmsPluginHealthzResponse{}, 778 apiServerID: apiServerID, 779 } 780 // initialize state so that Load always works 781 probe.state.Store(&envelopekmsv2.State{}) 782 783 primeAndProbeKMSv2(ctx, probe, kmsName) 784 transformer := storagevalue.PrefixTransformer{ 785 Transformer: envelopekmsv2.NewEnvelopeTransformer(envelopeService, kmsName, probe.getCurrentState, apiServerID), 786 Prefix: []byte(kmsTransformerPrefixV2 + kmsName + ":"), 787 } 788 789 return transformer, probe, &kmsState{ 790 v2Used: true, 791 kmsTimeoutSum: config.Timeout.Duration, 792 }, nil 793 794 default: 795 return storagevalue.PrefixTransformer{}, nil, nil, fmt.Errorf("could not configure KMS plugin %q, unsupported KMS API version %q", kmsName, config.APIVersion) 796 } 797 } 798 799 func primeAndProbeKMSv2(ctx context.Context, probe *kmsv2PluginProbe, kmsName string) { 800 runProbeCheckAndLog := func(ctx context.Context, depth int) error { 801 if err := probe.check(ctx); err != nil { 802 klog.VDepth(1+depth, 2).ErrorS(err, "kms plugin failed health check probe", "name", kmsName) 803 return err 804 } 805 return nil 806 } 807 808 blockAndProbeFastUntilSuccess := func(ctx context.Context) { 809 _ = wait.PollUntilWithContext( 810 ctx, 811 kmsv2PluginHealthzNegativeInterval, 812 func(ctx context.Context) (bool, error) { 813 return runProbeCheckAndLog(ctx, 1) == nil, nil 814 }, 815 ) 816 } 817 818 // on the happy path where the plugin is healthy and available on server start, 819 // prime keyID and DEK by running the check inline once (this also prevents unit tests from flaking) 820 errPrime := runProbeCheckAndLog(ctx, 0) 821 822 // if our initial attempt to prime failed, start trying to get to a valid state in the background ASAP 823 // this prevents a slow start when the external healthz checker is configured to ignore the KMS healthz endpoint 824 // since we want to support the plugin starting up async with the API server, this error is not fatal 825 if errPrime != nil { 826 go blockAndProbeFastUntilSuccess(ctx) // separate go routine to avoid blocking 827 } 828 829 // make sure that the plugin's key ID is reasonably up-to-date 830 // also, make sure that our DEK is up-to-date to with said key ID (if it expires the server will fail all writes) 831 // if this background loop ever stops running, the server will become unfunctional after kmsv2PluginWriteDEKSourceMaxTTL 832 go wait.PollUntilWithContext( 833 ctx, 834 kmsv2PluginHealthzPositiveInterval, 835 func(ctx context.Context) (bool, error) { 836 if err := runProbeCheckAndLog(ctx, 0); err == nil { 837 return false, nil 838 } 839 840 // TODO add integration test for quicker error poll on failure 841 // if we fail, block the outer polling and start a new quicker poll inline 842 // this limits the chance that our DEK expires during a transient failure 843 blockAndProbeFastUntilSuccess(ctx) 844 845 return false, nil 846 }) 847 } 848 849 func envelopePrefixTransformer(config *apiserver.KMSConfiguration, envelopeService envelope.Service, prefix string) storagevalue.PrefixTransformer { 850 baseTransformerFunc := func(block cipher.Block) (storagevalue.Transformer, error) { 851 gcm, err := aestransformer.NewGCMTransformer(block) 852 if err != nil { 853 return nil, err 854 } 855 856 // v1.24: write using AES-CBC only but support reads via AES-CBC and AES-GCM (so we can move to AES-GCM) 857 // v1.25: write using AES-GCM only but support reads via AES-GCM and fallback to AES-CBC for backwards compatibility 858 // TODO(aramase): Post v1.25: We cannot drop CBC read support until we automate storage migration. 859 // We could have a release note that hard requires users to perform storage migration. 860 return unionTransformers{gcm, aestransformer.NewCBCTransformer(block)}, nil 861 } 862 863 return storagevalue.PrefixTransformer{ 864 Transformer: envelope.NewEnvelopeTransformer(envelopeService, int(*config.CacheSize), baseTransformerFunc), 865 Prefix: []byte(prefix + config.Name + ":"), 866 } 867 } 868 869 type unionTransformers []storagevalue.Transformer 870 871 func (u unionTransformers) TransformFromStorage(ctx context.Context, data []byte, dataCtx storagevalue.Context) (out []byte, stale bool, err error) { 872 var errs []error 873 for i := range u { 874 transformer := u[i] 875 result, stale, err := transformer.TransformFromStorage(ctx, data, dataCtx) 876 if err != nil { 877 errs = append(errs, err) 878 continue 879 } 880 // when i != 0, we have transformed the data from storage using the new transformer, 881 // we want to issue a write to etcd even if the contents of the data haven't changed 882 return result, stale || i != 0, nil 883 } 884 if err := utilerrors.Reduce(utilerrors.NewAggregate(errs)); err != nil { 885 return nil, false, err 886 } 887 return nil, false, fmt.Errorf("unionTransformers: unable to transform from storage") 888 } 889 890 func (u unionTransformers) TransformToStorage(ctx context.Context, data []byte, dataCtx storagevalue.Context) (out []byte, err error) { 891 return u[0].TransformToStorage(ctx, data, dataCtx) 892 } 893 894 // computeEncryptionConfigHash returns the expected hash for an encryption config file that has been loaded as bytes. 895 // We use a hash instead of the raw file contents when tracking changes to avoid holding any encryption keys in memory outside of their associated transformers. 896 // This hash must be used in-memory and not externalized to the process because it has no cross-release stability guarantees. 897 func computeEncryptionConfigHash(data []byte) string { 898 return fmt.Sprintf("k8s:enc:unstable:1:%x", sha256.Sum256(data)) 899 } 900 901 var _ storagevalue.ResourceTransformers = &DynamicTransformers{} 902 var _ healthz.HealthChecker = &DynamicTransformers{} 903 904 // DynamicTransformers holds transformers that may be dynamically updated via a single external actor, likely a controller. 905 // This struct must avoid locks (even read write locks) as it is inline to all calls to storage. 906 type DynamicTransformers struct { 907 transformTracker *atomic.Value 908 } 909 910 type transformTracker struct { 911 transformerOverrides map[schema.GroupResource]storagevalue.Transformer 912 kmsPluginHealthzCheck healthz.HealthChecker 913 closeTransformers context.CancelFunc 914 kmsCloseGracePeriod time.Duration 915 } 916 917 // NewDynamicTransformers returns transformers, health checks for kms providers and an ability to close transformers. 918 func NewDynamicTransformers( 919 transformerOverrides map[schema.GroupResource]storagevalue.Transformer, 920 kmsPluginHealthzCheck healthz.HealthChecker, 921 closeTransformers context.CancelFunc, 922 kmsCloseGracePeriod time.Duration, 923 ) *DynamicTransformers { 924 dynamicTransformers := &DynamicTransformers{ 925 transformTracker: &atomic.Value{}, 926 } 927 928 tracker := &transformTracker{ 929 transformerOverrides: transformerOverrides, 930 kmsPluginHealthzCheck: kmsPluginHealthzCheck, 931 closeTransformers: closeTransformers, 932 kmsCloseGracePeriod: kmsCloseGracePeriod, 933 } 934 dynamicTransformers.transformTracker.Store(tracker) 935 936 return dynamicTransformers 937 } 938 939 // Check implements healthz.HealthChecker 940 func (d *DynamicTransformers) Check(req *http.Request) error { 941 return d.transformTracker.Load().(*transformTracker).kmsPluginHealthzCheck.Check(req) 942 } 943 944 // Name implements healthz.HealthChecker 945 func (d *DynamicTransformers) Name() string { 946 return kmsReloadHealthCheckName 947 } 948 949 // TransformerForResource returns the transformer for the given resource. 950 func (d *DynamicTransformers) TransformerForResource(resource schema.GroupResource) storagevalue.Transformer { 951 return &resourceTransformer{ 952 resource: resource, 953 transformTracker: d.transformTracker, 954 } 955 } 956 957 // Set sets the transformer overrides. This method is not go routine safe and must only be called by the same, single caller throughout the lifetime of this object. 958 func (d *DynamicTransformers) Set( 959 transformerOverrides map[schema.GroupResource]storagevalue.Transformer, 960 closeTransformers context.CancelFunc, 961 kmsPluginHealthzCheck healthz.HealthChecker, 962 kmsCloseGracePeriod time.Duration, 963 ) { 964 // store new values 965 newTransformTracker := &transformTracker{ 966 transformerOverrides: transformerOverrides, 967 closeTransformers: closeTransformers, 968 kmsPluginHealthzCheck: kmsPluginHealthzCheck, 969 kmsCloseGracePeriod: kmsCloseGracePeriod, 970 } 971 972 // update new transformer overrides 973 oldTransformTracker := d.transformTracker.Swap(newTransformTracker).(*transformTracker) 974 975 // close old transformers once we wait for grpc request to finish any in-flight requests. 976 // by the time we spawn this go routine, the new transformers have already been set and will be used for new requests. 977 // if the server starts shutting down during sleep duration then the transformers will correctly closed early because their lifetime is tied to the api-server drain notifier. 978 go func() { 979 time.Sleep(oldTransformTracker.kmsCloseGracePeriod) 980 oldTransformTracker.closeTransformers() 981 }() 982 } 983 984 var _ storagevalue.Transformer = &resourceTransformer{} 985 986 type resourceTransformer struct { 987 resource schema.GroupResource 988 transformTracker *atomic.Value 989 } 990 991 func (r *resourceTransformer) TransformFromStorage(ctx context.Context, data []byte, dataCtx storagevalue.Context) ([]byte, bool, error) { 992 return r.transformer().TransformFromStorage(ctx, data, dataCtx) 993 } 994 995 func (r *resourceTransformer) TransformToStorage(ctx context.Context, data []byte, dataCtx storagevalue.Context) ([]byte, error) { 996 return r.transformer().TransformToStorage(ctx, data, dataCtx) 997 } 998 999 func (r *resourceTransformer) transformer() storagevalue.Transformer { 1000 return transformerFromOverrides(r.transformTracker.Load().(*transformTracker).transformerOverrides, r.resource) 1001 } 1002 1003 var _ storagevalue.ResourceTransformers = &StaticTransformers{} 1004 1005 type StaticTransformers map[schema.GroupResource]storagevalue.Transformer 1006 1007 func (s StaticTransformers) TransformerForResource(resource schema.GroupResource) storagevalue.Transformer { 1008 return transformerFromOverrides(s, resource) 1009 } 1010 1011 var anyGroupAnyResource = schema.GroupResource{ 1012 Group: "*", 1013 Resource: "*", 1014 } 1015 1016 func transformerFromOverrides(transformerOverrides map[schema.GroupResource]storagevalue.Transformer, resource schema.GroupResource) storagevalue.Transformer { 1017 if transformer := transformerOverrides[resource]; transformer != nil { 1018 return transformer 1019 } 1020 1021 if transformer := transformerOverrides[schema.GroupResource{ 1022 Group: resource.Group, 1023 Resource: "*", 1024 }]; transformer != nil { 1025 return transformer 1026 } 1027 1028 if transformer := transformerOverrides[anyGroupAnyResource]; transformer != nil { 1029 return transformer 1030 } 1031 1032 return identity.NewEncryptCheckTransformer() 1033 } 1034 1035 func grYAMLString(gr schema.GroupResource) string { 1036 if gr.Group == "" && gr.Resource == "*" { 1037 return "*." 1038 } 1039 1040 return gr.String() 1041 }