k8s.io/apiserver@v0.31.1/pkg/server/options/encryptionconfig/controller/controller.go (about)

     1  /*
     2  Copyright 2022 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package controller
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"net/http"
    23  	"sync"
    24  	"time"
    25  
    26  	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
    27  	"k8s.io/apimachinery/pkg/util/wait"
    28  	"k8s.io/apiserver/pkg/server/healthz"
    29  	"k8s.io/apiserver/pkg/server/options/encryptionconfig"
    30  	"k8s.io/apiserver/pkg/server/options/encryptionconfig/metrics"
    31  	"k8s.io/client-go/util/workqueue"
    32  	"k8s.io/klog/v2"
    33  )
    34  
    35  // workqueueKey is the dummy key used to process change in encryption config file.
    36  const workqueueKey = "key"
    37  
    38  // EncryptionConfigFileChangePollDuration is exposed so that integration tests can crank up the reload speed.
    39  var EncryptionConfigFileChangePollDuration = time.Minute
    40  
    41  // DynamicEncryptionConfigContent which can dynamically handle changes in encryption config file.
    42  type DynamicEncryptionConfigContent struct {
    43  	name string
    44  
    45  	// filePath is the path of the file to read.
    46  	filePath string
    47  
    48  	// lastLoadedEncryptionConfigHash stores last successfully read encryption config file content.
    49  	lastLoadedEncryptionConfigHash string
    50  
    51  	// queue for processing changes in encryption config file.
    52  	queue workqueue.TypedRateLimitingInterface[string]
    53  
    54  	// dynamicTransformers updates the transformers when encryption config file changes.
    55  	dynamicTransformers *encryptionconfig.DynamicTransformers
    56  
    57  	// identity of the api server
    58  	apiServerID string
    59  
    60  	// can be swapped during testing
    61  	getEncryptionConfigHash func(ctx context.Context, filepath string) (string, error)
    62  	loadEncryptionConfig    func(ctx context.Context, filepath string, reload bool, apiServerID string) (*encryptionconfig.EncryptionConfiguration, error)
    63  }
    64  
    65  func init() {
    66  	metrics.RegisterMetrics()
    67  }
    68  
    69  // NewDynamicEncryptionConfiguration returns controller that dynamically reacts to changes in encryption config file.
    70  func NewDynamicEncryptionConfiguration(
    71  	name, filePath string,
    72  	dynamicTransformers *encryptionconfig.DynamicTransformers,
    73  	configContentHash string,
    74  	apiServerID string,
    75  ) *DynamicEncryptionConfigContent {
    76  	return &DynamicEncryptionConfigContent{
    77  		name:                           name,
    78  		filePath:                       filePath,
    79  		lastLoadedEncryptionConfigHash: configContentHash,
    80  		dynamicTransformers:            dynamicTransformers,
    81  		queue: workqueue.NewTypedRateLimitingQueueWithConfig(
    82  			workqueue.DefaultTypedControllerRateLimiter[string](),
    83  			workqueue.TypedRateLimitingQueueConfig[string]{Name: name},
    84  		),
    85  		apiServerID: apiServerID,
    86  		getEncryptionConfigHash: func(_ context.Context, filepath string) (string, error) {
    87  			return encryptionconfig.GetEncryptionConfigHash(filepath)
    88  		},
    89  		loadEncryptionConfig: encryptionconfig.LoadEncryptionConfig,
    90  	}
    91  }
    92  
    93  // Run starts the controller and blocks until ctx is canceled.
    94  func (d *DynamicEncryptionConfigContent) Run(ctx context.Context) {
    95  	defer utilruntime.HandleCrash()
    96  
    97  	klog.InfoS("Starting controller", "name", d.name)
    98  	defer klog.InfoS("Shutting down controller", "name", d.name)
    99  
   100  	var wg sync.WaitGroup
   101  
   102  	wg.Add(1)
   103  	go func() {
   104  		defer utilruntime.HandleCrash()
   105  		defer wg.Done()
   106  		defer d.queue.ShutDown()
   107  		<-ctx.Done()
   108  	}()
   109  
   110  	wg.Add(1)
   111  	go func() {
   112  		defer utilruntime.HandleCrash()
   113  		defer wg.Done()
   114  		d.runWorker(ctx)
   115  	}()
   116  
   117  	// this function polls changes in the encryption config file by placing a dummy key in the queue.
   118  	// the 'runWorker' function then picks up this dummy key and processes the changes.
   119  	// the goroutine terminates when 'ctx' is canceled.
   120  	_ = wait.PollUntilContextCancel(
   121  		ctx,
   122  		EncryptionConfigFileChangePollDuration,
   123  		true,
   124  		func(ctx context.Context) (bool, error) {
   125  			// add dummy item to the queue to trigger file content processing.
   126  			d.queue.Add(workqueueKey)
   127  
   128  			// return false to continue polling.
   129  			return false, nil
   130  		},
   131  	)
   132  
   133  	wg.Wait()
   134  }
   135  
   136  // runWorker to process file content
   137  func (d *DynamicEncryptionConfigContent) runWorker(ctx context.Context) {
   138  	for d.processNextWorkItem(ctx) {
   139  	}
   140  }
   141  
   142  // processNextWorkItem processes file content when there is a message in the queue.
   143  func (d *DynamicEncryptionConfigContent) processNextWorkItem(serverCtx context.Context) bool {
   144  	// key here is dummy item in the queue to trigger file content processing.
   145  	key, quit := d.queue.Get()
   146  	if quit {
   147  		return false
   148  	}
   149  	defer d.queue.Done(key)
   150  
   151  	d.processWorkItem(serverCtx, key)
   152  
   153  	return true
   154  }
   155  
   156  func (d *DynamicEncryptionConfigContent) processWorkItem(serverCtx context.Context, workqueueKey string) {
   157  	var (
   158  		updatedEffectiveConfig  bool
   159  		err                     error
   160  		encryptionConfiguration *encryptionconfig.EncryptionConfiguration
   161  		configChanged           bool
   162  	)
   163  
   164  	// get context to close the new transformers (on error cases and on the next reload)
   165  	// serverCtx is attached to the API server's lifecycle so we will always close transformers on shut down
   166  	ctx, closeTransformers := context.WithCancel(serverCtx)
   167  
   168  	defer func() {
   169  		// TODO can work queue metrics help here?
   170  
   171  		if !updatedEffectiveConfig {
   172  			// avoid leaking if we're not using the newly constructed transformers (due to an error or them not being changed)
   173  			closeTransformers()
   174  		}
   175  
   176  		if updatedEffectiveConfig && err == nil {
   177  			metrics.RecordEncryptionConfigAutomaticReloadSuccess(d.apiServerID)
   178  		}
   179  
   180  		if err != nil {
   181  			metrics.RecordEncryptionConfigAutomaticReloadFailure(d.apiServerID)
   182  			utilruntime.HandleError(fmt.Errorf("error processing encryption config file %s: %v", d.filePath, err))
   183  			// add dummy item back to the queue to trigger file content processing.
   184  			d.queue.AddRateLimited(workqueueKey)
   185  		}
   186  	}()
   187  
   188  	encryptionConfiguration, configChanged, err = d.processEncryptionConfig(ctx)
   189  	if err != nil {
   190  		return
   191  	}
   192  	if !configChanged {
   193  		return
   194  	}
   195  
   196  	if len(encryptionConfiguration.HealthChecks) != 1 {
   197  		err = fmt.Errorf("unexpected number of healthz checks: %d. Should have only one", len(encryptionConfiguration.HealthChecks))
   198  		return
   199  	}
   200  	// get healthz checks for all new KMS plugins.
   201  	if err = d.validateNewTransformersHealth(ctx, encryptionConfiguration.HealthChecks[0], encryptionConfiguration.KMSCloseGracePeriod); err != nil {
   202  		return
   203  	}
   204  
   205  	// update transformers.
   206  	// when reload=true there must always be one healthz check.
   207  	d.dynamicTransformers.Set(
   208  		encryptionConfiguration.Transformers,
   209  		closeTransformers,
   210  		encryptionConfiguration.HealthChecks[0],
   211  		encryptionConfiguration.KMSCloseGracePeriod,
   212  	)
   213  
   214  	// update local copy of recent config content once update is successful.
   215  	d.lastLoadedEncryptionConfigHash = encryptionConfiguration.EncryptionFileContentHash
   216  	klog.V(2).InfoS("Loaded new kms encryption config content", "name", d.name)
   217  
   218  	updatedEffectiveConfig = true
   219  }
   220  
   221  // loadEncryptionConfig processes the next set of content from the file.
   222  func (d *DynamicEncryptionConfigContent) processEncryptionConfig(ctx context.Context) (
   223  	_ *encryptionconfig.EncryptionConfiguration,
   224  	configChanged bool,
   225  	_ error,
   226  ) {
   227  	contentHash, err := d.getEncryptionConfigHash(ctx, d.filePath)
   228  	if err != nil {
   229  		return nil, false, err
   230  	}
   231  
   232  	// check if encryptionConfig is different from the current. Do nothing if they are the same.
   233  	if contentHash == d.lastLoadedEncryptionConfigHash {
   234  		klog.V(4).InfoS("Encryption config has not changed (before load)", "name", d.name)
   235  		return nil, false, nil
   236  	}
   237  
   238  	// this code path will only execute if reload=true. So passing true explicitly.
   239  	encryptionConfiguration, err := d.loadEncryptionConfig(ctx, d.filePath, true, d.apiServerID)
   240  	if err != nil {
   241  		return nil, false, err
   242  	}
   243  
   244  	// check if encryptionConfig is different from the current (again to avoid TOCTOU). Do nothing if they are the same.
   245  	if encryptionConfiguration.EncryptionFileContentHash == d.lastLoadedEncryptionConfigHash {
   246  		klog.V(4).InfoS("Encryption config has not changed (after load)", "name", d.name)
   247  		return nil, false, nil
   248  	}
   249  
   250  	return encryptionConfiguration, true, nil
   251  }
   252  
   253  // minKMSPluginCloseGracePeriod can be lowered in unit tests to make the health check poll faster
   254  var minKMSPluginCloseGracePeriod = 10 * time.Second
   255  
   256  func (d *DynamicEncryptionConfigContent) validateNewTransformersHealth(
   257  	ctx context.Context,
   258  	kmsPluginHealthzCheck healthz.HealthChecker,
   259  	kmsPluginCloseGracePeriod time.Duration,
   260  ) error {
   261  	// test if new transformers are healthy
   262  	var healthCheckError error
   263  
   264  	if kmsPluginCloseGracePeriod < minKMSPluginCloseGracePeriod {
   265  		kmsPluginCloseGracePeriod = minKMSPluginCloseGracePeriod
   266  	}
   267  
   268  	// really make sure that the immediate check does not hang
   269  	var cancel context.CancelFunc
   270  	ctx, cancel = context.WithTimeout(ctx, kmsPluginCloseGracePeriod)
   271  	defer cancel()
   272  
   273  	pollErr := wait.PollImmediateWithContext(ctx, 100*time.Millisecond, kmsPluginCloseGracePeriod, func(ctx context.Context) (bool, error) {
   274  		// create a fake http get request to health check endpoint
   275  		req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("/healthz/%s", kmsPluginHealthzCheck.Name()), nil)
   276  		if err != nil {
   277  			return false, err
   278  		}
   279  
   280  		healthCheckError = kmsPluginHealthzCheck.Check(req)
   281  		return healthCheckError == nil, nil
   282  	})
   283  	if pollErr != nil {
   284  		return fmt.Errorf("health check for new transformers failed, polling error %v: %w", pollErr, healthCheckError)
   285  	}
   286  	klog.V(2).InfoS("Health check succeeded")
   287  	return nil
   288  }