k8s.io/apiserver@v0.31.1/pkg/server/options/encryptionconfig/controller/controller.go (about) 1 /* 2 Copyright 2022 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package controller 18 19 import ( 20 "context" 21 "fmt" 22 "net/http" 23 "sync" 24 "time" 25 26 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 27 "k8s.io/apimachinery/pkg/util/wait" 28 "k8s.io/apiserver/pkg/server/healthz" 29 "k8s.io/apiserver/pkg/server/options/encryptionconfig" 30 "k8s.io/apiserver/pkg/server/options/encryptionconfig/metrics" 31 "k8s.io/client-go/util/workqueue" 32 "k8s.io/klog/v2" 33 ) 34 35 // workqueueKey is the dummy key used to process change in encryption config file. 36 const workqueueKey = "key" 37 38 // EncryptionConfigFileChangePollDuration is exposed so that integration tests can crank up the reload speed. 39 var EncryptionConfigFileChangePollDuration = time.Minute 40 41 // DynamicEncryptionConfigContent which can dynamically handle changes in encryption config file. 42 type DynamicEncryptionConfigContent struct { 43 name string 44 45 // filePath is the path of the file to read. 46 filePath string 47 48 // lastLoadedEncryptionConfigHash stores last successfully read encryption config file content. 49 lastLoadedEncryptionConfigHash string 50 51 // queue for processing changes in encryption config file. 52 queue workqueue.TypedRateLimitingInterface[string] 53 54 // dynamicTransformers updates the transformers when encryption config file changes. 55 dynamicTransformers *encryptionconfig.DynamicTransformers 56 57 // identity of the api server 58 apiServerID string 59 60 // can be swapped during testing 61 getEncryptionConfigHash func(ctx context.Context, filepath string) (string, error) 62 loadEncryptionConfig func(ctx context.Context, filepath string, reload bool, apiServerID string) (*encryptionconfig.EncryptionConfiguration, error) 63 } 64 65 func init() { 66 metrics.RegisterMetrics() 67 } 68 69 // NewDynamicEncryptionConfiguration returns controller that dynamically reacts to changes in encryption config file. 70 func NewDynamicEncryptionConfiguration( 71 name, filePath string, 72 dynamicTransformers *encryptionconfig.DynamicTransformers, 73 configContentHash string, 74 apiServerID string, 75 ) *DynamicEncryptionConfigContent { 76 return &DynamicEncryptionConfigContent{ 77 name: name, 78 filePath: filePath, 79 lastLoadedEncryptionConfigHash: configContentHash, 80 dynamicTransformers: dynamicTransformers, 81 queue: workqueue.NewTypedRateLimitingQueueWithConfig( 82 workqueue.DefaultTypedControllerRateLimiter[string](), 83 workqueue.TypedRateLimitingQueueConfig[string]{Name: name}, 84 ), 85 apiServerID: apiServerID, 86 getEncryptionConfigHash: func(_ context.Context, filepath string) (string, error) { 87 return encryptionconfig.GetEncryptionConfigHash(filepath) 88 }, 89 loadEncryptionConfig: encryptionconfig.LoadEncryptionConfig, 90 } 91 } 92 93 // Run starts the controller and blocks until ctx is canceled. 94 func (d *DynamicEncryptionConfigContent) Run(ctx context.Context) { 95 defer utilruntime.HandleCrash() 96 97 klog.InfoS("Starting controller", "name", d.name) 98 defer klog.InfoS("Shutting down controller", "name", d.name) 99 100 var wg sync.WaitGroup 101 102 wg.Add(1) 103 go func() { 104 defer utilruntime.HandleCrash() 105 defer wg.Done() 106 defer d.queue.ShutDown() 107 <-ctx.Done() 108 }() 109 110 wg.Add(1) 111 go func() { 112 defer utilruntime.HandleCrash() 113 defer wg.Done() 114 d.runWorker(ctx) 115 }() 116 117 // this function polls changes in the encryption config file by placing a dummy key in the queue. 118 // the 'runWorker' function then picks up this dummy key and processes the changes. 119 // the goroutine terminates when 'ctx' is canceled. 120 _ = wait.PollUntilContextCancel( 121 ctx, 122 EncryptionConfigFileChangePollDuration, 123 true, 124 func(ctx context.Context) (bool, error) { 125 // add dummy item to the queue to trigger file content processing. 126 d.queue.Add(workqueueKey) 127 128 // return false to continue polling. 129 return false, nil 130 }, 131 ) 132 133 wg.Wait() 134 } 135 136 // runWorker to process file content 137 func (d *DynamicEncryptionConfigContent) runWorker(ctx context.Context) { 138 for d.processNextWorkItem(ctx) { 139 } 140 } 141 142 // processNextWorkItem processes file content when there is a message in the queue. 143 func (d *DynamicEncryptionConfigContent) processNextWorkItem(serverCtx context.Context) bool { 144 // key here is dummy item in the queue to trigger file content processing. 145 key, quit := d.queue.Get() 146 if quit { 147 return false 148 } 149 defer d.queue.Done(key) 150 151 d.processWorkItem(serverCtx, key) 152 153 return true 154 } 155 156 func (d *DynamicEncryptionConfigContent) processWorkItem(serverCtx context.Context, workqueueKey string) { 157 var ( 158 updatedEffectiveConfig bool 159 err error 160 encryptionConfiguration *encryptionconfig.EncryptionConfiguration 161 configChanged bool 162 ) 163 164 // get context to close the new transformers (on error cases and on the next reload) 165 // serverCtx is attached to the API server's lifecycle so we will always close transformers on shut down 166 ctx, closeTransformers := context.WithCancel(serverCtx) 167 168 defer func() { 169 // TODO can work queue metrics help here? 170 171 if !updatedEffectiveConfig { 172 // avoid leaking if we're not using the newly constructed transformers (due to an error or them not being changed) 173 closeTransformers() 174 } 175 176 if updatedEffectiveConfig && err == nil { 177 metrics.RecordEncryptionConfigAutomaticReloadSuccess(d.apiServerID) 178 } 179 180 if err != nil { 181 metrics.RecordEncryptionConfigAutomaticReloadFailure(d.apiServerID) 182 utilruntime.HandleError(fmt.Errorf("error processing encryption config file %s: %v", d.filePath, err)) 183 // add dummy item back to the queue to trigger file content processing. 184 d.queue.AddRateLimited(workqueueKey) 185 } 186 }() 187 188 encryptionConfiguration, configChanged, err = d.processEncryptionConfig(ctx) 189 if err != nil { 190 return 191 } 192 if !configChanged { 193 return 194 } 195 196 if len(encryptionConfiguration.HealthChecks) != 1 { 197 err = fmt.Errorf("unexpected number of healthz checks: %d. Should have only one", len(encryptionConfiguration.HealthChecks)) 198 return 199 } 200 // get healthz checks for all new KMS plugins. 201 if err = d.validateNewTransformersHealth(ctx, encryptionConfiguration.HealthChecks[0], encryptionConfiguration.KMSCloseGracePeriod); err != nil { 202 return 203 } 204 205 // update transformers. 206 // when reload=true there must always be one healthz check. 207 d.dynamicTransformers.Set( 208 encryptionConfiguration.Transformers, 209 closeTransformers, 210 encryptionConfiguration.HealthChecks[0], 211 encryptionConfiguration.KMSCloseGracePeriod, 212 ) 213 214 // update local copy of recent config content once update is successful. 215 d.lastLoadedEncryptionConfigHash = encryptionConfiguration.EncryptionFileContentHash 216 klog.V(2).InfoS("Loaded new kms encryption config content", "name", d.name) 217 218 updatedEffectiveConfig = true 219 } 220 221 // loadEncryptionConfig processes the next set of content from the file. 222 func (d *DynamicEncryptionConfigContent) processEncryptionConfig(ctx context.Context) ( 223 _ *encryptionconfig.EncryptionConfiguration, 224 configChanged bool, 225 _ error, 226 ) { 227 contentHash, err := d.getEncryptionConfigHash(ctx, d.filePath) 228 if err != nil { 229 return nil, false, err 230 } 231 232 // check if encryptionConfig is different from the current. Do nothing if they are the same. 233 if contentHash == d.lastLoadedEncryptionConfigHash { 234 klog.V(4).InfoS("Encryption config has not changed (before load)", "name", d.name) 235 return nil, false, nil 236 } 237 238 // this code path will only execute if reload=true. So passing true explicitly. 239 encryptionConfiguration, err := d.loadEncryptionConfig(ctx, d.filePath, true, d.apiServerID) 240 if err != nil { 241 return nil, false, err 242 } 243 244 // check if encryptionConfig is different from the current (again to avoid TOCTOU). Do nothing if they are the same. 245 if encryptionConfiguration.EncryptionFileContentHash == d.lastLoadedEncryptionConfigHash { 246 klog.V(4).InfoS("Encryption config has not changed (after load)", "name", d.name) 247 return nil, false, nil 248 } 249 250 return encryptionConfiguration, true, nil 251 } 252 253 // minKMSPluginCloseGracePeriod can be lowered in unit tests to make the health check poll faster 254 var minKMSPluginCloseGracePeriod = 10 * time.Second 255 256 func (d *DynamicEncryptionConfigContent) validateNewTransformersHealth( 257 ctx context.Context, 258 kmsPluginHealthzCheck healthz.HealthChecker, 259 kmsPluginCloseGracePeriod time.Duration, 260 ) error { 261 // test if new transformers are healthy 262 var healthCheckError error 263 264 if kmsPluginCloseGracePeriod < minKMSPluginCloseGracePeriod { 265 kmsPluginCloseGracePeriod = minKMSPluginCloseGracePeriod 266 } 267 268 // really make sure that the immediate check does not hang 269 var cancel context.CancelFunc 270 ctx, cancel = context.WithTimeout(ctx, kmsPluginCloseGracePeriod) 271 defer cancel() 272 273 pollErr := wait.PollImmediateWithContext(ctx, 100*time.Millisecond, kmsPluginCloseGracePeriod, func(ctx context.Context) (bool, error) { 274 // create a fake http get request to health check endpoint 275 req, err := http.NewRequestWithContext(ctx, http.MethodGet, fmt.Sprintf("/healthz/%s", kmsPluginHealthzCheck.Name()), nil) 276 if err != nil { 277 return false, err 278 } 279 280 healthCheckError = kmsPluginHealthzCheck.Check(req) 281 return healthCheckError == nil, nil 282 }) 283 if pollErr != nil { 284 return fmt.Errorf("health check for new transformers failed, polling error %v: %w", pollErr, healthCheckError) 285 } 286 klog.V(2).InfoS("Health check succeeded") 287 return nil 288 }