k8s.io/apiserver@v0.31.1/pkg/server/genericapiserver.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package server 18 19 import ( 20 "context" 21 "errors" 22 "fmt" 23 "net/http" 24 gpath "path" 25 "strings" 26 "sync" 27 "time" 28 29 systemd "github.com/coreos/go-systemd/v22/daemon" 30 31 "golang.org/x/time/rate" 32 apidiscoveryv2 "k8s.io/api/apidiscovery/v2" 33 "k8s.io/apimachinery/pkg/api/meta" 34 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 35 "k8s.io/apimachinery/pkg/runtime" 36 "k8s.io/apimachinery/pkg/runtime/schema" 37 "k8s.io/apimachinery/pkg/runtime/serializer" 38 "k8s.io/apimachinery/pkg/util/managedfields" 39 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 40 "k8s.io/apimachinery/pkg/util/sets" 41 "k8s.io/apimachinery/pkg/util/wait" 42 utilwaitgroup "k8s.io/apimachinery/pkg/util/waitgroup" 43 "k8s.io/apiserver/pkg/admission" 44 "k8s.io/apiserver/pkg/audit" 45 "k8s.io/apiserver/pkg/authorization/authorizer" 46 genericapi "k8s.io/apiserver/pkg/endpoints" 47 "k8s.io/apiserver/pkg/endpoints/discovery" 48 discoveryendpoint "k8s.io/apiserver/pkg/endpoints/discovery/aggregated" 49 "k8s.io/apiserver/pkg/features" 50 "k8s.io/apiserver/pkg/registry/rest" 51 "k8s.io/apiserver/pkg/server/healthz" 52 "k8s.io/apiserver/pkg/server/routes" 53 "k8s.io/apiserver/pkg/storageversion" 54 utilversion "k8s.io/apiserver/pkg/util/version" 55 restclient "k8s.io/client-go/rest" 56 "k8s.io/component-base/featuregate" 57 "k8s.io/klog/v2" 58 openapibuilder3 "k8s.io/kube-openapi/pkg/builder3" 59 openapicommon "k8s.io/kube-openapi/pkg/common" 60 "k8s.io/kube-openapi/pkg/handler" 61 "k8s.io/kube-openapi/pkg/handler3" 62 openapiutil "k8s.io/kube-openapi/pkg/util" 63 "k8s.io/kube-openapi/pkg/validation/spec" 64 ) 65 66 // Info about an API group. 67 type APIGroupInfo struct { 68 PrioritizedVersions []schema.GroupVersion 69 // Info about the resources in this group. It's a map from version to resource to the storage. 70 VersionedResourcesStorageMap map[string]map[string]rest.Storage 71 // OptionsExternalVersion controls the APIVersion used for common objects in the 72 // schema like api.Status, api.DeleteOptions, and metav1.ListOptions. Other implementors may 73 // define a version "v1beta1" but want to use the Kubernetes "v1" internal objects. 74 // If nil, defaults to groupMeta.GroupVersion. 75 // TODO: Remove this when https://github.com/kubernetes/kubernetes/issues/19018 is fixed. 76 OptionsExternalVersion *schema.GroupVersion 77 // MetaGroupVersion defaults to "meta.k8s.io/v1" and is the scheme group version used to decode 78 // common API implementations like ListOptions. Future changes will allow this to vary by group 79 // version (for when the inevitable meta/v2 group emerges). 80 MetaGroupVersion *schema.GroupVersion 81 82 // Scheme includes all of the types used by this group and how to convert between them (or 83 // to convert objects from outside of this group that are accepted in this API). 84 // TODO: replace with interfaces 85 Scheme *runtime.Scheme 86 // NegotiatedSerializer controls how this group encodes and decodes data 87 NegotiatedSerializer runtime.NegotiatedSerializer 88 // ParameterCodec performs conversions for query parameters passed to API calls 89 ParameterCodec runtime.ParameterCodec 90 91 // StaticOpenAPISpec is the spec derived from the definitions of all resources installed together. 92 // It is set during InstallAPIGroups, InstallAPIGroup, and InstallLegacyAPIGroup. 93 StaticOpenAPISpec map[string]*spec.Schema 94 } 95 96 func (a *APIGroupInfo) destroyStorage() { 97 for _, stores := range a.VersionedResourcesStorageMap { 98 for _, store := range stores { 99 store.Destroy() 100 } 101 } 102 } 103 104 // GenericAPIServer contains state for a Kubernetes cluster api server. 105 type GenericAPIServer struct { 106 // discoveryAddresses is used to build cluster IPs for discovery. 107 discoveryAddresses discovery.Addresses 108 109 // LoopbackClientConfig is a config for a privileged loopback connection to the API server 110 LoopbackClientConfig *restclient.Config 111 112 // minRequestTimeout is how short the request timeout can be. This is used to build the RESTHandler 113 minRequestTimeout time.Duration 114 115 // ShutdownTimeout is the timeout used for server shutdown. This specifies the timeout before server 116 // gracefully shutdown returns. 117 ShutdownTimeout time.Duration 118 119 // legacyAPIGroupPrefixes is used to set up URL parsing for authorization and for validating requests 120 // to InstallLegacyAPIGroup 121 legacyAPIGroupPrefixes sets.String 122 123 // admissionControl is used to build the RESTStorage that backs an API Group. 124 admissionControl admission.Interface 125 126 // SecureServingInfo holds configuration of the TLS server. 127 SecureServingInfo *SecureServingInfo 128 129 // ExternalAddress is the address (hostname or IP and port) that should be used in 130 // external (public internet) URLs for this GenericAPIServer. 131 ExternalAddress string 132 133 // Serializer controls how common API objects not in a group/version prefix are serialized for this server. 134 // Individual APIGroups may define their own serializers. 135 Serializer runtime.NegotiatedSerializer 136 137 // "Outputs" 138 // Handler holds the handlers being used by this API server 139 Handler *APIServerHandler 140 141 // UnprotectedDebugSocket is used to serve pprof information in a unix-domain socket. This socket is 142 // not protected by authentication/authorization. 143 UnprotectedDebugSocket *routes.DebugSocket 144 145 // listedPathProvider is a lister which provides the set of paths to show at / 146 listedPathProvider routes.ListedPathProvider 147 148 // DiscoveryGroupManager serves /apis in an unaggregated form. 149 DiscoveryGroupManager discovery.GroupManager 150 151 // AggregatedDiscoveryGroupManager serves /apis in an aggregated form. 152 AggregatedDiscoveryGroupManager discoveryendpoint.ResourceManager 153 154 // AggregatedLegacyDiscoveryGroupManager serves /api in an aggregated form. 155 AggregatedLegacyDiscoveryGroupManager discoveryendpoint.ResourceManager 156 157 // Enable swagger and/or OpenAPI if these configs are non-nil. 158 openAPIConfig *openapicommon.Config 159 160 // Enable swagger and/or OpenAPI V3 if these configs are non-nil. 161 openAPIV3Config *openapicommon.OpenAPIV3Config 162 163 // SkipOpenAPIInstallation indicates not to install the OpenAPI handler 164 // during PrepareRun. 165 // Set this to true when the specific API Server has its own OpenAPI handler 166 // (e.g. kube-aggregator) 167 skipOpenAPIInstallation bool 168 169 // OpenAPIVersionedService controls the /openapi/v2 endpoint, and can be used to update the served spec. 170 // It is set during PrepareRun if `openAPIConfig` is non-nil unless `skipOpenAPIInstallation` is true. 171 OpenAPIVersionedService *handler.OpenAPIService 172 173 // OpenAPIV3VersionedService controls the /openapi/v3 endpoint and can be used to update the served spec. 174 // It is set during PrepareRun if `openAPIConfig` is non-nil unless `skipOpenAPIInstallation` is true. 175 OpenAPIV3VersionedService *handler3.OpenAPIService 176 177 // StaticOpenAPISpec is the spec derived from the restful container endpoints. 178 // It is set during PrepareRun. 179 StaticOpenAPISpec *spec.Swagger 180 181 // PostStartHooks are each called after the server has started listening, in a separate go func for each 182 // with no guarantee of ordering between them. The map key is a name used for error reporting. 183 // It may kill the process with a panic if it wishes to by returning an error. 184 postStartHookLock sync.Mutex 185 postStartHooks map[string]postStartHookEntry 186 postStartHooksCalled bool 187 disabledPostStartHooks sets.String 188 189 preShutdownHookLock sync.Mutex 190 preShutdownHooks map[string]preShutdownHookEntry 191 preShutdownHooksCalled bool 192 193 // healthz checks 194 healthzRegistry healthCheckRegistry 195 readyzRegistry healthCheckRegistry 196 livezRegistry healthCheckRegistry 197 198 livezGracePeriod time.Duration 199 200 // auditing. The backend is started before the server starts listening. 201 AuditBackend audit.Backend 202 203 // Authorizer determines whether a user is allowed to make a certain request. The Handler does a preliminary 204 // authorization check using the request URI but it may be necessary to make additional checks, such as in 205 // the create-on-update case 206 Authorizer authorizer.Authorizer 207 208 // EquivalentResourceRegistry provides information about resources equivalent to a given resource, 209 // and the kind associated with a given resource. As resources are installed, they are registered here. 210 EquivalentResourceRegistry runtime.EquivalentResourceRegistry 211 212 // delegationTarget is the next delegate in the chain. This is never nil. 213 delegationTarget DelegationTarget 214 215 // NonLongRunningRequestWaitGroup allows you to wait for all chain 216 // handlers associated with non long-running requests 217 // to complete while the server is shuting down. 218 NonLongRunningRequestWaitGroup *utilwaitgroup.SafeWaitGroup 219 // WatchRequestWaitGroup allows us to wait for all chain 220 // handlers associated with active watch requests to 221 // complete while the server is shuting down. 222 WatchRequestWaitGroup *utilwaitgroup.RateLimitedSafeWaitGroup 223 224 // ShutdownDelayDuration allows to block shutdown for some time, e.g. until endpoints pointing to this API server 225 // have converged on all node. During this time, the API server keeps serving, /healthz will return 200, 226 // but /readyz will return failure. 227 ShutdownDelayDuration time.Duration 228 229 // The limit on the request body size that would be accepted and decoded in a write request. 230 // 0 means no limit. 231 maxRequestBodyBytes int64 232 233 // APIServerID is the ID of this API server 234 APIServerID string 235 236 // StorageReadinessHook implements post-start-hook functionality for checking readiness 237 // of underlying storage for registered resources. 238 StorageReadinessHook *StorageReadinessHook 239 240 // StorageVersionManager holds the storage versions of the API resources installed by this server. 241 StorageVersionManager storageversion.Manager 242 243 // EffectiveVersion determines which apis and features are available 244 // based on when the api/feature lifecyle. 245 EffectiveVersion utilversion.EffectiveVersion 246 // FeatureGate is a way to plumb feature gate through if you have them. 247 FeatureGate featuregate.FeatureGate 248 249 // lifecycleSignals provides access to the various signals that happen during the life cycle of the apiserver. 250 lifecycleSignals lifecycleSignals 251 252 // destroyFns contains a list of functions that should be called on shutdown to clean up resources. 253 destroyFns []func() 254 255 // muxAndDiscoveryCompleteSignals holds signals that indicate all known HTTP paths have been registered. 256 // it exists primarily to avoid returning a 404 response when a resource actually exists but we haven't installed the path to a handler. 257 // it is exposed for easier composition of the individual servers. 258 // the primary users of this field are the WithMuxCompleteProtection filter and the NotFoundHandler 259 muxAndDiscoveryCompleteSignals map[string]<-chan struct{} 260 261 // ShutdownSendRetryAfter dictates when to initiate shutdown of the HTTP 262 // Server during the graceful termination of the apiserver. If true, we wait 263 // for non longrunning requests in flight to be drained and then initiate a 264 // shutdown of the HTTP Server. If false, we initiate a shutdown of the HTTP 265 // Server as soon as ShutdownDelayDuration has elapsed. 266 // If enabled, after ShutdownDelayDuration elapses, any incoming request is 267 // rejected with a 429 status code and a 'Retry-After' response. 268 ShutdownSendRetryAfter bool 269 270 // ShutdownWatchTerminationGracePeriod, if set to a positive value, 271 // is the maximum duration the apiserver will wait for all active 272 // watch request(s) to drain. 273 // Once this grace period elapses, the apiserver will no longer 274 // wait for any active watch request(s) in flight to drain, it will 275 // proceed to the next step in the graceful server shutdown process. 276 // If set to a positive value, the apiserver will keep track of the 277 // number of active watch request(s) in flight and during shutdown 278 // it will wait, at most, for the specified duration and allow these 279 // active watch requests to drain with some rate limiting in effect. 280 // The default is zero, which implies the apiserver will not keep 281 // track of active watch request(s) in flight and will not wait 282 // for them to drain, this maintains backward compatibility. 283 // This grace period is orthogonal to other grace periods, and 284 // it is not overridden by any other grace period. 285 ShutdownWatchTerminationGracePeriod time.Duration 286 } 287 288 // DelegationTarget is an interface which allows for composition of API servers with top level handling that works 289 // as expected. 290 type DelegationTarget interface { 291 // UnprotectedHandler returns a handler that is NOT protected by a normal chain 292 UnprotectedHandler() http.Handler 293 294 // PostStartHooks returns the post-start hooks that need to be combined 295 PostStartHooks() map[string]postStartHookEntry 296 297 // PreShutdownHooks returns the pre-stop hooks that need to be combined 298 PreShutdownHooks() map[string]preShutdownHookEntry 299 300 // HealthzChecks returns the healthz checks that need to be combined 301 HealthzChecks() []healthz.HealthChecker 302 303 // ListedPaths returns the paths for supporting an index 304 ListedPaths() []string 305 306 // NextDelegate returns the next delegationTarget in the chain of delegations 307 NextDelegate() DelegationTarget 308 309 // PrepareRun does post API installation setup steps. It calls recursively the same function of the delegates. 310 PrepareRun() preparedGenericAPIServer 311 312 // MuxAndDiscoveryCompleteSignals exposes registered signals that indicate if all known HTTP paths have been installed. 313 MuxAndDiscoveryCompleteSignals() map[string]<-chan struct{} 314 315 // Destroy cleans up its resources on shutdown. 316 // Destroy has to be implemented in thread-safe way and be prepared 317 // for being called more than once. 318 Destroy() 319 } 320 321 func (s *GenericAPIServer) UnprotectedHandler() http.Handler { 322 // when we delegate, we need the server we're delegating to choose whether or not to use gorestful 323 return s.Handler.Director 324 } 325 func (s *GenericAPIServer) PostStartHooks() map[string]postStartHookEntry { 326 return s.postStartHooks 327 } 328 func (s *GenericAPIServer) PreShutdownHooks() map[string]preShutdownHookEntry { 329 return s.preShutdownHooks 330 } 331 func (s *GenericAPIServer) HealthzChecks() []healthz.HealthChecker { 332 return s.healthzRegistry.checks 333 } 334 func (s *GenericAPIServer) ListedPaths() []string { 335 return s.listedPathProvider.ListedPaths() 336 } 337 338 func (s *GenericAPIServer) NextDelegate() DelegationTarget { 339 return s.delegationTarget 340 } 341 342 // RegisterMuxAndDiscoveryCompleteSignal registers the given signal that will be used to determine if all known 343 // HTTP paths have been registered. It is okay to call this method after instantiating the generic server but before running. 344 func (s *GenericAPIServer) RegisterMuxAndDiscoveryCompleteSignal(signalName string, signal <-chan struct{}) error { 345 if _, exists := s.muxAndDiscoveryCompleteSignals[signalName]; exists { 346 return fmt.Errorf("%s already registered", signalName) 347 } 348 s.muxAndDiscoveryCompleteSignals[signalName] = signal 349 return nil 350 } 351 352 func (s *GenericAPIServer) MuxAndDiscoveryCompleteSignals() map[string]<-chan struct{} { 353 return s.muxAndDiscoveryCompleteSignals 354 } 355 356 // RegisterDestroyFunc registers a function that will be called during Destroy(). 357 // The function have to be idempotent and prepared to be called more than once. 358 func (s *GenericAPIServer) RegisterDestroyFunc(destroyFn func()) { 359 s.destroyFns = append(s.destroyFns, destroyFn) 360 } 361 362 // Destroy cleans up all its and its delegation target resources on shutdown. 363 // It starts with destroying its own resources and later proceeds with 364 // its delegation target. 365 func (s *GenericAPIServer) Destroy() { 366 for _, destroyFn := range s.destroyFns { 367 destroyFn() 368 } 369 if s.delegationTarget != nil { 370 s.delegationTarget.Destroy() 371 } 372 } 373 374 type emptyDelegate struct { 375 // handler is called at the end of the delegation chain 376 // when a request has been made against an unregistered HTTP path the individual servers will simply pass it through until it reaches the handler. 377 handler http.Handler 378 } 379 380 func NewEmptyDelegate() DelegationTarget { 381 return emptyDelegate{} 382 } 383 384 // NewEmptyDelegateWithCustomHandler allows for registering a custom handler usually for special handling of 404 requests 385 func NewEmptyDelegateWithCustomHandler(handler http.Handler) DelegationTarget { 386 return emptyDelegate{handler} 387 } 388 389 func (s emptyDelegate) UnprotectedHandler() http.Handler { 390 return s.handler 391 } 392 func (s emptyDelegate) PostStartHooks() map[string]postStartHookEntry { 393 return map[string]postStartHookEntry{} 394 } 395 func (s emptyDelegate) PreShutdownHooks() map[string]preShutdownHookEntry { 396 return map[string]preShutdownHookEntry{} 397 } 398 func (s emptyDelegate) HealthzChecks() []healthz.HealthChecker { 399 return []healthz.HealthChecker{} 400 } 401 func (s emptyDelegate) ListedPaths() []string { 402 return []string{} 403 } 404 func (s emptyDelegate) NextDelegate() DelegationTarget { 405 return nil 406 } 407 func (s emptyDelegate) PrepareRun() preparedGenericAPIServer { 408 return preparedGenericAPIServer{nil} 409 } 410 func (s emptyDelegate) MuxAndDiscoveryCompleteSignals() map[string]<-chan struct{} { 411 return map[string]<-chan struct{}{} 412 } 413 func (s emptyDelegate) Destroy() { 414 } 415 416 // preparedGenericAPIServer is a private wrapper that enforces a call of PrepareRun() before Run can be invoked. 417 type preparedGenericAPIServer struct { 418 *GenericAPIServer 419 } 420 421 // PrepareRun does post API installation setup steps. It calls recursively the same function of the delegates. 422 func (s *GenericAPIServer) PrepareRun() preparedGenericAPIServer { 423 s.delegationTarget.PrepareRun() 424 425 if s.openAPIConfig != nil && !s.skipOpenAPIInstallation { 426 s.OpenAPIVersionedService, s.StaticOpenAPISpec = routes.OpenAPI{ 427 Config: s.openAPIConfig, 428 }.InstallV2(s.Handler.GoRestfulContainer, s.Handler.NonGoRestfulMux) 429 } 430 431 if s.openAPIV3Config != nil && !s.skipOpenAPIInstallation { 432 s.OpenAPIV3VersionedService = routes.OpenAPI{ 433 V3Config: s.openAPIV3Config, 434 }.InstallV3(s.Handler.GoRestfulContainer, s.Handler.NonGoRestfulMux) 435 } 436 437 s.installHealthz() 438 s.installLivez() 439 440 // as soon as shutdown is initiated, readiness should start failing 441 readinessStopCh := s.lifecycleSignals.ShutdownInitiated.Signaled() 442 err := s.addReadyzShutdownCheck(readinessStopCh) 443 if err != nil { 444 klog.Errorf("Failed to install readyz shutdown check %s", err) 445 } 446 s.installReadyz() 447 448 return preparedGenericAPIServer{s} 449 } 450 451 // Run spawns the secure http server. It only returns if stopCh is closed 452 // or the secure port cannot be listened on initially. 453 // 454 // Deprecated: use RunWithContext instead. Run will not get removed to avoid 455 // breaking consumers, but should not be used in new code. 456 func (s preparedGenericAPIServer) Run(stopCh <-chan struct{}) error { 457 ctx := wait.ContextForChannel(stopCh) 458 return s.RunWithContext(ctx) 459 } 460 461 // RunWithContext spawns the secure http server. It only returns if ctx is canceled 462 // or the secure port cannot be listened on initially. 463 // This is the diagram of what contexts/channels/signals are dependent on each other: 464 // 465 // | ctx 466 // | | 467 // | --------------------------------------------------------- 468 // | | | 469 // | ShutdownInitiated (shutdownInitiatedCh) | 470 // | | | 471 // | (ShutdownDelayDuration) (PreShutdownHooks) 472 // | | | 473 // | AfterShutdownDelayDuration (delayedStopCh) PreShutdownHooksStopped (preShutdownHooksHasStoppedCh) 474 // | | | 475 // | |-------------------------------------------------------| 476 // | | 477 // | | 478 // | NotAcceptingNewRequest (notAcceptingNewRequestCh) 479 // | | 480 // | | 481 // | |----------------------------------------------------------------------------------| 482 // | | | | | 483 // | [without [with | | 484 // | ShutdownSendRetryAfter] ShutdownSendRetryAfter] | | 485 // | | | | | 486 // | | ---------------| | 487 // | | | | 488 // | | |----------------|-----------------------| | 489 // | | | | | 490 // | | (NonLongRunningRequestWaitGroup::Wait) (WatchRequestWaitGroup::Wait) | 491 // | | | | | 492 // | | |------------------|---------------------| | 493 // | | | | 494 // | | InFlightRequestsDrained (drainedCh) | 495 // | | | | 496 // | |-------------------|---------------------|----------------------------------------| 497 // | | | 498 // | stopHttpServerCtx (AuditBackend::Shutdown()) 499 // | | 500 // | listenerStoppedCh 501 // | | 502 // | HTTPServerStoppedListening (httpServerStoppedListeningCh) 503 func (s preparedGenericAPIServer) RunWithContext(ctx context.Context) error { 504 stopCh := ctx.Done() 505 delayedStopCh := s.lifecycleSignals.AfterShutdownDelayDuration 506 shutdownInitiatedCh := s.lifecycleSignals.ShutdownInitiated 507 508 // Clean up resources on shutdown. 509 defer s.Destroy() 510 511 // If UDS profiling is enabled, start a local http server listening on that socket 512 if s.UnprotectedDebugSocket != nil { 513 go func() { 514 defer utilruntime.HandleCrash() 515 klog.Error(s.UnprotectedDebugSocket.Run(stopCh)) 516 }() 517 } 518 519 // spawn a new goroutine for closing the MuxAndDiscoveryComplete signal 520 // registration happens during construction of the generic api server 521 // the last server in the chain aggregates signals from the previous instances 522 go func() { 523 for _, muxAndDiscoveryCompletedSignal := range s.GenericAPIServer.MuxAndDiscoveryCompleteSignals() { 524 select { 525 case <-muxAndDiscoveryCompletedSignal: 526 continue 527 case <-stopCh: 528 klog.V(1).Infof("haven't completed %s, stop requested", s.lifecycleSignals.MuxAndDiscoveryComplete.Name()) 529 return 530 } 531 } 532 s.lifecycleSignals.MuxAndDiscoveryComplete.Signal() 533 klog.V(1).Infof("%s has all endpoints registered and discovery information is complete", s.lifecycleSignals.MuxAndDiscoveryComplete.Name()) 534 }() 535 536 go func() { 537 defer delayedStopCh.Signal() 538 defer klog.V(1).InfoS("[graceful-termination] shutdown event", "name", delayedStopCh.Name()) 539 540 <-stopCh 541 542 // As soon as shutdown is initiated, /readyz should start returning failure. 543 // This gives the load balancer a window defined by ShutdownDelayDuration to detect that /readyz is red 544 // and stop sending traffic to this server. 545 shutdownInitiatedCh.Signal() 546 klog.V(1).InfoS("[graceful-termination] shutdown event", "name", shutdownInitiatedCh.Name()) 547 548 time.Sleep(s.ShutdownDelayDuration) 549 }() 550 551 // close socket after delayed stopCh 552 shutdownTimeout := s.ShutdownTimeout 553 if s.ShutdownSendRetryAfter { 554 // when this mode is enabled, we do the following: 555 // - the server will continue to listen until all existing requests in flight 556 // (not including active long running requests) have been drained. 557 // - once drained, http Server Shutdown is invoked with a timeout of 2s, 558 // net/http waits for 1s for the peer to respond to a GO_AWAY frame, so 559 // we should wait for a minimum of 2s 560 shutdownTimeout = 2 * time.Second 561 klog.V(1).InfoS("[graceful-termination] using HTTP Server shutdown timeout", "shutdownTimeout", shutdownTimeout) 562 } 563 564 notAcceptingNewRequestCh := s.lifecycleSignals.NotAcceptingNewRequest 565 drainedCh := s.lifecycleSignals.InFlightRequestsDrained 566 // Canceling the parent context does not immediately cancel the HTTP server. 567 // We only inherit context values here and deal with cancellation ourselves. 568 stopHTTPServerCtx, stopHTTPServer := context.WithCancelCause(context.WithoutCancel(ctx)) 569 go func() { 570 defer stopHTTPServer(errors.New("time to stop HTTP server")) 571 572 timeToStopHttpServerCh := notAcceptingNewRequestCh.Signaled() 573 if s.ShutdownSendRetryAfter { 574 timeToStopHttpServerCh = drainedCh.Signaled() 575 } 576 577 <-timeToStopHttpServerCh 578 }() 579 580 // Start the audit backend before any request comes in. This means we must call Backend.Run 581 // before http server start serving. Otherwise the Backend.ProcessEvents call might block. 582 // AuditBackend.Run will stop as soon as all in-flight requests are drained. 583 if s.AuditBackend != nil { 584 if err := s.AuditBackend.Run(drainedCh.Signaled()); err != nil { 585 return fmt.Errorf("failed to run the audit backend: %v", err) 586 } 587 } 588 589 stoppedCh, listenerStoppedCh, err := s.NonBlockingRunWithContext(stopHTTPServerCtx, shutdownTimeout) 590 if err != nil { 591 return err 592 } 593 594 httpServerStoppedListeningCh := s.lifecycleSignals.HTTPServerStoppedListening 595 go func() { 596 <-listenerStoppedCh 597 httpServerStoppedListeningCh.Signal() 598 klog.V(1).InfoS("[graceful-termination] shutdown event", "name", httpServerStoppedListeningCh.Name()) 599 }() 600 601 // we don't accept new request as soon as both ShutdownDelayDuration has 602 // elapsed and preshutdown hooks have completed. 603 preShutdownHooksHasStoppedCh := s.lifecycleSignals.PreShutdownHooksStopped 604 go func() { 605 defer klog.V(1).InfoS("[graceful-termination] shutdown event", "name", notAcceptingNewRequestCh.Name()) 606 defer notAcceptingNewRequestCh.Signal() 607 608 // wait for the delayed stopCh before closing the handler chain 609 <-delayedStopCh.Signaled() 610 611 // Additionally wait for preshutdown hooks to also be finished, as some of them need 612 // to send API calls to clean up after themselves (e.g. lease reconcilers removing 613 // itself from the active servers). 614 <-preShutdownHooksHasStoppedCh.Signaled() 615 }() 616 617 // wait for all in-flight non-long running requests to finish 618 nonLongRunningRequestDrainedCh := make(chan struct{}) 619 go func() { 620 defer close(nonLongRunningRequestDrainedCh) 621 defer klog.V(1).Info("[graceful-termination] in-flight non long-running request(s) have drained") 622 623 // wait for the delayed stopCh before closing the handler chain (it rejects everything after Wait has been called). 624 <-notAcceptingNewRequestCh.Signaled() 625 626 // Wait for all requests to finish, which are bounded by the RequestTimeout variable. 627 // once NonLongRunningRequestWaitGroup.Wait is invoked, the apiserver is 628 // expected to reject any incoming request with a {503, Retry-After} 629 // response via the WithWaitGroup filter. On the contrary, we observe 630 // that incoming request(s) get a 'connection refused' error, this is 631 // because, at this point, we have called 'Server.Shutdown' and 632 // net/http server has stopped listening. This causes incoming 633 // request to get a 'connection refused' error. 634 // On the other hand, if 'ShutdownSendRetryAfter' is enabled incoming 635 // requests will be rejected with a {429, Retry-After} since 636 // 'Server.Shutdown' will be invoked only after in-flight requests 637 // have been drained. 638 // TODO: can we consolidate these two modes of graceful termination? 639 s.NonLongRunningRequestWaitGroup.Wait() 640 }() 641 642 // wait for all in-flight watches to finish 643 activeWatchesDrainedCh := make(chan struct{}) 644 go func() { 645 defer close(activeWatchesDrainedCh) 646 647 <-notAcceptingNewRequestCh.Signaled() 648 if s.ShutdownWatchTerminationGracePeriod <= time.Duration(0) { 649 klog.V(1).InfoS("[graceful-termination] not going to wait for active watch request(s) to drain") 650 return 651 } 652 653 // Wait for all active watches to finish 654 grace := s.ShutdownWatchTerminationGracePeriod 655 activeBefore, activeAfter, err := s.WatchRequestWaitGroup.Wait(func(count int) (utilwaitgroup.RateLimiter, context.Context, context.CancelFunc) { 656 qps := float64(count) / grace.Seconds() 657 // TODO: we don't want the QPS (max requests drained per second) to 658 // get below a certain floor value, since we want the server to 659 // drain the active watch requests as soon as possible. 660 // For now, it's hard coded to 200, and it is subject to change 661 // based on the result from the scale testing. 662 if qps < 200 { 663 qps = 200 664 } 665 666 ctx, cancel := context.WithTimeout(context.Background(), grace) 667 // We don't expect more than one token to be consumed 668 // in a single Wait call, so setting burst to 1. 669 return rate.NewLimiter(rate.Limit(qps), 1), ctx, cancel 670 }) 671 klog.V(1).InfoS("[graceful-termination] active watch request(s) have drained", 672 "duration", grace, "activeWatchesBefore", activeBefore, "activeWatchesAfter", activeAfter, "error", err) 673 }() 674 675 go func() { 676 defer klog.V(1).InfoS("[graceful-termination] shutdown event", "name", drainedCh.Name()) 677 defer drainedCh.Signal() 678 679 <-nonLongRunningRequestDrainedCh 680 <-activeWatchesDrainedCh 681 }() 682 683 klog.V(1).Info("[graceful-termination] waiting for shutdown to be initiated") 684 <-stopCh 685 686 // run shutdown hooks directly. This includes deregistering from 687 // the kubernetes endpoint in case of kube-apiserver. 688 func() { 689 defer func() { 690 preShutdownHooksHasStoppedCh.Signal() 691 klog.V(1).InfoS("[graceful-termination] pre-shutdown hooks completed", "name", preShutdownHooksHasStoppedCh.Name()) 692 }() 693 err = s.RunPreShutdownHooks() 694 }() 695 if err != nil { 696 return err 697 } 698 699 // Wait for all requests in flight to drain, bounded by the RequestTimeout variable. 700 <-drainedCh.Signaled() 701 702 if s.AuditBackend != nil { 703 s.AuditBackend.Shutdown() 704 klog.V(1).InfoS("[graceful-termination] audit backend shutdown completed") 705 } 706 707 // wait for stoppedCh that is closed when the graceful termination (server.Shutdown) is finished. 708 <-listenerStoppedCh 709 <-stoppedCh 710 711 klog.V(1).Info("[graceful-termination] apiserver is exiting") 712 return nil 713 } 714 715 // NonBlockingRun spawns the secure http server. An error is 716 // returned if the secure port cannot be listened on. 717 // The returned channel is closed when the (asynchronous) termination is finished. 718 // 719 // Deprecated: use RunWithContext instead. Run will not get removed to avoid 720 // breaking consumers, but should not be used in new code. 721 func (s preparedGenericAPIServer) NonBlockingRun(stopCh <-chan struct{}, shutdownTimeout time.Duration) (<-chan struct{}, <-chan struct{}, error) { 722 ctx := wait.ContextForChannel(stopCh) 723 return s.NonBlockingRunWithContext(ctx, shutdownTimeout) 724 } 725 726 // NonBlockingRunWithContext spawns the secure http server. An error is 727 // returned if the secure port cannot be listened on. 728 // The returned channel is closed when the (asynchronous) termination is finished. 729 func (s preparedGenericAPIServer) NonBlockingRunWithContext(ctx context.Context, shutdownTimeout time.Duration) (<-chan struct{}, <-chan struct{}, error) { 730 // Use an internal stop channel to allow cleanup of the listeners on error. 731 internalStopCh := make(chan struct{}) 732 var stoppedCh <-chan struct{} 733 var listenerStoppedCh <-chan struct{} 734 if s.SecureServingInfo != nil && s.Handler != nil { 735 var err error 736 stoppedCh, listenerStoppedCh, err = s.SecureServingInfo.Serve(s.Handler, shutdownTimeout, internalStopCh) 737 if err != nil { 738 close(internalStopCh) 739 return nil, nil, err 740 } 741 } 742 743 // Now that listener have bound successfully, it is the 744 // responsibility of the caller to close the provided channel to 745 // ensure cleanup. 746 go func() { 747 <-ctx.Done() 748 close(internalStopCh) 749 }() 750 751 s.RunPostStartHooks(ctx) 752 753 if _, err := systemd.SdNotify(true, "READY=1\n"); err != nil { 754 klog.Errorf("Unable to send systemd daemon successful start message: %v\n", err) 755 } 756 757 return stoppedCh, listenerStoppedCh, nil 758 } 759 760 // installAPIResources is a private method for installing the REST storage backing each api groupversionresource 761 func (s *GenericAPIServer) installAPIResources(apiPrefix string, apiGroupInfo *APIGroupInfo, typeConverter managedfields.TypeConverter) error { 762 var resourceInfos []*storageversion.ResourceInfo 763 for _, groupVersion := range apiGroupInfo.PrioritizedVersions { 764 if len(apiGroupInfo.VersionedResourcesStorageMap[groupVersion.Version]) == 0 { 765 klog.Warningf("Skipping API %v because it has no resources.", groupVersion) 766 continue 767 } 768 769 apiGroupVersion, err := s.getAPIGroupVersion(apiGroupInfo, groupVersion, apiPrefix) 770 if err != nil { 771 return err 772 } 773 if apiGroupInfo.OptionsExternalVersion != nil { 774 apiGroupVersion.OptionsExternalVersion = apiGroupInfo.OptionsExternalVersion 775 } 776 apiGroupVersion.TypeConverter = typeConverter 777 apiGroupVersion.MaxRequestBodyBytes = s.maxRequestBodyBytes 778 779 discoveryAPIResources, r, err := apiGroupVersion.InstallREST(s.Handler.GoRestfulContainer) 780 781 if err != nil { 782 return fmt.Errorf("unable to setup API %v: %v", apiGroupInfo, err) 783 } 784 resourceInfos = append(resourceInfos, r...) 785 786 if s.FeatureGate.Enabled(features.AggregatedDiscoveryEndpoint) { 787 // Aggregated discovery only aggregates resources under /apis 788 if apiPrefix == APIGroupPrefix { 789 s.AggregatedDiscoveryGroupManager.AddGroupVersion( 790 groupVersion.Group, 791 apidiscoveryv2.APIVersionDiscovery{ 792 Freshness: apidiscoveryv2.DiscoveryFreshnessCurrent, 793 Version: groupVersion.Version, 794 Resources: discoveryAPIResources, 795 }, 796 ) 797 } else { 798 // There is only one group version for legacy resources, priority can be defaulted to 0. 799 s.AggregatedLegacyDiscoveryGroupManager.AddGroupVersion( 800 groupVersion.Group, 801 apidiscoveryv2.APIVersionDiscovery{ 802 Freshness: apidiscoveryv2.DiscoveryFreshnessCurrent, 803 Version: groupVersion.Version, 804 Resources: discoveryAPIResources, 805 }, 806 ) 807 } 808 } 809 810 } 811 812 s.RegisterDestroyFunc(apiGroupInfo.destroyStorage) 813 814 if s.FeatureGate.Enabled(features.StorageVersionAPI) && 815 s.FeatureGate.Enabled(features.APIServerIdentity) { 816 // API installation happens before we start listening on the handlers, 817 // therefore it is safe to register ResourceInfos here. The handler will block 818 // write requests until the storage versions of the targeting resources are updated. 819 s.StorageVersionManager.AddResourceInfo(resourceInfos...) 820 } 821 822 return nil 823 } 824 825 // InstallLegacyAPIGroup exposes the given legacy api group in the API. 826 // The <apiGroupInfo> passed into this function shouldn't be used elsewhere as the 827 // underlying storage will be destroyed on this servers shutdown. 828 func (s *GenericAPIServer) InstallLegacyAPIGroup(apiPrefix string, apiGroupInfo *APIGroupInfo) error { 829 if !s.legacyAPIGroupPrefixes.Has(apiPrefix) { 830 return fmt.Errorf("%q is not in the allowed legacy API prefixes: %v", apiPrefix, s.legacyAPIGroupPrefixes.List()) 831 } 832 833 openAPIModels, err := s.getOpenAPIModels(apiPrefix, apiGroupInfo) 834 if err != nil { 835 return fmt.Errorf("unable to get openapi models: %v", err) 836 } 837 838 if err := s.installAPIResources(apiPrefix, apiGroupInfo, openAPIModels); err != nil { 839 return err 840 } 841 842 // Install the version handler. 843 // Add a handler at /<apiPrefix> to enumerate the supported api versions. 844 legacyRootAPIHandler := discovery.NewLegacyRootAPIHandler(s.discoveryAddresses, s.Serializer, apiPrefix) 845 if s.FeatureGate.Enabled(features.AggregatedDiscoveryEndpoint) { 846 wrapped := discoveryendpoint.WrapAggregatedDiscoveryToHandler(legacyRootAPIHandler, s.AggregatedLegacyDiscoveryGroupManager) 847 s.Handler.GoRestfulContainer.Add(wrapped.GenerateWebService("/api", metav1.APIVersions{})) 848 } else { 849 s.Handler.GoRestfulContainer.Add(legacyRootAPIHandler.WebService()) 850 } 851 s.registerStorageReadinessCheck("", apiGroupInfo) 852 853 return nil 854 } 855 856 // InstallAPIGroups exposes given api groups in the API. 857 // The <apiGroupInfos> passed into this function shouldn't be used elsewhere as the 858 // underlying storage will be destroyed on this servers shutdown. 859 func (s *GenericAPIServer) InstallAPIGroups(apiGroupInfos ...*APIGroupInfo) error { 860 for _, apiGroupInfo := range apiGroupInfos { 861 if len(apiGroupInfo.PrioritizedVersions) == 0 { 862 return fmt.Errorf("no version priority set for %#v", *apiGroupInfo) 863 } 864 // Do not register empty group or empty version. Doing so claims /apis/ for the wrong entity to be returned. 865 // Catching these here places the error much closer to its origin 866 if len(apiGroupInfo.PrioritizedVersions[0].Group) == 0 { 867 return fmt.Errorf("cannot register handler with an empty group for %#v", *apiGroupInfo) 868 } 869 if len(apiGroupInfo.PrioritizedVersions[0].Version) == 0 { 870 return fmt.Errorf("cannot register handler with an empty version for %#v", *apiGroupInfo) 871 } 872 } 873 874 openAPIModels, err := s.getOpenAPIModels(APIGroupPrefix, apiGroupInfos...) 875 if err != nil { 876 return fmt.Errorf("unable to get openapi models: %v", err) 877 } 878 879 for _, apiGroupInfo := range apiGroupInfos { 880 if err := s.installAPIResources(APIGroupPrefix, apiGroupInfo, openAPIModels); err != nil { 881 return fmt.Errorf("unable to install api resources: %v", err) 882 } 883 884 // setup discovery 885 // Install the version handler. 886 // Add a handler at /apis/<groupName> to enumerate all versions supported by this group. 887 apiVersionsForDiscovery := []metav1.GroupVersionForDiscovery{} 888 for _, groupVersion := range apiGroupInfo.PrioritizedVersions { 889 // Check the config to make sure that we elide versions that don't have any resources 890 if len(apiGroupInfo.VersionedResourcesStorageMap[groupVersion.Version]) == 0 { 891 continue 892 } 893 apiVersionsForDiscovery = append(apiVersionsForDiscovery, metav1.GroupVersionForDiscovery{ 894 GroupVersion: groupVersion.String(), 895 Version: groupVersion.Version, 896 }) 897 } 898 preferredVersionForDiscovery := metav1.GroupVersionForDiscovery{ 899 GroupVersion: apiGroupInfo.PrioritizedVersions[0].String(), 900 Version: apiGroupInfo.PrioritizedVersions[0].Version, 901 } 902 apiGroup := metav1.APIGroup{ 903 Name: apiGroupInfo.PrioritizedVersions[0].Group, 904 Versions: apiVersionsForDiscovery, 905 PreferredVersion: preferredVersionForDiscovery, 906 } 907 908 s.DiscoveryGroupManager.AddGroup(apiGroup) 909 s.Handler.GoRestfulContainer.Add(discovery.NewAPIGroupHandler(s.Serializer, apiGroup).WebService()) 910 s.registerStorageReadinessCheck(apiGroupInfo.PrioritizedVersions[0].Group, apiGroupInfo) 911 } 912 return nil 913 } 914 915 // registerStorageReadinessCheck registers the readiness checks for all underlying storages 916 // for a given APIGroup. 917 func (s *GenericAPIServer) registerStorageReadinessCheck(groupName string, apiGroupInfo *APIGroupInfo) { 918 for version, storageMap := range apiGroupInfo.VersionedResourcesStorageMap { 919 for resource, storage := range storageMap { 920 if withReadiness, ok := storage.(rest.StorageWithReadiness); ok { 921 gvr := metav1.GroupVersionResource{ 922 Group: groupName, 923 Version: version, 924 Resource: resource, 925 } 926 s.StorageReadinessHook.RegisterStorage(gvr, withReadiness) 927 } 928 } 929 } 930 } 931 932 // InstallAPIGroup exposes the given api group in the API. 933 // The <apiGroupInfo> passed into this function shouldn't be used elsewhere as the 934 // underlying storage will be destroyed on this servers shutdown. 935 func (s *GenericAPIServer) InstallAPIGroup(apiGroupInfo *APIGroupInfo) error { 936 return s.InstallAPIGroups(apiGroupInfo) 937 } 938 939 func (s *GenericAPIServer) getAPIGroupVersion(apiGroupInfo *APIGroupInfo, groupVersion schema.GroupVersion, apiPrefix string) (*genericapi.APIGroupVersion, error) { 940 storage := make(map[string]rest.Storage) 941 for k, v := range apiGroupInfo.VersionedResourcesStorageMap[groupVersion.Version] { 942 if strings.ToLower(k) != k { 943 return nil, fmt.Errorf("resource names must be lowercase only, not %q", k) 944 } 945 storage[k] = v 946 } 947 version := s.newAPIGroupVersion(apiGroupInfo, groupVersion) 948 version.Root = apiPrefix 949 version.Storage = storage 950 return version, nil 951 } 952 953 func (s *GenericAPIServer) newAPIGroupVersion(apiGroupInfo *APIGroupInfo, groupVersion schema.GroupVersion) *genericapi.APIGroupVersion { 954 955 allServedVersionsByResource := map[string][]string{} 956 for version, resourcesInVersion := range apiGroupInfo.VersionedResourcesStorageMap { 957 for resource := range resourcesInVersion { 958 if len(groupVersion.Group) == 0 { 959 allServedVersionsByResource[resource] = append(allServedVersionsByResource[resource], version) 960 } else { 961 allServedVersionsByResource[resource] = append(allServedVersionsByResource[resource], fmt.Sprintf("%s/%s", groupVersion.Group, version)) 962 } 963 } 964 } 965 966 return &genericapi.APIGroupVersion{ 967 GroupVersion: groupVersion, 968 AllServedVersionsByResource: allServedVersionsByResource, 969 MetaGroupVersion: apiGroupInfo.MetaGroupVersion, 970 971 ParameterCodec: apiGroupInfo.ParameterCodec, 972 Serializer: apiGroupInfo.NegotiatedSerializer, 973 Creater: apiGroupInfo.Scheme, 974 Convertor: apiGroupInfo.Scheme, 975 ConvertabilityChecker: apiGroupInfo.Scheme, 976 UnsafeConvertor: runtime.UnsafeObjectConvertor(apiGroupInfo.Scheme), 977 Defaulter: apiGroupInfo.Scheme, 978 Typer: apiGroupInfo.Scheme, 979 Namer: runtime.Namer(meta.NewAccessor()), 980 981 EquivalentResourceRegistry: s.EquivalentResourceRegistry, 982 983 Admit: s.admissionControl, 984 MinRequestTimeout: s.minRequestTimeout, 985 Authorizer: s.Authorizer, 986 } 987 } 988 989 // NewDefaultAPIGroupInfo returns an APIGroupInfo stubbed with "normal" values 990 // exposed for easier composition from other packages 991 func NewDefaultAPIGroupInfo(group string, scheme *runtime.Scheme, parameterCodec runtime.ParameterCodec, codecs serializer.CodecFactory) APIGroupInfo { 992 return APIGroupInfo{ 993 PrioritizedVersions: scheme.PrioritizedVersionsForGroup(group), 994 VersionedResourcesStorageMap: map[string]map[string]rest.Storage{}, 995 // TODO unhardcode this. It was hardcoded before, but we need to re-evaluate 996 OptionsExternalVersion: &schema.GroupVersion{Version: "v1"}, 997 Scheme: scheme, 998 ParameterCodec: parameterCodec, 999 NegotiatedSerializer: codecs, 1000 } 1001 } 1002 1003 // getOpenAPIModels is a private method for getting the OpenAPI models 1004 func (s *GenericAPIServer) getOpenAPIModels(apiPrefix string, apiGroupInfos ...*APIGroupInfo) (managedfields.TypeConverter, error) { 1005 if s.openAPIV3Config == nil { 1006 // SSA is GA and requires OpenAPI config to be set 1007 // to create models. 1008 return nil, errors.New("OpenAPIV3 config must not be nil") 1009 } 1010 pathsToIgnore := openapiutil.NewTrie(s.openAPIV3Config.IgnorePrefixes) 1011 resourceNames := make([]string, 0) 1012 for _, apiGroupInfo := range apiGroupInfos { 1013 groupResources, err := getResourceNamesForGroup(apiPrefix, apiGroupInfo, pathsToIgnore) 1014 if err != nil { 1015 return nil, err 1016 } 1017 resourceNames = append(resourceNames, groupResources...) 1018 } 1019 1020 // Build the openapi definitions for those resources and convert it to proto models 1021 openAPISpec, err := openapibuilder3.BuildOpenAPIDefinitionsForResources(s.openAPIV3Config, resourceNames...) 1022 if err != nil { 1023 return nil, err 1024 } 1025 for _, apiGroupInfo := range apiGroupInfos { 1026 apiGroupInfo.StaticOpenAPISpec = openAPISpec 1027 } 1028 1029 typeConverter, err := managedfields.NewTypeConverter(openAPISpec, false) 1030 if err != nil { 1031 return nil, err 1032 } 1033 1034 return typeConverter, nil 1035 } 1036 1037 // getResourceNamesForGroup is a private method for getting the canonical names for each resource to build in an api group 1038 func getResourceNamesForGroup(apiPrefix string, apiGroupInfo *APIGroupInfo, pathsToIgnore openapiutil.Trie) ([]string, error) { 1039 // Get the canonical names of every resource we need to build in this api group 1040 resourceNames := make([]string, 0) 1041 for _, groupVersion := range apiGroupInfo.PrioritizedVersions { 1042 for resource, storage := range apiGroupInfo.VersionedResourcesStorageMap[groupVersion.Version] { 1043 path := gpath.Join(apiPrefix, groupVersion.Group, groupVersion.Version, resource) 1044 if !pathsToIgnore.HasPrefix(path) { 1045 kind, err := genericapi.GetResourceKind(groupVersion, storage, apiGroupInfo.Scheme) 1046 if err != nil { 1047 return nil, err 1048 } 1049 sampleObject, err := apiGroupInfo.Scheme.New(kind) 1050 if err != nil { 1051 return nil, err 1052 } 1053 name := openapiutil.GetCanonicalTypeName(sampleObject) 1054 resourceNames = append(resourceNames, name) 1055 } 1056 } 1057 } 1058 1059 return resourceNames, nil 1060 }