k8s.io/kubernetes@v1.29.3/pkg/registry/core/service/storage/alloc.go (about) 1 /* 2 Copyright 2014 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package storage 18 19 import ( 20 "fmt" 21 "net" 22 23 "k8s.io/apimachinery/pkg/api/errors" 24 utilruntime "k8s.io/apimachinery/pkg/util/runtime" 25 "k8s.io/apimachinery/pkg/util/validation/field" 26 utilfeature "k8s.io/apiserver/pkg/util/feature" 27 "k8s.io/klog/v2" 28 apiservice "k8s.io/kubernetes/pkg/api/service" 29 api "k8s.io/kubernetes/pkg/apis/core" 30 "k8s.io/kubernetes/pkg/apis/core/validation" 31 "k8s.io/kubernetes/pkg/features" 32 "k8s.io/kubernetes/pkg/registry/core/service/ipallocator" 33 "k8s.io/kubernetes/pkg/registry/core/service/portallocator" 34 netutils "k8s.io/utils/net" 35 ) 36 37 // Allocators encapsulates the various allocators (IPs, ports) used in 38 // Services. 39 type Allocators struct { 40 serviceIPAllocatorsByFamily map[api.IPFamily]ipallocator.Interface 41 defaultServiceIPFamily api.IPFamily // --service-cluster-ip-range[0] 42 serviceNodePorts portallocator.Interface 43 } 44 45 // ServiceNodePort includes protocol and port number of a service NodePort. 46 type ServiceNodePort struct { 47 // The IP protocol for this port. Supports "TCP" and "UDP". 48 Protocol api.Protocol 49 50 // The port on each node on which this service is exposed. 51 // Default is to auto-allocate a port if the ServiceType of this Service requires one. 52 NodePort int32 53 } 54 55 // This is a trasitionary function to facilitate service REST flattening. 56 func makeAlloc(defaultFamily api.IPFamily, ipAllocs map[api.IPFamily]ipallocator.Interface, portAlloc portallocator.Interface) Allocators { 57 return Allocators{ 58 defaultServiceIPFamily: defaultFamily, 59 serviceIPAllocatorsByFamily: ipAllocs, 60 serviceNodePorts: portAlloc, 61 } 62 } 63 64 func (al *Allocators) allocateCreate(service *api.Service, dryRun bool) (transaction, error) { 65 result := metaTransaction{} 66 success := false 67 68 defer func() { 69 if !success { 70 result.Revert() 71 } 72 }() 73 74 // Ensure IP family fields are correctly initialized. We do it here, since 75 // we want this to be visible even when dryRun == true. 76 if err := al.initIPFamilyFields(After{service}, Before{nil}); err != nil { 77 return nil, err 78 } 79 80 // Allocate ClusterIPs 81 //TODO(thockin): validation should not pass with empty clusterIP, but it 82 //does (and is tested!). Fixing that all is a big PR and will have to 83 //happen later. 84 if txn, err := al.txnAllocClusterIPs(service, dryRun); err != nil { 85 return nil, err 86 } else { 87 result = append(result, txn) 88 } 89 90 // Allocate ports 91 if txn, err := al.txnAllocNodePorts(service, dryRun); err != nil { 92 return nil, err 93 } else { 94 result = append(result, txn) 95 } 96 97 success = true 98 return result, nil 99 } 100 101 // attempts to default service ip families according to cluster configuration 102 // while ensuring that provided families are configured on cluster. 103 func (al *Allocators) initIPFamilyFields(after After, before Before) error { 104 oldService, service := before.Service, after.Service 105 106 // can not do anything here 107 if service.Spec.Type == api.ServiceTypeExternalName { 108 return nil 109 } 110 111 // We don't want to auto-upgrade (add an IP) or downgrade (remove an IP) 112 // PreferDualStack services following a cluster change to/from 113 // dual-stackness. 114 // 115 // That means a PreferDualStack service will only be upgraded/downgraded 116 // when: 117 // - changing ipFamilyPolicy to "RequireDualStack" or "SingleStack" AND 118 // - adding or removing a secondary clusterIP or ipFamily 119 if isMatchingPreferDualStackClusterIPFields(after, before) { 120 return nil // nothing more to do. 121 } 122 123 // If the user didn't specify ipFamilyPolicy, we can infer a default. We 124 // don't want a static default because we want to make sure that we never 125 // change between single- and dual-stack modes with explicit direction, as 126 // provided by ipFamilyPolicy. Consider these cases: 127 // * Create (POST): If they didn't specify a policy we can assume it's 128 // always SingleStack. 129 // * Update (PUT): If they didn't specify a policy we need to adopt the 130 // policy from before. This is better than always assuming SingleStack 131 // because a PUT that changes clusterIPs from 2 to 1 value but doesn't 132 // specify ipFamily would work. 133 // * Update (PATCH): If they didn't specify a policy it will adopt the 134 // policy from before. 135 if service.Spec.IPFamilyPolicy == nil { 136 if oldService != nil && oldService.Spec.IPFamilyPolicy != nil { 137 // Update from an object with policy, use the old policy 138 service.Spec.IPFamilyPolicy = oldService.Spec.IPFamilyPolicy 139 } else if service.Spec.ClusterIP == api.ClusterIPNone && len(service.Spec.Selector) == 0 { 140 // Special-case: headless + selectorless defaults to dual. 141 requireDualStack := api.IPFamilyPolicyRequireDualStack 142 service.Spec.IPFamilyPolicy = &requireDualStack 143 } else { 144 // create or update from an object without policy (e.g. 145 // ExternalName) to one that needs policy 146 singleStack := api.IPFamilyPolicySingleStack 147 service.Spec.IPFamilyPolicy = &singleStack 148 } 149 } 150 // Henceforth we can assume ipFamilyPolicy is set. 151 152 // Do some loose pre-validation of the input. This makes it easier in the 153 // rest of allocation code to not have to consider corner cases. 154 // TODO(thockin): when we tighten validation (e.g. to require IPs) we will 155 // need a "strict" and a "loose" form of this. 156 if el := validation.ValidateServiceClusterIPsRelatedFields(service); len(el) != 0 { 157 return errors.NewInvalid(api.Kind("Service"), service.Name, el) 158 } 159 160 //TODO(thockin): Move this logic to validation? 161 el := make(field.ErrorList, 0) 162 163 // Update-only prep work. 164 if oldService != nil { 165 if getIPFamilyPolicy(service) == api.IPFamilyPolicySingleStack { 166 // As long as ClusterIPs and IPFamilies have not changed, setting 167 // the policy to single-stack is clear intent. 168 // ClusterIPs[0] is immutable, so it is safe to keep. 169 if sameClusterIPs(oldService, service) && len(service.Spec.ClusterIPs) > 1 { 170 service.Spec.ClusterIPs = service.Spec.ClusterIPs[0:1] 171 } 172 if sameIPFamilies(oldService, service) && len(service.Spec.IPFamilies) > 1 { 173 service.Spec.IPFamilies = service.Spec.IPFamilies[0:1] 174 } 175 } else { 176 // If the policy is anything but single-stack AND they reduced these 177 // fields, it's an error. They need to specify policy. 178 if reducedClusterIPs(After{service}, Before{oldService}) { 179 el = append(el, field.Invalid(field.NewPath("spec", "ipFamilyPolicy"), service.Spec.IPFamilyPolicy, 180 "must be 'SingleStack' to release the secondary cluster IP")) 181 } 182 if reducedIPFamilies(After{service}, Before{oldService}) { 183 el = append(el, field.Invalid(field.NewPath("spec", "ipFamilyPolicy"), service.Spec.IPFamilyPolicy, 184 "must be 'SingleStack' to release the secondary IP family")) 185 } 186 } 187 } 188 189 // Make sure ipFamilyPolicy makes sense for the provided ipFamilies and 190 // clusterIPs. Further checks happen below - after the special cases. 191 if getIPFamilyPolicy(service) == api.IPFamilyPolicySingleStack { 192 if len(service.Spec.ClusterIPs) == 2 { 193 el = append(el, field.Invalid(field.NewPath("spec", "ipFamilyPolicy"), service.Spec.IPFamilyPolicy, 194 "must be 'RequireDualStack' or 'PreferDualStack' when multiple cluster IPs are specified")) 195 } 196 if len(service.Spec.IPFamilies) == 2 { 197 el = append(el, field.Invalid(field.NewPath("spec", "ipFamilyPolicy"), service.Spec.IPFamilyPolicy, 198 "must be 'RequireDualStack' or 'PreferDualStack' when multiple IP families are specified")) 199 } 200 } 201 202 // Infer IPFamilies[] from ClusterIPs[]. Further checks happen below, 203 // after the special cases. 204 for i, ip := range service.Spec.ClusterIPs { 205 if ip == api.ClusterIPNone { 206 break 207 } 208 209 // We previously validated that IPs are well-formed and that if an 210 // ipFamilies[] entry exists it matches the IP. 211 fam := familyOf(ip) 212 213 // If the corresponding family is not specified, add it. 214 if i >= len(service.Spec.IPFamilies) { 215 // Families are checked more later, but this is a better error in 216 // this specific case (indicating the user-provided IP, rather 217 // than than the auto-assigned family). 218 if _, found := al.serviceIPAllocatorsByFamily[fam]; !found { 219 el = append(el, field.Invalid(field.NewPath("spec", "clusterIPs").Index(i), service.Spec.ClusterIPs, 220 fmt.Sprintf("%s is not configured on this cluster", fam))) 221 } else { 222 // OK to infer. 223 service.Spec.IPFamilies = append(service.Spec.IPFamilies, fam) 224 } 225 } 226 } 227 228 // If we have validation errors, bail out now so we don't make them worse. 229 if len(el) > 0 { 230 return errors.NewInvalid(api.Kind("Service"), service.Name, el) 231 } 232 233 // Special-case: headless + selectorless. This has to happen before other 234 // checks because it explicitly allows combinations of inputs that would 235 // otherwise be errors. 236 if service.Spec.ClusterIP == api.ClusterIPNone && len(service.Spec.Selector) == 0 { 237 // If IPFamilies was not set by the user, start with the default 238 // family. 239 if len(service.Spec.IPFamilies) == 0 { 240 service.Spec.IPFamilies = []api.IPFamily{al.defaultServiceIPFamily} 241 } 242 243 // this follows headful services. With one exception on a single stack 244 // cluster the user is allowed to create headless services that has multi families 245 // the validation allows it 246 if len(service.Spec.IPFamilies) < 2 { 247 if *(service.Spec.IPFamilyPolicy) != api.IPFamilyPolicySingleStack { 248 // add the alt ipfamily 249 if service.Spec.IPFamilies[0] == api.IPv4Protocol { 250 service.Spec.IPFamilies = append(service.Spec.IPFamilies, api.IPv6Protocol) 251 } else { 252 service.Spec.IPFamilies = append(service.Spec.IPFamilies, api.IPv4Protocol) 253 } 254 } 255 } 256 257 // nothing more needed here 258 return nil 259 } 260 261 // 262 // Everything below this MUST happen *after* the above special cases. 263 // 264 265 // Demanding dual-stack on a non dual-stack cluster. 266 if getIPFamilyPolicy(service) == api.IPFamilyPolicyRequireDualStack { 267 if len(al.serviceIPAllocatorsByFamily) < 2 { 268 el = append(el, field.Invalid(field.NewPath("spec", "ipFamilyPolicy"), service.Spec.IPFamilyPolicy, 269 "this cluster is not configured for dual-stack services")) 270 } 271 } 272 273 // If there is a family requested then it has to be configured on cluster. 274 for i, ipFamily := range service.Spec.IPFamilies { 275 if _, found := al.serviceIPAllocatorsByFamily[ipFamily]; !found { 276 el = append(el, field.Invalid(field.NewPath("spec", "ipFamilies").Index(i), ipFamily, "not configured on this cluster")) 277 } 278 } 279 280 // If we have validation errors, don't bother with the rest. 281 if len(el) > 0 { 282 return errors.NewInvalid(api.Kind("Service"), service.Name, el) 283 } 284 285 // nil families, gets cluster default 286 if len(service.Spec.IPFamilies) == 0 { 287 service.Spec.IPFamilies = []api.IPFamily{al.defaultServiceIPFamily} 288 } 289 290 // If this service is looking for dual-stack and this cluster does have two 291 // families, append the missing family. 292 if *(service.Spec.IPFamilyPolicy) != api.IPFamilyPolicySingleStack && 293 len(service.Spec.IPFamilies) == 1 && 294 len(al.serviceIPAllocatorsByFamily) == 2 { 295 296 if service.Spec.IPFamilies[0] == api.IPv4Protocol { 297 service.Spec.IPFamilies = append(service.Spec.IPFamilies, api.IPv6Protocol) 298 } else if service.Spec.IPFamilies[0] == api.IPv6Protocol { 299 service.Spec.IPFamilies = append(service.Spec.IPFamilies, api.IPv4Protocol) 300 } 301 } 302 303 return nil 304 } 305 306 func (al *Allocators) txnAllocClusterIPs(service *api.Service, dryRun bool) (transaction, error) { 307 // clusterIPs that were allocated may need to be released in case of 308 // failure at a higher level. 309 allocated, err := al.allocClusterIPs(service, dryRun) 310 if err != nil { 311 return nil, err 312 } 313 314 txn := callbackTransaction{ 315 revert: func() { 316 if dryRun { 317 return 318 } 319 actuallyReleased, err := al.releaseIPs(allocated) 320 if err != nil { 321 klog.ErrorS(err, "failed to clean up after failed service create", 322 "service", klog.KObj(service), 323 "shouldRelease", allocated, 324 "released", actuallyReleased) 325 } 326 }, 327 commit: func() { 328 if !dryRun { 329 if len(allocated) > 0 { 330 klog.InfoS("allocated clusterIPs", 331 "service", klog.KObj(service), 332 "clusterIPs", allocated) 333 } 334 } 335 }, 336 } 337 return txn, nil 338 } 339 340 // allocates ClusterIPs for a service 341 func (al *Allocators) allocClusterIPs(service *api.Service, dryRun bool) (map[api.IPFamily]string, error) { 342 // external name don't get ClusterIPs 343 if service.Spec.Type == api.ServiceTypeExternalName { 344 return nil, nil 345 } 346 347 // headless don't get ClusterIPs 348 if len(service.Spec.ClusterIPs) > 0 && service.Spec.ClusterIPs[0] == api.ClusterIPNone { 349 return nil, nil 350 } 351 352 toAlloc := make(map[api.IPFamily]string) 353 // at this stage, the only fact we know is that service has correct ip families 354 // assigned to it. It may have partial assigned ClusterIPs (Upgrade to dual stack) 355 // may have no ips at all. The below loop is meant to fix this 356 // (we also know that this cluster has these families) 357 358 // if there is no slice to work with 359 if service.Spec.ClusterIPs == nil { 360 service.Spec.ClusterIPs = make([]string, 0, len(service.Spec.IPFamilies)) 361 } 362 363 for i, ipFamily := range service.Spec.IPFamilies { 364 if i > (len(service.Spec.ClusterIPs) - 1) { 365 service.Spec.ClusterIPs = append(service.Spec.ClusterIPs, "" /* just a marker */) 366 } 367 368 toAlloc[ipFamily] = service.Spec.ClusterIPs[i] 369 } 370 371 // allocate 372 allocated, err := al.allocIPs(service, toAlloc, dryRun) 373 374 // set if successful 375 if err == nil { 376 for family, ip := range allocated { 377 for i, check := range service.Spec.IPFamilies { 378 if family == check { 379 service.Spec.ClusterIPs[i] = ip 380 // while we technically don't need to do that testing rest does not 381 // go through conversion logic but goes through validation *sigh*. 382 // so we set ClusterIP here as well 383 // because the testing code expects valid (as they are output-ed from conversion) 384 // as it patches fields 385 if i == 0 { 386 service.Spec.ClusterIP = ip 387 } 388 } 389 } 390 } 391 } 392 393 return allocated, err 394 } 395 396 func (al *Allocators) allocIPs(service *api.Service, toAlloc map[api.IPFamily]string, dryRun bool) (map[api.IPFamily]string, error) { 397 allocated := make(map[api.IPFamily]string) 398 399 for family, ip := range toAlloc { 400 allocator := al.serviceIPAllocatorsByFamily[family] // should always be there, as we pre validate 401 if dryRun { 402 allocator = allocator.DryRun() 403 } 404 if ip == "" { 405 var allocatedIP net.IP 406 var err error 407 if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) { 408 // TODO: simplify this and avoid all this duplicate code 409 svcAllocator, ok := allocator.(*ipallocator.MetaAllocator) 410 if ok { 411 allocatedIP, err = svcAllocator.AllocateNextService(service) 412 } else { 413 allocatedIP, err = allocator.AllocateNext() 414 } 415 } else { 416 allocatedIP, err = allocator.AllocateNext() 417 } 418 if err != nil { 419 return allocated, errors.NewInternalError(fmt.Errorf("failed to allocate a serviceIP: %v", err)) 420 } 421 allocated[family] = allocatedIP.String() 422 } else { 423 parsedIP := netutils.ParseIPSloppy(ip) 424 if parsedIP == nil { 425 return allocated, errors.NewInternalError(fmt.Errorf("failed to parse service IP %q", ip)) 426 } 427 var err error 428 if utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRServiceAllocator) { 429 // TODO: simplify this and avoid all this duplicate code 430 svcAllocator, ok := allocator.(*ipallocator.MetaAllocator) 431 if ok { 432 err = svcAllocator.AllocateService(service, parsedIP) 433 } else { 434 err = allocator.Allocate(parsedIP) 435 } 436 } else { 437 err = allocator.Allocate(parsedIP) 438 } 439 if err != nil { 440 el := field.ErrorList{field.Invalid(field.NewPath("spec", "clusterIPs"), service.Spec.ClusterIPs, fmt.Sprintf("failed to allocate IP %v: %v", ip, err))} 441 return allocated, errors.NewInvalid(api.Kind("Service"), service.Name, el) 442 } 443 allocated[family] = ip 444 } 445 } 446 return allocated, nil 447 } 448 449 // releases clusterIPs per family 450 func (al *Allocators) releaseIPs(toRelease map[api.IPFamily]string) (map[api.IPFamily]string, error) { 451 if toRelease == nil { 452 return nil, nil 453 } 454 455 released := make(map[api.IPFamily]string) 456 for family, ip := range toRelease { 457 allocator, ok := al.serviceIPAllocatorsByFamily[family] 458 if !ok { 459 // Maybe the cluster was previously configured for dual-stack, 460 // then switched to single-stack? 461 klog.InfoS("Not releasing ClusterIP because related family is not enabled", "clusterIP", ip, "family", family) 462 continue 463 } 464 465 parsedIP := netutils.ParseIPSloppy(ip) 466 if parsedIP == nil { 467 return released, errors.NewInternalError(fmt.Errorf("failed to parse service IP %q", ip)) 468 } 469 if err := allocator.Release(parsedIP); err != nil { 470 return released, err 471 } 472 released[family] = ip 473 } 474 475 return released, nil 476 } 477 478 func (al *Allocators) txnAllocNodePorts(service *api.Service, dryRun bool) (transaction, error) { 479 // The allocator tracks dry-run-ness internally. 480 nodePortOp := portallocator.StartOperation(al.serviceNodePorts, dryRun) 481 482 txn := callbackTransaction{ 483 commit: func() { 484 nodePortOp.Commit() 485 // We don't NEED to call Finish() here, but for that package says 486 // to, so for future-safety, we will. 487 nodePortOp.Finish() 488 }, 489 revert: func() { 490 // Weirdly named but this will revert if commit wasn't called 491 nodePortOp.Finish() 492 }, 493 } 494 495 // Allocate NodePorts, if needed. 496 if service.Spec.Type == api.ServiceTypeNodePort || service.Spec.Type == api.ServiceTypeLoadBalancer { 497 if err := initNodePorts(service, nodePortOp); err != nil { 498 txn.Revert() 499 return nil, err 500 } 501 } 502 503 // Handle ExternalTraffic related fields during service creation. 504 if apiservice.NeedsHealthCheck(service) { 505 if err := al.allocHealthCheckNodePort(service, nodePortOp); err != nil { 506 txn.Revert() 507 return nil, errors.NewInternalError(err) 508 } 509 } 510 511 return txn, nil 512 } 513 514 func initNodePorts(service *api.Service, nodePortOp *portallocator.PortAllocationOperation) error { 515 svcPortToNodePort := map[int]int{} 516 for i := range service.Spec.Ports { 517 servicePort := &service.Spec.Ports[i] 518 if servicePort.NodePort == 0 && !shouldAllocateNodePorts(service) { 519 // Don't allocate new ports, but do respect specific requests. 520 continue 521 } 522 allocatedNodePort := svcPortToNodePort[int(servicePort.Port)] 523 if allocatedNodePort == 0 { 524 // This will only scan forward in the service.Spec.Ports list because any matches 525 // before the current port would have been found in svcPortToNodePort. This is really 526 // looking for any user provided values. 527 np := findRequestedNodePort(int(servicePort.Port), service.Spec.Ports) 528 if np != 0 { 529 err := nodePortOp.Allocate(np) 530 if err != nil { 531 // TODO: when validation becomes versioned, this gets more complicated. 532 el := field.ErrorList{field.Invalid(field.NewPath("spec", "ports").Index(i).Child("nodePort"), np, err.Error())} 533 return errors.NewInvalid(api.Kind("Service"), service.Name, el) 534 } 535 servicePort.NodePort = int32(np) 536 svcPortToNodePort[int(servicePort.Port)] = np 537 } else { 538 nodePort, err := nodePortOp.AllocateNext() 539 if err != nil { 540 // TODO: what error should be returned here? It's not a 541 // field-level validation failure (the field is valid), and it's 542 // not really an internal error. 543 return errors.NewInternalError(fmt.Errorf("failed to allocate a nodePort: %v", err)) 544 } 545 servicePort.NodePort = int32(nodePort) 546 svcPortToNodePort[int(servicePort.Port)] = nodePort 547 } 548 } else if int(servicePort.NodePort) != allocatedNodePort { 549 // TODO(xiangpengzhao): do we need to allocate a new NodePort in this case? 550 // Note: the current implementation is better, because it saves a NodePort. 551 if servicePort.NodePort == 0 { 552 servicePort.NodePort = int32(allocatedNodePort) 553 } else { 554 err := nodePortOp.Allocate(int(servicePort.NodePort)) 555 if err != nil { 556 // TODO: when validation becomes versioned, this gets more complicated. 557 el := field.ErrorList{field.Invalid(field.NewPath("spec", "ports").Index(i).Child("nodePort"), servicePort.NodePort, err.Error())} 558 return errors.NewInvalid(api.Kind("Service"), service.Name, el) 559 } 560 } 561 } 562 } 563 564 return nil 565 } 566 567 // allocHealthCheckNodePort allocates health check node port to service. 568 func (al *Allocators) allocHealthCheckNodePort(service *api.Service, nodePortOp *portallocator.PortAllocationOperation) error { 569 healthCheckNodePort := service.Spec.HealthCheckNodePort 570 if healthCheckNodePort != 0 { 571 // If the request has a health check nodePort in mind, attempt to reserve it. 572 err := nodePortOp.Allocate(int(healthCheckNodePort)) 573 if err != nil { 574 return fmt.Errorf("failed to allocate requested HealthCheck NodePort %v: %v", 575 healthCheckNodePort, err) 576 } 577 } else { 578 // If the request has no health check nodePort specified, allocate any. 579 healthCheckNodePort, err := nodePortOp.AllocateNext() 580 if err != nil { 581 return fmt.Errorf("failed to allocate a HealthCheck NodePort %v: %v", healthCheckNodePort, err) 582 } 583 service.Spec.HealthCheckNodePort = int32(healthCheckNodePort) 584 } 585 return nil 586 } 587 588 func (al *Allocators) allocateUpdate(after After, before Before, dryRun bool) (transaction, error) { 589 result := metaTransaction{} 590 success := false 591 592 defer func() { 593 if !success { 594 result.Revert() 595 } 596 }() 597 598 // Ensure IP family fields are correctly initialized. We do it here, since 599 // we want this to be visible even when dryRun == true. 600 if err := al.initIPFamilyFields(after, before); err != nil { 601 return nil, err 602 } 603 604 // Allocate ClusterIPs 605 //TODO(thockin): validation should not pass with empty clusterIP, but it 606 //does (and is tested!). Fixing that all is a big PR and will have to 607 //happen later. 608 if txn, err := al.txnUpdateClusterIPs(after, before, dryRun); err != nil { 609 return nil, err 610 } else { 611 result = append(result, txn) 612 } 613 614 // Allocate ports 615 if txn, err := al.txnUpdateNodePorts(after, before, dryRun); err != nil { 616 return nil, err 617 } else { 618 result = append(result, txn) 619 } 620 621 success = true 622 return result, nil 623 } 624 625 func (al *Allocators) txnUpdateClusterIPs(after After, before Before, dryRun bool) (transaction, error) { 626 service := after.Service 627 628 allocated, released, err := al.updateClusterIPs(after, before, dryRun) 629 if err != nil { 630 return nil, err 631 } 632 633 // on failure: Any newly allocated IP must be released back 634 // on failure: Any previously allocated IP that would have been released, 635 // must *not* be released 636 // on success: Any previously allocated IP that should be released, will be 637 // released 638 txn := callbackTransaction{ 639 commit: func() { 640 if dryRun { 641 return 642 } 643 if len(allocated) > 0 { 644 klog.InfoS("allocated clusterIPs", 645 "service", klog.KObj(service), 646 "clusterIPs", allocated) 647 } 648 if actuallyReleased, err := al.releaseIPs(released); err != nil { 649 klog.ErrorS(err, "failed to clean up after successful service update", 650 "service", klog.KObj(service), 651 "shouldRelease", released, 652 "released", actuallyReleased) 653 } 654 }, 655 revert: func() { 656 if dryRun { 657 return 658 } 659 if actuallyReleased, err := al.releaseIPs(allocated); err != nil { 660 klog.ErrorS(err, "failed to clean up after failed service update", 661 "service", klog.KObj(service), 662 "shouldRelease", allocated, 663 "released", actuallyReleased) 664 } 665 }, 666 } 667 return txn, nil 668 } 669 670 // handles type change/upgrade/downgrade change type for an update service 671 // this func does not perform actual release of clusterIPs. it returns 672 // a map[family]ip for the caller to release when everything else has 673 // executed successfully 674 func (al *Allocators) updateClusterIPs(after After, before Before, dryRun bool) (allocated map[api.IPFamily]string, toRelease map[api.IPFamily]string, err error) { 675 oldService, service := before.Service, after.Service 676 677 // We don't want to auto-upgrade (add an IP) or downgrade (remove an IP) 678 // PreferDualStack services following a cluster change to/from 679 // dual-stackness. 680 // 681 // That means a PreferDualStack service will only be upgraded/downgraded 682 // when: 683 // - changing ipFamilyPolicy to "RequireDualStack" or "SingleStack" AND 684 // - adding or removing a secondary clusterIP or ipFamily 685 if isMatchingPreferDualStackClusterIPFields(after, before) { 686 return allocated, toRelease, nil // nothing more to do. 687 } 688 689 // use cases: 690 // A: service changing types from ExternalName TO ClusterIP types ==> allocate all new 691 // B: service changing types from ClusterIP types TO ExternalName ==> release all allocated 692 // C: Service upgrading to dual stack ==> partial allocation 693 // D: service downgrading from dual stack ==> partial release 694 695 // CASE A: 696 // Update service from ExternalName to non-ExternalName, should initialize ClusterIP. 697 if oldService.Spec.Type == api.ServiceTypeExternalName && service.Spec.Type != api.ServiceTypeExternalName { 698 allocated, err := al.allocClusterIPs(service, dryRun) 699 return allocated, nil, err 700 } 701 702 // if headless service then we bail out early (no clusterIPs management needed) 703 if len(oldService.Spec.ClusterIPs) > 0 && oldService.Spec.ClusterIPs[0] == api.ClusterIPNone { 704 return nil, nil, nil 705 } 706 707 // CASE B: 708 // Update service from non-ExternalName to ExternalName, should release ClusterIP if exists. 709 if oldService.Spec.Type != api.ServiceTypeExternalName && service.Spec.Type == api.ServiceTypeExternalName { 710 toRelease = make(map[api.IPFamily]string) 711 for i, family := range oldService.Spec.IPFamilies { 712 toRelease[family] = oldService.Spec.ClusterIPs[i] 713 } 714 return nil, toRelease, nil 715 } 716 717 upgraded := len(oldService.Spec.IPFamilies) == 1 && len(service.Spec.IPFamilies) == 2 718 downgraded := len(oldService.Spec.IPFamilies) == 2 && len(service.Spec.IPFamilies) == 1 719 720 // CASE C: 721 if upgraded { 722 toAllocate := make(map[api.IPFamily]string) 723 // if secondary ip was named, just get it. if not add a marker 724 if len(service.Spec.ClusterIPs) < 2 { 725 service.Spec.ClusterIPs = append(service.Spec.ClusterIPs, "" /* marker */) 726 } 727 728 toAllocate[service.Spec.IPFamilies[1]] = service.Spec.ClusterIPs[1] 729 730 // allocate 731 allocated, err := al.allocIPs(service, toAllocate, dryRun) 732 // set if successful 733 if err == nil { 734 service.Spec.ClusterIPs[1] = allocated[service.Spec.IPFamilies[1]] 735 } 736 737 return allocated, nil, err 738 } 739 740 // CASE D: 741 if downgraded { 742 toRelease = make(map[api.IPFamily]string) 743 toRelease[oldService.Spec.IPFamilies[1]] = oldService.Spec.ClusterIPs[1] 744 // note: we don't release clusterIP, this is left to clean up in the action itself 745 return nil, toRelease, err 746 } 747 // it was not an upgrade nor downgrade 748 return nil, nil, nil 749 } 750 751 func (al *Allocators) txnUpdateNodePorts(after After, before Before, dryRun bool) (transaction, error) { 752 oldService, service := before.Service, after.Service 753 754 // The allocator tracks dry-run-ness internally. 755 nodePortOp := portallocator.StartOperation(al.serviceNodePorts, dryRun) 756 757 txn := callbackTransaction{ 758 commit: func() { 759 nodePortOp.Commit() 760 // We don't NEED to call Finish() here, but for that package says 761 // to, so for future-safety, we will. 762 nodePortOp.Finish() 763 }, 764 revert: func() { 765 // Weirdly named but this will revert if commit wasn't called 766 nodePortOp.Finish() 767 }, 768 } 769 770 // Update service from NodePort or LoadBalancer to ExternalName or ClusterIP, should release NodePort if exists. 771 if (oldService.Spec.Type == api.ServiceTypeNodePort || oldService.Spec.Type == api.ServiceTypeLoadBalancer) && 772 (service.Spec.Type == api.ServiceTypeExternalName || service.Spec.Type == api.ServiceTypeClusterIP) { 773 al.releaseNodePorts(oldService, nodePortOp) 774 } 775 776 // Update service from any type to NodePort or LoadBalancer, should update NodePort. 777 if service.Spec.Type == api.ServiceTypeNodePort || service.Spec.Type == api.ServiceTypeLoadBalancer { 778 if err := al.updateNodePorts(After{service}, Before{oldService}, nodePortOp); err != nil { 779 txn.Revert() 780 return nil, err 781 } 782 } 783 784 // Handle ExternalTraffic related updates. 785 success, err := al.updateHealthCheckNodePort(After{service}, Before{oldService}, nodePortOp) 786 if !success || err != nil { 787 txn.Revert() 788 return nil, err 789 } 790 791 return txn, nil 792 } 793 794 func (al *Allocators) releaseNodePorts(service *api.Service, nodePortOp *portallocator.PortAllocationOperation) { 795 nodePorts := collectServiceNodePorts(service) 796 797 for _, nodePort := range nodePorts { 798 nodePortOp.ReleaseDeferred(nodePort) 799 } 800 } 801 802 func (al *Allocators) updateNodePorts(after After, before Before, nodePortOp *portallocator.PortAllocationOperation) error { 803 oldService, newService := before.Service, after.Service 804 805 oldNodePortsNumbers := collectServiceNodePorts(oldService) 806 newNodePorts := []ServiceNodePort{} 807 portAllocated := map[int]bool{} 808 809 for i := range newService.Spec.Ports { 810 servicePort := &newService.Spec.Ports[i] 811 if servicePort.NodePort == 0 && !shouldAllocateNodePorts(newService) { 812 // Don't allocate new ports, but do respect specific requests. 813 continue 814 } 815 nodePort := ServiceNodePort{Protocol: servicePort.Protocol, NodePort: servicePort.NodePort} 816 if nodePort.NodePort != 0 { 817 if !containsNumber(oldNodePortsNumbers, int(nodePort.NodePort)) && !portAllocated[int(nodePort.NodePort)] { 818 err := nodePortOp.Allocate(int(nodePort.NodePort)) 819 if err != nil { 820 el := field.ErrorList{field.Invalid(field.NewPath("spec", "ports").Index(i).Child("nodePort"), nodePort.NodePort, err.Error())} 821 return errors.NewInvalid(api.Kind("Service"), newService.Name, el) 822 } 823 portAllocated[int(nodePort.NodePort)] = true 824 } 825 } else { 826 nodePortNumber, err := nodePortOp.AllocateNext() 827 if err != nil { 828 // TODO: what error should be returned here? It's not a 829 // field-level validation failure (the field is valid), and it's 830 // not really an internal error. 831 return errors.NewInternalError(fmt.Errorf("failed to allocate a nodePort: %v", err)) 832 } 833 servicePort.NodePort = int32(nodePortNumber) 834 nodePort.NodePort = servicePort.NodePort 835 } 836 if containsNodePort(newNodePorts, nodePort) { 837 return fmt.Errorf("duplicate nodePort: %v", nodePort) 838 } 839 newNodePorts = append(newNodePorts, nodePort) 840 } 841 842 newNodePortsNumbers := collectServiceNodePorts(newService) 843 844 // The comparison loops are O(N^2), but we don't expect N to be huge 845 // (there's a hard-limit at 2^16, because they're ports; and even 4 ports would be a lot) 846 for _, oldNodePortNumber := range oldNodePortsNumbers { 847 if containsNumber(newNodePortsNumbers, oldNodePortNumber) { 848 continue 849 } 850 nodePortOp.ReleaseDeferred(int(oldNodePortNumber)) 851 } 852 853 return nil 854 } 855 856 // updateHealthCheckNodePort handles HealthCheckNodePort allocation/release 857 // and adjusts HealthCheckNodePort during service update if needed. 858 func (al *Allocators) updateHealthCheckNodePort(after After, before Before, nodePortOp *portallocator.PortAllocationOperation) (bool, error) { 859 oldService, service := before.Service, after.Service 860 861 neededHealthCheckNodePort := apiservice.NeedsHealthCheck(oldService) 862 oldHealthCheckNodePort := oldService.Spec.HealthCheckNodePort 863 864 needsHealthCheckNodePort := apiservice.NeedsHealthCheck(service) 865 866 switch { 867 // Case 1: Transition from don't need HealthCheckNodePort to needs HealthCheckNodePort. 868 // Allocate a health check node port or attempt to reserve the user-specified one if provided. 869 // Insert health check node port into the service's HealthCheckNodePort field if needed. 870 case !neededHealthCheckNodePort && needsHealthCheckNodePort: 871 if err := al.allocHealthCheckNodePort(service, nodePortOp); err != nil { 872 return false, errors.NewInternalError(err) 873 } 874 875 // Case 2: Transition from needs HealthCheckNodePort to don't need HealthCheckNodePort. 876 // Free the existing healthCheckNodePort and clear the HealthCheckNodePort field. 877 case neededHealthCheckNodePort && !needsHealthCheckNodePort: 878 nodePortOp.ReleaseDeferred(int(oldHealthCheckNodePort)) 879 } 880 return true, nil 881 } 882 883 func (al *Allocators) releaseAllocatedResources(svc *api.Service) { 884 al.releaseClusterIPs(svc) 885 886 for _, nodePort := range collectServiceNodePorts(svc) { 887 err := al.serviceNodePorts.Release(nodePort) 888 if err != nil { 889 // these should be caught by an eventual reconciliation / restart 890 utilruntime.HandleError(fmt.Errorf("Error releasing service %s node port %d: %v", svc.Name, nodePort, err)) 891 } 892 } 893 894 if apiservice.NeedsHealthCheck(svc) { 895 nodePort := svc.Spec.HealthCheckNodePort 896 if nodePort > 0 { 897 err := al.serviceNodePorts.Release(int(nodePort)) 898 if err != nil { 899 // these should be caught by an eventual reconciliation / restart 900 utilruntime.HandleError(fmt.Errorf("Error releasing service %s health check node port %d: %v", svc.Name, nodePort, err)) 901 } 902 } 903 } 904 } 905 906 // releases allocated ClusterIPs for service that is about to be deleted 907 func (al *Allocators) releaseClusterIPs(service *api.Service) (released map[api.IPFamily]string, err error) { 908 // external name don't get ClusterIPs 909 if service.Spec.Type == api.ServiceTypeExternalName { 910 return nil, nil 911 } 912 913 // headless don't get ClusterIPs 914 if len(service.Spec.ClusterIPs) > 0 && service.Spec.ClusterIPs[0] == api.ClusterIPNone { 915 return nil, nil 916 } 917 918 toRelease := make(map[api.IPFamily]string) 919 for _, ip := range service.Spec.ClusterIPs { 920 if netutils.IsIPv6String(ip) { 921 toRelease[api.IPv6Protocol] = ip 922 } else { 923 toRelease[api.IPv4Protocol] = ip 924 } 925 } 926 return al.releaseIPs(toRelease) 927 } 928 929 func (al *Allocators) Destroy() { 930 al.serviceNodePorts.Destroy() 931 for _, a := range al.serviceIPAllocatorsByFamily { 932 a.Destroy() 933 } 934 } 935 936 // This is O(N), but we expect haystack to be small; 937 // so small that we expect a linear search to be faster 938 func containsNumber(haystack []int, needle int) bool { 939 for _, v := range haystack { 940 if v == needle { 941 return true 942 } 943 } 944 return false 945 } 946 947 // This is O(N), but we expect serviceNodePorts to be small; 948 // so small that we expect a linear search to be faster 949 func containsNodePort(serviceNodePorts []ServiceNodePort, serviceNodePort ServiceNodePort) bool { 950 for _, snp := range serviceNodePorts { 951 if snp == serviceNodePort { 952 return true 953 } 954 } 955 return false 956 } 957 958 // Loop through the service ports list, find one with the same port number and 959 // NodePort specified, return this NodePort otherwise return 0. 960 func findRequestedNodePort(port int, servicePorts []api.ServicePort) int { 961 for i := range servicePorts { 962 servicePort := servicePorts[i] 963 if port == int(servicePort.Port) && servicePort.NodePort != 0 { 964 return int(servicePort.NodePort) 965 } 966 } 967 return 0 968 } 969 970 func shouldAllocateNodePorts(service *api.Service) bool { 971 if service.Spec.Type == api.ServiceTypeNodePort { 972 return true 973 } 974 if service.Spec.Type == api.ServiceTypeLoadBalancer { 975 return *service.Spec.AllocateLoadBalancerNodePorts 976 } 977 return false 978 } 979 980 func collectServiceNodePorts(service *api.Service) []int { 981 servicePorts := []int{} 982 for i := range service.Spec.Ports { 983 servicePort := &service.Spec.Ports[i] 984 if servicePort.NodePort != 0 { 985 servicePorts = append(servicePorts, int(servicePort.NodePort)) 986 } 987 } 988 return servicePorts 989 } 990 991 // tests if two preferred dual-stack service have matching ClusterIPFields 992 // assumption: old service is a valid, default service (e.g., loaded from store) 993 func isMatchingPreferDualStackClusterIPFields(after After, before Before) bool { 994 oldService, service := before.Service, after.Service 995 996 if oldService == nil { 997 return false 998 } 999 1000 if service.Spec.IPFamilyPolicy == nil { 1001 return false 1002 } 1003 1004 // if type mutated then it is an update 1005 // that needs to run through the entire process. 1006 if oldService.Spec.Type != service.Spec.Type { 1007 return false 1008 } 1009 // both must be type that gets an IP assigned 1010 if service.Spec.Type != api.ServiceTypeClusterIP && 1011 service.Spec.Type != api.ServiceTypeNodePort && 1012 service.Spec.Type != api.ServiceTypeLoadBalancer { 1013 return false 1014 } 1015 1016 // both must be of IPFamilyPolicy==PreferDualStack 1017 if service.Spec.IPFamilyPolicy != nil && *(service.Spec.IPFamilyPolicy) != api.IPFamilyPolicyPreferDualStack { 1018 return false 1019 } 1020 1021 if oldService.Spec.IPFamilyPolicy != nil && *(oldService.Spec.IPFamilyPolicy) != api.IPFamilyPolicyPreferDualStack { 1022 return false 1023 } 1024 1025 if !sameClusterIPs(oldService, service) { 1026 return false 1027 } 1028 1029 if !sameIPFamilies(oldService, service) { 1030 return false 1031 } 1032 1033 // they match on 1034 // Policy: preferDualStack 1035 // ClusterIPs 1036 // IPFamilies 1037 return true 1038 } 1039 1040 // Helper to avoid nil-checks all over. Callers of this need to be checking 1041 // for an exact value. 1042 func getIPFamilyPolicy(svc *api.Service) api.IPFamilyPolicy { 1043 if svc.Spec.IPFamilyPolicy == nil { 1044 return "" // callers need to handle this 1045 } 1046 return *svc.Spec.IPFamilyPolicy 1047 } 1048 1049 func sameClusterIPs(lhs, rhs *api.Service) bool { 1050 if len(rhs.Spec.ClusterIPs) != len(lhs.Spec.ClusterIPs) { 1051 return false 1052 } 1053 1054 for i, ip := range rhs.Spec.ClusterIPs { 1055 if lhs.Spec.ClusterIPs[i] != ip { 1056 return false 1057 } 1058 } 1059 1060 return true 1061 } 1062 1063 func reducedClusterIPs(after After, before Before) bool { 1064 oldSvc, newSvc := before.Service, after.Service 1065 1066 if len(newSvc.Spec.ClusterIPs) == 0 { // Not specified 1067 return false 1068 } 1069 return len(newSvc.Spec.ClusterIPs) < len(oldSvc.Spec.ClusterIPs) 1070 } 1071 1072 func sameIPFamilies(lhs, rhs *api.Service) bool { 1073 if len(rhs.Spec.IPFamilies) != len(lhs.Spec.IPFamilies) { 1074 return false 1075 } 1076 1077 for i, family := range rhs.Spec.IPFamilies { 1078 if lhs.Spec.IPFamilies[i] != family { 1079 return false 1080 } 1081 } 1082 1083 return true 1084 } 1085 1086 func reducedIPFamilies(after After, before Before) bool { 1087 oldSvc, newSvc := before.Service, after.Service 1088 1089 if len(newSvc.Spec.IPFamilies) == 0 { // Not specified 1090 return false 1091 } 1092 return len(newSvc.Spec.IPFamilies) < len(oldSvc.Spec.IPFamilies) 1093 } 1094 1095 // Helper to get the IP family of a given IP. 1096 func familyOf(ip string) api.IPFamily { 1097 if netutils.IsIPv4String(ip) { 1098 return api.IPv4Protocol 1099 } 1100 if netutils.IsIPv6String(ip) { 1101 return api.IPv6Protocol 1102 } 1103 return api.IPFamily("unknown") 1104 }