github.com/containers/podman/v4@v4.9.4/pkg/specgen/generate/kube/kube.go (about) 1 //go:build !remote 2 // +build !remote 3 4 package kube 5 6 import ( 7 "context" 8 "encoding/json" 9 "errors" 10 "fmt" 11 "math" 12 "net" 13 "os" 14 "regexp" 15 "runtime" 16 "strconv" 17 "strings" 18 "time" 19 20 "github.com/containers/common/libimage" 21 "github.com/containers/common/libnetwork/types" 22 "github.com/containers/common/pkg/config" 23 "github.com/containers/common/pkg/parse" 24 "github.com/containers/common/pkg/secrets" 25 cutil "github.com/containers/common/pkg/util" 26 "github.com/containers/image/v5/manifest" 27 itypes "github.com/containers/image/v5/types" 28 "github.com/containers/podman/v4/libpod/define" 29 ann "github.com/containers/podman/v4/pkg/annotations" 30 "github.com/containers/podman/v4/pkg/domain/entities" 31 v1 "github.com/containers/podman/v4/pkg/k8s.io/api/core/v1" 32 "github.com/containers/podman/v4/pkg/k8s.io/apimachinery/pkg/api/resource" 33 "github.com/containers/podman/v4/pkg/k8s.io/apimachinery/pkg/util/intstr" 34 "github.com/containers/podman/v4/pkg/specgen" 35 "github.com/containers/podman/v4/pkg/specgen/generate" 36 systemdDefine "github.com/containers/podman/v4/pkg/systemd/define" 37 "github.com/containers/podman/v4/pkg/util" 38 "github.com/docker/docker/pkg/meminfo" 39 "github.com/docker/go-units" 40 spec "github.com/opencontainers/runtime-spec/specs-go" 41 "github.com/sirupsen/logrus" 42 "golang.org/x/exp/slices" 43 "sigs.k8s.io/yaml" 44 ) 45 46 func ToPodOpt(ctx context.Context, podName string, p entities.PodCreateOptions, publishAllPorts bool, podYAML *v1.PodTemplateSpec) (entities.PodCreateOptions, error) { 47 p.Net = &entities.NetOptions{NoHosts: p.Net.NoHosts} 48 49 p.Name = podName 50 p.Labels = podYAML.ObjectMeta.Labels 51 // Kube pods must share {ipc, net, uts} by default 52 p.Share = append(p.Share, "ipc") 53 p.Share = append(p.Share, "net") 54 p.Share = append(p.Share, "uts") 55 // TODO we only configure Process namespace. We also need to account for Host{IPC,Network,PID} 56 // which is not currently possible with pod create 57 if podYAML.Spec.ShareProcessNamespace != nil && *podYAML.Spec.ShareProcessNamespace { 58 p.Share = append(p.Share, "pid") 59 } 60 if podYAML.Spec.HostPID { 61 p.Pid = "host" 62 } 63 if podYAML.Spec.HostIPC { 64 p.Ipc = "host" 65 } 66 p.Hostname = podYAML.Spec.Hostname 67 if p.Hostname == "" { 68 p.Hostname = podName 69 } 70 if podYAML.Spec.HostNetwork { 71 p.Net.Network = specgen.Namespace{NSMode: "host"} 72 nodeHostName, err := os.Hostname() 73 if err != nil { 74 return p, err 75 } 76 p.Hostname = nodeHostName 77 p.Uts = "host" 78 } 79 if podYAML.Spec.HostAliases != nil { 80 if p.Net.NoHosts { 81 return p, errors.New("HostAliases in yaml file will not work with --no-hosts") 82 } 83 hosts := make([]string, 0, len(podYAML.Spec.HostAliases)) 84 for _, hostAlias := range podYAML.Spec.HostAliases { 85 for _, host := range hostAlias.Hostnames { 86 hosts = append(hosts, host+":"+hostAlias.IP) 87 } 88 } 89 p.Net.AddHosts = hosts 90 } 91 podPorts := getPodPorts(podYAML.Spec.Containers, publishAllPorts) 92 p.Net.PublishPorts = podPorts 93 94 if dnsConfig := podYAML.Spec.DNSConfig; dnsConfig != nil { 95 // name servers 96 if dnsServers := dnsConfig.Nameservers; len(dnsServers) > 0 { 97 servers := make([]net.IP, 0) 98 for _, server := range dnsServers { 99 servers = append(servers, net.ParseIP(server)) 100 } 101 p.Net.DNSServers = servers 102 } 103 // search domains 104 if domains := dnsConfig.Searches; len(domains) > 0 { 105 p.Net.DNSSearch = domains 106 } 107 // dns options 108 if options := dnsConfig.Options; len(options) > 0 { 109 dnsOptions := make([]string, 0, len(options)) 110 for _, opts := range options { 111 d := opts.Name 112 if opts.Value != nil { 113 d += ":" + *opts.Value 114 } 115 dnsOptions = append(dnsOptions, d) 116 } 117 p.Net.DNSOptions = dnsOptions 118 } 119 } 120 121 if pscConfig := podYAML.Spec.SecurityContext; pscConfig != nil { 122 // Extract sysctl list from pod security context 123 if options := pscConfig.Sysctls; len(options) > 0 { 124 sysctlOptions := make([]string, 0, len(options)) 125 for _, opts := range options { 126 sysctlOptions = append(sysctlOptions, opts.Name+"="+opts.Value) 127 } 128 p.Sysctl = sysctlOptions 129 } 130 } 131 return p, nil 132 } 133 134 type CtrSpecGenOptions struct { 135 // Annotations from the Pod 136 Annotations map[string]string 137 // Container as read from the pod yaml 138 Container v1.Container 139 // Image available to use (pulled or found local) 140 Image *libimage.Image 141 // IPCNSIsHost tells the container to use the host ipcns 142 IpcNSIsHost bool 143 // Volumes for all containers 144 Volumes map[string]*KubeVolume 145 // PodID of the parent pod 146 PodID string 147 // PodName of the parent pod 148 PodName string 149 // PodInfraID as the infrastructure container id 150 PodInfraID string 151 // ConfigMaps the configuration maps for environment variables 152 ConfigMaps []v1.ConfigMap 153 // SeccompPaths for finding the seccomp profile path 154 SeccompPaths *KubeSeccompPaths 155 // ReadOnly make all containers root file system readonly 156 ReadOnly itypes.OptionalBool 157 // RestartPolicy defines the restart policy of the container 158 RestartPolicy string 159 // NetNSIsHost tells the container to use the host netns 160 NetNSIsHost bool 161 // UserNSIsHost tells the container to use the host userns 162 UserNSIsHost bool 163 // PidNSIsHost tells the container to use the host pidns 164 PidNSIsHost bool 165 // UtsNSIsHost tells the container to use the host utsns 166 UtsNSIsHost bool 167 // SecretManager to access the secrets 168 SecretsManager *secrets.SecretsManager 169 // LogDriver which should be used for the container 170 LogDriver string 171 // LogOptions log options which should be used for the container 172 LogOptions []string 173 // Labels define key-value pairs of metadata 174 Labels map[string]string 175 // 176 IsInfra bool 177 // InitContainerType sets what type the init container is 178 // Note: When playing a kube yaml, the inti container type will be set to "always" only 179 InitContainerType string 180 // PodSecurityContext is the security context specified for the pod 181 PodSecurityContext *v1.PodSecurityContext 182 // TerminationGracePeriodSeconds is the grace period given to a container to stop before being forcefully killed 183 TerminationGracePeriodSeconds *int64 184 } 185 186 func ToSpecGen(ctx context.Context, opts *CtrSpecGenOptions) (*specgen.SpecGenerator, error) { 187 s := specgen.NewSpecGenerator(opts.Container.Image, false) 188 189 rtc, err := config.Default() 190 if err != nil { 191 return nil, err 192 } 193 194 if s.Umask == "" { 195 s.Umask = rtc.Umask() 196 } 197 198 if s.CgroupsMode == "" { 199 s.CgroupsMode = rtc.Cgroups() 200 } 201 if len(s.ImageVolumeMode) == 0 { 202 s.ImageVolumeMode = rtc.Engine.ImageVolumeMode 203 } 204 if s.ImageVolumeMode == define.TypeBind { 205 s.ImageVolumeMode = "anonymous" 206 } 207 208 // pod name should be non-empty for Deployment objects to be able to create 209 // multiple pods having containers with unique names 210 if len(opts.PodName) < 1 { 211 return nil, errors.New("got empty pod name on container creation when playing kube") 212 } 213 214 s.Name = fmt.Sprintf("%s-%s", opts.PodName, opts.Container.Name) 215 216 s.Terminal = opts.Container.TTY 217 218 s.Pod = opts.PodID 219 220 s.LogConfiguration = &specgen.LogConfig{ 221 Driver: opts.LogDriver, 222 } 223 224 s.LogConfiguration.Options = make(map[string]string) 225 for _, o := range opts.LogOptions { 226 split := strings.SplitN(o, "=", 2) 227 if len(split) < 2 { 228 return nil, fmt.Errorf("invalid log option %q", o) 229 } 230 switch strings.ToLower(split[0]) { 231 case "driver": 232 s.LogConfiguration.Driver = split[1] 233 case "path": 234 s.LogConfiguration.Path = split[1] 235 case "max-size": 236 logSize, err := units.FromHumanSize(split[1]) 237 if err != nil { 238 return nil, err 239 } 240 s.LogConfiguration.Size = logSize 241 default: 242 switch len(split[1]) { 243 case 0: 244 return nil, fmt.Errorf("invalid log option: %w", define.ErrInvalidArg) 245 default: 246 // tags for journald only 247 if s.LogConfiguration.Driver == "" || s.LogConfiguration.Driver == define.JournaldLogging { 248 s.LogConfiguration.Options[split[0]] = split[1] 249 } else { 250 logrus.Warnf("Can only set tags with journald log driver but driver is %q", s.LogConfiguration.Driver) 251 } 252 } 253 } 254 } 255 256 s.InitContainerType = opts.InitContainerType 257 258 setupSecurityContext(s, opts.Container.SecurityContext, opts.PodSecurityContext) 259 err = setupLivenessProbe(s, opts.Container, opts.RestartPolicy) 260 if err != nil { 261 return nil, fmt.Errorf("failed to configure livenessProbe: %w", err) 262 } 263 err = setupStartupProbe(s, opts.Container, opts.RestartPolicy) 264 if err != nil { 265 return nil, fmt.Errorf("failed to configure startupProbe: %w", err) 266 } 267 268 // Since we prefix the container name with pod name to work-around the uniqueness requirement, 269 // the seccomp profile should reference the actual container name from the YAML 270 // but apply to the containers with the prefixed name 271 s.SeccompProfilePath = opts.SeccompPaths.FindForContainer(opts.Container.Name) 272 273 s.ResourceLimits = &spec.LinuxResources{} 274 milliCPU := opts.Container.Resources.Limits.Cpu().MilliValue() 275 if milliCPU > 0 { 276 period, quota := util.CoresToPeriodAndQuota(float64(milliCPU) / 1000) 277 s.ResourceLimits.CPU = &spec.LinuxCPU{ 278 Quota: "a, 279 Period: &period, 280 } 281 } 282 283 limit, err := quantityToInt64(opts.Container.Resources.Limits.Memory()) 284 if err != nil { 285 return nil, fmt.Errorf("failed to set memory limit: %w", err) 286 } 287 288 memoryRes, err := quantityToInt64(opts.Container.Resources.Requests.Memory()) 289 if err != nil { 290 return nil, fmt.Errorf("failed to set memory reservation: %w", err) 291 } 292 293 if limit > 0 || memoryRes > 0 { 294 s.ResourceLimits.Memory = &spec.LinuxMemory{} 295 } 296 297 if limit > 0 { 298 s.ResourceLimits.Memory.Limit = &limit 299 } 300 301 if memoryRes > 0 { 302 s.ResourceLimits.Memory.Reservation = &memoryRes 303 } 304 305 ulimitVal, ok := opts.Annotations[define.UlimitAnnotation] 306 if ok { 307 ulimits := strings.Split(ulimitVal, ",") 308 for _, ul := range ulimits { 309 parsed, err := units.ParseUlimit(ul) 310 if err != nil { 311 return nil, err 312 } 313 s.Rlimits = append(s.Rlimits, spec.POSIXRlimit{Type: parsed.Name, Soft: uint64(parsed.Soft), Hard: uint64(parsed.Hard)}) 314 } 315 } 316 317 // TODO: We don't understand why specgen does not take of this, but 318 // integration tests clearly pointed out that it was required. 319 imageData, err := opts.Image.Inspect(ctx, nil) 320 if err != nil { 321 return nil, err 322 } 323 s.WorkDir = "/" 324 // Entrypoint/Command handling is based off of 325 // https://kubernetes.io/docs/tasks/inject-data-application/define-command-argument-container/#notes 326 if imageData != nil && imageData.Config != nil { 327 if imageData.Config.WorkingDir != "" { 328 s.WorkDir = imageData.Config.WorkingDir 329 } 330 if s.User == "" { 331 s.User = imageData.Config.User 332 } 333 334 exposed, err := generate.GenExposedPorts(imageData.Config.ExposedPorts) 335 if err != nil { 336 return nil, err 337 } 338 339 for k, v := range s.Expose { 340 exposed[k] = v 341 } 342 s.Expose = exposed 343 // Pull entrypoint and cmd from image 344 s.Entrypoint = imageData.Config.Entrypoint 345 s.Command = imageData.Config.Cmd 346 s.Labels = imageData.Config.Labels 347 if len(imageData.Config.StopSignal) > 0 { 348 stopSignal, err := util.ParseSignal(imageData.Config.StopSignal) 349 if err != nil { 350 return nil, err 351 } 352 s.StopSignal = &stopSignal 353 } 354 } 355 // If only the yaml.Command is specified, set it as the entrypoint and drop the image Cmd 356 if !opts.IsInfra && len(opts.Container.Command) != 0 { 357 s.Entrypoint = opts.Container.Command 358 s.Command = []string{} 359 } 360 // Only override the cmd field if yaml.Args is specified 361 // Keep the image entrypoint, or the yaml.command if specified 362 if !opts.IsInfra && len(opts.Container.Args) != 0 { 363 s.Command = opts.Container.Args 364 } 365 366 // FIXME, 367 // we are currently ignoring imageData.Config.ExposedPorts 368 if !opts.IsInfra && opts.Container.WorkingDir != "" { 369 s.WorkDir = opts.Container.WorkingDir 370 } 371 372 annotations := make(map[string]string) 373 if opts.Annotations != nil { 374 annotations = opts.Annotations 375 } 376 if opts.PodInfraID != "" { 377 annotations[ann.SandboxID] = opts.PodInfraID 378 } 379 s.Annotations = annotations 380 381 if containerCIDFile, ok := opts.Annotations[define.InspectAnnotationCIDFile+"/"+opts.Container.Name]; ok { 382 s.Annotations[define.InspectAnnotationCIDFile] = containerCIDFile 383 } 384 385 if seccomp, ok := opts.Annotations[define.InspectAnnotationSeccomp+"/"+opts.Container.Name]; ok { 386 s.Annotations[define.InspectAnnotationSeccomp] = seccomp 387 } 388 389 if apparmor, ok := opts.Annotations[define.InspectAnnotationApparmor+"/"+opts.Container.Name]; ok { 390 s.Annotations[define.InspectAnnotationApparmor] = apparmor 391 } 392 393 if label, ok := opts.Annotations[define.InspectAnnotationLabel+"/"+opts.Container.Name]; ok { 394 if label == "nested" { 395 s.ContainerSecurityConfig.LabelNested = true 396 } 397 if !slices.Contains(s.ContainerSecurityConfig.SelinuxOpts, label) { 398 s.ContainerSecurityConfig.SelinuxOpts = append(s.ContainerSecurityConfig.SelinuxOpts, label) 399 } 400 s.Annotations[define.InspectAnnotationLabel] = strings.Join(s.ContainerSecurityConfig.SelinuxOpts, ",label=") 401 } 402 403 if autoremove, ok := opts.Annotations[define.InspectAnnotationAutoremove+"/"+opts.Container.Name]; ok { 404 autoremoveAsBool, err := strconv.ParseBool(autoremove) 405 if err != nil { 406 return nil, err 407 } 408 s.Remove = autoremoveAsBool 409 s.Annotations[define.InspectAnnotationAutoremove] = autoremove 410 } 411 412 if init, ok := opts.Annotations[define.InspectAnnotationInit+"/"+opts.Container.Name]; ok { 413 initAsBool, err := strconv.ParseBool(init) 414 if err != nil { 415 return nil, err 416 } 417 418 s.Init = initAsBool 419 s.Annotations[define.InspectAnnotationInit] = init 420 } 421 422 if publishAll, ok := opts.Annotations[define.InspectAnnotationPublishAll+"/"+opts.Container.Name]; ok { 423 if opts.IsInfra { 424 publishAllAsBool, err := strconv.ParseBool(publishAll) 425 if err != nil { 426 return nil, err 427 } 428 s.PublishExposedPorts = publishAllAsBool 429 } 430 431 s.Annotations[define.InspectAnnotationPublishAll] = publishAll 432 } 433 434 s.Annotations[define.KubeHealthCheckAnnotation] = "true" 435 436 // Environment Variables 437 envs := map[string]string{} 438 for _, env := range imageData.Config.Env { 439 keyval := strings.SplitN(env, "=", 2) 440 envs[keyval[0]] = keyval[1] 441 } 442 443 for _, env := range opts.Container.Env { 444 value, err := envVarValue(env, opts) 445 if err != nil { 446 return nil, err 447 } 448 449 // Only set the env if the value is not nil 450 if value != nil { 451 envs[env.Name] = *value 452 } 453 } 454 for _, envFrom := range opts.Container.EnvFrom { 455 cmEnvs, err := envVarsFrom(envFrom, opts) 456 if err != nil { 457 return nil, err 458 } 459 460 for k, v := range cmEnvs { 461 envs[k] = v 462 } 463 } 464 s.Env = envs 465 466 for _, volume := range opts.Container.VolumeMounts { 467 volumeSource, exists := opts.Volumes[volume.Name] 468 if !exists { 469 return nil, fmt.Errorf("volume mount %s specified for container but not configured in volumes", volume.Name) 470 } 471 // Skip if the volume is optional. This means that a configmap for a configmap volume was not found but it was 472 // optional so we can move on without throwing an error 473 if exists && volumeSource.Optional { 474 continue 475 } 476 477 dest, options, err := parseMountPath(volume.MountPath, volume.ReadOnly, volume.MountPropagation) 478 if err != nil { 479 return nil, err 480 } 481 482 volume.MountPath = dest 483 switch volumeSource.Type { 484 case KubeVolumeTypeBindMount: 485 // If the container has bind mounts, we need to check if 486 // a selinux mount option exists for it 487 for k, v := range opts.Annotations { 488 // Make sure the z/Z option is not already there (from editing the YAML) 489 if k == define.BindMountPrefix { 490 lastIndex := strings.LastIndex(v, ":") 491 if v[:lastIndex] == volumeSource.Source && !cutil.StringInSlice("z", options) && !cutil.StringInSlice("Z", options) { 492 options = append(options, v[lastIndex+1:]) 493 } 494 } 495 } 496 mount := spec.Mount{ 497 Destination: volume.MountPath, 498 Source: volumeSource.Source, 499 Type: define.TypeBind, 500 Options: options, 501 } 502 if len(volume.SubPath) > 0 { 503 mount.Options = append(mount.Options, fmt.Sprintf("subpath=%s", volume.SubPath)) 504 } 505 s.Mounts = append(s.Mounts, mount) 506 case KubeVolumeTypeNamed: 507 namedVolume := specgen.NamedVolume{ 508 Dest: volume.MountPath, 509 Name: volumeSource.Source, 510 Options: options, 511 SubPath: volume.SubPath, 512 } 513 s.Volumes = append(s.Volumes, &namedVolume) 514 case KubeVolumeTypeConfigMap: 515 cmVolume := specgen.NamedVolume{ 516 Dest: volume.MountPath, 517 Name: volumeSource.Source, 518 Options: options, 519 SubPath: volume.SubPath, 520 } 521 s.Volumes = append(s.Volumes, &cmVolume) 522 case KubeVolumeTypeCharDevice: 523 // We are setting the path as hostPath:mountPath to comply with pkg/specgen/generate.DeviceFromPath. 524 // The type is here just to improve readability as it is not taken into account when the actual device is created. 525 device := spec.LinuxDevice{ 526 Path: fmt.Sprintf("%s:%s", volumeSource.Source, volume.MountPath), 527 Type: "c", 528 } 529 s.Devices = append(s.Devices, device) 530 case KubeVolumeTypeBlockDevice: 531 // We are setting the path as hostPath:mountPath to comply with pkg/specgen/generate.DeviceFromPath. 532 // The type is here just to improve readability as it is not taken into account when the actual device is created. 533 device := spec.LinuxDevice{ 534 Path: fmt.Sprintf("%s:%s", volumeSource.Source, volume.MountPath), 535 Type: "b", 536 } 537 s.Devices = append(s.Devices, device) 538 case KubeVolumeTypeSecret: 539 // in podman play kube we need to add these secrets as volumes rather than as 540 // specgen.Secrets. Adding them as volumes allows for all key: value pairs to be mounted 541 secretVolume := specgen.NamedVolume{ 542 Dest: volume.MountPath, 543 Name: volumeSource.Source, 544 Options: options, 545 SubPath: volume.SubPath, 546 } 547 s.Volumes = append(s.Volumes, &secretVolume) 548 case KubeVolumeTypeEmptyDir: 549 emptyDirVolume := specgen.NamedVolume{ 550 Dest: volume.MountPath, 551 Name: volumeSource.Source, 552 Options: options, 553 IsAnonymous: true, 554 SubPath: volume.SubPath, 555 } 556 s.Volumes = append(s.Volumes, &emptyDirVolume) 557 default: 558 return nil, errors.New("unsupported volume source type") 559 } 560 } 561 562 s.RestartPolicy = opts.RestartPolicy 563 564 if opts.NetNSIsHost { 565 s.NetNS.NSMode = specgen.Host 566 } 567 if opts.UserNSIsHost { 568 s.UserNS.NSMode = specgen.Host 569 } 570 if opts.PidNSIsHost { 571 s.PidNS.NSMode = specgen.Host 572 } 573 if opts.IpcNSIsHost { 574 s.IpcNS.NSMode = specgen.Host 575 } 576 if opts.UtsNSIsHost { 577 s.UtsNS.NSMode = specgen.Host 578 } 579 580 // Add labels that come from kube 581 if len(s.Labels) == 0 { 582 // If there are no labels, let's use the map that comes 583 // from kube 584 s.Labels = opts.Labels 585 } else { 586 // If there are already labels in the map, append the ones 587 // obtained from kube 588 for k, v := range opts.Labels { 589 s.Labels[k] = v 590 } 591 } 592 593 if ro := opts.ReadOnly; ro != itypes.OptionalBoolUndefined { 594 s.ReadOnlyFilesystem = ro == itypes.OptionalBoolTrue 595 } 596 // This should default to true for kubernetes yaml 597 s.ReadWriteTmpfs = true 598 599 // Make sure the container runs in a systemd unit which is 600 // stored as a label at container creation. 601 if unit := os.Getenv(systemdDefine.EnvVariable); unit != "" { 602 s.Labels[systemdDefine.EnvVariable] = unit 603 } 604 605 // Set the stopTimeout if terminationGracePeriodSeconds is set in the kube yaml 606 if opts.TerminationGracePeriodSeconds != nil { 607 timeout := uint(*opts.TerminationGracePeriodSeconds) 608 s.StopTimeout = &timeout 609 } 610 611 return s, nil 612 } 613 614 func parseMountPath(mountPath string, readOnly bool, propagationMode *v1.MountPropagationMode) (string, []string, error) { 615 options := []string{} 616 splitVol := strings.Split(mountPath, ":") 617 if len(splitVol) > 2 { 618 return "", options, fmt.Errorf("%q incorrect volume format, should be ctr-dir[:option]", mountPath) 619 } 620 dest := splitVol[0] 621 if len(splitVol) > 1 { 622 options = strings.Split(splitVol[1], ",") 623 } 624 if err := parse.ValidateVolumeCtrDir(dest); err != nil { 625 return "", options, fmt.Errorf("parsing MountPath: %w", err) 626 } 627 if readOnly { 628 options = append(options, "ro") 629 } 630 opts, err := parse.ValidateVolumeOpts(options) 631 if err != nil { 632 return "", opts, fmt.Errorf("parsing MountOptions: %w", err) 633 } 634 if propagationMode != nil { 635 switch *propagationMode { 636 case v1.MountPropagationNone: 637 opts = append(opts, "private") 638 case v1.MountPropagationHostToContainer: 639 opts = append(opts, "rslave") 640 case v1.MountPropagationBidirectional: 641 opts = append(opts, "rshared") 642 default: 643 return "", opts, fmt.Errorf("unknown propagation mode %q", *propagationMode) 644 } 645 } 646 return dest, opts, nil 647 } 648 649 func probeToHealthConfig(probe *v1.Probe, containerPorts []v1.ContainerPort) (*manifest.Schema2HealthConfig, error) { 650 var commandString string 651 failureCmd := "exit 1" 652 probeHandler := probe.Handler 653 host := "localhost" // Kubernetes default is host IP, but with Podman currently we run inside the container 654 655 // configure healthcheck on the basis of Handler Actions. 656 switch { 657 case probeHandler.Exec != nil: 658 // `makeHealthCheck` function can accept a json array as the command. 659 cmd, err := json.Marshal(probeHandler.Exec.Command) 660 if err != nil { 661 return nil, err 662 } 663 commandString = string(cmd) 664 case probeHandler.HTTPGet != nil: 665 // set defaults as in https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#http-probes 666 uriScheme := v1.URISchemeHTTP 667 if probeHandler.HTTPGet.Scheme != "" { 668 uriScheme = probeHandler.HTTPGet.Scheme 669 } 670 if probeHandler.HTTPGet.Host != "" { 671 host = probeHandler.HTTPGet.Host 672 } 673 path := "/" 674 if probeHandler.HTTPGet.Path != "" { 675 path = probeHandler.HTTPGet.Path 676 } 677 portNum, err := getPortNumber(probeHandler.HTTPGet.Port, containerPorts) 678 if err != nil { 679 return nil, err 680 } 681 commandString = fmt.Sprintf("curl -f %s://%s:%d%s || %s", uriScheme, host, portNum, path, failureCmd) 682 case probeHandler.TCPSocket != nil: 683 portNum, err := getPortNumber(probeHandler.TCPSocket.Port, containerPorts) 684 if err != nil { 685 return nil, err 686 } 687 if probeHandler.TCPSocket.Host != "" { 688 host = probeHandler.TCPSocket.Host 689 } 690 commandString = fmt.Sprintf("nc -z -v %s %d || %s", host, portNum, failureCmd) 691 } 692 return makeHealthCheck(commandString, probe.PeriodSeconds, probe.FailureThreshold, probe.TimeoutSeconds, probe.InitialDelaySeconds) 693 } 694 695 func getPortNumber(port intstr.IntOrString, containerPorts []v1.ContainerPort) (int, error) { 696 var portNum int 697 if port.Type == intstr.String && port.IntValue() == 0 { 698 idx := slices.IndexFunc(containerPorts, func(cp v1.ContainerPort) bool { return cp.Name == port.String() }) 699 if idx == -1 { 700 return 0, fmt.Errorf("unknown port: %s", port.String()) 701 } 702 portNum = int(containerPorts[idx].ContainerPort) 703 } else { 704 portNum = port.IntValue() 705 } 706 return portNum, nil 707 } 708 709 func setupLivenessProbe(s *specgen.SpecGenerator, containerYAML v1.Container, restartPolicy string) error { 710 var err error 711 if containerYAML.LivenessProbe == nil { 712 return nil 713 } 714 emptyHandler := v1.Handler{} 715 if containerYAML.LivenessProbe.Handler != emptyHandler { 716 s.HealthConfig, err = probeToHealthConfig(containerYAML.LivenessProbe, containerYAML.Ports) 717 if err != nil { 718 return err 719 } 720 // if restart policy is in place, ensure the health check enforces it 721 if restartPolicy == "always" || restartPolicy == "onfailure" { 722 s.HealthCheckOnFailureAction = define.HealthCheckOnFailureActionRestart 723 } 724 return nil 725 } 726 return nil 727 } 728 729 func setupStartupProbe(s *specgen.SpecGenerator, containerYAML v1.Container, restartPolicy string) error { 730 if containerYAML.StartupProbe == nil { 731 return nil 732 } 733 emptyHandler := v1.Handler{} 734 if containerYAML.StartupProbe.Handler != emptyHandler { 735 healthConfig, err := probeToHealthConfig(containerYAML.StartupProbe, containerYAML.Ports) 736 if err != nil { 737 return err 738 } 739 740 // currently, StartupProbe still an optional feature, and it requires HealthConfig. 741 if s.HealthConfig == nil { 742 probe := containerYAML.StartupProbe 743 s.HealthConfig, err = makeHealthCheck("exit 0", probe.PeriodSeconds, probe.FailureThreshold, probe.TimeoutSeconds, probe.InitialDelaySeconds) 744 if err != nil { 745 return err 746 } 747 } 748 s.StartupHealthConfig = &define.StartupHealthCheck{ 749 Schema2HealthConfig: *healthConfig, 750 Successes: int(containerYAML.StartupProbe.SuccessThreshold), 751 } 752 // if restart policy is in place, ensure the health check enforces it 753 if restartPolicy == "always" || restartPolicy == "onfailure" { 754 s.HealthCheckOnFailureAction = define.HealthCheckOnFailureActionRestart 755 } 756 return nil 757 } 758 return nil 759 } 760 761 func makeHealthCheck(inCmd string, interval int32, retries int32, timeout int32, startPeriod int32) (*manifest.Schema2HealthConfig, error) { 762 // Every healthcheck requires a command 763 if len(inCmd) == 0 { 764 return nil, errors.New("must define a healthcheck command for all healthchecks") 765 } 766 767 // first try to parse option value as JSON array of strings... 768 cmd := []string{} 769 770 if inCmd == "none" { 771 cmd = []string{define.HealthConfigTestNone} 772 } else { 773 err := json.Unmarshal([]byte(inCmd), &cmd) 774 if err != nil { 775 // ...otherwise pass it to "/bin/sh -c" inside the container 776 cmd = []string{define.HealthConfigTestCmdShell} 777 cmd = append(cmd, strings.Split(inCmd, " ")...) 778 } else { 779 cmd = append([]string{define.HealthConfigTestCmd}, cmd...) 780 } 781 } 782 hc := manifest.Schema2HealthConfig{ 783 Test: cmd, 784 } 785 786 if interval < 1 { 787 // kubernetes interval defaults to 10 sec and cannot be less than 1 788 interval = 10 789 } 790 hc.Interval = time.Duration(interval) * time.Second 791 if retries < 1 { 792 // kubernetes retries defaults to 3 793 retries = 3 794 } 795 hc.Retries = int(retries) 796 if timeout < 1 { 797 // kubernetes timeout defaults to 1 798 timeout = 1 799 } 800 timeoutDuration := time.Duration(timeout) * time.Second 801 if timeoutDuration < time.Duration(1) { 802 return nil, errors.New("healthcheck-timeout must be at least 1 second") 803 } 804 hc.Timeout = timeoutDuration 805 806 startPeriodDuration := time.Duration(startPeriod) * time.Second 807 if startPeriodDuration < time.Duration(0) { 808 return nil, errors.New("healthcheck-start-period must be 0 seconds or greater") 809 } 810 hc.StartPeriod = startPeriodDuration 811 812 return &hc, nil 813 } 814 815 func setupSecurityContext(s *specgen.SpecGenerator, securityContext *v1.SecurityContext, podSecurityContext *v1.PodSecurityContext) { 816 if securityContext == nil { 817 securityContext = &v1.SecurityContext{} 818 } 819 if podSecurityContext == nil { 820 podSecurityContext = &v1.PodSecurityContext{} 821 } 822 823 if securityContext.ReadOnlyRootFilesystem != nil { 824 s.ReadOnlyFilesystem = *securityContext.ReadOnlyRootFilesystem 825 } 826 if securityContext.Privileged != nil { 827 s.Privileged = *securityContext.Privileged 828 } 829 830 if securityContext.AllowPrivilegeEscalation != nil { 831 s.NoNewPrivileges = !*securityContext.AllowPrivilegeEscalation 832 } 833 834 if securityContext.ProcMount != nil && *securityContext.ProcMount == v1.UnmaskedProcMount { 835 s.ContainerSecurityConfig.Unmask = append(s.ContainerSecurityConfig.Unmask, []string{"ALL"}...) 836 } 837 838 seopt := securityContext.SELinuxOptions 839 if seopt == nil { 840 seopt = podSecurityContext.SELinuxOptions 841 } 842 if seopt != nil { 843 if seopt.User != "" { 844 s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("user:%s", seopt.User)) 845 } 846 if seopt.Role != "" { 847 s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("role:%s", seopt.Role)) 848 } 849 if seopt.Type != "" { 850 s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("type:%s", seopt.Type)) 851 } 852 if seopt.Level != "" { 853 s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("level:%s", seopt.Level)) 854 } 855 if seopt.FileType != "" { 856 s.SelinuxOpts = append(s.SelinuxOpts, fmt.Sprintf("filetype:%s", seopt.FileType)) 857 } 858 } 859 if caps := securityContext.Capabilities; caps != nil { 860 for _, capability := range caps.Add { 861 s.CapAdd = append(s.CapAdd, string(capability)) 862 } 863 for _, capability := range caps.Drop { 864 s.CapDrop = append(s.CapDrop, string(capability)) 865 } 866 } 867 runAsUser := securityContext.RunAsUser 868 if runAsUser == nil { 869 runAsUser = podSecurityContext.RunAsUser 870 } 871 if runAsUser != nil { 872 s.User = strconv.FormatInt(*runAsUser, 10) 873 } 874 875 runAsGroup := securityContext.RunAsGroup 876 if runAsGroup == nil { 877 runAsGroup = podSecurityContext.RunAsGroup 878 } 879 if runAsGroup != nil { 880 if s.User == "" { 881 s.User = "0" 882 } 883 s.User = fmt.Sprintf("%s:%d", s.User, *runAsGroup) 884 } 885 for _, group := range podSecurityContext.SupplementalGroups { 886 s.Groups = append(s.Groups, strconv.FormatInt(group, 10)) 887 } 888 } 889 890 func quantityToInt64(quantity *resource.Quantity) (int64, error) { 891 if i, ok := quantity.AsInt64(); ok { 892 return i, nil 893 } 894 895 if i, ok := quantity.AsDec().Unscaled(); ok { 896 return i, nil 897 } 898 899 return 0, fmt.Errorf("quantity cannot be represented as int64: %v", quantity) 900 } 901 902 // read a k8s secret in JSON/YAML format from the secret manager 903 // k8s secret is stored as YAML, we have to read data as JSON for backward compatibility 904 func k8sSecretFromSecretManager(name string, secretsManager *secrets.SecretsManager) (map[string][]byte, error) { 905 _, inputSecret, err := secretsManager.LookupSecretData(name) 906 if err != nil { 907 return nil, err 908 } 909 910 var secrets map[string][]byte 911 if err := json.Unmarshal(inputSecret, &secrets); err != nil { 912 secrets = make(map[string][]byte) 913 var secret v1.Secret 914 if err := yaml.Unmarshal(inputSecret, &secret); err != nil { 915 return nil, fmt.Errorf("secret %v is not valid JSON/YAML: %v", name, err) 916 } 917 918 for key, val := range secret.Data { 919 secrets[key] = val 920 } 921 922 for key, val := range secret.StringData { 923 secrets[key] = []byte(val) 924 } 925 } 926 927 return secrets, nil 928 } 929 930 // envVarsFrom returns all key-value pairs as env vars from a configMap or secret that matches the envFrom setting of a container 931 func envVarsFrom(envFrom v1.EnvFromSource, opts *CtrSpecGenOptions) (map[string]string, error) { 932 envs := map[string]string{} 933 934 if envFrom.ConfigMapRef != nil { 935 cmRef := envFrom.ConfigMapRef 936 err := fmt.Errorf("configmap %v not found", cmRef.Name) 937 938 for _, c := range opts.ConfigMaps { 939 if cmRef.Name == c.Name { 940 envs = c.Data 941 err = nil 942 break 943 } 944 } 945 946 if err != nil && (cmRef.Optional == nil || !*cmRef.Optional) { 947 return nil, err 948 } 949 } 950 951 if envFrom.SecretRef != nil { 952 secRef := envFrom.SecretRef 953 secret, err := k8sSecretFromSecretManager(secRef.Name, opts.SecretsManager) 954 if err == nil { 955 for k, v := range secret { 956 envs[k] = string(v) 957 } 958 } else if secRef.Optional == nil || !*secRef.Optional { 959 return nil, err 960 } 961 } 962 963 return envs, nil 964 } 965 966 // envVarValue returns the environment variable value configured within the container's env setting. 967 // It gets the value from a configMap or secret if specified, otherwise returns env.Value 968 func envVarValue(env v1.EnvVar, opts *CtrSpecGenOptions) (*string, error) { 969 if env.ValueFrom != nil { 970 if env.ValueFrom.ConfigMapKeyRef != nil { 971 cmKeyRef := env.ValueFrom.ConfigMapKeyRef 972 err := fmt.Errorf("cannot set env %v: configmap %v not found", env.Name, cmKeyRef.Name) 973 974 for _, c := range opts.ConfigMaps { 975 if cmKeyRef.Name == c.Name { 976 if value, ok := c.Data[cmKeyRef.Key]; ok { 977 return &value, nil 978 } 979 err = fmt.Errorf("cannot set env %v: key %s not found in configmap %v", env.Name, cmKeyRef.Key, cmKeyRef.Name) 980 break 981 } 982 } 983 if cmKeyRef.Optional == nil || !*cmKeyRef.Optional { 984 return nil, err 985 } 986 return nil, nil 987 } 988 989 if env.ValueFrom.SecretKeyRef != nil { 990 secKeyRef := env.ValueFrom.SecretKeyRef 991 secret, err := k8sSecretFromSecretManager(secKeyRef.Name, opts.SecretsManager) 992 if err == nil { 993 if val, ok := secret[secKeyRef.Key]; ok { 994 value := string(val) 995 return &value, nil 996 } 997 err = fmt.Errorf("secret %v has not %v key", secKeyRef.Name, secKeyRef.Key) 998 } 999 if secKeyRef.Optional == nil || !*secKeyRef.Optional { 1000 return nil, fmt.Errorf("cannot set env %v: %v", env.Name, err) 1001 } 1002 return nil, nil 1003 } 1004 1005 if env.ValueFrom.FieldRef != nil { 1006 return envVarValueFieldRef(env, opts) 1007 } 1008 1009 if env.ValueFrom.ResourceFieldRef != nil { 1010 return envVarValueResourceFieldRef(env, opts) 1011 } 1012 } 1013 1014 return &env.Value, nil 1015 } 1016 1017 func envVarValueFieldRef(env v1.EnvVar, opts *CtrSpecGenOptions) (*string, error) { 1018 fieldRef := env.ValueFrom.FieldRef 1019 1020 fieldPathLabelPattern := `^metadata.labels\['(.+)'\]$` 1021 fieldPathLabelRegex := regexp.MustCompile(fieldPathLabelPattern) 1022 fieldPathAnnotationPattern := `^metadata.annotations\['(.+)'\]$` 1023 fieldPathAnnotationRegex := regexp.MustCompile(fieldPathAnnotationPattern) 1024 1025 fieldPath := fieldRef.FieldPath 1026 1027 if fieldPath == "metadata.name" { 1028 return &opts.PodName, nil 1029 } 1030 if fieldPath == "metadata.uid" { 1031 return &opts.PodID, nil 1032 } 1033 fieldPathMatches := fieldPathLabelRegex.FindStringSubmatch(fieldPath) 1034 if len(fieldPathMatches) == 2 { // 1 for entire regex and 1 for subexp 1035 labelValue := opts.Labels[fieldPathMatches[1]] // not existent label is OK 1036 return &labelValue, nil 1037 } 1038 fieldPathMatches = fieldPathAnnotationRegex.FindStringSubmatch(fieldPath) 1039 if len(fieldPathMatches) == 2 { // 1 for entire regex and 1 for subexp 1040 annotationValue := opts.Annotations[fieldPathMatches[1]] // not existent annotation is OK 1041 return &annotationValue, nil 1042 } 1043 1044 return nil, fmt.Errorf( 1045 "can not set env %v. Reason: fieldPath %v is either not valid or not supported", 1046 env.Name, fieldPath, 1047 ) 1048 } 1049 1050 func envVarValueResourceFieldRef(env v1.EnvVar, opts *CtrSpecGenOptions) (*string, error) { 1051 divisor := env.ValueFrom.ResourceFieldRef.Divisor 1052 if divisor.IsZero() { // divisor not set, use default 1053 divisor.Set(1) 1054 } 1055 1056 resources, err := getContainerResources(opts.Container) 1057 if err != nil { 1058 return nil, err 1059 } 1060 1061 var value *resource.Quantity 1062 resourceName := env.ValueFrom.ResourceFieldRef.Resource 1063 var isValidDivisor bool 1064 1065 switch resourceName { 1066 case "limits.memory": 1067 value = resources.Limits.Memory() 1068 isValidDivisor = isMemoryDivisor(divisor) 1069 case "limits.cpu": 1070 value = resources.Limits.Cpu() 1071 isValidDivisor = isCPUDivisor(divisor) 1072 case "requests.memory": 1073 value = resources.Requests.Memory() 1074 isValidDivisor = isMemoryDivisor(divisor) 1075 case "requests.cpu": 1076 value = resources.Requests.Cpu() 1077 isValidDivisor = isCPUDivisor(divisor) 1078 default: 1079 return nil, fmt.Errorf( 1080 "can not set env %v. Reason: resource %v is either not valid or not supported", 1081 env.Name, resourceName, 1082 ) 1083 } 1084 1085 if !isValidDivisor { 1086 return nil, fmt.Errorf( 1087 "can not set env %s. Reason: divisor value %s is not valid", 1088 env.Name, divisor.String(), 1089 ) 1090 } 1091 1092 // k8s rounds up the result to the nearest integer 1093 intValue := int64(math.Ceil(value.AsApproximateFloat64() / divisor.AsApproximateFloat64())) 1094 stringValue := strconv.FormatInt(intValue, 10) 1095 1096 return &stringValue, nil 1097 } 1098 1099 func isMemoryDivisor(divisor resource.Quantity) bool { 1100 switch divisor.String() { 1101 case "1", "1k", "1M", "1G", "1T", "1P", "1E", "1Ki", "1Mi", "1Gi", "1Ti", "1Pi", "1Ei": 1102 return true 1103 default: 1104 return false 1105 } 1106 } 1107 1108 func isCPUDivisor(divisor resource.Quantity) bool { 1109 switch divisor.String() { 1110 case "1", "1m": 1111 return true 1112 default: 1113 return false 1114 } 1115 } 1116 1117 func getContainerResources(container v1.Container) (v1.ResourceRequirements, error) { 1118 result := v1.ResourceRequirements{ 1119 Limits: v1.ResourceList{}, 1120 Requests: v1.ResourceList{}, 1121 } 1122 1123 limits := container.Resources.Limits 1124 requests := container.Resources.Requests 1125 1126 if limits == nil || limits.Memory().IsZero() { 1127 mi, err := meminfo.Read() 1128 if err != nil { 1129 return result, err 1130 } 1131 result.Limits[v1.ResourceMemory] = *resource.NewQuantity(mi.MemTotal, resource.DecimalSI) 1132 } else { 1133 result.Limits[v1.ResourceMemory] = limits[v1.ResourceMemory] 1134 } 1135 1136 if limits == nil || limits.Cpu().IsZero() { 1137 result.Limits[v1.ResourceCPU] = *resource.NewQuantity(int64(runtime.NumCPU()), resource.DecimalSI) 1138 } else { 1139 result.Limits[v1.ResourceCPU] = limits[v1.ResourceCPU] 1140 } 1141 1142 if requests == nil || requests.Memory().IsZero() { 1143 result.Requests[v1.ResourceMemory] = result.Limits[v1.ResourceMemory] 1144 } else { 1145 result.Requests[v1.ResourceMemory] = requests[v1.ResourceMemory] 1146 } 1147 1148 if requests == nil || requests.Cpu().IsZero() { 1149 result.Requests[v1.ResourceCPU] = result.Limits[v1.ResourceCPU] 1150 } else { 1151 result.Requests[v1.ResourceCPU] = requests[v1.ResourceCPU] 1152 } 1153 1154 return result, nil 1155 } 1156 1157 // getPodPorts converts a slice of kube container descriptions to an 1158 // array of portmapping 1159 func getPodPorts(containers []v1.Container, publishAll bool) []types.PortMapping { 1160 var infraPorts []types.PortMapping 1161 for _, container := range containers { 1162 for _, p := range container.Ports { 1163 if p.HostPort != 0 && p.ContainerPort == 0 { 1164 p.ContainerPort = p.HostPort 1165 } 1166 if p.HostPort == 0 && p.ContainerPort != 0 && publishAll { 1167 p.HostPort = p.ContainerPort 1168 } 1169 if p.Protocol == "" { 1170 p.Protocol = "tcp" 1171 } 1172 portBinding := types.PortMapping{ 1173 HostPort: uint16(p.HostPort), 1174 ContainerPort: uint16(p.ContainerPort), 1175 Protocol: strings.ToLower(string(p.Protocol)), 1176 HostIP: p.HostIP, 1177 } 1178 // only hostPort is utilized in podman context, all container ports 1179 // are accessible inside the shared network namespace 1180 if p.HostPort != 0 { 1181 infraPorts = append(infraPorts, portBinding) 1182 } 1183 } 1184 } 1185 return infraPorts 1186 }