github.com/containers/podman/v4@v4.9.4/pkg/specgen/generate/security_linux.go (about) 1 //go:build !remote 2 // +build !remote 3 4 package generate 5 6 import ( 7 "fmt" 8 "strings" 9 10 "github.com/containers/common/libimage" 11 "github.com/containers/common/pkg/apparmor" 12 "github.com/containers/common/pkg/capabilities" 13 "github.com/containers/common/pkg/config" 14 cutil "github.com/containers/common/pkg/util" 15 "github.com/containers/podman/v4/libpod" 16 "github.com/containers/podman/v4/libpod/define" 17 "github.com/containers/podman/v4/pkg/specgen" 18 "github.com/containers/podman/v4/pkg/util" 19 "github.com/opencontainers/runtime-tools/generate" 20 "github.com/opencontainers/selinux/go-selinux/label" 21 "github.com/sirupsen/logrus" 22 ) 23 24 // setLabelOpts sets the label options of the SecurityConfig according to the 25 // input. 26 func setLabelOpts(s *specgen.SpecGenerator, runtime *libpod.Runtime, pidConfig specgen.Namespace, ipcConfig specgen.Namespace) error { 27 if !runtime.EnableLabeling() || s.Privileged { 28 s.SelinuxOpts = label.DisableSecOpt() 29 return nil 30 } 31 32 var labelOpts []string 33 if pidConfig.IsHost() { 34 labelOpts = append(labelOpts, label.DisableSecOpt()...) 35 } else if pidConfig.IsContainer() { 36 ctr, err := runtime.LookupContainer(pidConfig.Value) 37 if err != nil { 38 return fmt.Errorf("container %q not found: %w", pidConfig.Value, err) 39 } 40 secopts, err := label.DupSecOpt(ctr.ProcessLabel()) 41 if err != nil { 42 return fmt.Errorf("failed to duplicate label %q : %w", ctr.ProcessLabel(), err) 43 } 44 labelOpts = append(labelOpts, secopts...) 45 } 46 47 if ipcConfig.IsHost() { 48 labelOpts = append(labelOpts, label.DisableSecOpt()...) 49 } else if ipcConfig.IsContainer() { 50 ctr, err := runtime.LookupContainer(ipcConfig.Value) 51 if err != nil { 52 return fmt.Errorf("container %q not found: %w", ipcConfig.Value, err) 53 } 54 secopts, err := label.DupSecOpt(ctr.ProcessLabel()) 55 if err != nil { 56 return fmt.Errorf("failed to duplicate label %q : %w", ctr.ProcessLabel(), err) 57 } 58 labelOpts = append(labelOpts, secopts...) 59 } 60 61 s.SelinuxOpts = append(s.SelinuxOpts, labelOpts...) 62 return nil 63 } 64 65 func setupApparmor(s *specgen.SpecGenerator, rtc *config.Config, g *generate.Generator) error { 66 hasProfile := len(s.ApparmorProfile) > 0 67 if !apparmor.IsEnabled() { 68 if hasProfile && s.ApparmorProfile != "unconfined" { 69 return fmt.Errorf("apparmor profile %q specified, but Apparmor is not enabled on this system", s.ApparmorProfile) 70 } 71 return nil 72 } 73 // If privileged and caller did not specify apparmor profiles return 74 if s.Privileged && !hasProfile { 75 return nil 76 } 77 if !hasProfile { 78 s.ApparmorProfile = rtc.Containers.ApparmorProfile 79 } 80 if len(s.ApparmorProfile) > 0 { 81 g.SetProcessApparmorProfile(s.ApparmorProfile) 82 } 83 84 return nil 85 } 86 87 func securityConfigureGenerator(s *specgen.SpecGenerator, g *generate.Generator, newImage *libimage.Image, rtc *config.Config) error { 88 var ( 89 caplist []string 90 err error 91 ) 92 // HANDLE CAPABILITIES 93 // NOTE: Must happen before SECCOMP 94 if s.Privileged { 95 g.SetupPrivileged(true) 96 caplist, err = capabilities.BoundingSet() 97 if err != nil { 98 return err 99 } 100 } else { 101 mergedCaps, err := capabilities.MergeCapabilities(rtc.Containers.DefaultCapabilities.Get(), s.CapAdd, s.CapDrop) 102 if err != nil { 103 return err 104 } 105 boundingSet, err := capabilities.BoundingSet() 106 if err != nil { 107 return err 108 } 109 boundingCaps := make(map[string]interface{}) 110 for _, b := range boundingSet { 111 boundingCaps[b] = b 112 } 113 for _, c := range mergedCaps { 114 if _, ok := boundingCaps[c]; ok { 115 caplist = append(caplist, c) 116 } 117 } 118 119 privCapsRequired := []string{} 120 121 // If the container image specifies a label with a 122 // capabilities.ContainerImageLabel then split the comma separated list 123 // of capabilities and record them. This list indicates the only 124 // capabilities, required to run the container. 125 var capsRequiredRequested []string 126 for key, val := range s.Labels { 127 if cutil.StringInSlice(key, capabilities.ContainerImageLabels) { 128 capsRequiredRequested = strings.Split(val, ",") 129 } 130 } 131 if !s.Privileged && len(capsRequiredRequested) == 1 && capsRequiredRequested[0] == "" { 132 caplist = []string{} 133 } else if !s.Privileged && len(capsRequiredRequested) > 0 { 134 // Pass capRequiredRequested in CapAdd field to normalize capabilities names 135 capsRequired, err := capabilities.MergeCapabilities(nil, capsRequiredRequested, nil) 136 if err != nil { 137 return fmt.Errorf("capabilities requested by user or image are not valid: %q: %w", strings.Join(capsRequired, ","), err) 138 } 139 // Verify all capRequired are in the capList 140 for _, cap := range capsRequired { 141 if !cutil.StringInSlice(cap, caplist) { 142 privCapsRequired = append(privCapsRequired, cap) 143 } 144 } 145 if len(privCapsRequired) == 0 { 146 caplist = capsRequired 147 } else { 148 logrus.Errorf("Capabilities requested by user or image are not allowed by default: %q", strings.Join(privCapsRequired, ",")) 149 } 150 } 151 } 152 153 configSpec := g.Config 154 configSpec.Process.Capabilities.Ambient = []string{} 155 156 // Always unset the inheritable capabilities similarly to what the Linux kernel does 157 // They are used only when using capabilities with uid != 0. 158 configSpec.Process.Capabilities.Inheritable = []string{} 159 configSpec.Process.Capabilities.Bounding = caplist 160 161 user := strings.Split(s.User, ":")[0] 162 163 if (user == "" && s.UserNS.NSMode != specgen.KeepID) || user == "root" || user == "0" { 164 configSpec.Process.Capabilities.Effective = caplist 165 configSpec.Process.Capabilities.Permitted = caplist 166 } else { 167 mergedCaps, err := capabilities.MergeCapabilities(nil, s.CapAdd, nil) 168 if err != nil { 169 return fmt.Errorf("capabilities requested by user are not valid: %q: %w", strings.Join(s.CapAdd, ","), err) 170 } 171 boundingSet, err := capabilities.BoundingSet() 172 if err != nil { 173 return err 174 } 175 boundingCaps := make(map[string]interface{}) 176 for _, b := range boundingSet { 177 boundingCaps[b] = b 178 } 179 var userCaps []string 180 for _, c := range mergedCaps { 181 if _, ok := boundingCaps[c]; ok { 182 userCaps = append(userCaps, c) 183 } 184 } 185 configSpec.Process.Capabilities.Effective = userCaps 186 configSpec.Process.Capabilities.Permitted = userCaps 187 188 // Ambient capabilities were added to Linux 4.3. Set ambient 189 // capabilities only when the kernel supports them. 190 if supportAmbientCapabilities() { 191 configSpec.Process.Capabilities.Ambient = userCaps 192 configSpec.Process.Capabilities.Inheritable = userCaps 193 } 194 } 195 196 g.SetProcessNoNewPrivileges(s.NoNewPrivileges) 197 198 if err := setupApparmor(s, rtc, g); err != nil { 199 return err 200 } 201 202 // HANDLE SECCOMP 203 if s.SeccompProfilePath != "unconfined" { 204 seccompConfig, err := getSeccompConfig(s, configSpec, newImage) 205 if err != nil { 206 return err 207 } 208 configSpec.Linux.Seccomp = seccompConfig 209 } 210 211 // Clear default Seccomp profile from Generator for unconfined containers 212 // and privileged containers which do not specify a seccomp profile. 213 if s.SeccompProfilePath == "unconfined" || (s.Privileged && (s.SeccompProfilePath == "" || s.SeccompProfilePath == config.SeccompOverridePath || s.SeccompProfilePath == config.SeccompDefaultPath)) { 214 configSpec.Linux.Seccomp = nil 215 } 216 217 g.SetRootReadonly(s.ReadOnlyFilesystem) 218 219 noUseIPC := s.IpcNS.NSMode == specgen.FromContainer || s.IpcNS.NSMode == specgen.FromPod || s.IpcNS.NSMode == specgen.Host 220 noUseNet := s.NetNS.NSMode == specgen.FromContainer || s.NetNS.NSMode == specgen.FromPod || s.NetNS.NSMode == specgen.Host 221 noUseUTS := s.UtsNS.NSMode == specgen.FromContainer || s.UtsNS.NSMode == specgen.FromPod || s.UtsNS.NSMode == specgen.Host 222 223 // Add default sysctls 224 defaultSysctls, err := util.ValidateSysctls(rtc.Sysctls()) 225 if err != nil { 226 return err 227 } 228 for sysctlKey, sysctlVal := range defaultSysctls { 229 // Ignore mqueue sysctls if --ipc=host 230 if noUseIPC && strings.HasPrefix(sysctlKey, "fs.mqueue.") { 231 logrus.Infof("Sysctl %s=%s ignored in containers.conf, since IPC Namespace set to %q", sysctlKey, sysctlVal, s.IpcNS.NSMode) 232 233 continue 234 } 235 236 // Ignore net sysctls if --net=host 237 if noUseNet && strings.HasPrefix(sysctlKey, "net.") { 238 logrus.Infof("Sysctl %s=%s ignored in containers.conf, since Network Namespace set to host", sysctlKey, sysctlVal) 239 continue 240 } 241 242 // Ignore uts sysctls if --uts=host 243 if noUseUTS && (strings.HasPrefix(sysctlKey, "kernel.domainname") || strings.HasPrefix(sysctlKey, "kernel.hostname")) { 244 logrus.Infof("Sysctl %s=%s ignored in containers.conf, since UTS Namespace set to host", sysctlKey, sysctlVal) 245 continue 246 } 247 248 g.AddLinuxSysctl(sysctlKey, sysctlVal) 249 } 250 251 for sysctlKey, sysctlVal := range s.Sysctl { 252 if s.IpcNS.IsHost() && strings.HasPrefix(sysctlKey, "fs.mqueue.") { 253 return fmt.Errorf("sysctl %s=%s can't be set since IPC Namespace set to host: %w", sysctlKey, sysctlVal, define.ErrInvalidArg) 254 } 255 256 // Ignore net sysctls if --net=host 257 if s.NetNS.IsHost() && strings.HasPrefix(sysctlKey, "net.") { 258 return fmt.Errorf("sysctl %s=%s can't be set since Network Namespace set to host: %w", sysctlKey, sysctlVal, define.ErrInvalidArg) 259 } 260 261 // Ignore uts sysctls if --uts=host 262 if s.UtsNS.IsHost() && (strings.HasPrefix(sysctlKey, "kernel.domainname") || strings.HasPrefix(sysctlKey, "kernel.hostname")) { 263 return fmt.Errorf("sysctl %s=%s can't be set since UTS Namespace set to host: %w", sysctlKey, sysctlVal, define.ErrInvalidArg) 264 } 265 266 g.AddLinuxSysctl(sysctlKey, sysctlVal) 267 } 268 269 return nil 270 }