github.com/rkt/rkt@v1.30.1-0.20200224141603-171c416fac02/stage1/init/common/app.go (about) 1 // Copyright 2016 The rkt Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 //+build linux 16 17 package common 18 19 import ( 20 "errors" 21 "fmt" 22 "os" 23 "path/filepath" 24 25 "github.com/appc/spec/schema" 26 "github.com/appc/spec/schema/types" 27 "github.com/hashicorp/errwrap" 28 29 "github.com/rkt/rkt/common" 30 "github.com/rkt/rkt/common/cgroup" 31 stage1commontypes "github.com/rkt/rkt/stage1/common/types" 32 ) 33 34 // preparedApp contains some internal state needed to actually run an app. 35 // We add this intermediate step to prevent unit file generation from being 36 // totally unwieldy. 37 type preparedApp struct { 38 app *schema.RuntimeApp 39 uid uint32 40 gid uint32 41 env types.Environment 42 resources appResources 43 mounts []Mount 44 noNewPrivileges bool 45 capabilities []string 46 seccomp *seccompFilter 47 48 // Path restrictions 49 roPaths []string 50 hiddenPaths []string 51 hiddenDirs []string 52 } 53 54 type appResources struct { 55 MemoryLimit *uint64 // Memory limit in bytes 56 CPUQuota *uint64 // The hard (absolute) CPU quota as a percent (100 = 1 core) 57 LinuxCPUShares *uint64 // The relative CPU weight in the app's cgroup. 58 LinuxOOMScoreAdjust *int // OOMScoreAdjust knob 59 } 60 61 /* 62 * Paths to protect for non-provileged applications 63 * AKA protectKernelTunables 64 */ 65 var protectKernelROPaths = []string{ 66 "/proc/bus/", 67 "/proc/sys/kernel/core_pattern", 68 "/proc/sys/kernel/modprobe", 69 "/proc/sys/vm/panic_on_oom", 70 "/proc/sysrq-trigger", 71 "/sys/block/", 72 "/sys/bus/", 73 "/sys/class/", 74 "/sys/dev/", 75 "/sys/devices/", 76 "/sys/kernel/", 77 } 78 var protectKernelHiddenDirs = []string{ 79 "/sys/firmware/", 80 "/sys/fs/", 81 "/sys/hypervisor/", 82 "/sys/module/", 83 "/sys/power/", 84 } 85 86 // This is separate because systemd <231 didn't support masking files, 87 // only directories 88 var protectKernelHiddenPaths = []string{ 89 "/proc/config.gz", 90 "/proc/kallsyms", 91 "/proc/sched_debug", 92 "/proc/kcore", 93 "/proc/kmem", 94 "/proc/mem", 95 } 96 97 // prepareApp sets up the internal runtime context for a specific app. 98 func prepareApp(p *stage1commontypes.Pod, ra *schema.RuntimeApp) (*preparedApp, error) { 99 pa := preparedApp{ 100 app: ra, 101 env: ra.App.Environment, 102 noNewPrivileges: getAppNoNewPrivileges(ra.App.Isolators), 103 } 104 var err error 105 106 // Determine numeric uid and gid 107 u, g, err := ParseUserGroup(p, ra) 108 if err != nil { 109 return nil, errwrap.Wrap(errors.New("unable to determine app's uid and gid"), err) 110 } 111 if u < 0 || g < 0 { 112 return nil, errors.New("Invalid uid or gid") 113 } 114 pa.uid = uint32(u) 115 pa.gid = uint32(g) 116 117 // Set some rkt-provided environment variables 118 pa.env.Set("AC_APP_NAME", ra.Name.String()) 119 if p.MetadataServiceURL != "" { 120 pa.env.Set("AC_METADATA_URL", p.MetadataServiceURL) 121 } 122 123 // Determine capability set 124 pa.capabilities, err = getAppCapabilities(ra.App.Isolators) 125 if err != nil { 126 return nil, errwrap.Wrap(errors.New("unable to construct capabilities"), err) 127 } 128 129 // Determine mounts 130 cfd := ConvertedFromDocker(p.Images[ra.Name.String()]) 131 pa.mounts, err = GenerateMounts(ra, p.Manifest.Volumes, cfd) 132 if err != nil { 133 return nil, errwrap.Wrap(errors.New("unable to compute mounts"), err) 134 } 135 136 // Compute resources 137 pa.resources, err = computeAppResources(ra.App.Isolators) 138 if err != nil { 139 return nil, errwrap.Wrap(errors.New("unable to compute resources"), err) 140 } 141 142 // Protect kernel tunables by default 143 if !p.InsecureOptions.DisablePaths { 144 pa.roPaths = append(pa.roPaths, protectKernelROPaths...) 145 pa.hiddenPaths = append(pa.hiddenDirs, protectKernelHiddenPaths...) 146 pa.hiddenDirs = append(pa.hiddenDirs, protectKernelHiddenDirs...) 147 } 148 149 // Seccomp 150 if !p.InsecureOptions.DisableSeccomp { 151 pa.seccomp, err = generateSeccompFilter(p, &pa) 152 if err != nil { 153 return nil, err 154 } 155 if pa.seccomp != nil && pa.seccomp.forceNoNewPrivileges { 156 pa.noNewPrivileges = true 157 } 158 } 159 160 // Write the systemd-sysusers config file 161 if err := generateSysusers(p, pa.app, int(pa.uid), int(pa.gid), &p.UidRange); err != nil { 162 return nil, errwrap.Wrapf("unable to generate sysusers file", err) 163 } 164 165 return &pa, nil 166 } 167 168 // computeAppResources processes any isolators that manipulate cgroups. 169 func computeAppResources(isolators types.Isolators) (appResources, error) { 170 res := appResources{} 171 var err error 172 173 withIsolator := func(name string, f func() error) error { 174 ok, err := cgroup.IsIsolatorSupported(name) 175 if err != nil { 176 return errwrap.Wrapf("could not check for isolator "+name, err) 177 } 178 179 if !ok { 180 fmt.Fprintf(os.Stderr, "warning: resource/%s isolator set but support disabled in the kernel, skipping\n", name) 181 return nil 182 } 183 184 return f() 185 } 186 187 for _, isolator := range isolators { 188 if err != nil { 189 return res, err 190 } 191 192 switch v := isolator.Value().(type) { 193 case *types.ResourceMemory: 194 err = withIsolator("memory", func() error { 195 if v.Limit() == nil { 196 return nil 197 } 198 199 val := uint64(v.Limit().Value()) 200 res.MemoryLimit = &val 201 return nil 202 }) 203 case *types.ResourceCPU: 204 err = withIsolator("cpu", func() error { 205 if v.Limit() == nil { 206 return nil 207 } 208 if v.Limit().Value() > MaxMilliValue { 209 return fmt.Errorf("cpu limit exceeds the maximum millivalue: %v", v.Limit().String()) 210 } 211 212 val := uint64(v.Limit().MilliValue() / 10) 213 res.CPUQuota = &val 214 return nil 215 }) 216 case *types.LinuxCPUShares: 217 err = withIsolator("cpu", func() error { 218 val := uint64(*v) 219 res.LinuxCPUShares = &val 220 return nil 221 }) 222 case *types.LinuxOOMScoreAdj: 223 val := int(*v) 224 res.LinuxOOMScoreAdjust = &val 225 } 226 } 227 228 return res, err 229 } 230 231 // relAppPaths prepends the relative app path (/opt/stage1/rootfs/) to a list 232 // of paths. Useful for systemd unit directives. 233 func (pa *preparedApp) relAppPaths(paths []string) []string { 234 out := make([]string, 0, len(paths)) 235 for _, p := range paths { 236 out = append(out, filepath.Join(common.RelAppRootfsPath(pa.app.Name), p)) 237 } 238 return out 239 }