github.com/kubiko/snapd@v0.0.0-20201013125620-d4f3094d9ddf/sandbox/cgroup/tracking.go (about) 1 package cgroup 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "strings" 8 9 "github.com/godbus/dbus" 10 11 "github.com/snapcore/snapd/dbusutil" 12 "github.com/snapcore/snapd/features" 13 "github.com/snapcore/snapd/logger" 14 "github.com/snapcore/snapd/randutil" 15 ) 16 17 var osGetuid = os.Getuid 18 var osGetpid = os.Getpid 19 var cgroupProcessPathInTrackingCgroup = ProcessPathInTrackingCgroup 20 21 var ErrCannotTrackProcess = errors.New("cannot track application process") 22 23 // TrackingOptions control how tracking, based on systemd transient scope, operates. 24 type TrackingOptions struct { 25 // AllowSessionBus controls if CreateTransientScopeForTracking will 26 // consider using the session bus for making the request. 27 AllowSessionBus bool 28 } 29 30 // CreateTransientScopeForTracking puts the current process in a transient scope. 31 // 32 // To quote systemd documentation about scope units: 33 // 34 // >> Scopes units manage a set of system processes. Unlike service units, 35 // >> scope units manage externally created processes, and do not fork off 36 // >> processes on its own. 37 // 38 // Scope names must be unique, a randomly generated UUID is appended to the 39 // security tag, further suffixed with the string ".scope". 40 func CreateTransientScopeForTracking(securityTag string, opts *TrackingOptions) error { 41 if !features.RefreshAppAwareness.IsEnabled() { 42 return nil 43 } 44 if opts == nil { 45 // Retain original semantics when not explicitly configured otherwise. 46 opts = &TrackingOptions{AllowSessionBus: true} 47 } 48 logger.Debugf("creating transient scope %s", securityTag) 49 50 // Session or system bus might be unavailable. To avoid being fragile 51 // ignore all errors when establishing session bus connection to avoid 52 // breaking user interactions. This is consistent with similar failure 53 // modes below, where other parts of the stack fail. 54 // 55 // Ideally we would check for a distinct error type but this is just an 56 // errors.New() in go-dbus code. 57 uid := osGetuid() 58 // Depending on options, we may use the session bus instead of the system 59 // bus. In addition, when uid == 0 we may fall back from using the session 60 // bus to the system bus. 61 var isSessionBus bool 62 var conn *dbus.Conn 63 var err error 64 if opts.AllowSessionBus { 65 isSessionBus, conn, err = sessionOrMaybeSystemBus(uid) 66 if err != nil { 67 return ErrCannotTrackProcess 68 } 69 } else { 70 isSessionBus = false 71 conn, err = dbusutil.SystemBus() 72 if err != nil { 73 return ErrCannotTrackProcess 74 } 75 } 76 77 // We ask the kernel for a random UUID. We need one because each transient 78 // scope needs a unique name. The unique name is composed of said UUID and 79 // the snap security tag. 80 uuid, err := randomUUID() 81 if err != nil { 82 return err 83 } 84 85 // Enforcing uniqueness is preferred to reusing an existing scope for 86 // simplicity since doing otherwise by joining an existing scope has 87 // limitations: 88 // - the originally started scope must be marked as a delegate, with all 89 // consequences. 90 // - the method AttachProcessesToUnit is unavailable on Ubuntu 16.04 91 unitName := fmt.Sprintf("%s.%s.scope", securityTag, uuid) 92 93 pid := osGetpid() 94 tryAgain: 95 // Create a transient scope by talking to systemd over DBus. 96 if err := doCreateTransientScope(conn, unitName, pid); err != nil { 97 switch err { 98 case errDBusUnknownMethod: 99 return ErrCannotTrackProcess 100 case errDBusSpawnChildExited: 101 fallthrough 102 case errDBusNameHasNoOwner: 103 if isSessionBus && uid == 0 { 104 // We cannot activate systemd --user for root, 105 // try the system bus as a fallback. 106 logger.Debugf("cannot activate systemd --user on session bus, falling back to system bus: %s", err) 107 isSessionBus = false 108 conn, err = dbusutil.SystemBus() 109 if err != nil { 110 logger.Debugf("system bus is not available: %s", err) 111 return ErrCannotTrackProcess 112 } 113 logger.Debugf("using system bus now, session bus could not activate systemd --user") 114 goto tryAgain 115 } 116 return ErrCannotTrackProcess 117 } 118 return err 119 } 120 // We may have created a transient scope but due to the constraints the 121 // kernel puts on process transitions on unprivileged users (and remember 122 // that systemd --user is unprivileged) the actual re-association with the 123 // scope cgroup may have silently failed - unfortunately some versions of 124 // systemd do not report an error in that case. Systemd 238 and newer 125 // detects the error correctly and uses privileged systemd running as pid 1 126 // to assist in the transition. 127 // 128 // For more details about the transition constraints refer to 129 // cgroup_procs_write_permission() as of linux 5.8 and 130 // unit_attach_pids_to_cgroup() as of systemd 245. 131 // 132 // Verify the effective tracking cgroup and check that our scope name is 133 // contained therein. 134 path, err := cgroupProcessPathInTrackingCgroup(pid) 135 if err != nil { 136 return err 137 } 138 if !strings.HasSuffix(path, unitName) { 139 logger.Debugf("systemd could not associate process %d with transient scope %s", pid, unitName) 140 return ErrCannotTrackProcess 141 } 142 return nil 143 } 144 145 // ConfirmSystemdServiceTracking checks if systemd tracks this process as a snap service. 146 // 147 // Systemd is placing started services, both user and system, into appropriate 148 // tracking groups. Given a security tag we can confirm if the current process 149 // belongs to such tracking group and thus could be identified by snapd as 150 // belonging to a particular snap and application. 151 // 152 // If the application process is not tracked then ErrCannotTrackProcess is returned. 153 func ConfirmSystemdServiceTracking(securityTag string) error { 154 pid := osGetpid() 155 path, err := cgroupProcessPathInTrackingCgroup(pid) 156 if err != nil { 157 return err 158 } 159 unitName := fmt.Sprintf("%s.service", securityTag) 160 if !strings.Contains(path, unitName) { 161 return ErrCannotTrackProcess 162 } 163 return nil 164 } 165 166 func sessionOrMaybeSystemBus(uid int) (isSessionBus bool, conn *dbus.Conn, err error) { 167 // The scope is created with a DBus call to systemd running either on 168 // system or session bus. We have a preference for session bus, as this is 169 // where applications normally go to. When a session bus is not available 170 // and the invoking user is root, we use the system bus instead. 171 // 172 // It is worth noting that hooks will not normally have a session bus to 173 // connect to, as they are invoked as descendants of snapd, and snapd is a 174 // service running outside of any session. 175 conn, err = dbusutil.SessionBus() 176 if err == nil { 177 logger.Debugf("using session bus") 178 return true, conn, nil 179 } 180 logger.Debugf("session bus is not available: %s", err) 181 if uid == 0 { 182 logger.Debugf("falling back to system bus") 183 conn, err = dbusutil.SystemBus() 184 if err != nil { 185 logger.Debugf("system bus is not available: %s", err) 186 } else { 187 logger.Debugf("using system bus now, session bus was not available") 188 } 189 } 190 return false, conn, err 191 } 192 193 type handledDBusError struct { 194 msg string 195 dbusError string 196 } 197 198 func (e *handledDBusError) Error() string { 199 return fmt.Sprintf("%s [%s]", e.msg, e.dbusError) 200 } 201 202 var ( 203 errDBusUnknownMethod = &handledDBusError{msg: "unknown dbus object method", dbusError: "org.freedesktop.DBus.Error.UnknownMethod"} 204 errDBusNameHasNoOwner = &handledDBusError{msg: "dbus name has no owner", dbusError: "org.freedesktop.DBus.Error.NameHasNoOwner"} 205 errDBusSpawnChildExited = &handledDBusError{msg: "dbus spawned child process exited", dbusError: "org.freedesktop.DBus.Error.Spawn.ChildExited"} 206 ) 207 208 // doCreateTransientScope creates a systemd transient scope with specified properties. 209 // 210 // The scope is created by asking systemd via the specified DBus connection. 211 // The unit name and the PID to attach are provided as well. The DBus method 212 // call is performed outside confinement established by snap-confine. 213 var doCreateTransientScope = func(conn *dbus.Conn, unitName string, pid int) error { 214 // Documentation of StartTransientUnit is available at 215 // https://www.freedesktop.org/wiki/Software/systemd/dbus/ 216 // 217 // The property and auxUnit types are not well documented but can be traced 218 // from systemd source code. As of systemd 245 it can be found in src/core/dbus-manager.c, 219 // in a declaration containing SD_BUS_METHOD_WITH_NAMES("SD_BUS_METHOD_WITH_NAMES",... 220 // From there one can follow to method_start_transient_unit to understand 221 // how argument parsing is performed. 222 // 223 // Systemd defines the signature of StartTransientUnit as 224 // "ssa(sv)a(sa(sv))". The signature can be decomposed as follows: 225 // 226 // unitName string // name of the unit to start 227 // jobMode string // corresponds to --job-mode= (see systemctl(1) manual page) 228 // properties []struct{ 229 // Name string 230 // Value interface{} 231 // } // properties describe properties of the started unit 232 // auxUnits []struct { 233 // Name string 234 // Properties []struct{ 235 // Name string 236 // Value interface{} 237 // } 238 // } // auxUnits describe any additional units to define. 239 type property struct { 240 Name string 241 Value interface{} 242 } 243 type auxUnit struct { 244 Name string 245 Props []property 246 } 247 248 // The mode string decides how the job is interacting with other systemd 249 // jobs on the system. The documentation of the systemd StartUnit() method 250 // describes the possible values and their properties: 251 // 252 // >> StartUnit() enqeues a start job, and possibly depending jobs. Takes 253 // >> the unit to activate, plus a mode string. The mode needs to be one of 254 // >> replace, fail, isolate, ignore-dependencies, ignore-requirements. If 255 // >> "replace" the call will start the unit and its dependencies, possibly 256 // >> replacing already queued jobs that conflict with this. If "fail" the 257 // >> call will start the unit and its dependencies, but will fail if this 258 // >> would change an already queued job. If "isolate" the call will start 259 // >> the unit in question and terminate all units that aren't dependencies 260 // >> of it. If "ignore-dependencies" it will start a unit but ignore all 261 // >> its dependencies. If "ignore-requirements" it will start a unit but 262 // >> only ignore the requirement dependencies. It is not recommended to 263 // >> make use of the latter two options. Returns the newly created job 264 // >> object. 265 // 266 // Here we choose "fail" to match systemd-run. 267 mode := "fail" 268 properties := []property{{"PIDs", []uint{uint(pid)}}} 269 aux := []auxUnit(nil) 270 systemd := conn.Object("org.freedesktop.systemd1", "/org/freedesktop/systemd1") 271 call := systemd.Call( 272 "org.freedesktop.systemd1.Manager.StartTransientUnit", 273 0, 274 unitName, 275 mode, 276 properties, 277 aux, 278 ) 279 var job dbus.ObjectPath 280 if err := call.Store(&job); err != nil { 281 if dbusErr, ok := err.(dbus.Error); ok { 282 logger.Debugf("StartTransientUnit failed with %q: %v", dbusErr.Name, dbusErr.Body) 283 // Some specific DBus errors have distinct handling. 284 switch dbusErr.Name { 285 case "org.freedesktop.DBus.Error.NameHasNoOwner": 286 // Nothing is providing systemd bus name. This is, most likely, 287 // an Ubuntu 14.04 system with the special deputy systemd. 288 return errDBusNameHasNoOwner 289 case "org.freedesktop.DBus.Error.UnknownMethod": 290 // The DBus API is not supported on this system. This can happen on 291 // very old versions of Systemd, for instance on Ubuntu 14.04. 292 return errDBusUnknownMethod 293 case "org.freedesktop.DBus.Error.Spawn.ChildExited": 294 // We tried to socket-activate dbus-daemon or bus-activate 295 // systemd --user but it failed. 296 return errDBusSpawnChildExited 297 case "org.freedesktop.systemd1.UnitExists": 298 // Starting a scope with a name that already exists is an 299 // error. Normally this should never happen. 300 return fmt.Errorf("cannot create transient scope: scope %q clashed: %s", unitName, err) 301 default: 302 return fmt.Errorf("cannot create transient scope: DBus error %q: %v", dbusErr.Name, dbusErr.Body) 303 } 304 } 305 if err != nil { 306 return fmt.Errorf("cannot create transient scope: %s", err) 307 } 308 } 309 logger.Debugf("created transient scope as object: %s", job) 310 return nil 311 } 312 313 var randomUUID = func() (string, error) { 314 // The source of the bytes generated here is the same as that of 315 // /dev/urandom which doesn't block and is sufficient for our purposes 316 // of avoiding clashing UUIDs that are needed for all of the non-service 317 // commands that are started with the help of this UUID. 318 return randutil.RandomKernelUUID(), nil 319 }