gitee.com/mysnapcore/mysnapd@v0.1.0/sandbox/cgroup/tracking.go (about) 1 package cgroup 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/godbus/dbus" 12 13 "gitee.com/mysnapcore/mysnapd/dbusutil" 14 "gitee.com/mysnapcore/mysnapd/logger" 15 "gitee.com/mysnapcore/mysnapd/randutil" 16 ) 17 18 var osGetuid = os.Getuid 19 var osGetpid = os.Getpid 20 var cgroupProcessPathInTrackingCgroup = ProcessPathInTrackingCgroup 21 22 var ErrCannotTrackProcess = errors.New("cannot track application process") 23 24 // TrackingOptions control how tracking, based on systemd transient scope, operates. 25 type TrackingOptions struct { 26 // AllowSessionBus controls if CreateTransientScopeForTracking will 27 // consider using the session bus for making the request. 28 AllowSessionBus bool 29 } 30 31 // CreateTransientScopeForTracking puts the current process in a transient scope. 32 // 33 // To quote systemd documentation about scope units: 34 // 35 // >> Scopes units manage a set of system processes. Unlike service units, 36 // >> scope units manage externally created processes, and do not fork off 37 // >> processes on its own. 38 // 39 // Scope names must be unique, a randomly generated UUID is appended to the 40 // security tag, further suffixed with the string ".scope". 41 func CreateTransientScopeForTracking(securityTag string, opts *TrackingOptions) error { 42 if opts == nil { 43 // Retain original semantics when not explicitly configured otherwise. 44 opts = &TrackingOptions{AllowSessionBus: true} 45 } 46 logger.Debugf("creating transient scope %s", securityTag) 47 48 // Session or system bus might be unavailable. To avoid being fragile 49 // ignore all errors when establishing session bus connection to avoid 50 // breaking user interactions. This is consistent with similar failure 51 // modes below, where other parts of the stack fail. 52 // 53 // Ideally we would check for a distinct error type but this is just an 54 // errors.New() in go-dbus code. 55 uid := osGetuid() 56 // Depending on options, we may use the session bus instead of the system 57 // bus. In addition, when uid == 0 we may fall back from using the session 58 // bus to the system bus. 59 var isSessionBus bool 60 var conn *dbus.Conn 61 var err error 62 if opts.AllowSessionBus { 63 isSessionBus, conn, err = sessionOrMaybeSystemBus(uid) 64 if err != nil { 65 return ErrCannotTrackProcess 66 } 67 } else { 68 isSessionBus = false 69 conn, err = dbusutil.SystemBus() 70 if err != nil { 71 return ErrCannotTrackProcess 72 } 73 } 74 75 // We ask the kernel for a random UUID. We need one because each transient 76 // scope needs a unique name. The unique name is composed of said UUID and 77 // the snap security tag. 78 uuid, err := randomUUID() 79 if err != nil { 80 return err 81 } 82 83 // Enforcing uniqueness is preferred to reusing an existing scope for 84 // simplicity since doing otherwise by joining an existing scope has 85 // limitations: 86 // - the originally started scope must be marked as a delegate, with all 87 // consequences. 88 // - the method AttachProcessesToUnit is unavailable on Ubuntu 16.04 89 unitName := fmt.Sprintf("%s.%s.scope", securityTag, uuid) 90 91 pid := osGetpid() 92 start := time.Now() 93 tryAgain: 94 // Create a transient scope by talking to systemd over DBus. 95 if err := doCreateTransientScope(conn, unitName, pid); err != nil { 96 switch err { 97 case errDBusUnknownMethod: 98 return ErrCannotTrackProcess 99 case errDBusSpawnChildExited: 100 fallthrough 101 case errDBusNameHasNoOwner: 102 if isSessionBus && uid == 0 { 103 // We cannot activate systemd --user for root, 104 // try the system bus as a fallback. 105 logger.Debugf("cannot activate systemd --user on session bus, falling back to system bus: %s", err) 106 isSessionBus = false 107 conn, err = dbusutil.SystemBus() 108 if err != nil { 109 logger.Debugf("system bus is not available: %s", err) 110 return ErrCannotTrackProcess 111 } 112 logger.Debugf("using system bus now, session bus could not activate systemd --user") 113 goto tryAgain 114 } 115 return ErrCannotTrackProcess 116 } 117 return err 118 } 119 // We may have created a transient scope but due to the constraints the 120 // kernel puts on process transitions on unprivileged users (and remember 121 // that systemd --user is unprivileged) the actual re-association with the 122 // scope cgroup may have silently failed - unfortunately some versions of 123 // systemd do not report an error in that case. Systemd 238 and newer 124 // detects the error correctly and uses privileged systemd running as pid 1 125 // to assist in the transition. 126 // 127 // For more details about the transition constraints refer to 128 // cgroup_procs_write_permission() as of linux 5.8 and 129 // unit_attach_pids_to_cgroup() as of systemd 245. 130 // 131 // Verify the effective tracking cgroup and check that our scope name is 132 // contained therein. 133 hasTracking := false 134 for tries := 0; tries < 100; tries++ { 135 path, err := cgroupProcessPathInTrackingCgroup(pid) 136 if err != nil { 137 return err 138 } 139 if strings.HasSuffix(path, unitName) { 140 hasTracking = true 141 break 142 } 143 time.Sleep(1 * time.Millisecond) 144 } 145 waitForTracking := time.Since(start) 146 logger.Debugf("waited %v for tracking", waitForTracking) 147 if !hasTracking { 148 logger.Debugf("systemd could not associate process %d with transient scope %s", pid, unitName) 149 return ErrCannotTrackProcess 150 } 151 return nil 152 } 153 154 // ConfirmSystemdServiceTracking checks if systemd tracks this process as a snap service. 155 // 156 // Systemd is placing started services, both user and system, into appropriate 157 // tracking groups. Given a security tag we can confirm if the current process 158 // belongs to such tracking group and thus could be identified by snapd as 159 // belonging to a particular snap and application. 160 // 161 // If the application process is not tracked then ErrCannotTrackProcess is returned. 162 func ConfirmSystemdServiceTracking(securityTag string) error { 163 pid := osGetpid() 164 path, err := cgroupProcessPathInTrackingCgroup(pid) 165 if err != nil { 166 return err 167 } 168 unitName := fmt.Sprintf("%s.service", securityTag) 169 if !strings.Contains(path, unitName) { 170 return ErrCannotTrackProcess 171 } 172 return nil 173 } 174 175 func sessionOrMaybeSystemBus(uid int) (isSessionBus bool, conn *dbus.Conn, err error) { 176 // The scope is created with a DBus call to systemd running either on 177 // system or session bus. We have a preference for session bus, as this is 178 // where applications normally go to. When a session bus is not available 179 // and the invoking user is root, we use the system bus instead. 180 // 181 // It is worth noting that hooks will not normally have a session bus to 182 // connect to, as they are invoked as descendants of snapd, and snapd is a 183 // service running outside of any session. 184 conn, err = dbusutil.SessionBus() 185 if err == nil { 186 logger.Debugf("using session bus") 187 return true, conn, nil 188 } 189 logger.Debugf("session bus is not available: %s", err) 190 if uid == 0 { 191 logger.Debugf("falling back to system bus") 192 conn, err = dbusutil.SystemBus() 193 if err != nil { 194 logger.Debugf("system bus is not available: %s", err) 195 } else { 196 logger.Debugf("using system bus now, session bus was not available") 197 } 198 } 199 return false, conn, err 200 } 201 202 type handledDBusError struct { 203 msg string 204 dbusError string 205 } 206 207 func (e *handledDBusError) Error() string { 208 return fmt.Sprintf("%s [%s]", e.msg, e.dbusError) 209 } 210 211 var ( 212 errDBusUnknownMethod = &handledDBusError{msg: "unknown dbus object method", dbusError: "org.freedesktop.DBus.Error.UnknownMethod"} 213 errDBusNameHasNoOwner = &handledDBusError{msg: "dbus name has no owner", dbusError: "org.freedesktop.DBus.Error.NameHasNoOwner"} 214 errDBusSpawnChildExited = &handledDBusError{msg: "dbus spawned child process exited", dbusError: "org.freedesktop.DBus.Error.Spawn.ChildExited"} 215 216 // pick a decent fit-all timeout 217 createScopeJobTimeout = 10 * time.Second 218 ) 219 220 // startTransientScope requests systemd to create a transient unit and returns 221 // the associated systemd job path. 222 // 223 // The scope is created by asking systemd via the specified DBus connection. 224 // The unit name and the PID to attach are provided as well. The DBus method 225 // call is performed outside confinement established by snap-confine. 226 func startTransientScope(conn *dbus.Conn, unitName string, pid int) (job dbus.ObjectPath, err error) { 227 // Documentation of StartTransientUnit is available at 228 // https://www.freedesktop.org/wiki/Software/systemd/dbus/ 229 // 230 // The property and auxUnit types are not well documented but can be traced 231 // from systemd source code. As of systemd 245 it can be found in src/core/dbus-manager.c, 232 // in a declaration containing SD_BUS_METHOD_WITH_NAMES("SD_BUS_METHOD_WITH_NAMES",... 233 // From there one can follow to method_start_transient_unit to understand 234 // how argument parsing is performed. 235 // 236 // Systemd defines the signature of StartTransientUnit as 237 // "ssa(sv)a(sa(sv))". The signature can be decomposed as follows: 238 // 239 // unitName string // name of the unit to start 240 // jobMode string // corresponds to --job-mode= (see systemctl(1) manual page) 241 // properties []struct{ 242 // Name string 243 // Value interface{} 244 // } // properties describe properties of the started unit 245 // auxUnits []struct { 246 // Name string 247 // Properties []struct{ 248 // Name string 249 // Value interface{} 250 // } 251 // } // auxUnits describe any additional units to define. 252 type property struct { 253 Name string 254 Value interface{} 255 } 256 type auxUnit struct { 257 Name string 258 Props []property 259 } 260 261 // The mode string decides how the job is interacting with other systemd 262 // jobs on the system. The documentation of the systemd StartUnit() method 263 // describes the possible values and their properties: 264 // 265 // >> StartUnit() enqeues a start job, and possibly depending jobs. Takes 266 // >> the unit to activate, plus a mode string. The mode needs to be one of 267 // >> replace, fail, isolate, ignore-dependencies, ignore-requirements. If 268 // >> "replace" the call will start the unit and its dependencies, possibly 269 // >> replacing already queued jobs that conflict with this. If "fail" the 270 // >> call will start the unit and its dependencies, but will fail if this 271 // >> would change an already queued job. If "isolate" the call will start 272 // >> the unit in question and terminate all units that aren't dependencies 273 // >> of it. If "ignore-dependencies" it will start a unit but ignore all 274 // >> its dependencies. If "ignore-requirements" it will start a unit but 275 // >> only ignore the requirement dependencies. It is not recommended to 276 // >> make use of the latter two options. Returns the newly created job 277 // >> object. 278 // 279 // Here we choose "fail" to match systemd-run. 280 mode := "fail" 281 properties := []property{{"PIDs", []uint{uint(pid)}}} 282 aux := []auxUnit(nil) 283 systemd := conn.Object("org.freedesktop.systemd1", "/org/freedesktop/systemd1") 284 call := systemd.Call( 285 "org.freedesktop.systemd1.Manager.StartTransientUnit", 286 0, 287 unitName, 288 mode, 289 properties, 290 aux, 291 ) 292 if err := call.Store(&job); err != nil { 293 if dbusErr, ok := err.(dbus.Error); ok { 294 logger.Debugf("StartTransientUnit failed with %q: %v", dbusErr.Name, dbusErr.Body) 295 // Some specific DBus errors have distinct handling. 296 switch dbusErr.Name { 297 case "org.freedesktop.DBus.Error.NameHasNoOwner": 298 // Nothing is providing systemd bus name. This is, most likely, 299 // an Ubuntu 14.04 system with the special deputy systemd. 300 return "", errDBusNameHasNoOwner 301 case "org.freedesktop.DBus.Error.UnknownMethod": 302 // The DBus API is not supported on this system. This can happen on 303 // very old versions of Systemd, for instance on Ubuntu 14.04. 304 return "", errDBusUnknownMethod 305 case "org.freedesktop.DBus.Error.Spawn.ChildExited": 306 // We tried to socket-activate dbus-daemon or bus-activate 307 // systemd --user but it failed. 308 return "", errDBusSpawnChildExited 309 case "org.freedesktop.systemd1.UnitExists": 310 // Starting a scope with a name that already exists is an 311 // error. Normally this should never happen. 312 return "", fmt.Errorf("cannot create transient scope: scope %q clashed: %s", unitName, err) 313 default: 314 return "", fmt.Errorf("cannot create transient scope: DBus error %q: %v", dbusErr.Name, dbusErr.Body) 315 } 316 } 317 return "", fmt.Errorf("cannot create transient scope: %s", err) 318 } 319 logger.Debugf("create transient scope job: %s", job) 320 return job, nil 321 } 322 323 // doCreateTransientScopeOpportunisticSync creates a transient scope with a 324 // given unit name asking systemd to move the provided pid to that scope, does 325 // not wait for the systemd job to complete 326 func doCreateTransientScopeNoSync(conn *dbus.Conn, unitName string, pid int) error { 327 _, err := startTransientScope(conn, unitName, pid) 328 return err 329 } 330 331 // doCreateTransientScopeOpportunisticSync creates a transient scope with a 332 // given unit name asking systemd to move the provided pid to that scope, and 333 // waits for the systemd job to finish 334 func doCreateTransientScopeJobRemovedSync(conn *dbus.Conn, unitName string, pid int) error { 335 // set up a watch for JobRemoved signals, so that we'll know when our 336 // request has completed 337 jobRemoveMatch := []dbus.MatchOption{ 338 dbus.WithMatchInterface("org.freedesktop.systemd1.Manager"), 339 dbus.WithMatchMember("JobRemoved"), 340 } 341 if err := conn.AddMatchSignal(jobRemoveMatch...); err != nil { 342 return fmt.Errorf("cannot subscribe to systemd signals: %v", err) 343 } 344 // signal channel with buffer for some messages 345 signals := make(chan *dbus.Signal, 10) 346 // for receiving job results 347 jobResultChan := make(chan string, 1) 348 // for passing the job we want to observe 349 jobWaitFor := make(chan dbus.ObjectPath, 1) 350 // and start watching for signals, we do this before even sending a 351 // request, so that we won't miss any signals from systemd 352 conn.Signal(signals) 353 354 var wg sync.WaitGroup 355 defer func() { 356 close(jobWaitFor) 357 // wait for the signal handling to finish before returning 358 wg.Wait() 359 }() 360 wg.Add(1) 361 go func() { 362 defer wg.Done() 363 jobResults := make(map[dbus.ObjectPath]string, 10) 364 expectedJob := dbus.ObjectPath("") 365 for { 366 select { 367 case job, ok := <-jobWaitFor: 368 if !ok { 369 // the channel got closed, meaning it's 370 // time to clean up 371 conn.RemoveSignal(signals) 372 conn.RemoveMatchSignal(jobRemoveMatch...) 373 close(jobResultChan) 374 close(signals) 375 return 376 } 377 if result, ok := jobResults[job]; ok { 378 // maybe we already have result for this job 379 jobResultChan <- result 380 } else { 381 expectedJob = job 382 } 383 case sig, ok := <-signals: 384 if !ok { 385 continue 386 } 387 // make sure the signal name is as expected, although the 388 // match selectors should ensure we only receive 389 // JobRemoved signals 390 if sig.Name != "org.freedesktop.systemd1.Manager.JobRemoved" { 391 continue 392 } 393 var id uint32 394 var jobFromSignal dbus.ObjectPath 395 var unit string 396 var result string 397 if err := dbus.Store(sig.Body, &id, &jobFromSignal, &unit, &result); err != nil { 398 continue 399 } 400 if jobFromSignal == expectedJob { 401 // we are already expecting results for this job 402 jobResultChan <- result 403 } else { 404 // or not, just keep result for now, as 405 // a request to track a job may come 406 // later 407 jobResults[jobFromSignal] = result 408 } 409 } 410 } 411 }() 412 job, err := startTransientScope(conn, unitName, pid) 413 if err != nil { 414 return err 415 } 416 jobWaitFor <- job 417 select { 418 case result := <-jobResultChan: 419 logger.Debugf("job result is %q", result) 420 if result != "done" { 421 return fmt.Errorf("transient scope could not be started, job %v finished with result %v", job, result) 422 } 423 case <-time.After(createScopeJobTimeout): 424 return fmt.Errorf("transient scope not created in %v", createScopeJobTimeout) 425 } 426 logger.Debugf("transient scope %v created", unitName) 427 return nil 428 } 429 430 // doCreateTransientScope creates a systemd transient scope with specified properties. 431 // 432 // The scope is created by asking systemd via the specified DBus connection. 433 // The unit name and the PID to attach are provided as well. The DBus method 434 // call is performed outside confinement established by snap-confine. 435 var doCreateTransientScope = func(conn *dbus.Conn, unitName string, pid int) error { 436 // in theory we could use a single implementation that sync with job 437 // removed signal and inspects the result, however some older 438 // distributions sport an unpatched and broken version of systemd, which 439 // prevents the job from being correctly moved to new scope when 440 // creating one on the user systemd instance, and thus we always get an 441 // error. Fortunately, it so happens that distributions that have 442 // switched to a unified cgroup hierarchy, carry a systemd version that 443 // has so far been able to successfully create user scopes in user 444 // sessions 445 if IsUnified() { 446 // when using cgroup v2, we absolutely must be sure that the 447 // tracking group has been created, otherwise we risk 448 // establishing a device cgroup filtering in the wrong group 449 return doCreateTransientScopeJobRemovedSync(conn, unitName, pid) 450 } 451 return doCreateTransientScopeNoSync(conn, unitName, pid) 452 } 453 454 var randomUUID = func() (string, error) { 455 // The source of the bytes generated here is the same as that of 456 // /dev/urandom which doesn't block and is sufficient for our purposes 457 // of avoiding clashing UUIDs that are needed for all of the non-service 458 // commands that are started with the help of this UUID. 459 return randutil.RandomKernelUUID(), nil 460 }