gitee.com/mysnapcore/mysnapd@v0.1.0/sandbox/cgroup/tracking.go (about)

     1  package cgroup
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/godbus/dbus"
    12  
    13  	"gitee.com/mysnapcore/mysnapd/dbusutil"
    14  	"gitee.com/mysnapcore/mysnapd/logger"
    15  	"gitee.com/mysnapcore/mysnapd/randutil"
    16  )
    17  
    18  var osGetuid = os.Getuid
    19  var osGetpid = os.Getpid
    20  var cgroupProcessPathInTrackingCgroup = ProcessPathInTrackingCgroup
    21  
    22  var ErrCannotTrackProcess = errors.New("cannot track application process")
    23  
    24  // TrackingOptions control how tracking, based on systemd transient scope, operates.
    25  type TrackingOptions struct {
    26  	// AllowSessionBus controls if CreateTransientScopeForTracking will
    27  	// consider using the session bus for making the request.
    28  	AllowSessionBus bool
    29  }
    30  
    31  // CreateTransientScopeForTracking puts the current process in a transient scope.
    32  //
    33  // To quote systemd documentation about scope units:
    34  //
    35  // >> Scopes units manage a set of system processes. Unlike service units,
    36  // >> scope units manage externally created processes, and do not fork off
    37  // >> processes on its own.
    38  //
    39  // Scope names must be unique, a randomly generated UUID is appended to the
    40  // security tag, further suffixed with the string ".scope".
    41  func CreateTransientScopeForTracking(securityTag string, opts *TrackingOptions) error {
    42  	if opts == nil {
    43  		// Retain original semantics when not explicitly configured otherwise.
    44  		opts = &TrackingOptions{AllowSessionBus: true}
    45  	}
    46  	logger.Debugf("creating transient scope %s", securityTag)
    47  
    48  	// Session or system bus might be unavailable. To avoid being fragile
    49  	// ignore all errors when establishing session bus connection to avoid
    50  	// breaking user interactions. This is consistent with similar failure
    51  	// modes below, where other parts of the stack fail.
    52  	//
    53  	// Ideally we would check for a distinct error type but this is just an
    54  	// errors.New() in go-dbus code.
    55  	uid := osGetuid()
    56  	// Depending on options, we may use the session bus instead of the system
    57  	// bus. In addition, when uid == 0 we may fall back from using the session
    58  	// bus to the system bus.
    59  	var isSessionBus bool
    60  	var conn *dbus.Conn
    61  	var err error
    62  	if opts.AllowSessionBus {
    63  		isSessionBus, conn, err = sessionOrMaybeSystemBus(uid)
    64  		if err != nil {
    65  			return ErrCannotTrackProcess
    66  		}
    67  	} else {
    68  		isSessionBus = false
    69  		conn, err = dbusutil.SystemBus()
    70  		if err != nil {
    71  			return ErrCannotTrackProcess
    72  		}
    73  	}
    74  
    75  	// We ask the kernel for a random UUID. We need one because each transient
    76  	// scope needs a unique name. The unique name is composed of said UUID and
    77  	// the snap security tag.
    78  	uuid, err := randomUUID()
    79  	if err != nil {
    80  		return err
    81  	}
    82  
    83  	// Enforcing uniqueness is preferred to reusing an existing scope for
    84  	// simplicity since doing otherwise by joining an existing scope has
    85  	// limitations:
    86  	// - the originally started scope must be marked as a delegate, with all
    87  	//   consequences.
    88  	// - the method AttachProcessesToUnit is unavailable on Ubuntu 16.04
    89  	unitName := fmt.Sprintf("%s.%s.scope", securityTag, uuid)
    90  
    91  	pid := osGetpid()
    92  	start := time.Now()
    93  tryAgain:
    94  	// Create a transient scope by talking to systemd over DBus.
    95  	if err := doCreateTransientScope(conn, unitName, pid); err != nil {
    96  		switch err {
    97  		case errDBusUnknownMethod:
    98  			return ErrCannotTrackProcess
    99  		case errDBusSpawnChildExited:
   100  			fallthrough
   101  		case errDBusNameHasNoOwner:
   102  			if isSessionBus && uid == 0 {
   103  				// We cannot activate systemd --user for root,
   104  				// try the system bus as a fallback.
   105  				logger.Debugf("cannot activate systemd --user on session bus, falling back to system bus: %s", err)
   106  				isSessionBus = false
   107  				conn, err = dbusutil.SystemBus()
   108  				if err != nil {
   109  					logger.Debugf("system bus is not available: %s", err)
   110  					return ErrCannotTrackProcess
   111  				}
   112  				logger.Debugf("using system bus now, session bus could not activate systemd --user")
   113  				goto tryAgain
   114  			}
   115  			return ErrCannotTrackProcess
   116  		}
   117  		return err
   118  	}
   119  	// We may have created a transient scope but due to the constraints the
   120  	// kernel puts on process transitions on unprivileged users (and remember
   121  	// that systemd --user is unprivileged) the actual re-association with the
   122  	// scope cgroup may have silently failed - unfortunately some versions of
   123  	// systemd do not report an error in that case. Systemd 238 and newer
   124  	// detects the error correctly and uses privileged systemd running as pid 1
   125  	// to assist in the transition.
   126  	//
   127  	// For more details about the transition constraints refer to
   128  	// cgroup_procs_write_permission() as of linux 5.8 and
   129  	// unit_attach_pids_to_cgroup() as of systemd 245.
   130  	//
   131  	// Verify the effective tracking cgroup and check that our scope name is
   132  	// contained therein.
   133  	hasTracking := false
   134  	for tries := 0; tries < 100; tries++ {
   135  		path, err := cgroupProcessPathInTrackingCgroup(pid)
   136  		if err != nil {
   137  			return err
   138  		}
   139  		if strings.HasSuffix(path, unitName) {
   140  			hasTracking = true
   141  			break
   142  		}
   143  		time.Sleep(1 * time.Millisecond)
   144  	}
   145  	waitForTracking := time.Since(start)
   146  	logger.Debugf("waited %v for tracking", waitForTracking)
   147  	if !hasTracking {
   148  		logger.Debugf("systemd could not associate process %d with transient scope %s", pid, unitName)
   149  		return ErrCannotTrackProcess
   150  	}
   151  	return nil
   152  }
   153  
   154  // ConfirmSystemdServiceTracking checks if systemd tracks this process as a snap service.
   155  //
   156  // Systemd is placing started services, both user and system, into appropriate
   157  // tracking groups. Given a security tag we can confirm if the current process
   158  // belongs to such tracking group and thus could be identified by snapd as
   159  // belonging to a particular snap and application.
   160  //
   161  // If the application process is not tracked then ErrCannotTrackProcess is returned.
   162  func ConfirmSystemdServiceTracking(securityTag string) error {
   163  	pid := osGetpid()
   164  	path, err := cgroupProcessPathInTrackingCgroup(pid)
   165  	if err != nil {
   166  		return err
   167  	}
   168  	unitName := fmt.Sprintf("%s.service", securityTag)
   169  	if !strings.Contains(path, unitName) {
   170  		return ErrCannotTrackProcess
   171  	}
   172  	return nil
   173  }
   174  
   175  func sessionOrMaybeSystemBus(uid int) (isSessionBus bool, conn *dbus.Conn, err error) {
   176  	// The scope is created with a DBus call to systemd running either on
   177  	// system or session bus. We have a preference for session bus, as this is
   178  	// where applications normally go to. When a session bus is not available
   179  	// and the invoking user is root, we use the system bus instead.
   180  	//
   181  	// It is worth noting that hooks will not normally have a session bus to
   182  	// connect to, as they are invoked as descendants of snapd, and snapd is a
   183  	// service running outside of any session.
   184  	conn, err = dbusutil.SessionBus()
   185  	if err == nil {
   186  		logger.Debugf("using session bus")
   187  		return true, conn, nil
   188  	}
   189  	logger.Debugf("session bus is not available: %s", err)
   190  	if uid == 0 {
   191  		logger.Debugf("falling back to system bus")
   192  		conn, err = dbusutil.SystemBus()
   193  		if err != nil {
   194  			logger.Debugf("system bus is not available: %s", err)
   195  		} else {
   196  			logger.Debugf("using system bus now, session bus was not available")
   197  		}
   198  	}
   199  	return false, conn, err
   200  }
   201  
   202  type handledDBusError struct {
   203  	msg       string
   204  	dbusError string
   205  }
   206  
   207  func (e *handledDBusError) Error() string {
   208  	return fmt.Sprintf("%s [%s]", e.msg, e.dbusError)
   209  }
   210  
   211  var (
   212  	errDBusUnknownMethod    = &handledDBusError{msg: "unknown dbus object method", dbusError: "org.freedesktop.DBus.Error.UnknownMethod"}
   213  	errDBusNameHasNoOwner   = &handledDBusError{msg: "dbus name has no owner", dbusError: "org.freedesktop.DBus.Error.NameHasNoOwner"}
   214  	errDBusSpawnChildExited = &handledDBusError{msg: "dbus spawned child process exited", dbusError: "org.freedesktop.DBus.Error.Spawn.ChildExited"}
   215  
   216  	// pick a decent fit-all timeout
   217  	createScopeJobTimeout = 10 * time.Second
   218  )
   219  
   220  // startTransientScope requests systemd to create a transient unit and returns
   221  // the associated systemd job path.
   222  //
   223  // The scope is created by asking systemd via the specified DBus connection.
   224  // The unit name and the PID to attach are provided as well. The DBus method
   225  // call is performed outside confinement established by snap-confine.
   226  func startTransientScope(conn *dbus.Conn, unitName string, pid int) (job dbus.ObjectPath, err error) {
   227  	// Documentation of StartTransientUnit is available at
   228  	// https://www.freedesktop.org/wiki/Software/systemd/dbus/
   229  	//
   230  	// The property and auxUnit types are not well documented but can be traced
   231  	// from systemd source code. As of systemd 245 it can be found in src/core/dbus-manager.c,
   232  	// in a declaration containing SD_BUS_METHOD_WITH_NAMES("SD_BUS_METHOD_WITH_NAMES",...
   233  	// From there one can follow to method_start_transient_unit to understand
   234  	// how argument parsing is performed.
   235  	//
   236  	// Systemd defines the signature of StartTransientUnit as
   237  	// "ssa(sv)a(sa(sv))". The signature can be decomposed as follows:
   238  	//
   239  	// unitName string // name of the unit to start
   240  	// jobMode string  // corresponds to --job-mode= (see systemctl(1) manual page)
   241  	// properties []struct{
   242  	//   Name string
   243  	//   Value interface{}
   244  	// } // properties describe properties of the started unit
   245  	// auxUnits []struct {
   246  	//   Name string
   247  	//   Properties []struct{
   248  	//   	Name string
   249  	//   	Value interface{}
   250  	//	 }
   251  	// } // auxUnits describe any additional units to define.
   252  	type property struct {
   253  		Name  string
   254  		Value interface{}
   255  	}
   256  	type auxUnit struct {
   257  		Name  string
   258  		Props []property
   259  	}
   260  
   261  	// The mode string decides how the job is interacting with other systemd
   262  	// jobs on the system. The documentation of the systemd StartUnit() method
   263  	// describes the possible values and their properties:
   264  	//
   265  	// >> StartUnit() enqeues a start job, and possibly depending jobs. Takes
   266  	// >> the unit to activate, plus a mode string. The mode needs to be one of
   267  	// >> replace, fail, isolate, ignore-dependencies, ignore-requirements. If
   268  	// >> "replace" the call will start the unit and its dependencies, possibly
   269  	// >> replacing already queued jobs that conflict with this. If "fail" the
   270  	// >> call will start the unit and its dependencies, but will fail if this
   271  	// >> would change an already queued job. If "isolate" the call will start
   272  	// >> the unit in question and terminate all units that aren't dependencies
   273  	// >> of it. If "ignore-dependencies" it will start a unit but ignore all
   274  	// >> its dependencies. If "ignore-requirements" it will start a unit but
   275  	// >> only ignore the requirement dependencies. It is not recommended to
   276  	// >> make use of the latter two options. Returns the newly created job
   277  	// >> object.
   278  	//
   279  	// Here we choose "fail" to match systemd-run.
   280  	mode := "fail"
   281  	properties := []property{{"PIDs", []uint{uint(pid)}}}
   282  	aux := []auxUnit(nil)
   283  	systemd := conn.Object("org.freedesktop.systemd1", "/org/freedesktop/systemd1")
   284  	call := systemd.Call(
   285  		"org.freedesktop.systemd1.Manager.StartTransientUnit",
   286  		0,
   287  		unitName,
   288  		mode,
   289  		properties,
   290  		aux,
   291  	)
   292  	if err := call.Store(&job); err != nil {
   293  		if dbusErr, ok := err.(dbus.Error); ok {
   294  			logger.Debugf("StartTransientUnit failed with %q: %v", dbusErr.Name, dbusErr.Body)
   295  			// Some specific DBus errors have distinct handling.
   296  			switch dbusErr.Name {
   297  			case "org.freedesktop.DBus.Error.NameHasNoOwner":
   298  				// Nothing is providing systemd bus name. This is, most likely,
   299  				// an Ubuntu 14.04 system with the special deputy systemd.
   300  				return "", errDBusNameHasNoOwner
   301  			case "org.freedesktop.DBus.Error.UnknownMethod":
   302  				// The DBus API is not supported on this system. This can happen on
   303  				// very old versions of Systemd, for instance on Ubuntu 14.04.
   304  				return "", errDBusUnknownMethod
   305  			case "org.freedesktop.DBus.Error.Spawn.ChildExited":
   306  				// We tried to socket-activate dbus-daemon or bus-activate
   307  				// systemd --user but it failed.
   308  				return "", errDBusSpawnChildExited
   309  			case "org.freedesktop.systemd1.UnitExists":
   310  				// Starting a scope with a name that already exists is an
   311  				// error. Normally this should never happen.
   312  				return "", fmt.Errorf("cannot create transient scope: scope %q clashed: %s", unitName, err)
   313  			default:
   314  				return "", fmt.Errorf("cannot create transient scope: DBus error %q: %v", dbusErr.Name, dbusErr.Body)
   315  			}
   316  		}
   317  		return "", fmt.Errorf("cannot create transient scope: %s", err)
   318  	}
   319  	logger.Debugf("create transient scope job: %s", job)
   320  	return job, nil
   321  }
   322  
   323  // doCreateTransientScopeOpportunisticSync creates a transient scope with a
   324  // given unit name asking systemd to move the provided pid to that scope, does
   325  // not wait for the systemd job to complete
   326  func doCreateTransientScopeNoSync(conn *dbus.Conn, unitName string, pid int) error {
   327  	_, err := startTransientScope(conn, unitName, pid)
   328  	return err
   329  }
   330  
   331  // doCreateTransientScopeOpportunisticSync creates a transient scope with a
   332  // given unit name asking systemd to move the provided pid to that scope, and
   333  // waits for the systemd job to finish
   334  func doCreateTransientScopeJobRemovedSync(conn *dbus.Conn, unitName string, pid int) error {
   335  	// set up a watch for JobRemoved signals, so that we'll know when our
   336  	// request has completed
   337  	jobRemoveMatch := []dbus.MatchOption{
   338  		dbus.WithMatchInterface("org.freedesktop.systemd1.Manager"),
   339  		dbus.WithMatchMember("JobRemoved"),
   340  	}
   341  	if err := conn.AddMatchSignal(jobRemoveMatch...); err != nil {
   342  		return fmt.Errorf("cannot subscribe to systemd signals: %v", err)
   343  	}
   344  	// signal channel with buffer for some messages
   345  	signals := make(chan *dbus.Signal, 10)
   346  	// for receiving job results
   347  	jobResultChan := make(chan string, 1)
   348  	// for passing the job we want to observe
   349  	jobWaitFor := make(chan dbus.ObjectPath, 1)
   350  	// and start watching for signals, we do this before even sending a
   351  	// request, so that we won't miss any signals from systemd
   352  	conn.Signal(signals)
   353  
   354  	var wg sync.WaitGroup
   355  	defer func() {
   356  		close(jobWaitFor)
   357  		// wait for the signal handling to finish before returning
   358  		wg.Wait()
   359  	}()
   360  	wg.Add(1)
   361  	go func() {
   362  		defer wg.Done()
   363  		jobResults := make(map[dbus.ObjectPath]string, 10)
   364  		expectedJob := dbus.ObjectPath("")
   365  		for {
   366  			select {
   367  			case job, ok := <-jobWaitFor:
   368  				if !ok {
   369  					// the channel got closed, meaning it's
   370  					// time to clean up
   371  					conn.RemoveSignal(signals)
   372  					conn.RemoveMatchSignal(jobRemoveMatch...)
   373  					close(jobResultChan)
   374  					close(signals)
   375  					return
   376  				}
   377  				if result, ok := jobResults[job]; ok {
   378  					// maybe we already have result for this job
   379  					jobResultChan <- result
   380  				} else {
   381  					expectedJob = job
   382  				}
   383  			case sig, ok := <-signals:
   384  				if !ok {
   385  					continue
   386  				}
   387  				// make sure the signal name is as expected, although the
   388  				// match selectors should ensure we only receive
   389  				// JobRemoved signals
   390  				if sig.Name != "org.freedesktop.systemd1.Manager.JobRemoved" {
   391  					continue
   392  				}
   393  				var id uint32
   394  				var jobFromSignal dbus.ObjectPath
   395  				var unit string
   396  				var result string
   397  				if err := dbus.Store(sig.Body, &id, &jobFromSignal, &unit, &result); err != nil {
   398  					continue
   399  				}
   400  				if jobFromSignal == expectedJob {
   401  					// we are already expecting results for this job
   402  					jobResultChan <- result
   403  				} else {
   404  					// or not, just keep result for now, as
   405  					// a request to track a job may come
   406  					// later
   407  					jobResults[jobFromSignal] = result
   408  				}
   409  			}
   410  		}
   411  	}()
   412  	job, err := startTransientScope(conn, unitName, pid)
   413  	if err != nil {
   414  		return err
   415  	}
   416  	jobWaitFor <- job
   417  	select {
   418  	case result := <-jobResultChan:
   419  		logger.Debugf("job result is %q", result)
   420  		if result != "done" {
   421  			return fmt.Errorf("transient scope could not be started, job %v finished with result %v", job, result)
   422  		}
   423  	case <-time.After(createScopeJobTimeout):
   424  		return fmt.Errorf("transient scope not created in %v", createScopeJobTimeout)
   425  	}
   426  	logger.Debugf("transient scope %v created", unitName)
   427  	return nil
   428  }
   429  
   430  // doCreateTransientScope creates a systemd transient scope with specified properties.
   431  //
   432  // The scope is created by asking systemd via the specified DBus connection.
   433  // The unit name and the PID to attach are provided as well. The DBus method
   434  // call is performed outside confinement established by snap-confine.
   435  var doCreateTransientScope = func(conn *dbus.Conn, unitName string, pid int) error {
   436  	// in theory we could use a single implementation that sync with job
   437  	// removed signal and inspects the result, however some older
   438  	// distributions sport an unpatched and broken version of systemd, which
   439  	// prevents the job from being correctly moved to new scope when
   440  	// creating one on the user systemd instance, and thus we always get an
   441  	// error. Fortunately, it so happens that distributions that have
   442  	// switched to a unified cgroup hierarchy, carry a systemd version that
   443  	// has so far been able to successfully create user scopes in user
   444  	// sessions
   445  	if IsUnified() {
   446  		// when using cgroup v2, we absolutely must be sure that the
   447  		// tracking group has been created, otherwise we risk
   448  		// establishing a device cgroup filtering in the wrong group
   449  		return doCreateTransientScopeJobRemovedSync(conn, unitName, pid)
   450  	}
   451  	return doCreateTransientScopeNoSync(conn, unitName, pid)
   452  }
   453  
   454  var randomUUID = func() (string, error) {
   455  	// The source of the bytes generated here is the same as that of
   456  	// /dev/urandom which doesn't block and is sufficient for our purposes
   457  	// of avoiding clashing UUIDs that are needed for all of the non-service
   458  	// commands that are started with the help of this UUID.
   459  	return randutil.RandomKernelUUID(), nil
   460  }