go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/acls/acls.go (about)

     1  // Copyright 2023 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package acls implements access control checks for Swarming APIs.
    16  package acls
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  
    22  	"google.golang.org/grpc/codes"
    23  	"google.golang.org/grpc/status"
    24  
    25  	"go.chromium.org/luci/auth/identity"
    26  	"go.chromium.org/luci/common/logging"
    27  	"go.chromium.org/luci/server/auth"
    28  	"go.chromium.org/luci/server/auth/authdb"
    29  	"go.chromium.org/luci/server/auth/realms"
    30  
    31  	"go.chromium.org/luci/swarming/server/cfg"
    32  )
    33  
    34  // Checker knows how to check Swarming ACLs inside a single RPC call.
    35  //
    36  // Its lifetime is scoped to a single request. It caches checks done within this
    37  // request to avoid doing redundant work. This cache never expires, thus it is
    38  // important to **drop** this checker once the request is finishes, to avoid
    39  // using stale cached data.
    40  //
    41  // Resources are organized hierarchically: Server => Pool => Task and Bot.
    42  // Permissions can potentially be granted on any level of this hierarchy, e.g.
    43  // permissions granted on a pool level apply to all tasks and bots the belong
    44  // to this pool.
    45  //
    46  // RPCs concerned with a specific task or bot should just check permission on
    47  // the task/bot layer using CheckTaskPerm/CheckBotPerm.
    48  //
    49  // RPCs that do listing or other operations that touch many tasks and bots may
    50  // use CheckPoolPerm and CheckServerPerm to do "prefiltering". They should also
    51  // be used if RPCs results are an aggregation over a pool, and thus explicitly
    52  // require pool-level permissions.
    53  type Checker struct {
    54  	cfg    *cfg.Config       // swarming config
    55  	db     authdb.DB         // auth DB with groups and permissions
    56  	caller identity.Identity // authenticated identity of the caller
    57  }
    58  
    59  // CheckResult is returned by all Checker methods.
    60  type CheckResult struct {
    61  	// Permitted is true if the permission check passed successfully.
    62  	//
    63  	// It is false if the caller doesn't have the requested permission or the
    64  	// check itself failed. Look at InternalError field to distinguish these cases
    65  	// if necessary.
    66  	//
    67  	// Use ToGrpcErr to convert a failure to a gRPC error. Note that CheckResult
    68  	// explicitly **does not** implement `error` interface to make sure callers
    69  	// are aware they need to return the gRPC error without any additional
    70  	// wrapping via `return nil, res.ToGrpcErr()`.
    71  	Permitted bool
    72  
    73  	// InternalError indicates there were some internal error checking ACLs.
    74  	//
    75  	// An internal error means the check itself failed due to internal errors,
    76  	// such as a timeout contacting the backend. This should abort the request
    77  	// handler ASAP with Internal gRPC error. Use ToGrpcErr to get such error.
    78  	//
    79  	// If both Permitted and InternalError are false, it means the caller has no
    80  	// requested permission. Use ToGrpcErr to get the error that must be returned
    81  	// to the caller in that case.
    82  	InternalError bool
    83  
    84  	// err is a gRPC error to return.
    85  	err error
    86  }
    87  
    88  // ToGrpcErr converts this failure to a gRPC error.
    89  //
    90  // To avoid accidentally leaking private information or implementation details,
    91  // this error should be returned to the gRPC caller as is, without any
    92  // additional wrapping. It is constructed to have all necessary information
    93  // about the call already.
    94  //
    95  // If the check succeeded and the access is permitted, returns nil.
    96  func (res *CheckResult) ToGrpcErr() error {
    97  	switch {
    98  	case res.InternalError:
    99  		return status.Errorf(codes.Internal, "internal error when checking permissions")
   100  	case res.Permitted:
   101  		return nil
   102  	case res.err == nil:
   103  		panic("err is not populated")
   104  	default:
   105  		return res.err
   106  	}
   107  }
   108  
   109  // TaskAuthInfo are properties of a task that affect who can access it.
   110  //
   111  // Extracted either from TaskRequest or from TaskResultSummary.
   112  type TaskAuthInfo struct {
   113  	// TaskID is ID of the task. Only for error messages and logs!
   114  	TaskID string
   115  	// Realm is the realm the task belongs to, as "<project>:<realm>" string.
   116  	Realm string
   117  	// Pool is task's pool extracted from "pool" dimension.
   118  	Pool string
   119  	// BotID is a bot the task is targeting via "id" dimension or "" if none.
   120  	BotID string
   121  	// Submitter is whoever submitted the task.
   122  	Submitter identity.Identity
   123  }
   124  
   125  // NewChecker constructs an ACL checker that uses the given config snapshot.
   126  func NewChecker(ctx context.Context, cfg *cfg.Config) *Checker {
   127  	state := auth.GetState(ctx)
   128  	return &Checker{
   129  		cfg:    cfg,
   130  		db:     state.DB(),
   131  		caller: state.User().Identity,
   132  	}
   133  }
   134  
   135  // CheckServerPerm checks if the caller has a permission on a server level.
   136  //
   137  // Having a permission on a server level means it applies to all pools, tasks
   138  // and bots in this instance of Swarming. Server level permissions are defined
   139  // via "auth { ... }" stanza with group names in the server's settings.cfg.
   140  func (chk *Checker) CheckServerPerm(ctx context.Context, perm realms.Permission) CheckResult {
   141  	serverGroups := chk.cfg.Settings().Auth
   142  
   143  	var allowedGroups []string
   144  
   145  	switch perm {
   146  	case PermTasksGet, PermPoolsListTasks:
   147  		allowedGroups = []string{
   148  			serverGroups.ViewAllTasksGroup,
   149  			serverGroups.PrivilegedUsersGroup,
   150  			serverGroups.AdminsGroup,
   151  		}
   152  
   153  	case PermPoolsListBots:
   154  		allowedGroups = []string{
   155  			serverGroups.ViewAllBotsGroup,
   156  			serverGroups.PrivilegedUsersGroup,
   157  			serverGroups.AdminsGroup,
   158  		}
   159  
   160  	case PermPoolsCreateBot:
   161  		allowedGroups = []string{
   162  			serverGroups.BotBootstrapGroup,
   163  			serverGroups.AdminsGroup,
   164  		}
   165  
   166  	case PermTasksCancel, PermPoolsCancelTask, PermPoolsDeleteBot, PermPoolsTerminateBot:
   167  		allowedGroups = []string{
   168  			serverGroups.AdminsGroup,
   169  		}
   170  	}
   171  
   172  	if len(allowedGroups) != 0 {
   173  		switch yes, err := chk.db.IsMember(ctx, chk.caller, allowedGroups); {
   174  		case err != nil:
   175  			logging.Errorf(ctx, "Error when checking groups: %s", err)
   176  			return CheckResult{InternalError: true}
   177  		case yes:
   178  			return CheckResult{Permitted: true}
   179  		}
   180  	}
   181  
   182  	return CheckResult{
   183  		err: status.Errorf(
   184  			codes.PermissionDenied,
   185  			"the caller %q doesn't have server-level permission %q",
   186  			chk.caller, perm),
   187  	}
   188  }
   189  
   190  // CheckPoolPerm checks if the caller has a permission on a pool level.
   191  //
   192  // Having a permission on a pool level means it applies for all tasks and bots
   193  // in that pool. CheckPoolPerm implicitly calls CheckServerPerm.
   194  func (chk *Checker) CheckPoolPerm(ctx context.Context, pool string, perm realms.Permission) CheckResult {
   195  	// If have a server-level permission, no need to check the pool. Server-level
   196  	// permissions are also the only way to deal with deleted pools.
   197  	if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError {
   198  		return res
   199  	}
   200  
   201  	if cfg := chk.cfg.Pool(pool); cfg != nil {
   202  		switch yes, err := chk.db.HasPermission(ctx, chk.caller, perm, cfg.Realm, nil); {
   203  		case err != nil:
   204  			logging.Errorf(ctx, "Error in HasPermission(%q, %q): %s", perm, cfg.Realm, err)
   205  			return CheckResult{InternalError: true}
   206  		case yes:
   207  			return CheckResult{Permitted: true}
   208  		}
   209  	}
   210  
   211  	// TODO(vadimsh): Make the error message more informative.
   212  	return CheckResult{
   213  		err: status.Errorf(
   214  			codes.PermissionDenied,
   215  			"the caller %q doesn't have permission %q in the pool %q or the pool doesn't exist",
   216  			chk.caller, perm, pool),
   217  	}
   218  }
   219  
   220  // FilterPoolsByPerm filters the list of pools keeping only ones in which the
   221  // caller has the permission.
   222  //
   223  // If the caller doesn't have the permission in any of the pools, returns nil
   224  // slice and no error. Returns a gRPC status error if the check failed due to
   225  // some internal issues.
   226  func (chk *Checker) FilterPoolsByPerm(ctx context.Context, pools []string, perm realms.Permission) ([]string, error) {
   227  	// If have a server-level permission, no need to check individual pools.
   228  	switch res := chk.CheckServerPerm(ctx, perm); {
   229  	case res.InternalError:
   230  		return nil, res.ToGrpcErr()
   231  	case res.Permitted:
   232  		return pools, nil
   233  	}
   234  
   235  	var filtered []string
   236  
   237  	ok := chk.visitRealms(ctx, pools, perm, func(pool string, allowed bool) bool {
   238  		if allowed {
   239  			filtered = append(filtered, pool)
   240  		}
   241  		return true
   242  	})
   243  
   244  	if !ok {
   245  		return nil, (&CheckResult{InternalError: true}).ToGrpcErr()
   246  	}
   247  	return filtered, nil
   248  }
   249  
   250  // CheckAllPoolsPerm checks if the caller has a permission in *all* given pools.
   251  //
   252  // The list of pools must not be empty. Panics if it is.
   253  func (chk *Checker) CheckAllPoolsPerm(ctx context.Context, pools []string, perm realms.Permission) CheckResult {
   254  	switch len(pools) {
   255  	case 0:
   256  		panic("empty list of pools in CheckAllPoolsPerm")
   257  	case 1:
   258  		// Use a single pool check for better error messages.
   259  		return chk.CheckPoolPerm(ctx, pools[0], perm)
   260  	}
   261  
   262  	// If have a server-level permission, no need to check individual pools.
   263  	if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError {
   264  		return res
   265  	}
   266  
   267  	allAllowed := true
   268  
   269  	ok := chk.visitRealms(ctx, pools, perm, func(_ string, allowed bool) bool {
   270  		allAllowed = allAllowed && allowed
   271  		return allAllowed
   272  	})
   273  
   274  	switch {
   275  	case !ok:
   276  		return CheckResult{InternalError: true}
   277  	case allAllowed:
   278  		return CheckResult{Permitted: true}
   279  	default:
   280  		// TODO(vadimsh): Make the error message more informative.
   281  		return CheckResult{
   282  			err: status.Errorf(
   283  				codes.PermissionDenied,
   284  				"the caller %q doesn't have permission %q in some of the requested pools",
   285  				chk.caller, perm),
   286  		}
   287  	}
   288  }
   289  
   290  // CheckAnyPoolsPerm checks if the caller has a permission in *any* given pool.
   291  //
   292  // The list of pools must not be empty. Panics if it is.
   293  func (chk *Checker) CheckAnyPoolsPerm(ctx context.Context, pools []string, perm realms.Permission) CheckResult {
   294  	switch len(pools) {
   295  	case 0:
   296  		panic("empty list of pools in CheckAnyPoolsPerm")
   297  	case 1:
   298  		// Use a single pool check for better error messages.
   299  		return chk.CheckPoolPerm(ctx, pools[0], perm)
   300  	}
   301  
   302  	// If have a server-level permission, no need to check individual pools.
   303  	if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError {
   304  		return res
   305  	}
   306  
   307  	oneAllowed := false
   308  
   309  	ok := chk.visitRealms(ctx, pools, perm, func(_ string, allowed bool) bool {
   310  		oneAllowed = oneAllowed || allowed
   311  		return !oneAllowed
   312  	})
   313  
   314  	switch {
   315  	case !ok:
   316  		return CheckResult{InternalError: true}
   317  	case oneAllowed:
   318  		return CheckResult{Permitted: true}
   319  	default:
   320  		// TODO(vadimsh): Make the error message more informative.
   321  		return CheckResult{
   322  			err: status.Errorf(
   323  				codes.PermissionDenied,
   324  				"the caller %q doesn't have permission %q in any of the requested pools",
   325  				chk.caller, perm),
   326  		}
   327  	}
   328  }
   329  
   330  // CheckTaskPerm checks if the caller has a permission in a specific task.
   331  //
   332  // Only accepts permissions targeting a single existing task: PermTasksGet and
   333  // PermTasksCancel. Panics if asked to check any other permission.
   334  //
   335  // It checks individual task ACL (based on task realm), as well as task's pool
   336  // ACL. The idea is that the caller can either "own" the task or "own" the bot
   337  // pool it was scheduled to run on. E.g. for a task to be visible, the caller
   338  // either needs PermTasksGet in the task's realm, or PermPoolsListTasks in the
   339  // bot pool realm. This function checks both.
   340  func (chk *Checker) CheckTaskPerm(ctx context.Context, task TaskAuthInfo, perm realms.Permission) CheckResult {
   341  	// Look up a matching pool level permission to check it in the task's pool.
   342  	var poolPerm realms.Permission
   343  	switch perm {
   344  	case PermTasksGet:
   345  		poolPerm = PermPoolsListTasks
   346  	case PermTasksCancel:
   347  		poolPerm = PermPoolsCancelTask
   348  	default:
   349  		panic(fmt.Sprintf("not a task-level permission %q", perm))
   350  	}
   351  
   352  	// Whoever submitted the task has full control over it.
   353  	if task.Submitter == chk.caller {
   354  		return CheckResult{Permitted: true}
   355  	}
   356  
   357  	// If have a server-level permission, no need to check anything else. Note
   358  	// that on the server level task<->pool permission pairs like PermTasksGet and
   359  	// PermPoolsListTasks are treated identically, so it is sufficient to check
   360  	// only `perm` (and skip checking `poolPerm`: the outcome will be the same).
   361  	if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError {
   362  		return res
   363  	}
   364  
   365  	// Check if the caller has the permission in the task's own realm.
   366  	switch yes, err := chk.db.HasPermission(ctx, chk.caller, perm, task.Realm, nil); {
   367  	case err != nil:
   368  		logging.Errorf(ctx, "Error in HasPermission(%q, %q): %s", perm, task.Realm, err)
   369  		return CheckResult{InternalError: true}
   370  	case yes:
   371  		return CheckResult{Permitted: true}
   372  	}
   373  
   374  	// Check if the caller has the matching permission in the task's assigned
   375  	// pool. If the task has no pool assigned but instead was scheduled to run on
   376  	// a concrete bot (happens for termination tasks), check if the caller has
   377  	// the permission in this bot's pool.
   378  	//
   379  	// Note that when both Pool and BotID fields are set, Pool should take
   380  	// precedence, since the pool is what we check when submitting tasks (i.e. for
   381  	// a new task with dimensions `{"pool": ..., "bot": ...}` only "pool" is being
   382  	// used in permission checks and "bot" is completely unrestricted). Checking
   383  	// pool here as well results in more consistent behavior.
   384  	//
   385  	// Note that it is forbidden to submit arbitrary tasks without a pool through
   386  	// the public API. They can be submitted only by the Swarming server
   387  	// internally.
   388  	var poolsToCheck []string
   389  	if task.Pool != "" {
   390  		poolsToCheck = []string{task.Pool}
   391  	} else if task.BotID != "" {
   392  		poolsToCheck = chk.cfg.BotGroup(task.BotID).Pools()
   393  	}
   394  	if len(poolsToCheck) != 0 {
   395  		oneAllowed := false
   396  		ok := chk.visitRealms(ctx, poolsToCheck, poolPerm, func(_ string, allowed bool) bool {
   397  			oneAllowed = oneAllowed || allowed
   398  			return !oneAllowed
   399  		})
   400  		switch {
   401  		case !ok:
   402  			return CheckResult{InternalError: true}
   403  		case oneAllowed:
   404  			return CheckResult{Permitted: true}
   405  		}
   406  	}
   407  
   408  	// TODO(vadimsh): Make the error message more informative.
   409  	return CheckResult{
   410  		err: status.Errorf(
   411  			codes.PermissionDenied,
   412  			"the caller %q doesn't have permission %q for the task %q",
   413  			chk.caller, perm, task.TaskID),
   414  	}
   415  }
   416  
   417  // CheckBotPerm checks if the caller has a permission in a specific bot.
   418  //
   419  // It looks up a realm the bot belong to (based on "pool" dimension) and then
   420  // checks the caller has the required permission in this realm.
   421  func (chk *Checker) CheckBotPerm(ctx context.Context, botID string, perm realms.Permission) CheckResult {
   422  	// If have a server-level permission, no need to fetch bot info.
   423  	if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError {
   424  		return res
   425  	}
   426  
   427  	// TODO(vadimsh): Python code used to fetch BotInfo or BotEvent from datastore
   428  	// to look up bot pools. This matters for bots removed from configs. Avoid
   429  	// this for now (fetch the bot info exclusively from the current config) to
   430  	// see if it makes any observable difference for real use cases.
   431  	pools := chk.cfg.BotGroup(botID).Pools()
   432  	if len(pools) == 0 {
   433  		panic("impossible due to the config validation and Pools() logic")
   434  	}
   435  
   436  	// Note: we can't just call CheckAnyPoolsPerm since it can potentially leak
   437  	// pool name in its error message. In CheckBotPerm we don't know if the caller
   438  	// is allowed to see bot => pool association and should not expose the pool
   439  	// name in errors, only bot ID.
   440  
   441  	oneAllowed := false
   442  
   443  	ok := chk.visitRealms(ctx, pools, perm, func(_ string, allowed bool) bool {
   444  		oneAllowed = oneAllowed || allowed
   445  		return !oneAllowed
   446  	})
   447  
   448  	switch {
   449  	case !ok:
   450  		return CheckResult{InternalError: true}
   451  	case oneAllowed:
   452  		return CheckResult{Permitted: true}
   453  	default:
   454  		// TODO(vadimsh): Make the error message more informative.
   455  		return CheckResult{
   456  			err: status.Errorf(
   457  				codes.PermissionDenied,
   458  				"the caller %q doesn't have permission %q in the pool that contains bot %q or this bot doesn't exist",
   459  				chk.caller, perm, botID),
   460  		}
   461  	}
   462  }
   463  
   464  // visitRealms does a permission check for every pool, sequentially.
   465  //
   466  // It calls the callback with the outcome of the check. If the callback returns
   467  // true, the iteration continues. Otherwise it stops and visitRealms returns
   468  // true. Returns false only on internal problems with the check.
   469  func (chk *Checker) visitRealms(ctx context.Context, pools []string, perm realms.Permission, cb func(pool string, allowed bool) bool) (ok bool) {
   470  	// A micro optimization for a very common case of one pool. Skips a map.
   471  	if len(pools) == 1 {
   472  		pool := pools[0]
   473  		cfg := chk.cfg.Pool(pool)
   474  		if cfg == nil {
   475  			// Missing pools assumed to have no permissions in them.
   476  			logging.Warningf(ctx, "Unknown pool when checking ACLs: %s", pool)
   477  			cb(pool, false)
   478  		} else {
   479  			outcome, err := chk.db.HasPermission(ctx, chk.caller, perm, cfg.Realm, nil)
   480  			if err != nil {
   481  				logging.Errorf(ctx, "Error in HasPermission(%q, %q): %s", perm, cfg.Realm, err)
   482  				return false
   483  			}
   484  			cb(pool, outcome)
   485  		}
   486  		return true
   487  	}
   488  
   489  	// Generic case that makes more memory allocations.
   490  	checkedRealms := make(map[string]bool, 2)
   491  	for _, pool := range pools {
   492  		cfg := chk.cfg.Pool(pool)
   493  		if cfg == nil {
   494  			// Missing pools assumed to have no permissions in them.
   495  			logging.Warningf(ctx, "Unknown pool when checking ACLs: %s", pool)
   496  			if !cb(pool, false) {
   497  				return true
   498  			}
   499  			continue
   500  		}
   501  		if _, checked := checkedRealms[cfg.Realm]; !checked {
   502  			outcome, err := chk.db.HasPermission(ctx, chk.caller, perm, cfg.Realm, nil)
   503  			if err != nil {
   504  				logging.Errorf(ctx, "Error in HasPermission(%q, %q): %s", perm, cfg.Realm, err)
   505  				return false
   506  			}
   507  			checkedRealms[cfg.Realm] = outcome
   508  		}
   509  		if !cb(pool, checkedRealms[cfg.Realm]) {
   510  			return true
   511  		}
   512  	}
   513  	return true
   514  }