go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/swarming/server/acls/acls.go (about) 1 // Copyright 2023 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package acls implements access control checks for Swarming APIs. 16 package acls 17 18 import ( 19 "context" 20 "fmt" 21 22 "google.golang.org/grpc/codes" 23 "google.golang.org/grpc/status" 24 25 "go.chromium.org/luci/auth/identity" 26 "go.chromium.org/luci/common/logging" 27 "go.chromium.org/luci/server/auth" 28 "go.chromium.org/luci/server/auth/authdb" 29 "go.chromium.org/luci/server/auth/realms" 30 31 "go.chromium.org/luci/swarming/server/cfg" 32 ) 33 34 // Checker knows how to check Swarming ACLs inside a single RPC call. 35 // 36 // Its lifetime is scoped to a single request. It caches checks done within this 37 // request to avoid doing redundant work. This cache never expires, thus it is 38 // important to **drop** this checker once the request is finishes, to avoid 39 // using stale cached data. 40 // 41 // Resources are organized hierarchically: Server => Pool => Task and Bot. 42 // Permissions can potentially be granted on any level of this hierarchy, e.g. 43 // permissions granted on a pool level apply to all tasks and bots the belong 44 // to this pool. 45 // 46 // RPCs concerned with a specific task or bot should just check permission on 47 // the task/bot layer using CheckTaskPerm/CheckBotPerm. 48 // 49 // RPCs that do listing or other operations that touch many tasks and bots may 50 // use CheckPoolPerm and CheckServerPerm to do "prefiltering". They should also 51 // be used if RPCs results are an aggregation over a pool, and thus explicitly 52 // require pool-level permissions. 53 type Checker struct { 54 cfg *cfg.Config // swarming config 55 db authdb.DB // auth DB with groups and permissions 56 caller identity.Identity // authenticated identity of the caller 57 } 58 59 // CheckResult is returned by all Checker methods. 60 type CheckResult struct { 61 // Permitted is true if the permission check passed successfully. 62 // 63 // It is false if the caller doesn't have the requested permission or the 64 // check itself failed. Look at InternalError field to distinguish these cases 65 // if necessary. 66 // 67 // Use ToGrpcErr to convert a failure to a gRPC error. Note that CheckResult 68 // explicitly **does not** implement `error` interface to make sure callers 69 // are aware they need to return the gRPC error without any additional 70 // wrapping via `return nil, res.ToGrpcErr()`. 71 Permitted bool 72 73 // InternalError indicates there were some internal error checking ACLs. 74 // 75 // An internal error means the check itself failed due to internal errors, 76 // such as a timeout contacting the backend. This should abort the request 77 // handler ASAP with Internal gRPC error. Use ToGrpcErr to get such error. 78 // 79 // If both Permitted and InternalError are false, it means the caller has no 80 // requested permission. Use ToGrpcErr to get the error that must be returned 81 // to the caller in that case. 82 InternalError bool 83 84 // err is a gRPC error to return. 85 err error 86 } 87 88 // ToGrpcErr converts this failure to a gRPC error. 89 // 90 // To avoid accidentally leaking private information or implementation details, 91 // this error should be returned to the gRPC caller as is, without any 92 // additional wrapping. It is constructed to have all necessary information 93 // about the call already. 94 // 95 // If the check succeeded and the access is permitted, returns nil. 96 func (res *CheckResult) ToGrpcErr() error { 97 switch { 98 case res.InternalError: 99 return status.Errorf(codes.Internal, "internal error when checking permissions") 100 case res.Permitted: 101 return nil 102 case res.err == nil: 103 panic("err is not populated") 104 default: 105 return res.err 106 } 107 } 108 109 // TaskAuthInfo are properties of a task that affect who can access it. 110 // 111 // Extracted either from TaskRequest or from TaskResultSummary. 112 type TaskAuthInfo struct { 113 // TaskID is ID of the task. Only for error messages and logs! 114 TaskID string 115 // Realm is the realm the task belongs to, as "<project>:<realm>" string. 116 Realm string 117 // Pool is task's pool extracted from "pool" dimension. 118 Pool string 119 // BotID is a bot the task is targeting via "id" dimension or "" if none. 120 BotID string 121 // Submitter is whoever submitted the task. 122 Submitter identity.Identity 123 } 124 125 // NewChecker constructs an ACL checker that uses the given config snapshot. 126 func NewChecker(ctx context.Context, cfg *cfg.Config) *Checker { 127 state := auth.GetState(ctx) 128 return &Checker{ 129 cfg: cfg, 130 db: state.DB(), 131 caller: state.User().Identity, 132 } 133 } 134 135 // CheckServerPerm checks if the caller has a permission on a server level. 136 // 137 // Having a permission on a server level means it applies to all pools, tasks 138 // and bots in this instance of Swarming. Server level permissions are defined 139 // via "auth { ... }" stanza with group names in the server's settings.cfg. 140 func (chk *Checker) CheckServerPerm(ctx context.Context, perm realms.Permission) CheckResult { 141 serverGroups := chk.cfg.Settings().Auth 142 143 var allowedGroups []string 144 145 switch perm { 146 case PermTasksGet, PermPoolsListTasks: 147 allowedGroups = []string{ 148 serverGroups.ViewAllTasksGroup, 149 serverGroups.PrivilegedUsersGroup, 150 serverGroups.AdminsGroup, 151 } 152 153 case PermPoolsListBots: 154 allowedGroups = []string{ 155 serverGroups.ViewAllBotsGroup, 156 serverGroups.PrivilegedUsersGroup, 157 serverGroups.AdminsGroup, 158 } 159 160 case PermPoolsCreateBot: 161 allowedGroups = []string{ 162 serverGroups.BotBootstrapGroup, 163 serverGroups.AdminsGroup, 164 } 165 166 case PermTasksCancel, PermPoolsCancelTask, PermPoolsDeleteBot, PermPoolsTerminateBot: 167 allowedGroups = []string{ 168 serverGroups.AdminsGroup, 169 } 170 } 171 172 if len(allowedGroups) != 0 { 173 switch yes, err := chk.db.IsMember(ctx, chk.caller, allowedGroups); { 174 case err != nil: 175 logging.Errorf(ctx, "Error when checking groups: %s", err) 176 return CheckResult{InternalError: true} 177 case yes: 178 return CheckResult{Permitted: true} 179 } 180 } 181 182 return CheckResult{ 183 err: status.Errorf( 184 codes.PermissionDenied, 185 "the caller %q doesn't have server-level permission %q", 186 chk.caller, perm), 187 } 188 } 189 190 // CheckPoolPerm checks if the caller has a permission on a pool level. 191 // 192 // Having a permission on a pool level means it applies for all tasks and bots 193 // in that pool. CheckPoolPerm implicitly calls CheckServerPerm. 194 func (chk *Checker) CheckPoolPerm(ctx context.Context, pool string, perm realms.Permission) CheckResult { 195 // If have a server-level permission, no need to check the pool. Server-level 196 // permissions are also the only way to deal with deleted pools. 197 if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError { 198 return res 199 } 200 201 if cfg := chk.cfg.Pool(pool); cfg != nil { 202 switch yes, err := chk.db.HasPermission(ctx, chk.caller, perm, cfg.Realm, nil); { 203 case err != nil: 204 logging.Errorf(ctx, "Error in HasPermission(%q, %q): %s", perm, cfg.Realm, err) 205 return CheckResult{InternalError: true} 206 case yes: 207 return CheckResult{Permitted: true} 208 } 209 } 210 211 // TODO(vadimsh): Make the error message more informative. 212 return CheckResult{ 213 err: status.Errorf( 214 codes.PermissionDenied, 215 "the caller %q doesn't have permission %q in the pool %q or the pool doesn't exist", 216 chk.caller, perm, pool), 217 } 218 } 219 220 // FilterPoolsByPerm filters the list of pools keeping only ones in which the 221 // caller has the permission. 222 // 223 // If the caller doesn't have the permission in any of the pools, returns nil 224 // slice and no error. Returns a gRPC status error if the check failed due to 225 // some internal issues. 226 func (chk *Checker) FilterPoolsByPerm(ctx context.Context, pools []string, perm realms.Permission) ([]string, error) { 227 // If have a server-level permission, no need to check individual pools. 228 switch res := chk.CheckServerPerm(ctx, perm); { 229 case res.InternalError: 230 return nil, res.ToGrpcErr() 231 case res.Permitted: 232 return pools, nil 233 } 234 235 var filtered []string 236 237 ok := chk.visitRealms(ctx, pools, perm, func(pool string, allowed bool) bool { 238 if allowed { 239 filtered = append(filtered, pool) 240 } 241 return true 242 }) 243 244 if !ok { 245 return nil, (&CheckResult{InternalError: true}).ToGrpcErr() 246 } 247 return filtered, nil 248 } 249 250 // CheckAllPoolsPerm checks if the caller has a permission in *all* given pools. 251 // 252 // The list of pools must not be empty. Panics if it is. 253 func (chk *Checker) CheckAllPoolsPerm(ctx context.Context, pools []string, perm realms.Permission) CheckResult { 254 switch len(pools) { 255 case 0: 256 panic("empty list of pools in CheckAllPoolsPerm") 257 case 1: 258 // Use a single pool check for better error messages. 259 return chk.CheckPoolPerm(ctx, pools[0], perm) 260 } 261 262 // If have a server-level permission, no need to check individual pools. 263 if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError { 264 return res 265 } 266 267 allAllowed := true 268 269 ok := chk.visitRealms(ctx, pools, perm, func(_ string, allowed bool) bool { 270 allAllowed = allAllowed && allowed 271 return allAllowed 272 }) 273 274 switch { 275 case !ok: 276 return CheckResult{InternalError: true} 277 case allAllowed: 278 return CheckResult{Permitted: true} 279 default: 280 // TODO(vadimsh): Make the error message more informative. 281 return CheckResult{ 282 err: status.Errorf( 283 codes.PermissionDenied, 284 "the caller %q doesn't have permission %q in some of the requested pools", 285 chk.caller, perm), 286 } 287 } 288 } 289 290 // CheckAnyPoolsPerm checks if the caller has a permission in *any* given pool. 291 // 292 // The list of pools must not be empty. Panics if it is. 293 func (chk *Checker) CheckAnyPoolsPerm(ctx context.Context, pools []string, perm realms.Permission) CheckResult { 294 switch len(pools) { 295 case 0: 296 panic("empty list of pools in CheckAnyPoolsPerm") 297 case 1: 298 // Use a single pool check for better error messages. 299 return chk.CheckPoolPerm(ctx, pools[0], perm) 300 } 301 302 // If have a server-level permission, no need to check individual pools. 303 if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError { 304 return res 305 } 306 307 oneAllowed := false 308 309 ok := chk.visitRealms(ctx, pools, perm, func(_ string, allowed bool) bool { 310 oneAllowed = oneAllowed || allowed 311 return !oneAllowed 312 }) 313 314 switch { 315 case !ok: 316 return CheckResult{InternalError: true} 317 case oneAllowed: 318 return CheckResult{Permitted: true} 319 default: 320 // TODO(vadimsh): Make the error message more informative. 321 return CheckResult{ 322 err: status.Errorf( 323 codes.PermissionDenied, 324 "the caller %q doesn't have permission %q in any of the requested pools", 325 chk.caller, perm), 326 } 327 } 328 } 329 330 // CheckTaskPerm checks if the caller has a permission in a specific task. 331 // 332 // Only accepts permissions targeting a single existing task: PermTasksGet and 333 // PermTasksCancel. Panics if asked to check any other permission. 334 // 335 // It checks individual task ACL (based on task realm), as well as task's pool 336 // ACL. The idea is that the caller can either "own" the task or "own" the bot 337 // pool it was scheduled to run on. E.g. for a task to be visible, the caller 338 // either needs PermTasksGet in the task's realm, or PermPoolsListTasks in the 339 // bot pool realm. This function checks both. 340 func (chk *Checker) CheckTaskPerm(ctx context.Context, task TaskAuthInfo, perm realms.Permission) CheckResult { 341 // Look up a matching pool level permission to check it in the task's pool. 342 var poolPerm realms.Permission 343 switch perm { 344 case PermTasksGet: 345 poolPerm = PermPoolsListTasks 346 case PermTasksCancel: 347 poolPerm = PermPoolsCancelTask 348 default: 349 panic(fmt.Sprintf("not a task-level permission %q", perm)) 350 } 351 352 // Whoever submitted the task has full control over it. 353 if task.Submitter == chk.caller { 354 return CheckResult{Permitted: true} 355 } 356 357 // If have a server-level permission, no need to check anything else. Note 358 // that on the server level task<->pool permission pairs like PermTasksGet and 359 // PermPoolsListTasks are treated identically, so it is sufficient to check 360 // only `perm` (and skip checking `poolPerm`: the outcome will be the same). 361 if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError { 362 return res 363 } 364 365 // Check if the caller has the permission in the task's own realm. 366 switch yes, err := chk.db.HasPermission(ctx, chk.caller, perm, task.Realm, nil); { 367 case err != nil: 368 logging.Errorf(ctx, "Error in HasPermission(%q, %q): %s", perm, task.Realm, err) 369 return CheckResult{InternalError: true} 370 case yes: 371 return CheckResult{Permitted: true} 372 } 373 374 // Check if the caller has the matching permission in the task's assigned 375 // pool. If the task has no pool assigned but instead was scheduled to run on 376 // a concrete bot (happens for termination tasks), check if the caller has 377 // the permission in this bot's pool. 378 // 379 // Note that when both Pool and BotID fields are set, Pool should take 380 // precedence, since the pool is what we check when submitting tasks (i.e. for 381 // a new task with dimensions `{"pool": ..., "bot": ...}` only "pool" is being 382 // used in permission checks and "bot" is completely unrestricted). Checking 383 // pool here as well results in more consistent behavior. 384 // 385 // Note that it is forbidden to submit arbitrary tasks without a pool through 386 // the public API. They can be submitted only by the Swarming server 387 // internally. 388 var poolsToCheck []string 389 if task.Pool != "" { 390 poolsToCheck = []string{task.Pool} 391 } else if task.BotID != "" { 392 poolsToCheck = chk.cfg.BotGroup(task.BotID).Pools() 393 } 394 if len(poolsToCheck) != 0 { 395 oneAllowed := false 396 ok := chk.visitRealms(ctx, poolsToCheck, poolPerm, func(_ string, allowed bool) bool { 397 oneAllowed = oneAllowed || allowed 398 return !oneAllowed 399 }) 400 switch { 401 case !ok: 402 return CheckResult{InternalError: true} 403 case oneAllowed: 404 return CheckResult{Permitted: true} 405 } 406 } 407 408 // TODO(vadimsh): Make the error message more informative. 409 return CheckResult{ 410 err: status.Errorf( 411 codes.PermissionDenied, 412 "the caller %q doesn't have permission %q for the task %q", 413 chk.caller, perm, task.TaskID), 414 } 415 } 416 417 // CheckBotPerm checks if the caller has a permission in a specific bot. 418 // 419 // It looks up a realm the bot belong to (based on "pool" dimension) and then 420 // checks the caller has the required permission in this realm. 421 func (chk *Checker) CheckBotPerm(ctx context.Context, botID string, perm realms.Permission) CheckResult { 422 // If have a server-level permission, no need to fetch bot info. 423 if res := chk.CheckServerPerm(ctx, perm); res.Permitted || res.InternalError { 424 return res 425 } 426 427 // TODO(vadimsh): Python code used to fetch BotInfo or BotEvent from datastore 428 // to look up bot pools. This matters for bots removed from configs. Avoid 429 // this for now (fetch the bot info exclusively from the current config) to 430 // see if it makes any observable difference for real use cases. 431 pools := chk.cfg.BotGroup(botID).Pools() 432 if len(pools) == 0 { 433 panic("impossible due to the config validation and Pools() logic") 434 } 435 436 // Note: we can't just call CheckAnyPoolsPerm since it can potentially leak 437 // pool name in its error message. In CheckBotPerm we don't know if the caller 438 // is allowed to see bot => pool association and should not expose the pool 439 // name in errors, only bot ID. 440 441 oneAllowed := false 442 443 ok := chk.visitRealms(ctx, pools, perm, func(_ string, allowed bool) bool { 444 oneAllowed = oneAllowed || allowed 445 return !oneAllowed 446 }) 447 448 switch { 449 case !ok: 450 return CheckResult{InternalError: true} 451 case oneAllowed: 452 return CheckResult{Permitted: true} 453 default: 454 // TODO(vadimsh): Make the error message more informative. 455 return CheckResult{ 456 err: status.Errorf( 457 codes.PermissionDenied, 458 "the caller %q doesn't have permission %q in the pool that contains bot %q or this bot doesn't exist", 459 chk.caller, perm, botID), 460 } 461 } 462 } 463 464 // visitRealms does a permission check for every pool, sequentially. 465 // 466 // It calls the callback with the outcome of the check. If the callback returns 467 // true, the iteration continues. Otherwise it stops and visitRealms returns 468 // true. Returns false only on internal problems with the check. 469 func (chk *Checker) visitRealms(ctx context.Context, pools []string, perm realms.Permission, cb func(pool string, allowed bool) bool) (ok bool) { 470 // A micro optimization for a very common case of one pool. Skips a map. 471 if len(pools) == 1 { 472 pool := pools[0] 473 cfg := chk.cfg.Pool(pool) 474 if cfg == nil { 475 // Missing pools assumed to have no permissions in them. 476 logging.Warningf(ctx, "Unknown pool when checking ACLs: %s", pool) 477 cb(pool, false) 478 } else { 479 outcome, err := chk.db.HasPermission(ctx, chk.caller, perm, cfg.Realm, nil) 480 if err != nil { 481 logging.Errorf(ctx, "Error in HasPermission(%q, %q): %s", perm, cfg.Realm, err) 482 return false 483 } 484 cb(pool, outcome) 485 } 486 return true 487 } 488 489 // Generic case that makes more memory allocations. 490 checkedRealms := make(map[string]bool, 2) 491 for _, pool := range pools { 492 cfg := chk.cfg.Pool(pool) 493 if cfg == nil { 494 // Missing pools assumed to have no permissions in them. 495 logging.Warningf(ctx, "Unknown pool when checking ACLs: %s", pool) 496 if !cb(pool, false) { 497 return true 498 } 499 continue 500 } 501 if _, checked := checkedRealms[cfg.Realm]; !checked { 502 outcome, err := chk.db.HasPermission(ctx, chk.caller, perm, cfg.Realm, nil) 503 if err != nil { 504 logging.Errorf(ctx, "Error in HasPermission(%q, %q): %s", perm, cfg.Realm, err) 505 return false 506 } 507 checkedRealms[cfg.Realm] = outcome 508 } 509 if !cb(pool, checkedRealms[cfg.Realm]) { 510 return true 511 } 512 } 513 return true 514 }