github.com/hernad/nomad@v1.6.112/nomad/acl.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package nomad 5 6 import ( 7 "errors" 8 "fmt" 9 "net" 10 "time" 11 12 metrics "github.com/armon/go-metrics" 13 "github.com/hernad/nomad/acl" 14 "github.com/hernad/nomad/helper" 15 "github.com/hernad/nomad/nomad/state" 16 "github.com/hernad/nomad/nomad/structs" 17 ) 18 19 // Authenticate extracts an AuthenticatedIdentity from the request context or 20 // provided token and sets the identity on the request. The caller can extract 21 // an acl.ACL, WorkloadIdentity, or other identifying tokens to use for 22 // authorization. Keeping these fields independent rather than merging them into 23 // an ephemeral ACLToken makes the original of the credential clear to RPC 24 // handlers, who may have different behavior for internal vs external origins. 25 // 26 // Note: when called on the follower we'll be making stale queries, so it's 27 // possible if the follower is behind that the leader will get a different value 28 // if an ACL token or allocation's WI has just been created. 29 // 30 // This method returns errors that are used for testing diagnostics. RPC callers 31 // should always return ErrPermissionDenied after checking forwarding when one 32 // of these errors is received. 33 func (s *Server) Authenticate(ctx *RPCContext, args structs.RequestWithIdentity) error { 34 35 // get the user ACLToken or anonymous token 36 secretID := args.GetAuthToken() 37 aclToken, err := s.ResolveSecretToken(secretID) 38 39 switch { 40 case err == nil: 41 // If ACLs are disabled or we have a non-anonymous token, return that. 42 if aclToken == nil || aclToken != structs.AnonymousACLToken { 43 args.SetIdentity(&structs.AuthenticatedIdentity{ACLToken: aclToken}) 44 return nil 45 } 46 47 case errors.Is(err, structs.ErrTokenExpired): 48 return err 49 50 case errors.Is(err, structs.ErrTokenInvalid): 51 // if it's not a UUID it might be an identity claim 52 claims, err := s.VerifyClaim(secretID) 53 if err != nil { 54 // we already know the token wasn't valid for an ACL in the state 55 // store, so if we get an error at this point we have an invalid 56 // token and there are no other options but to bail out 57 return err 58 } 59 60 args.SetIdentity(&structs.AuthenticatedIdentity{Claims: claims}) 61 return nil 62 63 case errors.Is(err, structs.ErrTokenNotFound): 64 // Check if the secret ID is the leader's secret ID, in which case treat 65 // it as a management token. 66 leaderAcl := s.getLeaderAcl() 67 if leaderAcl != "" && secretID == leaderAcl { 68 aclToken = structs.LeaderACLToken 69 break 70 } else { 71 // Otherwise, see if the secret ID belongs to a node. We should 72 // reach this point only on first connection. 73 node, err := s.State().NodeBySecretID(nil, secretID) 74 if err != nil { 75 // this is a go-memdb error; shouldn't happen 76 return fmt.Errorf("could not resolve node secret: %w", err) 77 } 78 if node != nil { 79 args.SetIdentity(&structs.AuthenticatedIdentity{ClientID: node.ID}) 80 return nil 81 } 82 } 83 84 // we were passed a bogus token so we'll return an error, but we'll also 85 // want to capture the IP for metrics 86 remoteIP, err := s.remoteIPFromRPCContext(ctx) 87 if err != nil { 88 s.logger.Error("could not determine remote address", "error", err) 89 } 90 args.SetIdentity(&structs.AuthenticatedIdentity{RemoteIP: remoteIP}) 91 return structs.ErrPermissionDenied 92 93 default: // any other error 94 return fmt.Errorf("could not resolve user: %w", err) 95 96 } 97 98 // If there's no context we're in a "static" handler which only happens for 99 // cases where the leader is making RPCs internally (volumewatcher and 100 // deploymentwatcher) 101 if ctx == nil { 102 args.SetIdentity(&structs.AuthenticatedIdentity{ACLToken: aclToken}) 103 return nil 104 } 105 106 // At this point we either have an anonymous token or an invalid one. 107 108 // Unlike clients that provide their Node ID on first connection, server 109 // RPCs don't include an ID for the server so we identify servers by cert 110 // and IP address. 111 identity := &structs.AuthenticatedIdentity{ACLToken: aclToken} 112 if ctx.TLS { 113 identity.TLSName = ctx.Certificate().Subject.CommonName 114 } 115 116 remoteIP, err := s.remoteIPFromRPCContext(ctx) 117 if err != nil { 118 s.logger.Error( 119 "could not authenticate RPC request or determine remote address", "error", err) 120 return err 121 } 122 identity.RemoteIP = remoteIP 123 args.SetIdentity(identity) 124 return nil 125 } 126 127 func (s *Server) remoteIPFromRPCContext(ctx *RPCContext) (net.IP, error) { 128 var remoteAddr *net.TCPAddr 129 var ok bool 130 if ctx == nil { 131 return nil, nil 132 } 133 if ctx.Session != nil { 134 remoteAddr, ok = ctx.Session.RemoteAddr().(*net.TCPAddr) 135 if !ok { 136 return nil, errors.New("session address was not a TCP address") 137 } 138 } 139 if remoteAddr == nil && ctx.Conn != nil { 140 remoteAddr, ok = ctx.Conn.RemoteAddr().(*net.TCPAddr) 141 if !ok { 142 return nil, errors.New("session address was not a TCP address") 143 } 144 } 145 if remoteAddr != nil { 146 return remoteAddr.IP, nil 147 } 148 return nil, structs.ErrPermissionDenied 149 } 150 151 // ResolveACL is an authentication wrapper which handles resolving both ACL 152 // tokens and Workload Identities. If both are provided the ACL token is 153 // preferred, but it is best for the RPC caller to only include the credentials 154 // for the identity they intend the operation to be performed with. 155 func (s *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) { 156 identity := args.GetIdentity() 157 if !s.config.ACLEnabled || identity == nil { 158 return nil, nil 159 } 160 aclToken := identity.GetACLToken() 161 if aclToken != nil { 162 return s.ResolveACLForToken(aclToken) 163 } 164 claims := identity.GetClaims() 165 if claims != nil { 166 return s.ResolveClaims(claims) 167 } 168 return nil, nil 169 } 170 171 // ResolveACLForToken resolves an ACL from a token only. It should be used only 172 // by Variables endpoints, which have additional implicit policies for their 173 // claims so we can't wrap them up in ResolveACL. 174 // 175 // TODO: figure out a way to the Variables endpoint implicit policies baked into 176 // their acl.ACL object so that we can avoid using this method. 177 func (s *Server) ResolveACLForToken(aclToken *structs.ACLToken) (*acl.ACL, error) { 178 if !s.config.ACLEnabled { 179 return nil, nil 180 } 181 snap, err := s.fsm.State().Snapshot() 182 if err != nil { 183 return nil, err 184 } 185 return resolveACLFromToken(snap, s.aclCache, aclToken) 186 } 187 188 // ResolveClientOrACL resolves an ACL if the identity has a token or claim, and 189 // falls back to verifying the client ID if one has been set 190 func (s *Server) ResolveClientOrACL(args structs.RequestWithIdentity) (*acl.ACL, error) { 191 identity := args.GetIdentity() 192 if !s.config.ACLEnabled || identity == nil || identity.ClientID != "" { 193 return nil, nil 194 } 195 aclObj, err := s.ResolveACL(args) 196 if err != nil { 197 return nil, err 198 } 199 200 // Returns either the users aclObj, or nil if ACLs are disabled. 201 return aclObj, nil 202 } 203 204 // ResolveToken is used to translate an ACL Token Secret ID into 205 // an ACL object, nil if ACLs are disabled, or an error. 206 func (s *Server) ResolveToken(secretID string) (*acl.ACL, error) { 207 // Fast-path if ACLs are disabled 208 if !s.config.ACLEnabled { 209 return nil, nil 210 } 211 defer metrics.MeasureSince([]string{"nomad", "acl", "resolveToken"}, time.Now()) 212 213 // Check if the secret ID is the leader secret ID, in which case treat it as 214 // a management token. 215 if leaderAcl := s.getLeaderAcl(); leaderAcl != "" && secretID == leaderAcl { 216 return acl.ManagementACL, nil 217 } 218 219 // Snapshot the state 220 snap, err := s.fsm.State().Snapshot() 221 if err != nil { 222 return nil, err 223 } 224 225 // Resolve the ACL 226 return resolveTokenFromSnapshotCache(snap, s.aclCache, secretID) 227 } 228 229 // VerifyClaim asserts that the token is valid and that the resulting 230 // allocation ID belongs to a non-terminal allocation 231 func (s *Server) VerifyClaim(token string) (*structs.IdentityClaims, error) { 232 233 claims, err := s.encrypter.VerifyClaim(token) 234 if err != nil { 235 return nil, err 236 } 237 snap, err := s.fsm.State().Snapshot() 238 if err != nil { 239 return nil, err 240 } 241 alloc, err := snap.AllocByID(nil, claims.AllocationID) 242 if err != nil { 243 return nil, err 244 } 245 if alloc == nil || alloc.Job == nil { 246 return nil, fmt.Errorf("allocation does not exist") 247 } 248 249 // the claims for terminal allocs are always treated as expired 250 if alloc.TerminalStatus() { 251 return nil, fmt.Errorf("allocation is terminal") 252 } 253 254 return claims, nil 255 } 256 257 func (s *Server) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) { 258 259 policies, err := s.resolvePoliciesForClaims(claims) 260 if err != nil { 261 return nil, err 262 } 263 264 // Compile and cache the ACL object. For many claims this will result in an 265 // ACL object with no policies, which can be efficiently cached. 266 aclObj, err := structs.CompileACLObject(s.aclCache, policies) 267 if err != nil { 268 return nil, err 269 } 270 return aclObj, nil 271 } 272 273 // resolveTokenFromSnapshotCache is used to resolve an ACL object from a 274 // snapshot of state, using a cache to avoid parsing and ACL construction when 275 // possible. It is split from resolveToken to simplify testing. 276 func resolveTokenFromSnapshotCache(snap *state.StateSnapshot, cache *structs.ACLCache[*acl.ACL], secretID string) (*acl.ACL, error) { 277 // Lookup the ACL Token 278 var token *structs.ACLToken 279 var err error 280 281 // Handle anonymous requests 282 if secretID == "" { 283 token = structs.AnonymousACLToken 284 } else { 285 token, err = snap.ACLTokenBySecretID(nil, secretID) 286 if err != nil { 287 return nil, err 288 } 289 if token == nil { 290 return nil, structs.ErrTokenNotFound 291 } 292 if token.IsExpired(time.Now().UTC()) { 293 return nil, structs.ErrTokenExpired 294 } 295 } 296 297 return resolveACLFromToken(snap, cache, token) 298 299 } 300 301 func resolveACLFromToken(snap *state.StateSnapshot, cache *structs.ACLCache[*acl.ACL], token *structs.ACLToken) (*acl.ACL, error) { 302 303 // Check if this is a management token 304 if token.Type == structs.ACLManagementToken { 305 return acl.ManagementACL, nil 306 } 307 308 // Store all policies detailed in the token request, this includes the 309 // named policies and those referenced within the role link. 310 policies := make([]*structs.ACLPolicy, 0, len(token.Policies)+len(token.Roles)) 311 312 // Iterate all the token policies and add these to our policy tracking 313 // array. 314 for _, policyName := range token.Policies { 315 policy, err := snap.ACLPolicyByName(nil, policyName) 316 if err != nil { 317 return nil, err 318 } 319 if policy == nil { 320 // Ignore policies that don't exist, since they don't grant any 321 // more privilege. 322 continue 323 } 324 325 // Add the policy to the tracking array. 326 policies = append(policies, policy) 327 } 328 329 // Iterate all the token role links, so we can unpack these and identify 330 // the ACL policies. 331 for _, roleLink := range token.Roles { 332 333 // Any error reading the role means we cannot move forward. We just 334 // ignore any roles that have been detailed but are not within our 335 // state. 336 role, err := snap.GetACLRoleByID(nil, roleLink.ID) 337 if err != nil { 338 return nil, err 339 } 340 if role == nil { 341 continue 342 } 343 344 // Unpack the policies held within the ACL role to form a single list 345 // of ACL policies that this token has available. 346 for _, policyLink := range role.Policies { 347 policy, err := snap.ACLPolicyByName(nil, policyLink.Name) 348 if err != nil { 349 return nil, err 350 } 351 352 // Ignore policies that don't exist, since they don't grant any 353 // more privilege. 354 if policy == nil { 355 continue 356 } 357 358 // Add the policy to the tracking array. 359 policies = append(policies, policy) 360 } 361 } 362 363 // Compile and cache the ACL object 364 aclObj, err := structs.CompileACLObject(cache, policies) 365 if err != nil { 366 return nil, err 367 } 368 return aclObj, nil 369 } 370 371 // ResolveSecretToken is used to translate an ACL Token Secret ID into 372 // an ACLToken object, nil if ACLs are disabled, or an error. 373 func (s *Server) ResolveSecretToken(secretID string) (*structs.ACLToken, error) { 374 // TODO(Drew) Look into using ACLObject cache or create a separate cache 375 376 // Fast-path if ACLs are disabled 377 if !s.config.ACLEnabled { 378 return nil, nil 379 } 380 defer metrics.MeasureSince([]string{"nomad", "acl", "resolveSecretToken"}, time.Now()) 381 382 if secretID == "" { 383 return structs.AnonymousACLToken, nil 384 } 385 if !helper.IsUUID(secretID) { 386 return nil, structs.ErrTokenInvalid 387 } 388 389 snap, err := s.fsm.State().Snapshot() 390 if err != nil { 391 return nil, err 392 } 393 394 // Lookup the ACL Token 395 token, err := snap.ACLTokenBySecretID(nil, secretID) 396 if err != nil { 397 return nil, err 398 } 399 if token == nil { 400 return nil, structs.ErrTokenNotFound 401 } 402 if token.IsExpired(time.Now().UTC()) { 403 return nil, structs.ErrTokenExpired 404 } 405 406 return token, nil 407 } 408 409 func (s *Server) resolvePoliciesForClaims(claims *structs.IdentityClaims) ([]*structs.ACLPolicy, error) { 410 411 snap, err := s.fsm.State().Snapshot() 412 if err != nil { 413 return nil, err 414 } 415 alloc, err := snap.AllocByID(nil, claims.AllocationID) 416 if err != nil { 417 return nil, err 418 } 419 if alloc == nil || alloc.Job == nil { 420 return nil, fmt.Errorf("allocation does not exist") 421 } 422 423 // Find any policies attached to the job 424 jobId := alloc.Job.ID 425 if alloc.Job.ParentID != "" { 426 jobId = alloc.Job.ParentID 427 } 428 iter, err := snap.ACLPolicyByJob(nil, alloc.Namespace, jobId) 429 if err != nil { 430 return nil, err 431 } 432 policies := []*structs.ACLPolicy{} 433 for { 434 raw := iter.Next() 435 if raw == nil { 436 break 437 } 438 policy := raw.(*structs.ACLPolicy) 439 if policy.JobACL == nil { 440 continue 441 } 442 443 switch { 444 case policy.JobACL.Group == "": 445 policies = append(policies, policy) 446 case policy.JobACL.Group != alloc.TaskGroup: 447 continue // don't bother checking task 448 case policy.JobACL.Task == "": 449 policies = append(policies, policy) 450 case policy.JobACL.Task == claims.TaskName: 451 policies = append(policies, policy) 452 } 453 } 454 455 return policies, nil 456 }