github.com/hernad/nomad@v1.6.112/nomad/acl.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package nomad
     5  
     6  import (
     7  	"errors"
     8  	"fmt"
     9  	"net"
    10  	"time"
    11  
    12  	metrics "github.com/armon/go-metrics"
    13  	"github.com/hernad/nomad/acl"
    14  	"github.com/hernad/nomad/helper"
    15  	"github.com/hernad/nomad/nomad/state"
    16  	"github.com/hernad/nomad/nomad/structs"
    17  )
    18  
    19  // Authenticate extracts an AuthenticatedIdentity from the request context or
    20  // provided token and sets the identity on the request. The caller can extract
    21  // an acl.ACL, WorkloadIdentity, or other identifying tokens to use for
    22  // authorization. Keeping these fields independent rather than merging them into
    23  // an ephemeral ACLToken makes the original of the credential clear to RPC
    24  // handlers, who may have different behavior for internal vs external origins.
    25  //
    26  // Note: when called on the follower we'll be making stale queries, so it's
    27  // possible if the follower is behind that the leader will get a different value
    28  // if an ACL token or allocation's WI has just been created.
    29  //
    30  // This method returns errors that are used for testing diagnostics. RPC callers
    31  // should always return ErrPermissionDenied after checking forwarding when one
    32  // of these errors is received.
    33  func (s *Server) Authenticate(ctx *RPCContext, args structs.RequestWithIdentity) error {
    34  
    35  	// get the user ACLToken or anonymous token
    36  	secretID := args.GetAuthToken()
    37  	aclToken, err := s.ResolveSecretToken(secretID)
    38  
    39  	switch {
    40  	case err == nil:
    41  		// If ACLs are disabled or we have a non-anonymous token, return that.
    42  		if aclToken == nil || aclToken != structs.AnonymousACLToken {
    43  			args.SetIdentity(&structs.AuthenticatedIdentity{ACLToken: aclToken})
    44  			return nil
    45  		}
    46  
    47  	case errors.Is(err, structs.ErrTokenExpired):
    48  		return err
    49  
    50  	case errors.Is(err, structs.ErrTokenInvalid):
    51  		// if it's not a UUID it might be an identity claim
    52  		claims, err := s.VerifyClaim(secretID)
    53  		if err != nil {
    54  			// we already know the token wasn't valid for an ACL in the state
    55  			// store, so if we get an error at this point we have an invalid
    56  			// token and there are no other options but to bail out
    57  			return err
    58  		}
    59  
    60  		args.SetIdentity(&structs.AuthenticatedIdentity{Claims: claims})
    61  		return nil
    62  
    63  	case errors.Is(err, structs.ErrTokenNotFound):
    64  		// Check if the secret ID is the leader's secret ID, in which case treat
    65  		// it as a management token.
    66  		leaderAcl := s.getLeaderAcl()
    67  		if leaderAcl != "" && secretID == leaderAcl {
    68  			aclToken = structs.LeaderACLToken
    69  			break
    70  		} else {
    71  			// Otherwise, see if the secret ID belongs to a node. We should
    72  			// reach this point only on first connection.
    73  			node, err := s.State().NodeBySecretID(nil, secretID)
    74  			if err != nil {
    75  				// this is a go-memdb error; shouldn't happen
    76  				return fmt.Errorf("could not resolve node secret: %w", err)
    77  			}
    78  			if node != nil {
    79  				args.SetIdentity(&structs.AuthenticatedIdentity{ClientID: node.ID})
    80  				return nil
    81  			}
    82  		}
    83  
    84  		// we were passed a bogus token so we'll return an error, but we'll also
    85  		// want to capture the IP for metrics
    86  		remoteIP, err := s.remoteIPFromRPCContext(ctx)
    87  		if err != nil {
    88  			s.logger.Error("could not determine remote address", "error", err)
    89  		}
    90  		args.SetIdentity(&structs.AuthenticatedIdentity{RemoteIP: remoteIP})
    91  		return structs.ErrPermissionDenied
    92  
    93  	default: // any other error
    94  		return fmt.Errorf("could not resolve user: %w", err)
    95  
    96  	}
    97  
    98  	// If there's no context we're in a "static" handler which only happens for
    99  	// cases where the leader is making RPCs internally (volumewatcher and
   100  	// deploymentwatcher)
   101  	if ctx == nil {
   102  		args.SetIdentity(&structs.AuthenticatedIdentity{ACLToken: aclToken})
   103  		return nil
   104  	}
   105  
   106  	// At this point we either have an anonymous token or an invalid one.
   107  
   108  	// Unlike clients that provide their Node ID on first connection, server
   109  	// RPCs don't include an ID for the server so we identify servers by cert
   110  	// and IP address.
   111  	identity := &structs.AuthenticatedIdentity{ACLToken: aclToken}
   112  	if ctx.TLS {
   113  		identity.TLSName = ctx.Certificate().Subject.CommonName
   114  	}
   115  
   116  	remoteIP, err := s.remoteIPFromRPCContext(ctx)
   117  	if err != nil {
   118  		s.logger.Error(
   119  			"could not authenticate RPC request or determine remote address", "error", err)
   120  		return err
   121  	}
   122  	identity.RemoteIP = remoteIP
   123  	args.SetIdentity(identity)
   124  	return nil
   125  }
   126  
   127  func (s *Server) remoteIPFromRPCContext(ctx *RPCContext) (net.IP, error) {
   128  	var remoteAddr *net.TCPAddr
   129  	var ok bool
   130  	if ctx == nil {
   131  		return nil, nil
   132  	}
   133  	if ctx.Session != nil {
   134  		remoteAddr, ok = ctx.Session.RemoteAddr().(*net.TCPAddr)
   135  		if !ok {
   136  			return nil, errors.New("session address was not a TCP address")
   137  		}
   138  	}
   139  	if remoteAddr == nil && ctx.Conn != nil {
   140  		remoteAddr, ok = ctx.Conn.RemoteAddr().(*net.TCPAddr)
   141  		if !ok {
   142  			return nil, errors.New("session address was not a TCP address")
   143  		}
   144  	}
   145  	if remoteAddr != nil {
   146  		return remoteAddr.IP, nil
   147  	}
   148  	return nil, structs.ErrPermissionDenied
   149  }
   150  
   151  // ResolveACL is an authentication wrapper which handles resolving both ACL
   152  // tokens and Workload Identities. If both are provided the ACL token is
   153  // preferred, but it is best for the RPC caller to only include the credentials
   154  // for the identity they intend the operation to be performed with.
   155  func (s *Server) ResolveACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
   156  	identity := args.GetIdentity()
   157  	if !s.config.ACLEnabled || identity == nil {
   158  		return nil, nil
   159  	}
   160  	aclToken := identity.GetACLToken()
   161  	if aclToken != nil {
   162  		return s.ResolveACLForToken(aclToken)
   163  	}
   164  	claims := identity.GetClaims()
   165  	if claims != nil {
   166  		return s.ResolveClaims(claims)
   167  	}
   168  	return nil, nil
   169  }
   170  
   171  // ResolveACLForToken resolves an ACL from a token only. It should be used only
   172  // by Variables endpoints, which have additional implicit policies for their
   173  // claims so we can't wrap them up in ResolveACL.
   174  //
   175  // TODO: figure out a way to the Variables endpoint implicit policies baked into
   176  // their acl.ACL object so that we can avoid using this method.
   177  func (s *Server) ResolveACLForToken(aclToken *structs.ACLToken) (*acl.ACL, error) {
   178  	if !s.config.ACLEnabled {
   179  		return nil, nil
   180  	}
   181  	snap, err := s.fsm.State().Snapshot()
   182  	if err != nil {
   183  		return nil, err
   184  	}
   185  	return resolveACLFromToken(snap, s.aclCache, aclToken)
   186  }
   187  
   188  // ResolveClientOrACL resolves an ACL if the identity has a token or claim, and
   189  // falls back to verifying the client ID if one has been set
   190  func (s *Server) ResolveClientOrACL(args structs.RequestWithIdentity) (*acl.ACL, error) {
   191  	identity := args.GetIdentity()
   192  	if !s.config.ACLEnabled || identity == nil || identity.ClientID != "" {
   193  		return nil, nil
   194  	}
   195  	aclObj, err := s.ResolveACL(args)
   196  	if err != nil {
   197  		return nil, err
   198  	}
   199  
   200  	// Returns either the users aclObj, or nil if ACLs are disabled.
   201  	return aclObj, nil
   202  }
   203  
   204  // ResolveToken is used to translate an ACL Token Secret ID into
   205  // an ACL object, nil if ACLs are disabled, or an error.
   206  func (s *Server) ResolveToken(secretID string) (*acl.ACL, error) {
   207  	// Fast-path if ACLs are disabled
   208  	if !s.config.ACLEnabled {
   209  		return nil, nil
   210  	}
   211  	defer metrics.MeasureSince([]string{"nomad", "acl", "resolveToken"}, time.Now())
   212  
   213  	// Check if the secret ID is the leader secret ID, in which case treat it as
   214  	// a management token.
   215  	if leaderAcl := s.getLeaderAcl(); leaderAcl != "" && secretID == leaderAcl {
   216  		return acl.ManagementACL, nil
   217  	}
   218  
   219  	// Snapshot the state
   220  	snap, err := s.fsm.State().Snapshot()
   221  	if err != nil {
   222  		return nil, err
   223  	}
   224  
   225  	// Resolve the ACL
   226  	return resolveTokenFromSnapshotCache(snap, s.aclCache, secretID)
   227  }
   228  
   229  // VerifyClaim asserts that the token is valid and that the resulting
   230  // allocation ID belongs to a non-terminal allocation
   231  func (s *Server) VerifyClaim(token string) (*structs.IdentityClaims, error) {
   232  
   233  	claims, err := s.encrypter.VerifyClaim(token)
   234  	if err != nil {
   235  		return nil, err
   236  	}
   237  	snap, err := s.fsm.State().Snapshot()
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  	alloc, err := snap.AllocByID(nil, claims.AllocationID)
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  	if alloc == nil || alloc.Job == nil {
   246  		return nil, fmt.Errorf("allocation does not exist")
   247  	}
   248  
   249  	// the claims for terminal allocs are always treated as expired
   250  	if alloc.TerminalStatus() {
   251  		return nil, fmt.Errorf("allocation is terminal")
   252  	}
   253  
   254  	return claims, nil
   255  }
   256  
   257  func (s *Server) ResolveClaims(claims *structs.IdentityClaims) (*acl.ACL, error) {
   258  
   259  	policies, err := s.resolvePoliciesForClaims(claims)
   260  	if err != nil {
   261  		return nil, err
   262  	}
   263  
   264  	// Compile and cache the ACL object. For many claims this will result in an
   265  	// ACL object with no policies, which can be efficiently cached.
   266  	aclObj, err := structs.CompileACLObject(s.aclCache, policies)
   267  	if err != nil {
   268  		return nil, err
   269  	}
   270  	return aclObj, nil
   271  }
   272  
   273  // resolveTokenFromSnapshotCache is used to resolve an ACL object from a
   274  // snapshot of state, using a cache to avoid parsing and ACL construction when
   275  // possible. It is split from resolveToken to simplify testing.
   276  func resolveTokenFromSnapshotCache(snap *state.StateSnapshot, cache *structs.ACLCache[*acl.ACL], secretID string) (*acl.ACL, error) {
   277  	// Lookup the ACL Token
   278  	var token *structs.ACLToken
   279  	var err error
   280  
   281  	// Handle anonymous requests
   282  	if secretID == "" {
   283  		token = structs.AnonymousACLToken
   284  	} else {
   285  		token, err = snap.ACLTokenBySecretID(nil, secretID)
   286  		if err != nil {
   287  			return nil, err
   288  		}
   289  		if token == nil {
   290  			return nil, structs.ErrTokenNotFound
   291  		}
   292  		if token.IsExpired(time.Now().UTC()) {
   293  			return nil, structs.ErrTokenExpired
   294  		}
   295  	}
   296  
   297  	return resolveACLFromToken(snap, cache, token)
   298  
   299  }
   300  
   301  func resolveACLFromToken(snap *state.StateSnapshot, cache *structs.ACLCache[*acl.ACL], token *structs.ACLToken) (*acl.ACL, error) {
   302  
   303  	// Check if this is a management token
   304  	if token.Type == structs.ACLManagementToken {
   305  		return acl.ManagementACL, nil
   306  	}
   307  
   308  	// Store all policies detailed in the token request, this includes the
   309  	// named policies and those referenced within the role link.
   310  	policies := make([]*structs.ACLPolicy, 0, len(token.Policies)+len(token.Roles))
   311  
   312  	// Iterate all the token policies and add these to our policy tracking
   313  	// array.
   314  	for _, policyName := range token.Policies {
   315  		policy, err := snap.ACLPolicyByName(nil, policyName)
   316  		if err != nil {
   317  			return nil, err
   318  		}
   319  		if policy == nil {
   320  			// Ignore policies that don't exist, since they don't grant any
   321  			// more privilege.
   322  			continue
   323  		}
   324  
   325  		// Add the policy to the tracking array.
   326  		policies = append(policies, policy)
   327  	}
   328  
   329  	// Iterate all the token role links, so we can unpack these and identify
   330  	// the ACL policies.
   331  	for _, roleLink := range token.Roles {
   332  
   333  		// Any error reading the role means we cannot move forward. We just
   334  		// ignore any roles that have been detailed but are not within our
   335  		// state.
   336  		role, err := snap.GetACLRoleByID(nil, roleLink.ID)
   337  		if err != nil {
   338  			return nil, err
   339  		}
   340  		if role == nil {
   341  			continue
   342  		}
   343  
   344  		// Unpack the policies held within the ACL role to form a single list
   345  		// of ACL policies that this token has available.
   346  		for _, policyLink := range role.Policies {
   347  			policy, err := snap.ACLPolicyByName(nil, policyLink.Name)
   348  			if err != nil {
   349  				return nil, err
   350  			}
   351  
   352  			// Ignore policies that don't exist, since they don't grant any
   353  			// more privilege.
   354  			if policy == nil {
   355  				continue
   356  			}
   357  
   358  			// Add the policy to the tracking array.
   359  			policies = append(policies, policy)
   360  		}
   361  	}
   362  
   363  	// Compile and cache the ACL object
   364  	aclObj, err := structs.CompileACLObject(cache, policies)
   365  	if err != nil {
   366  		return nil, err
   367  	}
   368  	return aclObj, nil
   369  }
   370  
   371  // ResolveSecretToken is used to translate an ACL Token Secret ID into
   372  // an ACLToken object, nil if ACLs are disabled, or an error.
   373  func (s *Server) ResolveSecretToken(secretID string) (*structs.ACLToken, error) {
   374  	// TODO(Drew) Look into using ACLObject cache or create a separate cache
   375  
   376  	// Fast-path if ACLs are disabled
   377  	if !s.config.ACLEnabled {
   378  		return nil, nil
   379  	}
   380  	defer metrics.MeasureSince([]string{"nomad", "acl", "resolveSecretToken"}, time.Now())
   381  
   382  	if secretID == "" {
   383  		return structs.AnonymousACLToken, nil
   384  	}
   385  	if !helper.IsUUID(secretID) {
   386  		return nil, structs.ErrTokenInvalid
   387  	}
   388  
   389  	snap, err := s.fsm.State().Snapshot()
   390  	if err != nil {
   391  		return nil, err
   392  	}
   393  
   394  	// Lookup the ACL Token
   395  	token, err := snap.ACLTokenBySecretID(nil, secretID)
   396  	if err != nil {
   397  		return nil, err
   398  	}
   399  	if token == nil {
   400  		return nil, structs.ErrTokenNotFound
   401  	}
   402  	if token.IsExpired(time.Now().UTC()) {
   403  		return nil, structs.ErrTokenExpired
   404  	}
   405  
   406  	return token, nil
   407  }
   408  
   409  func (s *Server) resolvePoliciesForClaims(claims *structs.IdentityClaims) ([]*structs.ACLPolicy, error) {
   410  
   411  	snap, err := s.fsm.State().Snapshot()
   412  	if err != nil {
   413  		return nil, err
   414  	}
   415  	alloc, err := snap.AllocByID(nil, claims.AllocationID)
   416  	if err != nil {
   417  		return nil, err
   418  	}
   419  	if alloc == nil || alloc.Job == nil {
   420  		return nil, fmt.Errorf("allocation does not exist")
   421  	}
   422  
   423  	// Find any policies attached to the job
   424  	jobId := alloc.Job.ID
   425  	if alloc.Job.ParentID != "" {
   426  		jobId = alloc.Job.ParentID
   427  	}
   428  	iter, err := snap.ACLPolicyByJob(nil, alloc.Namespace, jobId)
   429  	if err != nil {
   430  		return nil, err
   431  	}
   432  	policies := []*structs.ACLPolicy{}
   433  	for {
   434  		raw := iter.Next()
   435  		if raw == nil {
   436  			break
   437  		}
   438  		policy := raw.(*structs.ACLPolicy)
   439  		if policy.JobACL == nil {
   440  			continue
   441  		}
   442  
   443  		switch {
   444  		case policy.JobACL.Group == "":
   445  			policies = append(policies, policy)
   446  		case policy.JobACL.Group != alloc.TaskGroup:
   447  			continue // don't bother checking task
   448  		case policy.JobACL.Task == "":
   449  			policies = append(policies, policy)
   450  		case policy.JobACL.Task == claims.TaskName:
   451  			policies = append(policies, policy)
   452  		}
   453  	}
   454  
   455  	return policies, nil
   456  }