github.com/cilium/cilium@v1.16.2/pkg/kvstore/etcdinit/init.go (about)

     1  // SPDX-License-Identifier: Apache-2.0
     2  // Copyright Authors of Cilium
     3  
     4  package init
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"path"
    10  	"slices"
    11  	"strings"
    12  
    13  	"github.com/sirupsen/logrus"
    14  	"go.etcd.io/etcd/api/v3/authpb"
    15  	clientv3 "go.etcd.io/etcd/client/v3"
    16  
    17  	"github.com/cilium/cilium/pkg/kvstore"
    18  	"github.com/cilium/cilium/pkg/node/store"
    19  )
    20  
    21  // ClusterMeshEtcdInit initializes etcd for use by Cilium Clustermesh via the provided client. It creates a number of
    22  // user accounts and roles with permissions, sets a well-known key to indicate that clients should expect a cilium
    23  // config to be present, and enables authentication for the cluster.
    24  //
    25  // This function uses log to perform informational and debug logging about operations. This function does not log errors
    26  // and instead returns an error for handling, as it is assumed that the calling function will log errors. Most errors
    27  // are wrapped with extra context as to the situation in which the error arose.
    28  //
    29  // The ciliumClusterName is used to determine the admin username.
    30  //
    31  // The context provided as ctx can be used to implement a timeout on operations, and is passed to all etcd client
    32  // functions.
    33  //
    34  // Note that this function is **not idempotent**. It expects a completely blank etcd server with no non-default users,
    35  // roles, permissions, or keys.
    36  func ClusterMeshEtcdInit(ctx context.Context, log *logrus.Entry, client *clientv3.Client, ciliumClusterName string) error {
    37  	ic := initClient{
    38  		log:    log,
    39  		client: client,
    40  	}
    41  
    42  	// This function is largely procedural. The various functions on initClient already perform logging and wrap errors
    43  	// with additional context. So this function only performs the relevant operations, and is more or less a 1:1
    44  	// translation of the shell script that this function replaced.
    45  
    46  	// Pre setup
    47  	log.Info("Performing pre-init tasks")
    48  	err := ic.putHasConfigKey(ctx)
    49  	if err != nil {
    50  		return err
    51  	}
    52  
    53  	// Root user
    54  	rootUsername := username("root")
    55  	log.WithField("etcdUsername", rootUsername).
    56  		Info("Configuring root user")
    57  	err = ic.addNoPasswordUser(ctx, rootUsername)
    58  	if err != nil {
    59  		return err
    60  	}
    61  	err = ic.grantRoleToUser(ctx, rootRolename, rootUsername)
    62  	if err != nil {
    63  		return err
    64  	}
    65  
    66  	// Admin user
    67  	adminUsername := usernameForClusterName("admin", ciliumClusterName)
    68  	log.WithField("etcdUsername", adminUsername).
    69  		Info("Configuring admin user")
    70  	err = ic.addNoPasswordUser(ctx, adminUsername)
    71  	if err != nil {
    72  		return err
    73  	}
    74  	err = ic.grantRoleToUser(ctx, rootRolename, adminUsername)
    75  	if err != nil {
    76  		return err
    77  	}
    78  
    79  	// External workload user
    80  	externalWorkloadUsername := username("externalworkload")
    81  	log.WithField("etcdUsername", externalWorkloadUsername).
    82  		Info("Configuring external workload user")
    83  	err = ic.addNoPasswordUser(ctx, externalWorkloadUsername)
    84  	if err != nil {
    85  		return err
    86  	}
    87  	externalWorkloadRolename := rolename("externalworkload")
    88  	err = ic.addRole(ctx, externalWorkloadRolename)
    89  	if err != nil {
    90  		return err
    91  	}
    92  	err = ic.grantRoleToUser(ctx, externalWorkloadRolename, externalWorkloadUsername)
    93  	if err != nil {
    94  		return err
    95  	}
    96  	err = ic.grantPermissionToRole(ctx, readOnly, allKeysRange, externalWorkloadRolename)
    97  	if err != nil {
    98  		return err
    99  	}
   100  	err = ic.grantPermissionToRole(ctx, readWrite, rangeForPrefix(store.NodeRegisterStorePrefix), externalWorkloadRolename)
   101  	if err != nil {
   102  		return err
   103  	}
   104  	err = ic.grantPermissionToRole(ctx, readWrite, rangeForPrefix(kvstore.InitLockPath), externalWorkloadRolename)
   105  	if err != nil {
   106  		return err
   107  	}
   108  
   109  	// Local user (i.e., local agents accessing information cached by KVStoreMesh)
   110  	localUsername := usernameForClusterName("local", ciliumClusterName)
   111  	log.WithField("etcdUsername", localUsername).
   112  		Info("Configuring local user")
   113  	localRolename := rolename("local")
   114  	err = ic.addNoPasswordUser(ctx, localUsername)
   115  	if err != nil {
   116  		return err
   117  	}
   118  	err = ic.addRole(ctx, localRolename)
   119  	if err != nil {
   120  		return err
   121  	}
   122  	err = ic.grantRoleToUser(ctx, localRolename, localUsername)
   123  	if err != nil {
   124  		return err
   125  	}
   126  	for _, keyRange := range rangesForLocalRole() {
   127  		err = ic.grantPermissionToRole(ctx, readOnly, keyRange, localRolename)
   128  		if err != nil {
   129  			return err
   130  		}
   131  	}
   132  
   133  	// Remote user (i.e., remote clusters accessing state information)
   134  	remoteUsername := username("remote")
   135  	log.WithField("etcdUsername", remoteUsername).
   136  		Info("Configuring remote user")
   137  	remoteRolename := rolename("remote")
   138  	err = ic.addNoPasswordUser(ctx, remoteUsername)
   139  	if err != nil {
   140  		return err
   141  	}
   142  	err = ic.addRole(ctx, remoteRolename)
   143  	if err != nil {
   144  		return err
   145  	}
   146  	err = ic.grantRoleToUser(ctx, remoteRolename, remoteUsername)
   147  	if err != nil {
   148  		return err
   149  	}
   150  	for _, keyRange := range rangesForRemoteRole(ciliumClusterName) {
   151  		err = ic.grantPermissionToRole(ctx, readOnly, keyRange, remoteRolename)
   152  		if err != nil {
   153  			return err
   154  		}
   155  	}
   156  
   157  	// Post setup
   158  	log.Info("Performing post-init tasks")
   159  	err = ic.enableAuth(ctx)
   160  	if err != nil {
   161  		return err
   162  	}
   163  
   164  	return nil
   165  }
   166  
   167  // usernameForClusterName generates the account username for a given clusterName. This handles the edge case
   168  // where the clusterName is blank, ensuring we don't have a username with a trailing hyphen.
   169  func usernameForClusterName(base, clusterName string) username {
   170  	if clusterName == "" {
   171  		return username(base)
   172  	}
   173  	return username(fmt.Sprintf("%s-%s", base, clusterName))
   174  }
   175  
   176  // initClient is a thin wrapper around the etcd client library that provides functions with more useful error messages,
   177  // debug logging, and more. It's not intended as an interface for mocking or testing, or to be exposed outside of this
   178  // package. It's entirely an internal implementation detail.
   179  type initClient struct {
   180  	client *clientv3.Client
   181  	log    *logrus.Entry
   182  }
   183  
   184  // The username and rolename types exist to make it harder to mix up usernames and rolenames, which are both strings
   185  // and are often the same, in code. Without this there could be subtle bugs where the code still works so long as
   186  // usernames and role names are the same.
   187  type username string
   188  type rolename string
   189  
   190  // rootRolename refers to a special "root" role that exists by default in etcd.
   191  const rootRolename = rolename("root")
   192  
   193  // put sets a key to a value. It's a wrapper around the etcd client's put method.
   194  func (ic initClient) put(ctx context.Context, key, val string) error {
   195  	ic.log.WithField("etcdKey", key).
   196  		WithField("etcdValue", val).
   197  		Debug("Setting key in etcd")
   198  	_, err := ic.client.Put(ctx, key, val)
   199  	if err != nil {
   200  		return fmt.Errorf("setting path '%s' to '%s': %w", key, val, err)
   201  	}
   202  	return nil
   203  }
   204  
   205  // putHasConfigKey sets the specialised etcd "has config" key to be true.
   206  func (ic initClient) putHasConfigKey(ctx context.Context) error {
   207  	ic.log.Debug("Setting the key to indicate that config has already been set")
   208  	err := ic.put(ctx, kvstore.HasClusterConfigPath, "true")
   209  	if err != nil {
   210  		return fmt.Errorf("setting key to indicate config is already set: %w", err)
   211  	}
   212  	return nil
   213  }
   214  
   215  // addNoPasswordUser adds a new user to etcd with no password. This is expected as later on we'll enable auth which will
   216  // require other forms of authentication. This is a wrapper around the client's UserAddWithOptions method.
   217  func (ic initClient) addNoPasswordUser(ctx context.Context, username username) error {
   218  	ic.log.WithField("etcdUsername", username).
   219  		Debug("Adding etcd user")
   220  	_, err := ic.client.UserAddWithOptions(ctx, string(username), "", &clientv3.UserAddOptions{NoPassword: true})
   221  	if err != nil {
   222  		return fmt.Errorf("adding user '%s': %w", username, err)
   223  	}
   224  	return nil
   225  }
   226  
   227  // addRole adds a new role to etcd. This is a wrapper around the client's RoleAdd method.
   228  func (ic initClient) addRole(ctx context.Context, rolename rolename) error {
   229  	ic.log.WithField("etcdRolename", rolename).
   230  		Debug("Adding etcd role")
   231  	_, err := ic.client.RoleAdd(ctx, string(rolename))
   232  	if err != nil {
   233  		return fmt.Errorf("adding role '%s': %w", rolename, err)
   234  	}
   235  	return nil
   236  }
   237  
   238  // grantRoleToUser grants a role to a user, enabling that user access to the permissions of that role. This is a wrapper
   239  // around the client's UserGrantRole method.
   240  func (ic initClient) grantRoleToUser(ctx context.Context, rolename rolename, username username) error {
   241  	ic.log.WithField("etcdUsername", username).
   242  		WithField("etcdRolename", rolename).
   243  		Debug("Granting role to etcd user")
   244  	_, err := ic.client.UserGrantRole(ctx, string(username), string(rolename))
   245  	if err != nil {
   246  		return fmt.Errorf("granting role '%s' to user '%s': %w", rolename, username, err)
   247  	}
   248  	return nil
   249  }
   250  
   251  // keyRange describes a range of keys
   252  type keyRange struct {
   253  	start string
   254  	end   string
   255  }
   256  
   257  // krOpt represents a keyRange option.
   258  type krOpt int
   259  
   260  const (
   261  	// withoutTrailingSlash disables adding a trailing slash to a prefix.
   262  	withoutTrailingSlash krOpt = iota
   263  )
   264  
   265  // rangeForKey generates a keyRange for a single key.
   266  func rangeForKey(key string) keyRange {
   267  	return keyRange{key, ""}
   268  }
   269  
   270  // rangeForPrefix generates a keyRange for a given prefix. This is a wrapper around the client's GetPrefixRangeEnd
   271  // function.
   272  func rangeForPrefix(prefix string, opts ...krOpt) keyRange {
   273  	// For a **prefix** range, we need a trailing slash. Without it, the behaviour of clientv3.GetPrefixRangeEnd is
   274  	// slightly different. For example on `cilium/.initlock` the given range end is `cilium/.initlocl`, while on
   275  	// `cilium/.initlock/` it's `cilium/.initlock0`.
   276  	if !strings.HasSuffix(prefix, "/") && !slices.Contains(opts, withoutTrailingSlash) {
   277  		prefix += "/"
   278  	}
   279  	return keyRange{prefix, clientv3.GetPrefixRangeEnd(prefix)}
   280  }
   281  
   282  // allKeysRange is the range over all keys in etcd. Granting permissions on this range is the same as granting global
   283  // permissions in etcd.
   284  var allKeysRange = keyRange{"\x00", "\x00"}
   285  
   286  // permission is a thin, internal wrapper around etcd's permission types
   287  type permission clientv3.PermissionType
   288  
   289  var readOnly = permission(clientv3.PermRead)
   290  var readWrite = permission(clientv3.PermReadWrite)
   291  
   292  func (p permission) string() string {
   293  	return authpb.Permission_Type(p).String()
   294  }
   295  
   296  // grantPermissionToRole grants permissions on a range of keys to a role. This is a wrapper around the client's
   297  // RoleGrantPermission method.
   298  func (ic initClient) grantPermissionToRole(ctx context.Context, permission permission, keyRange keyRange, rolename rolename) error {
   299  	ic.log.WithFields(logrus.Fields{
   300  		"etcdRolename":   rolename,
   301  		"etcdPermission": permission.string(),
   302  		"etcdRangeStart": keyRange.start,
   303  		"etcdRangeEnd":   keyRange.end,
   304  	}).
   305  		Debug("Granting permission on a range of keys to an etcd role")
   306  	_, err := ic.client.RoleGrantPermission(ctx, string(rolename), keyRange.start, keyRange.end, clientv3.PermissionType(permission))
   307  	if err != nil {
   308  		return fmt.Errorf("granting role '%s' permission '%s' on range '%s' to '%s': %w", rolename, permission.string(), keyRange.start, keyRange.end, err)
   309  	}
   310  	return nil
   311  }
   312  
   313  // enableAuth enables etcd authentication. This is a wrapper around the client's AuthEnable method.
   314  //
   315  // It should be noted that this command should be run **last**, as we usually don't have authentication, so turning
   316  // this on will instantly lock us out.
   317  func (ic initClient) enableAuth(ctx context.Context) error {
   318  	ic.log.Debug("Enabling authentication on etcd cluster")
   319  	_, err := ic.client.AuthEnable(ctx)
   320  	if err != nil {
   321  		return fmt.Errorf("enabling authentication on etcd: %w", err)
   322  	}
   323  	return nil
   324  }
   325  
   326  // rangesForLocalRole returns the set of etcd key ranges allowed to be accessed by the local user.
   327  func rangesForLocalRole() []keyRange {
   328  	return []keyRange{
   329  		rangeForPrefix(kvstore.HeartbeatPath, withoutTrailingSlash),
   330  		rangeForKey(kvstore.HasClusterConfigPath),
   331  		rangeForPrefix(kvstore.CachePrefix),
   332  		rangeForPrefix(kvstore.ClusterConfigPrefix),
   333  		rangeForPrefix(kvstore.SyncedPrefix),
   334  	}
   335  }
   336  
   337  // rangesForLocalUser returns the set of etcd key ranges allowed to be accessed by the remote user.
   338  func rangesForRemoteRole(clusterName string) []keyRange {
   339  	return []keyRange{
   340  		rangeForPrefix(kvstore.HeartbeatPath, withoutTrailingSlash),
   341  		rangeForKey(kvstore.HasClusterConfigPath),
   342  		rangeForPrefix(kvstore.StatePrefix),
   343  		rangeForKey(path.Join(kvstore.ClusterConfigPrefix, clusterName)),
   344  		rangeForPrefix(path.Join(kvstore.SyncedPrefix, clusterName)),
   345  
   346  		// kvstoremesh-specific prefixes still allowed for backward compatibility
   347  		rangeForPrefix(kvstore.CachePrefix),
   348  		rangeForPrefix(kvstore.ClusterConfigPrefix),
   349  		rangeForPrefix(kvstore.SyncedPrefix),
   350  	}
   351  }