github.com/openshift/installer@v1.4.17/pkg/infrastructure/gcp/clusterapi/iam.go (about)

     1  package clusterapi
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net/http"
     8  	"time"
     9  
    10  	"github.com/sirupsen/logrus"
    11  	resourcemanager "google.golang.org/api/cloudresourcemanager/v1"
    12  	"google.golang.org/api/googleapi"
    13  	iam "google.golang.org/api/iam/v1"
    14  	"google.golang.org/api/option"
    15  	"k8s.io/apimachinery/pkg/util/wait"
    16  
    17  	gcp "github.com/openshift/installer/pkg/asset/installconfig/gcp"
    18  )
    19  
    20  const (
    21  	retryTime  = 10 * time.Second
    22  	retryCount = 6
    23  )
    24  
    25  func defaultServiceAccountID(infraID, projectID, role string) string {
    26  	// The account id is used to generate the service account email address,
    27  	// it should not contain the email suffixi. It is unique within a project,
    28  	// must be 6-30 characters long, and match the regular expression `[a-z]([-a-z0-9]*[a-z0-9])`
    29  	return fmt.Sprintf("%s-%s", infraID, role[0:1])
    30  }
    31  
    32  // GetMasterRoles returns the pre-defined roles for a master node.
    33  // Roles are described here https://cloud.google.com/iam/docs/understanding-roles#predefined_roles.
    34  func GetMasterRoles() []string {
    35  	return []string{
    36  		"roles/compute.instanceAdmin",
    37  		"roles/compute.networkAdmin",
    38  		"roles/compute.securityAdmin",
    39  		"roles/storage.admin",
    40  	}
    41  }
    42  
    43  // GetWorkerRoles returns the pre-defined roles for a worker node.
    44  func GetWorkerRoles() []string {
    45  	return []string{
    46  		"roles/compute.viewer",
    47  		"roles/storage.admin",
    48  	}
    49  }
    50  
    51  // CreateServiceAccount is used to create a service account for a compute instance.
    52  func CreateServiceAccount(ctx context.Context, infraID, projectID, role string) (string, error) {
    53  	ctx, cancel := context.WithTimeout(ctx, time.Minute*1)
    54  	defer cancel()
    55  
    56  	ssn, err := gcp.GetSession(ctx)
    57  	if err != nil {
    58  		return "", fmt.Errorf("failed to get session: %w", err)
    59  	}
    60  	service, err := iam.NewService(ctx, option.WithCredentials(ssn.Credentials))
    61  	if err != nil {
    62  		return "", fmt.Errorf("failed to create IAM service: %w", err)
    63  	}
    64  
    65  	accountID := defaultServiceAccountID(infraID, projectID, role)
    66  	displayName := fmt.Sprintf("%s-%s-node", infraID, role)
    67  
    68  	request := &iam.CreateServiceAccountRequest{
    69  		AccountId: accountID,
    70  		ServiceAccount: &iam.ServiceAccount{
    71  			Description: "The service account used by the instances.",
    72  			DisplayName: displayName,
    73  		},
    74  	}
    75  
    76  	sa, err := service.Projects.ServiceAccounts.Create("projects/"+projectID, request).Do()
    77  	if err != nil {
    78  		return "", fmt.Errorf("Projects.ServiceAccounts.Create: %w", err)
    79  	}
    80  
    81  	// Poll for service account
    82  	for i := 0; i < retryCount; i++ {
    83  		_, err := service.Projects.ServiceAccounts.Get(sa.Name).Do()
    84  		if err == nil {
    85  			logrus.Debugf("Service account created for %s", accountID)
    86  			return sa.Email, nil
    87  		}
    88  		time.Sleep(retryTime)
    89  	}
    90  
    91  	return "", fmt.Errorf("failure creating service account: %w", err)
    92  }
    93  
    94  // AddServiceAccountRoles adds predefined roles for service account.
    95  func AddServiceAccountRoles(ctx context.Context, projectID, serviceAccountID string, roles []string) error {
    96  	// Get cloudresourcemanager service
    97  	ctx, cancel := context.WithTimeout(ctx, time.Minute*1)
    98  	defer cancel()
    99  
   100  	ssn, err := gcp.GetSession(ctx)
   101  	if err != nil {
   102  		return fmt.Errorf("failed to get session: %w", err)
   103  	}
   104  	service, err := resourcemanager.NewService(ctx, option.WithCredentials(ssn.Credentials))
   105  	if err != nil {
   106  		return fmt.Errorf("failed to create resourcemanager service: %w", err)
   107  	}
   108  
   109  	backoff := wait.Backoff{
   110  		Duration: 2 * time.Second,
   111  		Jitter:   1.0,
   112  		Steps:    5,
   113  	}
   114  	// Get and set the policy in a backoff loop.
   115  	// If the policy set fails, the policy must be retrieved again via the get before retrying the set.
   116  	var lastErr error
   117  	if waitErr := wait.ExponentialBackoffWithContext(ctx, backoff, func(ctx context.Context) (bool, error) {
   118  		policy, err := getPolicy(ctx, service, projectID)
   119  		if isQuotaExceededError(err) {
   120  			lastErr = err
   121  			logrus.Debugf("Failed to get IAM policy, retrying after backoff")
   122  			return false, nil
   123  		} else if err != nil {
   124  			return false, fmt.Errorf("failed to get IAM policy, unexpected error: %w", err)
   125  		}
   126  
   127  		member := fmt.Sprintf("serviceAccount:%s", serviceAccountID)
   128  		for _, role := range roles {
   129  			err = addMemberToRole(policy, role, member)
   130  			if err != nil {
   131  				return false, fmt.Errorf("failed to add role %s to %s: %w", role, member, err)
   132  			}
   133  		}
   134  
   135  		err = setPolicy(ctx, service, projectID, policy)
   136  		if err != nil {
   137  			if isConflictError(err) {
   138  				lastErr = err
   139  				logrus.Debugf("Concurrent IAM policy changes, restarting read/modify/write")
   140  				return false, nil
   141  			}
   142  			return false, fmt.Errorf("failed to set IAM policy, unexpected error: %w", err)
   143  		}
   144  		logrus.Debugf("Successfully set IAM policy")
   145  		return true, nil
   146  	}); waitErr != nil {
   147  		if wait.Interrupted(waitErr) {
   148  			return fmt.Errorf("failed to set IAM policy: %w", lastErr)
   149  		}
   150  		return waitErr
   151  	}
   152  	return nil
   153  }
   154  
   155  // getPolicy gets the project's IAM policy.
   156  func getPolicy(ctx context.Context, crmService *resourcemanager.Service, projectID string) (*resourcemanager.Policy, error) {
   157  	logrus.Debugf("Getting policy for %s", projectID)
   158  	request := &resourcemanager.GetIamPolicyRequest{}
   159  	policy, err := crmService.Projects.GetIamPolicy(projectID, request).Context(ctx).Do()
   160  	return policy, err
   161  }
   162  
   163  // setPolicy sets the project's IAM policy.
   164  func setPolicy(ctx context.Context, crmService *resourcemanager.Service, projectID string, policy *resourcemanager.Policy) error {
   165  	request := &resourcemanager.SetIamPolicyRequest{}
   166  	request.Policy = policy
   167  	_, err := crmService.Projects.SetIamPolicy(projectID, request).Context(ctx).Do()
   168  	return err
   169  }
   170  
   171  // addMemberToRole adds a member to a role binding.
   172  func addMemberToRole(policy *resourcemanager.Policy, role, member string) error {
   173  	var policyBinding *resourcemanager.Binding
   174  
   175  	for _, binding := range policy.Bindings {
   176  		if binding.Role == role {
   177  			for _, m := range binding.Members {
   178  				if m == member {
   179  					logrus.Debugf("found %s role, member %s already exists", role, member)
   180  					return nil
   181  				}
   182  			}
   183  			policyBinding = binding
   184  		}
   185  	}
   186  
   187  	if policyBinding == nil {
   188  		policyBinding = &resourcemanager.Binding{
   189  			Role:    role,
   190  			Members: []string{member},
   191  		}
   192  		logrus.Debugf("creating new policy binding for %s role and %s member", role, member)
   193  		policy.Bindings = append(policy.Bindings, policyBinding)
   194  	}
   195  
   196  	policyBinding.Members = append(policyBinding.Members, member)
   197  	logrus.Debugf("adding %s role, added %s member", role, member)
   198  	return nil
   199  }
   200  
   201  // isConflictError returns true if error matches conflict on concurrent policy sets.
   202  func isConflictError(err error) bool {
   203  	var ae *googleapi.Error
   204  	if errors.As(err, &ae) && (ae.Code == http.StatusConflict || ae.Code == http.StatusPreconditionFailed) {
   205  		return true
   206  	}
   207  	return false
   208  }
   209  
   210  // isQuotaExceededError returns true if the error matches quota exceeded.
   211  func isQuotaExceededError(err error) bool {
   212  	var ae *googleapi.Error
   213  	if errors.As(err, &ae) && (ae.Code == http.StatusTooManyRequests) {
   214  		return true
   215  	}
   216  	return false
   217  }