github.com/openshift/installer@v1.4.17/pkg/infrastructure/powervs/clusterapi/powervs.go (about)

     1  package clusterapi
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"math"
     7  	"reflect"
     8  	"regexp"
     9  	"time"
    10  
    11  	"github.com/IBM/vpc-go-sdk/vpcv1"
    12  	"github.com/sirupsen/logrus"
    13  	corev1 "k8s.io/api/core/v1"
    14  	"k8s.io/apimachinery/pkg/util/sets"
    15  	"k8s.io/apimachinery/pkg/util/wait"
    16  	"k8s.io/utils/ptr"
    17  	capibm "sigs.k8s.io/cluster-api-provider-ibmcloud/api/v1beta2"
    18  	crclient "sigs.k8s.io/controller-runtime/pkg/client"
    19  
    20  	powervsconfig "github.com/openshift/installer/pkg/asset/installconfig/powervs"
    21  	"github.com/openshift/installer/pkg/asset/manifests/capiutils"
    22  	"github.com/openshift/installer/pkg/infrastructure/clusterapi"
    23  	"github.com/openshift/installer/pkg/types"
    24  	powervstypes "github.com/openshift/installer/pkg/types/powervs"
    25  )
    26  
    27  // Provider is the vSphere implementation of the clusterapi InfraProvider.
    28  type Provider struct {
    29  	clusterapi.InfraProvider
    30  }
    31  
    32  var _ clusterapi.Timeouts = (*Provider)(nil)
    33  var _ clusterapi.InfraReadyProvider = (*Provider)(nil)
    34  var _ clusterapi.Provider = (*Provider)(nil)
    35  var _ clusterapi.PostProvider = (*Provider)(nil)
    36  
    37  // Name returns the PowerVS provider name.
    38  func (p Provider) Name() string {
    39  	return powervstypes.Name
    40  }
    41  
    42  // PublicGatherEndpoint indicates that machine ready checks should NOT wait for an ExternalIP
    43  // in the status when declaring machines ready.
    44  func (Provider) PublicGatherEndpoint() clusterapi.GatherEndpoint { return clusterapi.InternalIP }
    45  
    46  func leftInContext(ctx context.Context) time.Duration {
    47  	deadline, ok := ctx.Deadline()
    48  	if !ok {
    49  		return math.MaxInt64
    50  	}
    51  
    52  	duration := time.Until(deadline)
    53  
    54  	return duration
    55  }
    56  
    57  const privatePrefix = "api-int."
    58  const publicPrefix = "api."
    59  
    60  // NetworkTimeout allows platform provider to override the timeout
    61  // when waiting for the network infrastructure to become ready.
    62  func (p Provider) NetworkTimeout() time.Duration {
    63  	return 30 * time.Minute
    64  }
    65  
    66  // ProvisionTimeout allows platform provider to override the timeout
    67  // when waiting for the machines to provision.
    68  func (p Provider) ProvisionTimeout() time.Duration {
    69  	return 15 * time.Minute
    70  }
    71  
    72  // InfraReady is called once cluster.Status.InfrastructureReady
    73  // is true, typically after load balancers have been provisioned. It can be used
    74  // to create DNS records.
    75  func (p Provider) InfraReady(ctx context.Context, in clusterapi.InfraReadyInput) error {
    76  	var (
    77  		err  error
    78  		rule *vpcv1.SecurityGroupRulePrototype
    79  	)
    80  
    81  	logrus.Debugf("InfraReady: in = %+v", in)
    82  	logrus.Debugf("InfraReady: in.InstallConfig.Config = %+v", in.InstallConfig.Config)
    83  	logrus.Debugf("InfraReady: in.InstallConfig.PowerVS = %+v", in.InstallConfig.PowerVS)
    84  
    85  	powerVSCluster := &capibm.IBMPowerVSCluster{}
    86  
    87  	// Get the cluster from the provider
    88  	key := crclient.ObjectKey{
    89  		Name:      in.InfraID,
    90  		Namespace: capiutils.Namespace,
    91  	}
    92  	logrus.Debugf("InfraReady: cluster key = %+v", key)
    93  	if err = in.Client.Get(ctx, key, powerVSCluster); err != nil {
    94  		return fmt.Errorf("failed to get PowerVS cluster in InfraReady: %w", err)
    95  	}
    96  	logrus.Debugf("InfraReady: powerVSCluster = %+v", powerVSCluster)
    97  	logrus.Debugf("InfraReady: powerVSCluster.Status = %+v", powerVSCluster.Status)
    98  	if powerVSCluster.Status.VPC == nil || powerVSCluster.Status.VPC.ID == nil {
    99  		return fmt.Errorf("vpc is empty in InfraReady?")
   100  	}
   101  	logrus.Debugf("InfraReady: powerVSCluster.Status.VPC.ID = %s", *powerVSCluster.Status.VPC.ID)
   102  
   103  	// Get the image from the provider
   104  	key = crclient.ObjectKey{
   105  		Name:      fmt.Sprintf("rhcos-%s", in.InfraID),
   106  		Namespace: capiutils.Namespace,
   107  	}
   108  	logrus.Debugf("InfraReady: image key = %+v", key)
   109  	powerVSImage := &capibm.IBMPowerVSImage{}
   110  	if err = in.Client.Get(ctx, key, powerVSImage); err != nil {
   111  		return fmt.Errorf("failed to get PowerVS image in InfraReady: %w", err)
   112  	}
   113  	logrus.Debugf("InfraReady: image = %+v", powerVSImage)
   114  
   115  	// We need to set the region we will eventually query inside
   116  	vpcRegion := in.InstallConfig.Config.Platform.PowerVS.VPCRegion
   117  	if vpcRegion == "" {
   118  		vpcRegion, err = powervstypes.VPCRegionForPowerVSRegion(in.InstallConfig.Config.Platform.PowerVS.Region)
   119  		if err != nil {
   120  			return fmt.Errorf("failed to get VPC region (%s) in InfraReady: %w", vpcRegion, err)
   121  		}
   122  	}
   123  	logrus.Debugf("InfraReady: vpcRegion = %s", vpcRegion)
   124  	if err = in.InstallConfig.PowerVS.SetVPCServiceURLForRegion(ctx, vpcRegion); err != nil {
   125  		return fmt.Errorf("failed to set the VPC service region (%s) in InfraReady: %w", vpcRegion, err)
   126  	}
   127  
   128  	// Step 1: Create DNS records for the two load balancers
   129  	if err = createLoadBalancerDNSRecords(ctx, in, powerVSCluster.Status.LoadBalancers); err != nil {
   130  		return fmt.Errorf("failed to create DNS records for loadbalancers: %w", err)
   131  	}
   132  
   133  	// Step 2: See which ports are already allowed.
   134  	missingPorts, err := findMissingSecurityGroupRules(ctx, in, *powerVSCluster.Status.VPC.ID)
   135  	if err != nil {
   136  		return fmt.Errorf("failed to find missing security group rules: %w", err)
   137  	}
   138  
   139  	// Step 3: Add to security group rules
   140  	for port := range missingPorts {
   141  		port := port // TODO: remove when using golang 1.22+
   142  		rule = &vpcv1.SecurityGroupRulePrototype{
   143  			Direction: ptr.To("inbound"),
   144  			Protocol:  ptr.To("tcp"),
   145  			PortMin:   ptr.To(port),
   146  			PortMax:   ptr.To(port),
   147  		}
   148  
   149  		logrus.Debugf("InfraReady: Adding port %d to security group rule to %v", port, *powerVSCluster.Status.VPC.ID)
   150  		err := in.InstallConfig.PowerVS.AddSecurityGroupRule(ctx, rule, *powerVSCluster.Status.VPC.ID)
   151  		if err != nil {
   152  			return fmt.Errorf("failed to add security group rule for port %d: %w", port, err)
   153  		}
   154  	}
   155  
   156  	// Also allow ping so we can debug
   157  	rule = &vpcv1.SecurityGroupRulePrototype{
   158  		Direction: ptr.To("inbound"),
   159  		Protocol:  ptr.To("icmp"),
   160  	}
   161  
   162  	err = in.InstallConfig.PowerVS.AddSecurityGroupRule(ctx, rule, *powerVSCluster.Status.VPC.ID)
   163  	if err != nil {
   164  		return fmt.Errorf("failed to add ping security group rule: %w", err)
   165  	}
   166  	return nil
   167  }
   168  
   169  func createLoadBalancerDNSRecords(ctx context.Context, in clusterapi.InfraReadyInput, loadBalancers map[string]capibm.VPCLoadBalancerStatus) error {
   170  	lbExtExp := regexp.MustCompile(`\b-loadbalancer\b$`)
   171  	lbIntExp := regexp.MustCompile(`\b-loadbalancer-int\b$`)
   172  	for lbKey, loadBalancerStatus := range loadBalancers {
   173  		var hostnames []string
   174  
   175  		clusterName := in.InstallConfig.Config.ObjectMeta.Name
   176  
   177  		// Is it external (public) or internal (private)?
   178  		logrus.Debugf("lbKey = %s", lbKey)
   179  		switch {
   180  		case lbExtExp.MatchString(lbKey):
   181  			if in.InstallConfig.Config.Publish == types.ExternalPublishingStrategy {
   182  				hostnames = append(hostnames, fmt.Sprintf("%s%s", publicPrefix, clusterName))
   183  			}
   184  		case lbIntExp.MatchString(lbKey):
   185  			hostnames = append(hostnames, fmt.Sprintf("%s%s", privatePrefix, clusterName))
   186  			// In the private cluster scenario, also point api.* to internal LB
   187  			if in.InstallConfig.Config.Publish == types.InternalPublishingStrategy {
   188  				hostnames = append(hostnames, fmt.Sprintf("%s%s", publicPrefix, clusterName))
   189  			}
   190  		}
   191  
   192  		for _, hostname := range hostnames {
   193  			logrus.Debugf("InfraReady: hostname = %s, cname = %s",
   194  				hostname,
   195  				*loadBalancerStatus.Hostname)
   196  
   197  			err := in.InstallConfig.PowerVS.CreateDNSRecord(ctx,
   198  				hostname,
   199  				*loadBalancerStatus.Hostname)
   200  			if err != nil {
   201  				return fmt.Errorf("InfraReady: Failed to create DNS record: %w", err)
   202  			}
   203  		}
   204  	}
   205  	return nil
   206  }
   207  
   208  func findMissingSecurityGroupRules(ctx context.Context, in clusterapi.InfraReadyInput, vpcID string) (sets.Set[int64], error) {
   209  	foundPorts := sets.Set[int64]{}
   210  	wantedPorts := sets.New[int64](22, 10258, 22623)
   211  
   212  	if in.InstallConfig.Config.Publish == types.InternalPublishingStrategy {
   213  		wantedPorts = wantedPorts.Insert(6443, 443, 5000)
   214  	}
   215  
   216  	existingRules, err := in.InstallConfig.PowerVS.ListSecurityGroupRules(ctx, vpcID)
   217  	if err != nil {
   218  		return nil, fmt.Errorf("failed to list security group rules: %w", err)
   219  	}
   220  
   221  	for _, existingRule := range existingRules.Rules {
   222  		switch reflect.TypeOf(existingRule).String() {
   223  		case "*vpcv1.SecurityGroupRuleSecurityGroupRuleProtocolAll":
   224  		case "*vpcv1.SecurityGroupRuleSecurityGroupRuleProtocolTcpudp":
   225  			securityGroupRule, ok := existingRule.(*vpcv1.SecurityGroupRuleSecurityGroupRuleProtocolTcpudp)
   226  			if !ok {
   227  				return nil, fmt.Errorf("could not convert to ProtocolTcpudp")
   228  			}
   229  			logrus.Debugf("InfraReady: VPC has rule: direction = %s, proto = %s, min = %d, max = %d",
   230  				*securityGroupRule.Direction,
   231  				*securityGroupRule.Protocol,
   232  				*securityGroupRule.PortMin,
   233  				*securityGroupRule.PortMax)
   234  			if *securityGroupRule.Direction == "inbound" &&
   235  				*securityGroupRule.Protocol == "tcp" {
   236  				foundPorts.Insert(*securityGroupRule.PortMin)
   237  			}
   238  		case "*vpcv1.SecurityGroupRuleSecurityGroupRuleProtocolIcmp":
   239  		}
   240  	}
   241  
   242  	missingPorts := wantedPorts.Difference(foundPorts)
   243  
   244  	logrus.Debugf("InfraReady: foundPorts = %+v", foundPorts)
   245  	logrus.Debugf("InfraReady: wantedPorts = %+v", wantedPorts)
   246  	logrus.Debugf("InfraReady: wantedPorts.Difference(foundPorts) = %+v", missingPorts)
   247  
   248  	return missingPorts, nil
   249  }
   250  
   251  func findMachineAddress(ctx context.Context, in clusterapi.PostProvisionInput, key crclient.ObjectKey) (string, error) {
   252  	powerVSMachine := &capibm.IBMPowerVSMachine{}
   253  
   254  	// Get the machine address
   255  	// Unfortunately https://pkg.go.dev/k8s.io/apimachinery/pkg/util/wait#PollUntilContextCancel
   256  	// can only return a bool.  It would be nice if it could return a pointer.
   257  	if err := wait.PollUntilContextCancel(ctx, time.Second*10,
   258  		false,
   259  		func(ctx context.Context) (bool, error) {
   260  			if err := in.Client.Get(ctx, key, powerVSMachine); err != nil {
   261  				return false, fmt.Errorf("failed to get PowerVS machine in PostProvision: %w", err)
   262  			}
   263  
   264  			for _, address := range powerVSMachine.Status.Addresses {
   265  				if address.Type == corev1.NodeInternalIP {
   266  					logrus.Debugf("PostProvision: found %s address %s", key.Name, address.Address)
   267  					return true, nil
   268  				}
   269  			}
   270  
   271  			logrus.Debugf("PostProvision: waiting for %s machine", key.Name)
   272  			return false, nil
   273  		}); err != nil {
   274  		return "", err
   275  	}
   276  
   277  	if err := in.Client.Get(ctx, key, powerVSMachine); err != nil {
   278  		return "", fmt.Errorf("failed to get PowerVS machine in PostProvision: %w", err)
   279  	}
   280  
   281  	for _, address := range powerVSMachine.Status.Addresses {
   282  		if address.Type == corev1.NodeInternalIP {
   283  			return address.Address, nil
   284  		}
   285  	}
   286  
   287  	return "", fmt.Errorf("failed to get machine %s IP address", key.Name)
   288  }
   289  
   290  // PostProvision should be called to add or update PowerVS resources after provisioning has completed.
   291  func (p Provider) PostProvision(ctx context.Context, in clusterapi.PostProvisionInput) error {
   292  	var (
   293  		client             *powervsconfig.Client
   294  		vpcRegion          string
   295  		ipAddr             string
   296  		refServiceInstance *capibm.IBMPowerVSResourceReference
   297  		sshKeyName         string
   298  		err                error
   299  		instanceID         *string
   300  		fieldType          string
   301  	)
   302  
   303  	// SAD: client in the Metadata struct is lowercase and therefore private
   304  	// client = in.InstallConfig.PowerVS.client
   305  	client, err = powervsconfig.NewClient()
   306  	if err != nil {
   307  		return fmt.Errorf("failed to get NewClient in PostProvision: %w", err)
   308  	}
   309  	logrus.Debugf("PostProvision: NewClient returns %+v", client)
   310  
   311  	// We need to set the region we will eventually query inside
   312  	vpcRegion = in.InstallConfig.Config.Platform.PowerVS.VPCRegion
   313  	if vpcRegion == "" {
   314  		vpcRegion, err = powervstypes.VPCRegionForPowerVSRegion(in.InstallConfig.Config.Platform.PowerVS.Region)
   315  		if err != nil {
   316  			return fmt.Errorf("failed to get VPC region (%s) in PostProvision: %w", vpcRegion, err)
   317  		}
   318  	}
   319  	logrus.Debugf("InfraReady: vpcRegion = %s", vpcRegion)
   320  	if err = client.SetVPCServiceURLForRegion(ctx, vpcRegion); err != nil {
   321  		return fmt.Errorf("failed to set the VPC service region (%s) in PostProvision: %w", vpcRegion, err)
   322  	}
   323  
   324  	// Step 1.
   325  	// Wait until bootstrap and master nodes have IP addresses.  This will verify
   326  	// that the Transit Gateway and DHCP server work correctly before continuing on.
   327  
   328  	// Get master IP addresses
   329  	masterCount := int64(1)
   330  	if reps := in.InstallConfig.Config.ControlPlane.Replicas; reps != nil {
   331  		masterCount = *reps
   332  	}
   333  	logrus.Debugf("PostProvision: masterCount = %d", masterCount)
   334  	for i := int64(0); i < masterCount; i++ {
   335  		key := crclient.ObjectKey{
   336  			Name:      fmt.Sprintf("%s-master-%d", in.InfraID, i),
   337  			Namespace: capiutils.Namespace,
   338  		}
   339  		if ipAddr, err = findMachineAddress(ctx, in, key); err != nil {
   340  			return err
   341  		}
   342  		logrus.Debugf("PostProvision: %s ipAddr = %v", key.Name, ipAddr)
   343  	}
   344  
   345  	// Get the bootstrap machine from the provider
   346  	key := crclient.ObjectKey{
   347  		Name:      fmt.Sprintf("%s-bootstrap", in.InfraID),
   348  		Namespace: capiutils.Namespace,
   349  	}
   350  	logrus.Debugf("PostProvision: machine key = %+v", key)
   351  
   352  	// Find its address
   353  	if ipAddr, err = findMachineAddress(ctx, in, key); err != nil {
   354  		return err
   355  	}
   356  	logrus.Debugf("PostProvision: ipAddr = %v", ipAddr)
   357  
   358  	// Get information about it
   359  	powerVSMachine := &capibm.IBMPowerVSMachine{}
   360  	if err := in.Client.Get(ctx, key, powerVSMachine); err != nil {
   361  		return fmt.Errorf("failed to get PowerVS bootstrap machine in PostProvision: %w", err)
   362  	}
   363  	logrus.Debugf("PostProvision: machine = %+v", powerVSMachine)
   364  
   365  	// Specifically the Power Virtual Server (PVS)
   366  	logrus.Debugf("PostProvision: machine.Spec.ServiceInstance = %+v", powerVSMachine.Spec.ServiceInstance)
   367  	refServiceInstance = powerVSMachine.Spec.ServiceInstance
   368  
   369  	// Step 2.
   370  	// Create worker ssh key in the PVS
   371  	if in.InstallConfig.Config.SSHKey == "" {
   372  		return fmt.Errorf("install config's ssh key is empty?")
   373  	}
   374  
   375  	sshKeyName = fmt.Sprintf("%s-key", in.InfraID)
   376  
   377  	switch {
   378  	case refServiceInstance.ID != nil:
   379  		logrus.Debugf("PostProvision: CreateSSHKey: si id = %s, key = %s",
   380  			*refServiceInstance.ID,
   381  			in.InstallConfig.Config.SSHKey)
   382  		instanceID = refServiceInstance.ID
   383  		fieldType = "ID"
   384  	case refServiceInstance.Name != nil:
   385  		logrus.Debugf("PostProvision: CreateSSHKey: si name = %s, key = %s",
   386  			*refServiceInstance.Name,
   387  			in.InstallConfig.Config.SSHKey)
   388  		guid, err := client.ServiceInstanceNameToGUID(ctx, *refServiceInstance.Name)
   389  		if err != nil {
   390  			return fmt.Errorf("failed to find id for ServiceInstance name %s: %w",
   391  				*refServiceInstance.Name,
   392  				err)
   393  		}
   394  		logrus.Debugf("PostProvision: CreateSSHKey: guid = %s", guid)
   395  		instanceID = ptr.To(guid)
   396  		fieldType = "Name"
   397  	default:
   398  		return fmt.Errorf("could not handle powerVSMachine.Spec.ServiceInstance")
   399  	}
   400  
   401  	backoff := wait.Backoff{
   402  		Duration: 15 * time.Second,
   403  		Factor:   1.1,
   404  		Cap:      leftInContext(ctx),
   405  		Steps:    math.MaxInt32,
   406  	}
   407  	err = wait.ExponentialBackoffWithContext(ctx, backoff, func(context.Context) (bool, error) {
   408  		err2 := client.CreateSSHKey(ctx,
   409  			*instanceID,
   410  			*powerVSMachine.Status.Zone,
   411  			sshKeyName,
   412  			in.InstallConfig.Config.SSHKey)
   413  		if err2 == nil {
   414  			return true, nil
   415  		}
   416  		return false, err2
   417  	})
   418  	if err != nil {
   419  		return fmt.Errorf("failed to add SSH key for the workers(%s): %w", fieldType, err)
   420  	}
   421  
   422  	// Step 3.
   423  	// @TODO Remove once https://github.com/kubernetes-sigs/cluster-api-provider-ibmcloud/issues/1679 is fixed
   424  	// Add the bootstrap's IP address to the load balancer pool
   425  	// Get the cluster from the provider so we can have what load balancers are attached
   426  	powerVSCluster := &capibm.IBMPowerVSCluster{}
   427  	key = crclient.ObjectKey{
   428  		Name:      in.InfraID,
   429  		Namespace: capiutils.Namespace,
   430  	}
   431  	logrus.Debugf("PostProvision: cluster key = %+v", key)
   432  	if err = in.Client.Get(ctx, key, powerVSCluster); err != nil {
   433  		return fmt.Errorf("failed to get PowerVS cluster in PostProvision: %w", err)
   434  	}
   435  
   436  	lbIntExp := regexp.MustCompile(`\b-loadbalancer-int\b$`)
   437  
   438  	// Find the internal load balancer
   439  	for lbKey, loadBalancerStatus := range powerVSCluster.Status.LoadBalancers {
   440  		if !lbIntExp.MatchString(lbKey) {
   441  			continue
   442  		}
   443  		logrus.Debugf("PostProvision: Found internal load balancer ID = %s, State = %s, Hostname = %s",
   444  			*loadBalancerStatus.ID,
   445  			loadBalancerStatus.State,
   446  			*loadBalancerStatus.Hostname)
   447  
   448  		if err = client.AddIPToLoadBalancerPool(ctx,
   449  			*loadBalancerStatus.ID,
   450  			"additional-pool-22623",
   451  			22623,
   452  			ipAddr); err != nil {
   453  			return fmt.Errorf("failed to add the bootstrap IP to the load balancer pool: %w", err)
   454  		}
   455  	}
   456  
   457  	return nil
   458  }