github.com/caos/orbos@v1.5.14-0.20221103111702-e6cd0cea7ad4/internal/operator/orbiter/kinds/clusters/kubernetes/upscale.go (about)

     1  package kubernetes
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  
     7  	"github.com/caos/orbos/internal/helpers"
     8  	"github.com/caos/orbos/internal/operator/orbiter/kinds/clusters/core/infra"
     9  	"github.com/caos/orbos/mntr"
    10  	"github.com/caos/orbos/pkg/kubernetes"
    11  	"github.com/caos/orbos/pkg/secret"
    12  )
    13  
    14  func ensureNodes(
    15  	monitor mntr.Monitor,
    16  	clusterID string,
    17  	desired *DesiredV0,
    18  	psf func(mntr.Monitor) error,
    19  	kubeAPI *infra.Address,
    20  	k8sVersion KubernetesVersion,
    21  	k8sClient *kubernetes.Client,
    22  	oneoff bool,
    23  	providerK8sSpec infra.Kubernetes,
    24  	machines []*initializedMachine,
    25  
    26  ) (done bool, err error) {
    27  
    28  	var joinCP *initializedMachine
    29  	var certsCP infra.Machine
    30  	var joinWorkers []*initializedMachine
    31  
    32  nodes:
    33  	for _, machine := range machines {
    34  
    35  		machineMonitor := monitor.WithFields(map[string]interface{}{
    36  			"machine": machine.infra.ID(),
    37  			"tier":    machine.pool.tier,
    38  		})
    39  
    40  		if machine.currentMachine.Unknown {
    41  			machineMonitor.Info("Waiting for kubernetes node to leave unknown state before proceeding")
    42  			return false, nil
    43  		}
    44  
    45  		isJoinedControlPlane := machine.pool.tier == Controlplane && machine.currentMachine.Joined
    46  
    47  		if isJoinedControlPlane && !machine.currentMachine.Updating && !machine.currentMachine.Rebooting {
    48  			certsCP = machine.infra
    49  			continue nodes
    50  		}
    51  
    52  		if isJoinedControlPlane && machine.node != nil && machine.node.Spec.Unschedulable {
    53  			machineMonitor.Info("Awaiting controlplane to become ready")
    54  			return false, nil
    55  		}
    56  
    57  		if machine.node != nil && !machine.node.Spec.Unschedulable {
    58  			continue nodes
    59  		}
    60  
    61  		if machine.currentMachine.Joined {
    62  			machineMonitor.Info("Node is already joining")
    63  			continue nodes
    64  		}
    65  
    66  		if machine.pool.tier == Controlplane && joinCP == nil {
    67  			joinCP = machine
    68  			continue nodes
    69  		}
    70  
    71  		joinWorkers = append(joinWorkers, machine)
    72  	}
    73  
    74  	if joinCP == nil && len(joinWorkers) == 0 {
    75  		monitor.Debug("Scale is ensured")
    76  		return true, nil
    77  	}
    78  
    79  	var jointoken string
    80  
    81  	if certsCP != nil && (joinCP != nil || len(joinWorkers) > 0) {
    82  		runes := []rune("abcdefghijklmnopqrstuvwxyz0123456789")
    83  		jointoken = fmt.Sprintf("%s.%s", helpers.RandomStringRunes(6, runes), helpers.RandomStringRunes(16, runes))
    84  		if _, err := certsCP.Execute(nil, "sudo kubeadm token create "+jointoken); err != nil {
    85  			return false, fmt.Errorf("creating new join token failed: %w", err)
    86  		}
    87  
    88  		defer certsCP.Execute(nil, "sudo kubeadm token delete "+jointoken)
    89  
    90  		if k8sVersion.equals(V1x18x0) {
    91  			if _, err := certsCP.Execute(nil, "sudo kubeadm init phase bootstrap-token"); err != nil {
    92  				return false, fmt.Errorf("working around kubeadm bug failed, see https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/troubleshooting-kubeadm/#not-possible-to-join-a-v1-18-node-to-a-v1-17-cluster-due-to-missing-rbac: %w", err)
    93  			}
    94  		}
    95  	}
    96  
    97  	var certKey []byte
    98  	doKubeadmInit := certsCP == nil
    99  	imageRepository := desired.Spec.CustomImageRegistry
   100  	if imageRepository == "" {
   101  		imageRepository = "k8s.gcr.io"
   102  	}
   103  
   104  	if joinCP != nil {
   105  
   106  		if doKubeadmInit && (desired.Spec.Kubeconfig != nil && desired.Spec.Kubeconfig.Value != "" || !oneoff) {
   107  			return false, errors.New("initializing a cluster is not supported when kubeconfig exists or the flag --recur is passed")
   108  		}
   109  
   110  		if !doKubeadmInit && certKey == nil {
   111  			var err error
   112  			certKey, err = certsCP.Execute(nil, "sudo kubeadm init phase upload-certs --upload-certs | tail -1")
   113  			if err != nil {
   114  				return false, fmt.Errorf("uploading certs failed: %w", err)
   115  			}
   116  			monitor.Info("Refreshed certs")
   117  		}
   118  
   119  		var joinKubeconfig *string
   120  		joinKubeconfig, err = join(
   121  			monitor,
   122  			clusterID,
   123  			joinCP,
   124  			certsCP,
   125  			*desired,
   126  			kubeAPI,
   127  			jointoken,
   128  			k8sVersion,
   129  			string(certKey),
   130  			k8sClient,
   131  			imageRepository,
   132  			providerK8sSpec,
   133  		)
   134  
   135  		if err != nil {
   136  			return false, err
   137  		}
   138  
   139  		if joinKubeconfig == nil || err != nil {
   140  			return false, err
   141  		}
   142  		desired.Spec.Kubeconfig = &secret.Secret{Value: *joinKubeconfig}
   143  		return false, psf(monitor.WithFields(map[string]interface{}{
   144  			"type": "kubeconfig",
   145  		}))
   146  	}
   147  
   148  	if certsCP == nil {
   149  		monitor.Info("Awaiting controlplane initialization")
   150  		return false, nil
   151  	}
   152  
   153  	for _, worker := range joinWorkers {
   154  		if _, err := join(
   155  			monitor,
   156  			clusterID,
   157  			worker,
   158  			certsCP,
   159  			*desired,
   160  			kubeAPI,
   161  			jointoken,
   162  			k8sVersion,
   163  			"",
   164  			k8sClient,
   165  			imageRepository,
   166  			providerK8sSpec,
   167  		); err != nil {
   168  			return false, fmt.Errorf("joining worker %s failed: %w", worker.infra.ID(), err)
   169  		}
   170  	}
   171  
   172  	return false, nil
   173  }