github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/caasoperator/initializer.go (about)

     1  // Copyright 2019 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package caasoperator
     5  
     6  import (
     7  	"bytes"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/juju/clock"
    14  	"github.com/juju/errors"
    15  	"github.com/juju/names/v5"
    16  	"github.com/juju/retry"
    17  	"github.com/juju/utils/v3"
    18  
    19  	"github.com/juju/juju/agent/tools"
    20  	"github.com/juju/juju/caas"
    21  	caasconstants "github.com/juju/juju/caas/kubernetes/provider/constants"
    22  	"github.com/juju/juju/caas/kubernetes/provider/exec"
    23  	"github.com/juju/juju/cmd/jujud/agent/config"
    24  	"github.com/juju/juju/worker/uniter"
    25  	"github.com/juju/juju/wrench"
    26  )
    27  
    28  // initializeUnitParams contains parameters and dependencies for initializing
    29  // a unit.
    30  type initializeUnitParams struct {
    31  	// UnitTag of the unit being initialized.
    32  	UnitTag names.UnitTag
    33  
    34  	// ProviderID is the pod-name or pod-uid
    35  	ProviderID string
    36  
    37  	// Logger for the worker.
    38  	Logger Logger
    39  
    40  	// Paths provides CAAS operator paths.
    41  	Paths Paths
    42  
    43  	// OperatorInfo contains serving information such as Certs and PrivateKeys.
    44  	OperatorInfo caas.OperatorInfo
    45  
    46  	// ExecClient is used for initializing units.
    47  	ExecClient exec.Executor
    48  
    49  	// WriteFile is used to write files to the local state.
    50  	WriteFile func(string, []byte, os.FileMode) error
    51  
    52  	// TempDir is used for creating a temporary directory.
    53  	TempDir func(string, string) (string, error)
    54  
    55  	// Clock holds the clock to be used by the runner.
    56  	Clock clock.Clock
    57  
    58  	// reTrier is used for re-running some certain retryable exec request.
    59  	ReTrier reTrier
    60  }
    61  
    62  // Validate initializeUnitParams
    63  func (p initializeUnitParams) Validate() error {
    64  	if p.Logger == nil {
    65  		return errors.NotValidf("missing Logger")
    66  	}
    67  	if p.ProviderID == "" {
    68  		return errors.NotValidf("missing ProviderID")
    69  	}
    70  	if p.ExecClient == nil {
    71  		return errors.NotValidf("missing ExecClient")
    72  	}
    73  	if p.WriteFile == nil {
    74  		return errors.NotValidf("missing WriteFile")
    75  	}
    76  	if p.TempDir == nil {
    77  		return errors.NotValidf("missing TempDir")
    78  	}
    79  	return nil
    80  }
    81  
    82  // reTrier is used for re-running some certain retryable exec request.
    83  type reTrier func(func() error, func(error) bool, Logger, clock.Clock, <-chan struct{}) error
    84  
    85  // runnerWithRetry retries the exec request for init unit process if it got a retryable error.
    86  func runnerWithRetry(f func() error, fatalChecker func(error) bool, logger Logger, clk clock.Clock, cancel <-chan struct{}) error {
    87  	do := func() error {
    88  		if wrench.IsActive("exec", "retryable-error") {
    89  			fakeErr := errors.New("fake retryable-error")
    90  			logger.Warningf("wrench exec retryable-error enabled, returns %v", fakeErr)
    91  			return exec.NewExecRetryableError(fakeErr)
    92  		}
    93  		return f()
    94  	}
    95  	args := retry.CallArgs{
    96  		Attempts:     5,
    97  		Delay:        2 * time.Second,
    98  		MaxDuration:  30 * time.Second,
    99  		Clock:        clk,
   100  		Stop:         cancel,
   101  		Func:         do,
   102  		IsFatalError: fatalChecker,
   103  		NotifyFunc: func(err error, attempt int) {
   104  			logger.Debugf("retrying exec request, in %d attempt, %v", attempt, err)
   105  		},
   106  	}
   107  	return errors.Trace(retry.Call(args))
   108  }
   109  
   110  // initializeUnit with the charm and configuration.
   111  func initializeUnit(params initializeUnitParams, cancel <-chan struct{}) error {
   112  	if err := params.Validate(); err != nil {
   113  		return errors.Trace(err)
   114  	}
   115  
   116  	params.Logger.Infof("started pod init on %q", params.UnitTag.Id())
   117  	container := caas.InitContainerName
   118  	initArgs := []string{"--unit", params.UnitTag.String()}
   119  
   120  	rootToolsDir := tools.ToolsDir(config.DataDir, "")
   121  	jujudPath := filepath.Join(rootToolsDir, "jujud")
   122  	unitPaths := uniter.NewPaths(config.DataDir, params.UnitTag, nil)
   123  	operatorPaths := params.Paths
   124  	tempDir, err := params.TempDir(os.TempDir(), params.UnitTag.String())
   125  	if err != nil {
   126  		return errors.Annotatef(err, "creating temp directory")
   127  	}
   128  
   129  	stdout := &bytes.Buffer{}
   130  	command := []string{"mkdir", "-p", tempDir}
   131  	err = params.ExecClient.Exec(exec.ExecParams{
   132  		Commands:      command,
   133  		PodName:       params.ProviderID,
   134  		ContainerName: container,
   135  		Stdout:        stdout,
   136  		Stderr:        stdout,
   137  	}, cancel)
   138  	if err != nil {
   139  		return errors.Annotatef(err, "running command: %q failed: %q", strings.Join(command, " "), string(stdout.Bytes()))
   140  	}
   141  
   142  	tempCharmDir := filepath.Join(tempDir, "charm")
   143  	// This heavy exec task might get 137 error, we will retry if it does happen.
   144  	err = params.ReTrier(
   145  		func() error {
   146  			return params.ExecClient.Copy(exec.CopyParams{
   147  				Src: exec.FileResource{
   148  					Path: operatorPaths.State.CharmDir,
   149  				},
   150  				Dest: exec.FileResource{
   151  					Path:          tempDir,
   152  					PodName:       params.ProviderID,
   153  					ContainerName: container,
   154  				},
   155  			}, cancel)
   156  		},
   157  		func(err error) bool {
   158  			return err != nil && !exec.IsExecRetryableError(err)
   159  		}, params.Logger, params.Clock, cancel,
   160  	)
   161  	if err != nil {
   162  		return errors.Trace(err)
   163  	}
   164  	tempOperatorCacheFile, tempCACertFile, err := setupRemoteConfiguration(params, cancel, unitPaths, tempDir, container)
   165  	if err != nil {
   166  		return errors.Trace(err)
   167  	}
   168  	initArgs = append(initArgs,
   169  		"--charm-dir", tempCharmDir,
   170  		"--send", // Init container will wait for us to send the data.
   171  		"--operator-file", tempOperatorCacheFile,
   172  		"--operator-ca-cert-file", tempCACertFile,
   173  	)
   174  
   175  	stdout = &bytes.Buffer{}
   176  	command = append([]string{jujudPath, "caas-unit-init"}, initArgs...)
   177  	err = params.ExecClient.Exec(exec.ExecParams{
   178  		Commands:      command,
   179  		PodName:       params.ProviderID,
   180  		ContainerName: container,
   181  		WorkingDir:    config.DataDir,
   182  		Stdout:        stdout,
   183  		Stderr:        stdout,
   184  	}, cancel)
   185  	if err != nil {
   186  		return errors.Annotatef(err, "caas-unit-init for unit %q with command: %q failed: %s", params.UnitTag.Id(), strings.Join(command, " "), string(stdout.Bytes()))
   187  	}
   188  	return nil
   189  }
   190  
   191  func setupRemoteConfiguration(params initializeUnitParams, cancel <-chan struct{},
   192  	unitPaths uniter.Paths, tempDir string, container string) (string, string, error) {
   193  	tempCACertFile := filepath.Join(tempDir, caas.CACertFile)
   194  	if err := params.WriteFile(tempCACertFile, []byte(params.OperatorInfo.CACert), 0644); err != nil {
   195  		return "", "", errors.Trace(err)
   196  	}
   197  	err := params.ExecClient.Copy(exec.CopyParams{
   198  		Src: exec.FileResource{
   199  			Path: tempCACertFile,
   200  		},
   201  		Dest: exec.FileResource{
   202  			Path:          tempCACertFile,
   203  			PodName:       params.ProviderID,
   204  			ContainerName: container,
   205  		},
   206  	}, cancel)
   207  	if err != nil {
   208  		return "", "", errors.Trace(err)
   209  	}
   210  
   211  	serviceAddress := os.Getenv(caasconstants.OperatorServiceIPEnvName)
   212  	params.Logger.Debugf("operator service address: %v", serviceAddress)
   213  	token, err := utils.RandomPassword()
   214  	if err != nil {
   215  		return "", "", errors.Trace(err)
   216  	}
   217  	clientInfo := caas.OperatorClientInfo{
   218  		ServiceAddress: serviceAddress,
   219  		Token:          token,
   220  	}
   221  	data, err := clientInfo.Marshal()
   222  	if err != nil {
   223  		return "", "", errors.Trace(err)
   224  	}
   225  	operatorCacheFile := filepath.Join(unitPaths.State.BaseDir, caas.OperatorClientInfoCacheFile)
   226  	if err := params.WriteFile(operatorCacheFile, data, 0644); err != nil {
   227  		return "", "", errors.Trace(err)
   228  	}
   229  	tempOperatorCacheFile := filepath.Join(tempDir, caas.OperatorClientInfoCacheFile)
   230  	err = params.ExecClient.Copy(exec.CopyParams{
   231  		Src: exec.FileResource{
   232  			Path: operatorCacheFile,
   233  		},
   234  		Dest: exec.FileResource{
   235  			Path:          tempOperatorCacheFile,
   236  			PodName:       params.ProviderID,
   237  			ContainerName: container,
   238  		},
   239  	}, cancel)
   240  	if err != nil {
   241  		return "", "", errors.Trace(err)
   242  	}
   243  
   244  	return tempOperatorCacheFile, tempCACertFile, nil
   245  }