github.com/juju/juju@v0.0.0-20240327075706-a90865de2538/worker/caasoperator/initializer.go (about) 1 // Copyright 2019 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package caasoperator 5 6 import ( 7 "bytes" 8 "os" 9 "path/filepath" 10 "strings" 11 "time" 12 13 "github.com/juju/clock" 14 "github.com/juju/errors" 15 "github.com/juju/names/v5" 16 "github.com/juju/retry" 17 "github.com/juju/utils/v3" 18 19 "github.com/juju/juju/agent/tools" 20 "github.com/juju/juju/caas" 21 caasconstants "github.com/juju/juju/caas/kubernetes/provider/constants" 22 "github.com/juju/juju/caas/kubernetes/provider/exec" 23 "github.com/juju/juju/cmd/jujud/agent/config" 24 "github.com/juju/juju/worker/uniter" 25 "github.com/juju/juju/wrench" 26 ) 27 28 // initializeUnitParams contains parameters and dependencies for initializing 29 // a unit. 30 type initializeUnitParams struct { 31 // UnitTag of the unit being initialized. 32 UnitTag names.UnitTag 33 34 // ProviderID is the pod-name or pod-uid 35 ProviderID string 36 37 // Logger for the worker. 38 Logger Logger 39 40 // Paths provides CAAS operator paths. 41 Paths Paths 42 43 // OperatorInfo contains serving information such as Certs and PrivateKeys. 44 OperatorInfo caas.OperatorInfo 45 46 // ExecClient is used for initializing units. 47 ExecClient exec.Executor 48 49 // WriteFile is used to write files to the local state. 50 WriteFile func(string, []byte, os.FileMode) error 51 52 // TempDir is used for creating a temporary directory. 53 TempDir func(string, string) (string, error) 54 55 // Clock holds the clock to be used by the runner. 56 Clock clock.Clock 57 58 // reTrier is used for re-running some certain retryable exec request. 59 ReTrier reTrier 60 } 61 62 // Validate initializeUnitParams 63 func (p initializeUnitParams) Validate() error { 64 if p.Logger == nil { 65 return errors.NotValidf("missing Logger") 66 } 67 if p.ProviderID == "" { 68 return errors.NotValidf("missing ProviderID") 69 } 70 if p.ExecClient == nil { 71 return errors.NotValidf("missing ExecClient") 72 } 73 if p.WriteFile == nil { 74 return errors.NotValidf("missing WriteFile") 75 } 76 if p.TempDir == nil { 77 return errors.NotValidf("missing TempDir") 78 } 79 return nil 80 } 81 82 // reTrier is used for re-running some certain retryable exec request. 83 type reTrier func(func() error, func(error) bool, Logger, clock.Clock, <-chan struct{}) error 84 85 // runnerWithRetry retries the exec request for init unit process if it got a retryable error. 86 func runnerWithRetry(f func() error, fatalChecker func(error) bool, logger Logger, clk clock.Clock, cancel <-chan struct{}) error { 87 do := func() error { 88 if wrench.IsActive("exec", "retryable-error") { 89 fakeErr := errors.New("fake retryable-error") 90 logger.Warningf("wrench exec retryable-error enabled, returns %v", fakeErr) 91 return exec.NewExecRetryableError(fakeErr) 92 } 93 return f() 94 } 95 args := retry.CallArgs{ 96 Attempts: 5, 97 Delay: 2 * time.Second, 98 MaxDuration: 30 * time.Second, 99 Clock: clk, 100 Stop: cancel, 101 Func: do, 102 IsFatalError: fatalChecker, 103 NotifyFunc: func(err error, attempt int) { 104 logger.Debugf("retrying exec request, in %d attempt, %v", attempt, err) 105 }, 106 } 107 return errors.Trace(retry.Call(args)) 108 } 109 110 // initializeUnit with the charm and configuration. 111 func initializeUnit(params initializeUnitParams, cancel <-chan struct{}) error { 112 if err := params.Validate(); err != nil { 113 return errors.Trace(err) 114 } 115 116 params.Logger.Infof("started pod init on %q", params.UnitTag.Id()) 117 container := caas.InitContainerName 118 initArgs := []string{"--unit", params.UnitTag.String()} 119 120 rootToolsDir := tools.ToolsDir(config.DataDir, "") 121 jujudPath := filepath.Join(rootToolsDir, "jujud") 122 unitPaths := uniter.NewPaths(config.DataDir, params.UnitTag, nil) 123 operatorPaths := params.Paths 124 tempDir, err := params.TempDir(os.TempDir(), params.UnitTag.String()) 125 if err != nil { 126 return errors.Annotatef(err, "creating temp directory") 127 } 128 129 stdout := &bytes.Buffer{} 130 command := []string{"mkdir", "-p", tempDir} 131 err = params.ExecClient.Exec(exec.ExecParams{ 132 Commands: command, 133 PodName: params.ProviderID, 134 ContainerName: container, 135 Stdout: stdout, 136 Stderr: stdout, 137 }, cancel) 138 if err != nil { 139 return errors.Annotatef(err, "running command: %q failed: %q", strings.Join(command, " "), string(stdout.Bytes())) 140 } 141 142 tempCharmDir := filepath.Join(tempDir, "charm") 143 // This heavy exec task might get 137 error, we will retry if it does happen. 144 err = params.ReTrier( 145 func() error { 146 return params.ExecClient.Copy(exec.CopyParams{ 147 Src: exec.FileResource{ 148 Path: operatorPaths.State.CharmDir, 149 }, 150 Dest: exec.FileResource{ 151 Path: tempDir, 152 PodName: params.ProviderID, 153 ContainerName: container, 154 }, 155 }, cancel) 156 }, 157 func(err error) bool { 158 return err != nil && !exec.IsExecRetryableError(err) 159 }, params.Logger, params.Clock, cancel, 160 ) 161 if err != nil { 162 return errors.Trace(err) 163 } 164 tempOperatorCacheFile, tempCACertFile, err := setupRemoteConfiguration(params, cancel, unitPaths, tempDir, container) 165 if err != nil { 166 return errors.Trace(err) 167 } 168 initArgs = append(initArgs, 169 "--charm-dir", tempCharmDir, 170 "--send", // Init container will wait for us to send the data. 171 "--operator-file", tempOperatorCacheFile, 172 "--operator-ca-cert-file", tempCACertFile, 173 ) 174 175 stdout = &bytes.Buffer{} 176 command = append([]string{jujudPath, "caas-unit-init"}, initArgs...) 177 err = params.ExecClient.Exec(exec.ExecParams{ 178 Commands: command, 179 PodName: params.ProviderID, 180 ContainerName: container, 181 WorkingDir: config.DataDir, 182 Stdout: stdout, 183 Stderr: stdout, 184 }, cancel) 185 if err != nil { 186 return errors.Annotatef(err, "caas-unit-init for unit %q with command: %q failed: %s", params.UnitTag.Id(), strings.Join(command, " "), string(stdout.Bytes())) 187 } 188 return nil 189 } 190 191 func setupRemoteConfiguration(params initializeUnitParams, cancel <-chan struct{}, 192 unitPaths uniter.Paths, tempDir string, container string) (string, string, error) { 193 tempCACertFile := filepath.Join(tempDir, caas.CACertFile) 194 if err := params.WriteFile(tempCACertFile, []byte(params.OperatorInfo.CACert), 0644); err != nil { 195 return "", "", errors.Trace(err) 196 } 197 err := params.ExecClient.Copy(exec.CopyParams{ 198 Src: exec.FileResource{ 199 Path: tempCACertFile, 200 }, 201 Dest: exec.FileResource{ 202 Path: tempCACertFile, 203 PodName: params.ProviderID, 204 ContainerName: container, 205 }, 206 }, cancel) 207 if err != nil { 208 return "", "", errors.Trace(err) 209 } 210 211 serviceAddress := os.Getenv(caasconstants.OperatorServiceIPEnvName) 212 params.Logger.Debugf("operator service address: %v", serviceAddress) 213 token, err := utils.RandomPassword() 214 if err != nil { 215 return "", "", errors.Trace(err) 216 } 217 clientInfo := caas.OperatorClientInfo{ 218 ServiceAddress: serviceAddress, 219 Token: token, 220 } 221 data, err := clientInfo.Marshal() 222 if err != nil { 223 return "", "", errors.Trace(err) 224 } 225 operatorCacheFile := filepath.Join(unitPaths.State.BaseDir, caas.OperatorClientInfoCacheFile) 226 if err := params.WriteFile(operatorCacheFile, data, 0644); err != nil { 227 return "", "", errors.Trace(err) 228 } 229 tempOperatorCacheFile := filepath.Join(tempDir, caas.OperatorClientInfoCacheFile) 230 err = params.ExecClient.Copy(exec.CopyParams{ 231 Src: exec.FileResource{ 232 Path: operatorCacheFile, 233 }, 234 Dest: exec.FileResource{ 235 Path: tempOperatorCacheFile, 236 PodName: params.ProviderID, 237 ContainerName: container, 238 }, 239 }, cancel) 240 if err != nil { 241 return "", "", errors.Trace(err) 242 } 243 244 return tempOperatorCacheFile, tempCACertFile, nil 245 }