github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/worker/upgrader/upgrader.go (about) 1 // Copyright 2012, 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package upgrader 5 6 import ( 7 "fmt" 8 "net/http" 9 "os" 10 "time" 11 12 "github.com/juju/errors" 13 "github.com/juju/loggo" 14 "github.com/juju/os/series" 15 "github.com/juju/utils" 16 "github.com/juju/utils/arch" 17 "github.com/juju/version" 18 "gopkg.in/juju/names.v2" 19 "gopkg.in/juju/worker.v1/catacomb" 20 21 "github.com/juju/juju/agent" 22 agenttools "github.com/juju/juju/agent/tools" 23 "github.com/juju/juju/api/upgrader" 24 coretools "github.com/juju/juju/tools" 25 "github.com/juju/juju/upgrades" 26 jujuversion "github.com/juju/juju/version" 27 "github.com/juju/juju/worker/gate" 28 ) 29 30 const ( 31 // shortDelay is the time we normally sleep for in the main loop 32 // when polling for changes to the model's version. 33 shortDelay = 5 * time.Second 34 35 // notEnoughSpaceDelay is how long we sleep when there's a new 36 // version of the agent that we need to download but there isn't 37 // enough available space to download and unpack it. Sleeping 38 // longer in that situation means we don't spam the log with disk 39 // space errors every 3 seconds, but still bring the message up 40 // regularly. 41 notEnoughSpaceDelay = time.Minute 42 ) 43 44 // retryAfter returns a channel that receives a value 45 // when a failed download should be retried. 46 var retryAfter = func(duration time.Duration) <-chan time.Time { 47 // TODO(fwereade): 2016-03-17 lp:1558657 48 return time.After(duration) 49 } 50 51 var logger = loggo.GetLogger("juju.worker.upgrader") 52 53 // Upgrader represents a worker that watches the state for upgrade 54 // requests. 55 type Upgrader struct { 56 catacomb catacomb.Catacomb 57 st *upgrader.State 58 dataDir string 59 tag names.Tag 60 config Config 61 } 62 63 // Config contains the items the worker needs to start. 64 type Config struct { 65 State *upgrader.State 66 AgentConfig agent.Config 67 OrigAgentVersion version.Number 68 UpgradeStepsWaiter gate.Waiter 69 InitialUpgradeCheckComplete gate.Unlocker 70 CheckDiskSpace func(string, uint64) error 71 } 72 73 // NewAgentUpgrader returns a new upgrader worker. It watches changes to the 74 // current version of the current agent (with the given tag) and tries to 75 // download the tools for any new version into the given data directory. If 76 // an upgrade is needed, the worker will exit with an UpgradeReadyError 77 // holding details of the requested upgrade. The tools will have been 78 // downloaded and unpacked. 79 func NewAgentUpgrader(config Config) (*Upgrader, error) { 80 u := &Upgrader{ 81 st: config.State, 82 dataDir: config.AgentConfig.DataDir(), 83 tag: config.AgentConfig.Tag(), 84 config: config, 85 } 86 err := catacomb.Invoke(catacomb.Plan{ 87 Site: &u.catacomb, 88 Work: u.loop, 89 }) 90 if err != nil { 91 return nil, errors.Trace(err) 92 } 93 return u, nil 94 } 95 96 // Kill implements worker.Worker.Kill. 97 func (u *Upgrader) Kill() { 98 u.catacomb.Kill(nil) 99 } 100 101 // Wait implements worker.Worker.Wait. 102 func (u *Upgrader) Wait() error { 103 return u.catacomb.Wait() 104 } 105 106 // Stop stops the upgrader and returns any 107 // error it encountered when running. 108 func (u *Upgrader) Stop() error { 109 u.Kill() 110 return u.Wait() 111 } 112 113 // allowedTargetVersion checks if targetVersion is too different from 114 // curVersion to allow a downgrade. 115 func allowedTargetVersion( 116 origAgentVersion version.Number, 117 curVersion version.Number, 118 upgradeStepsRunning bool, 119 targetVersion version.Number, 120 ) bool { 121 if upgradeStepsRunning && targetVersion == origAgentVersion { 122 return true 123 } 124 if targetVersion.Major < curVersion.Major { 125 return false 126 } 127 if targetVersion.Major == curVersion.Major && targetVersion.Minor < curVersion.Minor { 128 return false 129 } 130 return true 131 } 132 133 func (u *Upgrader) loop() error { 134 // Start by reporting current tools (which includes arch/series, and is 135 // used by the controller in communicating the desired version below). 136 if err := u.st.SetVersion(u.tag.String(), toBinaryVersion(jujuversion.Current)); err != nil { 137 return errors.Annotate(err, "cannot set agent version") 138 } 139 140 // We don't read on the dying channel until we have received the 141 // initial event from the API version watcher, thus ensuring 142 // that we attempt an upgrade even if other workers are dying 143 // all around us. Similarly, we don't want to bind the watcher 144 // to the catacomb's lifetime (yet!) lest we wait forever for a 145 // stopped watcher. 146 // 147 // However, that absolutely depends on versionWatcher's guaranteed 148 // initial event, and we should assume that it'll break its contract 149 // sometime. So we allow the watcher to wait patiently for the event 150 // for a full minute; but after that we proceed regardless. 151 versionWatcher, err := u.st.WatchAPIVersion(u.tag.String()) 152 if err != nil { 153 return errors.Trace(err) 154 } 155 logger.Infof("abort check blocked until version event received") 156 // TODO(fwereade): 2016-03-17 lp:1558657 157 mustProceed := time.After(time.Minute) 158 var dying <-chan struct{} 159 allowDying := func() { 160 if dying == nil { 161 logger.Infof("unblocking abort check") 162 mustProceed = nil 163 dying = u.catacomb.Dying() 164 if err := u.catacomb.Add(versionWatcher); err != nil { 165 u.catacomb.Kill(err) 166 } 167 } 168 } 169 170 var retry <-chan time.Time 171 for { 172 select { 173 // NOTE: retry and dying both start out nil, so they can't be chosen 174 // first time round the loop. However... 175 case <-retry: 176 case <-dying: 177 return u.catacomb.ErrDying() 178 // ...*every* other case *must* allowDying(), before doing anything 179 // else, lest an error cause us to leak versionWatcher. 180 case <-mustProceed: 181 logger.Infof("version event not received after one minute") 182 allowDying() 183 case _, ok := <-versionWatcher.Changes(): 184 allowDying() 185 if !ok { 186 return errors.New("version watcher closed") 187 } 188 } 189 190 wantVersion, err := u.st.DesiredVersion(u.tag.String()) 191 if err != nil { 192 return err 193 } 194 logger.Infof("desired agent binary version: %v", wantVersion) 195 196 if wantVersion == jujuversion.Current { 197 u.config.InitialUpgradeCheckComplete.Unlock() 198 continue 199 } else if !allowedTargetVersion( 200 u.config.OrigAgentVersion, 201 jujuversion.Current, 202 !u.config.UpgradeStepsWaiter.IsUnlocked(), 203 wantVersion, 204 ) { 205 // See also bug #1299802 where when upgrading from 206 // 1.16 to 1.18 there is a race condition that can 207 // cause the unit agent to upgrade, and then want to 208 // downgrade when its associate machine agent has not 209 // finished upgrading. 210 logger.Infof("desired agent binary version: %s is older than current %s, refusing to downgrade", 211 wantVersion, jujuversion.Current) 212 u.config.InitialUpgradeCheckComplete.Unlock() 213 continue 214 } 215 logger.Infof("upgrade requested from %v to %v", jujuversion.Current, wantVersion) 216 217 // Check if tools have already been downloaded. 218 wantVersionBinary := toBinaryVersion(wantVersion) 219 if u.toolsAlreadyDownloaded(wantVersionBinary) { 220 return u.newUpgradeReadyError(wantVersionBinary) 221 } 222 223 // Check if tools are available for download. 224 wantToolsList, err := u.st.Tools(u.tag.String()) 225 if err != nil { 226 // Not being able to lookup Tools is considered fatal 227 return err 228 } 229 // The worker cannot be stopped while we're downloading 230 // the tools - this means that even if the API is going down 231 // repeatedly (causing the agent to be stopped), as long 232 // as we have got as far as this, we will still be able to 233 // upgrade the agent. 234 delay := shortDelay 235 for _, wantTools := range wantToolsList { 236 if err := u.checkForSpace(); err != nil { 237 logger.Errorf("%s", err.Error()) 238 delay = notEnoughSpaceDelay 239 break 240 } 241 err = u.ensureTools(wantTools) 242 if err == nil { 243 return u.newUpgradeReadyError(wantTools.Version) 244 } 245 logger.Errorf("failed to fetch agent binaries from %q: %v", wantTools.URL, err) 246 } 247 retry = retryAfter(delay) 248 } 249 } 250 251 func toBinaryVersion(vers version.Number) version.Binary { 252 outVers := version.Binary{ 253 Number: vers, 254 Arch: arch.HostArch(), 255 Series: series.MustHostSeries(), 256 } 257 return outVers 258 } 259 260 func (u *Upgrader) toolsAlreadyDownloaded(wantVersion version.Binary) bool { 261 _, err := agenttools.ReadTools(u.dataDir, wantVersion) 262 return err == nil 263 } 264 265 func (u *Upgrader) newUpgradeReadyError(newVersion version.Binary) *UpgradeReadyError { 266 return &UpgradeReadyError{ 267 OldTools: toBinaryVersion(jujuversion.Current), 268 NewTools: newVersion, 269 AgentName: u.tag.String(), 270 DataDir: u.dataDir, 271 } 272 } 273 274 func (u *Upgrader) ensureTools(agentTools *coretools.Tools) error { 275 logger.Infof("fetching agent binaries from %q", agentTools.URL) 276 // The reader MUST verify the tools' hash, so there is no 277 // need to validate the peer. We cannot anyway: see http://pad.lv/1261780. 278 resp, err := utils.GetNonValidatingHTTPClient().Get(agentTools.URL) 279 if err != nil { 280 return err 281 } 282 defer resp.Body.Close() 283 if resp.StatusCode != http.StatusOK { 284 return fmt.Errorf("bad HTTP response: %v", resp.Status) 285 } 286 err = agenttools.UnpackTools(u.dataDir, agentTools, resp.Body) 287 if err != nil { 288 return fmt.Errorf("cannot unpack agent binaries: %v", err) 289 } 290 logger.Infof("unpacked agent binaries %s to %s", agentTools.Version, u.dataDir) 291 return nil 292 } 293 294 func (u *Upgrader) checkForSpace() error { 295 logger.Debugf("checking available space before downloading") 296 err := u.config.CheckDiskSpace(u.dataDir, upgrades.MinDiskSpaceMib) 297 if err != nil { 298 return errors.Trace(err) 299 } 300 err = u.config.CheckDiskSpace(os.TempDir(), upgrades.MinDiskSpaceMib) 301 if err != nil { 302 return errors.Trace(err) 303 } 304 return nil 305 }