github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/backups/restore.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // +build !windows 5 6 package backups 7 8 import ( 9 "bytes" 10 "net" 11 "os" 12 "strconv" 13 "sync" 14 "text/template" 15 "time" 16 17 "github.com/juju/clock" 18 "github.com/juju/errors" 19 "github.com/juju/utils" 20 "github.com/juju/utils/ssh" 21 "gopkg.in/juju/names.v2" 22 "gopkg.in/mgo.v2" 23 "gopkg.in/mgo.v2/bson" 24 25 "github.com/juju/juju/agent" 26 "github.com/juju/juju/core/instance" 27 "github.com/juju/juju/mongo" 28 "github.com/juju/juju/network" 29 "github.com/juju/juju/state" 30 "github.com/juju/juju/state/stateenvirons" 31 "github.com/juju/juju/worker/peergrouper" 32 ) 33 34 // resetReplicaSet re-initiates replica-set using the new controller 35 // values, this is required after a mongo restore. 36 // In case of failure returns error. 37 func resetReplicaSet(dialInfo *mgo.DialInfo, memberHostPort string) error { 38 params := peergrouper.InitiateMongoParams{ 39 DialInfo: dialInfo, 40 MemberHostPort: memberHostPort, 41 User: dialInfo.Username, 42 Password: dialInfo.Password, 43 } 44 return peergrouper.InitiateMongoServer(params) 45 } 46 47 var filesystemRoot = getFilesystemRoot 48 49 func getFilesystemRoot() string { 50 return string(os.PathSeparator) 51 } 52 53 // tagUserCredentials is a convenience function that extracts the 54 // tag user and apipassword, required to access mongodb. 55 func tagUserCredentials(conf agent.Config) (string, string, error) { 56 username := conf.Tag().String() 57 var password string 58 // TODO(perrito) we might need an accessor for the actual state password 59 // just in case it ever changes from the same as api password. 60 apiInfo, ok := conf.APIInfo() 61 if ok { 62 password = apiInfo.Password 63 } else { 64 // There seems to be no way to reach this inconsistence other than making a 65 // backup on a machine where these fields are corrupted and even so I find 66 // no reasonable way to reach this state, yet since APIInfo has it as a 67 // possibility I prefer to handle it, we cannot recover from this since 68 // it would mean that the agent.conf is corrupted. 69 return "", "", errors.New("cannot obtain password to access the controller") 70 } 71 return username, password, nil 72 } 73 74 // newDialInfo returns mgo.DialInfo with the given address using the minimal 75 // possible setup. 76 func newDialInfo(privateAddr string, conf agent.Config) (*mgo.DialInfo, error) { 77 dialOpts := mongo.DialOpts{Direct: true} 78 ssi, ok := conf.StateServingInfo() 79 if !ok { 80 return nil, errors.Errorf("cannot get state serving info to dial") 81 } 82 info := mongo.Info{ 83 Addrs: []string{net.JoinHostPort(privateAddr, strconv.Itoa(ssi.StatePort))}, 84 CACert: conf.CACert(), 85 } 86 dialInfo, err := mongo.DialInfo(info, dialOpts) 87 if err != nil { 88 return nil, errors.Annotate(err, "cannot produce a dial info") 89 } 90 oldPassword := conf.OldPassword() 91 if oldPassword != "" { 92 dialInfo.Username = "admin" 93 dialInfo.Password = conf.OldPassword() 94 } else { 95 dialInfo.Username, dialInfo.Password, err = tagUserCredentials(conf) 96 if err != nil { 97 return nil, errors.Trace(err) 98 } 99 } 100 return dialInfo, nil 101 } 102 103 // updateMongoEntries will update the machine entries in the restored mongo to 104 // reflect the real machine instanceid in case it changed (a newly bootstraped 105 // server). 106 func updateMongoEntries(newInstId instance.Id, newMachineId, oldMachineId string, dialInfo *mgo.DialInfo) error { 107 session, err := mgo.DialWithInfo(dialInfo) 108 if err != nil { 109 return errors.Annotate(err, "cannot connect to mongo to update") 110 } 111 defer session.Close() 112 // TODO(perrito666): Take the Machine id from an autoritative source 113 err = session.DB("juju").C("machines").Update( 114 bson.M{"machineid": oldMachineId}, 115 bson.M{"$set": bson.M{"instanceid": string(newInstId)}}, 116 ) 117 if err != nil { 118 return errors.Annotatef(err, "cannot update machine %s instance information", newMachineId) 119 } 120 return nil 121 } 122 123 // updateMachineAddresses will update the machine doc to the current addresses 124 func updateMachineAddresses(machine *state.Machine, privateAddress, publicAddress string) error { 125 privateAddressAddress := network.Address{ 126 Value: privateAddress, 127 Type: network.DeriveAddressType(privateAddress), 128 } 129 publicAddressAddress := network.Address{ 130 Value: publicAddress, 131 Type: network.DeriveAddressType(publicAddress), 132 } 133 if err := machine.SetProviderAddresses(publicAddressAddress, privateAddressAddress); err != nil { 134 return errors.Trace(err) 135 } 136 return nil 137 } 138 139 // assign to variables for testing purposes. 140 var mongoDefaultDialOpts = mongo.DefaultDialOpts 141 var environsGetNewPolicyFunc = stateenvirons.GetNewPolicyFunc 142 143 // connectToDB tries to connect to the newly restored controller. 144 func connectToDB(controllerTag names.ControllerTag, modelTag names.ModelTag, info *mongo.MongoInfo) (*state.StatePool, error) { 145 // We need to retry here to allow mongo to come up on the restored controller. 146 // The connection might succeed due to the mongo dial retries but there may still 147 // be a problem issuing database commands. 148 var ( 149 pool *state.StatePool 150 err error 151 ) 152 const ( 153 newStateConnDelay = 15 * time.Second 154 newStateConnMinAttempts = 8 155 ) 156 // TODO(katco): 2016-08-09: lp:1611427 157 attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts} 158 159 session, err := mongo.DialWithInfo(*info, mongoDefaultDialOpts()) 160 if err != nil { 161 return nil, errors.Trace(err) 162 } 163 defer session.Close() 164 165 for a := attempt.Start(); a.Next(); { 166 pool, err = state.OpenStatePool(state.OpenParams{ 167 Clock: clock.WallClock, 168 ControllerTag: controllerTag, 169 ControllerModelTag: modelTag, 170 MongoSession: session, 171 NewPolicy: environsGetNewPolicyFunc(), 172 }) 173 if err == nil { 174 return pool, nil 175 } 176 logger.Errorf("cannot open state, retrying: %v", err) 177 } 178 return nil, errors.Annotate(err, "cannot open state") 179 } 180 181 type machineModel struct { 182 machine *state.Machine 183 model *state.Model 184 } 185 186 // updateAllMachines finds all machines and resets the stored state address 187 // in each of them. The address does not include the port. 188 // It is too late to go back and errors in a couple of agents have 189 // better chance of being fixed by the user, if we were to fail 190 // we risk an inconsistent controller because of one unresponsive 191 // agent, we should nevertheless return the err info to the user. 192 func updateAllMachines(privateAddress, publicAddress string, machines []machineModel) error { 193 var machineUpdating sync.WaitGroup 194 for _, item := range machines { 195 machine := item.machine 196 // A newly resumed controller requires no updating, and more 197 // than one controller is not yet supported by this code. 198 if machine.IsManager() || machine.Life() == state.Dead { 199 continue 200 } 201 machineUpdating.Add(1) 202 go func(machine *state.Machine, model *state.Model) { 203 defer machineUpdating.Done() 204 logger.Debugf("updating addresses for machine %s in model %s/%s", machine.Tag().Id(), model.Owner().Id(), model.Name()) 205 // TODO: thumper 2016-09-20 206 // runMachineUpdate only handles linux machines, what about windows? 207 err := runMachineUpdate(machine, setAgentAddressScript(privateAddress, publicAddress)) 208 if err != nil { 209 logger.Errorf("failed updating machine: %v", err) 210 } 211 }(machine, item.model) 212 } 213 machineUpdating.Wait() 214 215 // We should return errors encapsulated in a digest. 216 return nil 217 } 218 219 // agentAddressAndRelationsTemplate is the template used to replace the api server data 220 // in the agents for the new ones if the machine has been rebootstraped it will also reset 221 // the relations so hooks will re-fire. 222 var agentAddressAndRelationsTemplate = template.Must(template.New("").Parse(` 223 set -xu 224 cd /var/lib/juju/agents 225 for agent in * 226 do 227 service jujud-$agent stop > /dev/null 228 229 # The below statement will work in cases where there 230 # is a private address for the api server only 231 # or where there are a private and a public, which are 232 # the two common cases. 233 sed -i.old -r "/^(stateaddresses|apiaddresses):/{ 234 n 235 s/- .*(:[0-9]+)/- {{.Address}}\1/ 236 n 237 s/- .*(:[0-9]+)/- {{.PubAddress}}\1/ 238 }" $agent/agent.conf 239 240 # If we're processing a unit agent's directly 241 # and it has some relations, reset 242 # the stored version of all of them to 243 # ensure that any relation hooks will 244 # fire. 245 if [[ $agent = unit-* ]] 246 then 247 find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \; 248 fi 249 service jujud-$agent start > /dev/null 250 done 251 `)) 252 253 // setAgentAddressScript generates an ssh script argument to update state addresses. 254 func setAgentAddressScript(stateAddr, statePubAddr string) string { 255 var buf bytes.Buffer 256 err := agentAddressAndRelationsTemplate.Execute(&buf, struct { 257 Address string 258 PubAddress string 259 }{stateAddr, statePubAddr}) 260 if err != nil { 261 panic(errors.Annotate(err, "template error")) 262 } 263 return buf.String() 264 } 265 266 // runMachineUpdate connects via ssh to the machine and runs the update script. 267 func runMachineUpdate(machine *state.Machine, sshArg string) error { 268 addr, err := machine.PublicAddress() 269 if err != nil { 270 if network.IsNoAddressError(err) { 271 return errors.Annotatef(err, "no appropriate public address found") 272 } 273 return errors.Trace(err) 274 } 275 return runViaSSH(addr.Value, sshArg) 276 } 277 278 // sshCommand hods ssh.Command type for testing purposes. 279 var sshCommand = ssh.Command 280 281 // runViaSSH runs script in the remote machine with address addr. 282 func runViaSSH(addr string, script string) error { 283 sshOptions := ssh.Options{} 284 sshOptions.SetIdentities("/var/lib/juju/system-identity") 285 // Disable host key checking. We're not pushing across anything 286 // sensitive, and there's no guarantee that the machine would 287 // have published up-to-date host key information. 288 sshOptions.SetStrictHostKeyChecking(ssh.StrictHostChecksNo) 289 sshOptions.SetKnownHostsFile(os.DevNull) 290 291 userAddr := "ubuntu@" + addr 292 userCmd := sshCommand(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, &sshOptions) 293 var stdoutBuf bytes.Buffer 294 var stderrBuf bytes.Buffer 295 userCmd.Stdout = &stdoutBuf 296 userCmd.Stderr = &stderrBuf 297 logger.Debugf("updating %s, script:\n%s", addr, script) 298 if err := userCmd.Run(); err != nil { 299 return errors.Annotatef(err, "ssh command failed: %q", stderrBuf.String()) 300 } 301 logger.Debugf("result %s\nstdout: \n%s\nstderr: %s", addr, stdoutBuf.String(), stderrBuf.String()) 302 return nil 303 }