github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/state/backups/restore.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // +build !windows 5 6 package backups 7 8 import ( 9 "bytes" 10 "net" 11 "os" 12 "strconv" 13 "sync" 14 "text/template" 15 "time" 16 17 "github.com/juju/errors" 18 "github.com/juju/utils" 19 "github.com/juju/utils/ssh" 20 "gopkg.in/juju/names.v2" 21 "gopkg.in/mgo.v2" 22 "gopkg.in/mgo.v2/bson" 23 24 "github.com/juju/juju/agent" 25 "github.com/juju/juju/environs" 26 "github.com/juju/juju/instance" 27 "github.com/juju/juju/mongo" 28 "github.com/juju/juju/network" 29 "github.com/juju/juju/state" 30 "github.com/juju/juju/state/stateenvirons" 31 "github.com/juju/juju/worker/peergrouper" 32 ) 33 34 // resetReplicaSet re-initiates replica-set using the new controller 35 // values, this is required after a mongo restore. 36 // In case of failure returns error. 37 func resetReplicaSet(dialInfo *mgo.DialInfo, memberHostPort string) error { 38 params := peergrouper.InitiateMongoParams{ 39 DialInfo: dialInfo, 40 MemberHostPort: memberHostPort, 41 User: dialInfo.Username, 42 Password: dialInfo.Password, 43 } 44 return peergrouper.InitiateMongoServer(params) 45 } 46 47 var filesystemRoot = getFilesystemRoot 48 49 func getFilesystemRoot() string { 50 return string(os.PathSeparator) 51 } 52 53 // tagUserCredentials is a convenience function that extracts the 54 // tag user and apipassword, required to access mongodb. 55 func tagUserCredentials(conf agent.Config) (string, string, error) { 56 username := conf.Tag().String() 57 var password string 58 // TODO(perrito) we might need an accessor for the actual state password 59 // just in case it ever changes from the same as api password. 60 apiInfo, ok := conf.APIInfo() 61 if ok { 62 password = apiInfo.Password 63 } else { 64 // There seems to be no way to reach this inconsistence other than making a 65 // backup on a machine where these fields are corrupted and even so I find 66 // no reasonable way to reach this state, yet since APIInfo has it as a 67 // possibility I prefer to handle it, we cannot recover from this since 68 // it would mean that the agent.conf is corrupted. 69 return "", "", errors.New("cannot obtain password to access the controller") 70 } 71 return username, password, nil 72 } 73 74 // newDialInfo returns mgo.DialInfo with the given address using the minimal 75 // possible setup. 76 func newDialInfo(privateAddr string, conf agent.Config) (*mgo.DialInfo, error) { 77 dialOpts := mongo.DialOpts{Direct: true} 78 ssi, ok := conf.StateServingInfo() 79 if !ok { 80 return nil, errors.Errorf("cannot get state serving info to dial") 81 } 82 info := mongo.Info{ 83 Addrs: []string{net.JoinHostPort(privateAddr, strconv.Itoa(ssi.StatePort))}, 84 CACert: conf.CACert(), 85 } 86 dialInfo, err := mongo.DialInfo(info, dialOpts) 87 if err != nil { 88 return nil, errors.Annotate(err, "cannot produce a dial info") 89 } 90 oldPassword := conf.OldPassword() 91 if oldPassword != "" { 92 dialInfo.Username = "admin" 93 dialInfo.Password = conf.OldPassword() 94 } else { 95 dialInfo.Username, dialInfo.Password, err = tagUserCredentials(conf) 96 if err != nil { 97 return nil, errors.Trace(err) 98 } 99 } 100 return dialInfo, nil 101 } 102 103 // updateMongoEntries will update the machine entries in the restored mongo to 104 // reflect the real machine instanceid in case it changed (a newly bootstraped 105 // server). 106 func updateMongoEntries(newInstId instance.Id, newMachineId, oldMachineId string, dialInfo *mgo.DialInfo) error { 107 session, err := mgo.DialWithInfo(dialInfo) 108 if err != nil { 109 return errors.Annotate(err, "cannot connect to mongo to update") 110 } 111 defer session.Close() 112 // TODO(perrito666): Take the Machine id from an autoritative source 113 err = session.DB("juju").C("machines").Update( 114 bson.M{"machineid": oldMachineId}, 115 bson.M{"$set": bson.M{"instanceid": string(newInstId)}}, 116 ) 117 if err != nil { 118 return errors.Annotatef(err, "cannot update machine %s instance information", newMachineId) 119 } 120 return nil 121 } 122 123 // updateMachineAddresses will update the machine doc to the current addresses 124 func updateMachineAddresses(machine *state.Machine, privateAddress, publicAddress string) error { 125 privateAddressAddress := network.Address{ 126 Value: privateAddress, 127 Type: network.DeriveAddressType(privateAddress), 128 } 129 publicAddressAddress := network.Address{ 130 Value: publicAddress, 131 Type: network.DeriveAddressType(publicAddress), 132 } 133 if err := machine.SetProviderAddresses(publicAddressAddress, privateAddressAddress); err != nil { 134 return errors.Trace(err) 135 } 136 return nil 137 } 138 139 // assign to variables for testing purposes. 140 var mongoDefaultDialOpts = mongo.DefaultDialOpts 141 var environsGetNewPolicyFunc = stateenvirons.GetNewPolicyFunc 142 143 // newStateConnection tries to connect to the newly restored controller. 144 func newStateConnection(controllerTag names.ControllerTag, modelTag names.ModelTag, info *mongo.MongoInfo) (*state.State, error) { 145 // We need to retry here to allow mongo to come up on the restored controller. 146 // The connection might succeed due to the mongo dial retries but there may still 147 // be a problem issuing database commands. 148 var ( 149 st *state.State 150 err error 151 ) 152 const ( 153 newStateConnDelay = 15 * time.Second 154 newStateConnMinAttempts = 8 155 ) 156 // TODO(katco): 2016-08-09: lp:1611427 157 attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts} 158 getEnviron := stateenvirons.GetNewEnvironFunc(environs.New) 159 for a := attempt.Start(); a.Next(); { 160 st, err = state.Open(modelTag, controllerTag, info, mongoDefaultDialOpts(), environsGetNewPolicyFunc(getEnviron)) 161 if err == nil { 162 return st, nil 163 } 164 logger.Errorf("cannot open state, retrying: %v", err) 165 } 166 return st, errors.Annotate(err, "cannot open state") 167 } 168 169 type machineModel struct { 170 machine *state.Machine 171 model *state.Model 172 } 173 174 // updateAllMachines finds all machines and resets the stored state address 175 // in each of them. The address does not include the port. 176 // It is too late to go back and errors in a couple of agents have 177 // better chance of being fixed by the user, if we were to fail 178 // we risk an inconsistent controller because of one unresponsive 179 // agent, we should nevertheless return the err info to the user. 180 func updateAllMachines(privateAddress, publicAddress string, machines []machineModel) error { 181 var machineUpdating sync.WaitGroup 182 for _, item := range machines { 183 machine := item.machine 184 // A newly resumed controller requires no updating, and more 185 // than one controller is not yet supported by this code. 186 if machine.IsManager() || machine.Life() == state.Dead { 187 continue 188 } 189 machineUpdating.Add(1) 190 go func(machine *state.Machine, model *state.Model) { 191 defer machineUpdating.Done() 192 logger.Debugf("updating addresses for machine %s in model %s/%s", machine.Tag().Id(), model.Owner().Canonical(), model.Name()) 193 // TODO: thumper 2016-09-20 194 // runMachineUpdate only handles linux machines, what about windows? 195 err := runMachineUpdate(machine, setAgentAddressScript(privateAddress, publicAddress)) 196 if err != nil { 197 logger.Errorf("failed updating machine: %v", err) 198 } 199 }(machine, item.model) 200 } 201 machineUpdating.Wait() 202 203 // We should return errors encapsulated in a digest. 204 return nil 205 } 206 207 // agentAddressAndRelationsTemplate is the template used to replace the api server data 208 // in the agents for the new ones if the machine has been rebootstraped it will also reset 209 // the relations so hooks will re-fire. 210 var agentAddressAndRelationsTemplate = template.Must(template.New("").Parse(` 211 set -xu 212 cd /var/lib/juju/agents 213 for agent in * 214 do 215 service jujud-$agent stop > /dev/null 216 217 # The below statement will work in cases where there 218 # is a private address for the api server only 219 # or where there are a private and a public, which are 220 # the two common cases. 221 sed -i.old -r "/^(stateaddresses|apiaddresses):/{ 222 n 223 s/- .*(:[0-9]+)/- {{.Address}}\1/ 224 n 225 s/- .*(:[0-9]+)/- {{.PubAddress}}\1/ 226 }" $agent/agent.conf 227 228 # If we're processing a unit agent's directly 229 # and it has some relations, reset 230 # the stored version of all of them to 231 # ensure that any relation hooks will 232 # fire. 233 if [[ $agent = unit-* ]] 234 then 235 find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \; 236 fi 237 service jujud-$agent start > /dev/null 238 done 239 `)) 240 241 // setAgentAddressScript generates an ssh script argument to update state addresses. 242 func setAgentAddressScript(stateAddr, statePubAddr string) string { 243 var buf bytes.Buffer 244 err := agentAddressAndRelationsTemplate.Execute(&buf, struct { 245 Address string 246 PubAddress string 247 }{stateAddr, statePubAddr}) 248 if err != nil { 249 panic(errors.Annotate(err, "template error")) 250 } 251 return buf.String() 252 } 253 254 // runMachineUpdate connects via ssh to the machine and runs the update script. 255 func runMachineUpdate(machine *state.Machine, sshArg string) error { 256 addr, err := machine.PublicAddress() 257 if err != nil { 258 if network.IsNoAddressError(err) { 259 return errors.Annotatef(err, "no appropriate public address found") 260 } 261 return errors.Trace(err) 262 } 263 return runViaSSH(addr.Value, sshArg) 264 } 265 266 // sshCommand hods ssh.Command type for testing purposes. 267 var sshCommand = ssh.Command 268 269 // runViaSSH runs script in the remote machine with address addr. 270 func runViaSSH(addr string, script string) error { 271 // This is taken from cmd/juju/ssh.go there is no other clear way to set user 272 userAddr := "ubuntu@" + addr 273 sshOptions := ssh.Options{} 274 sshOptions.SetIdentities("/var/lib/juju/system-identity") 275 userCmd := sshCommand(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, &sshOptions) 276 var stdoutBuf bytes.Buffer 277 var stderrBuf bytes.Buffer 278 userCmd.Stdout = &stdoutBuf 279 userCmd.Stderr = &stderrBuf 280 logger.Debugf("updating %s, script:\n%s", addr, script) 281 if err := userCmd.Run(); err != nil { 282 return errors.Annotatef(err, "ssh command failed: %q", stderrBuf.String()) 283 } 284 logger.Debugf("result %s\nstdout: \n%s\nstderr: %s", addr, stdoutBuf.String(), stderrBuf.String()) 285 return nil 286 }