github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/state/backups/restore.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // +build !windows 5 6 package backups 7 8 import ( 9 "bytes" 10 "net" 11 "os" 12 "strconv" 13 "sync" 14 "text/template" 15 "time" 16 17 "github.com/juju/errors" 18 "github.com/juju/names" 19 "github.com/juju/utils" 20 "github.com/juju/utils/ssh" 21 "gopkg.in/mgo.v2" 22 "gopkg.in/mgo.v2/bson" 23 24 "github.com/juju/juju/agent" 25 "github.com/juju/juju/environs" 26 "github.com/juju/juju/instance" 27 "github.com/juju/juju/mongo" 28 "github.com/juju/juju/network" 29 "github.com/juju/juju/state" 30 "github.com/juju/juju/worker/peergrouper" 31 ) 32 33 // resetReplicaSet re-initiates replica-set using the new controller 34 // values, this is required after a mongo restore. 35 // In case of failure returns error. 36 func resetReplicaSet(dialInfo *mgo.DialInfo, memberHostPort string) error { 37 params := peergrouper.InitiateMongoParams{ 38 DialInfo: dialInfo, 39 MemberHostPort: memberHostPort, 40 User: dialInfo.Username, 41 Password: dialInfo.Password, 42 } 43 return peergrouper.InitiateMongoServer(params) 44 } 45 46 var filesystemRoot = getFilesystemRoot 47 48 func getFilesystemRoot() string { 49 return string(os.PathSeparator) 50 } 51 52 // newDialInfo returns mgo.DialInfo with the given address using the minimal 53 // possible setup. 54 func newDialInfo(privateAddr string, conf agent.Config) (*mgo.DialInfo, error) { 55 dialOpts := mongo.DialOpts{Direct: true} 56 ssi, ok := conf.StateServingInfo() 57 if !ok { 58 return nil, errors.Errorf("cannot get state serving info to dial") 59 } 60 info := mongo.Info{ 61 Addrs: []string{net.JoinHostPort(privateAddr, strconv.Itoa(ssi.StatePort))}, 62 CACert: conf.CACert(), 63 } 64 dialInfo, err := mongo.DialInfo(info, dialOpts) 65 if err != nil { 66 return nil, errors.Annotate(err, "cannot produce a dial info") 67 } 68 oldPassword := conf.OldPassword() 69 if oldPassword != "" { 70 dialInfo.Username = "admin" 71 dialInfo.Password = conf.OldPassword() 72 } else { 73 dialInfo.Username = conf.Tag().String() 74 // TODO(perrito) we might need an accessor for the actual state password 75 // just in case it ever changes from the same as api password. 76 apiInfo, ok := conf.APIInfo() 77 if ok { 78 dialInfo.Password = apiInfo.Password 79 logger.Infof("using API password to access controller.") 80 } else { 81 // There seems to be no way to reach this inconsistence other than making a 82 // backup on a machine where these fields are corrupted and even so I find 83 // no reasonable way to reach this state, yet since APIInfo has it as a 84 // possibility I prefer to handle it, we cannot recover from this since 85 // it would mean that the agent.conf is corrupted. 86 return nil, errors.New("cannot obtain password to access the controller") 87 } 88 } 89 return dialInfo, nil 90 } 91 92 // updateMongoEntries will update the machine entries in the restored mongo to 93 // reflect the real machine instanceid in case it changed (a newly bootstraped 94 // server). 95 func updateMongoEntries(newInstId instance.Id, newMachineId, oldMachineId string, dialInfo *mgo.DialInfo) error { 96 session, err := mgo.DialWithInfo(dialInfo) 97 if err != nil { 98 return errors.Annotate(err, "cannot connect to mongo to update") 99 } 100 defer session.Close() 101 // TODO(perrito666): Take the Machine id from an autoritative source 102 err = session.DB("juju").C("machines").Update( 103 bson.M{"machineid": oldMachineId}, 104 bson.M{"$set": bson.M{"instanceid": string(newInstId)}}, 105 ) 106 if err != nil { 107 return errors.Annotatef(err, "cannot update machine %s instance information", newMachineId) 108 } 109 return nil 110 } 111 112 // updateMachineAddresses will update the machine doc to the current addresses 113 func updateMachineAddresses(machine *state.Machine, privateAddress, publicAddress string) error { 114 privateAddressAddress := network.Address{ 115 Value: privateAddress, 116 Type: network.DeriveAddressType(privateAddress), 117 } 118 publicAddressAddress := network.Address{ 119 Value: publicAddress, 120 Type: network.DeriveAddressType(publicAddress), 121 } 122 if err := machine.SetProviderAddresses(publicAddressAddress, privateAddressAddress); err != nil { 123 return errors.Trace(err) 124 } 125 return nil 126 } 127 128 // assign to variables for testing purposes. 129 var mongoDefaultDialOpts = mongo.DefaultDialOpts 130 var environsNewStatePolicy = environs.NewStatePolicy 131 132 // newStateConnection tries to connect to the newly restored controller. 133 func newStateConnection(modelTag names.ModelTag, info *mongo.MongoInfo) (*state.State, error) { 134 // We need to retry here to allow mongo to come up on the restored controller. 135 // The connection might succeed due to the mongo dial retries but there may still 136 // be a problem issuing database commands. 137 var ( 138 st *state.State 139 err error 140 ) 141 const ( 142 newStateConnDelay = 15 * time.Second 143 newStateConnMinAttempts = 8 144 ) 145 attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts} 146 for a := attempt.Start(); a.Next(); { 147 st, err = state.Open(modelTag, info, mongoDefaultDialOpts(), environsNewStatePolicy()) 148 if err == nil { 149 return st, nil 150 } 151 logger.Errorf("cannot open state, retrying: %v", err) 152 } 153 return st, errors.Annotate(err, "cannot open state") 154 } 155 156 // updateAllMachines finds all machines and resets the stored state address 157 // in each of them. The address does not include the port. 158 // It is too late to go back and errors in a couple of agents have 159 // better chance of being fixed by the user, if we were to fail 160 // we risk an inconsistent controller because of one unresponsive 161 // agent, we should nevertheless return the err info to the user. 162 func updateAllMachines(privateAddress string, machines []*state.Machine) error { 163 var machineUpdating sync.WaitGroup 164 for key := range machines { 165 // key is used to have machine be scope bound to the loop iteration. 166 machine := machines[key] 167 // A newly resumed controller requires no updating, and more 168 // than one controller is not yet supported by this code. 169 if machine.IsManager() || machine.Life() == state.Dead { 170 continue 171 } 172 machineUpdating.Add(1) 173 go func() { 174 defer machineUpdating.Done() 175 err := runMachineUpdate(machine, setAgentAddressScript(privateAddress)) 176 logger.Errorf("failed updating machine: %v", err) 177 }() 178 } 179 machineUpdating.Wait() 180 181 // We should return errors encapsulated in a digest. 182 return nil 183 } 184 185 // agentAddressAndRelationsTemplate is the template used to replace the api server data 186 // in the agents for the new ones if the machine has been rebootstraped it will also reset 187 // the relations so hooks will re-fire. 188 var agentAddressAndRelationsTemplate = template.Must(template.New("").Parse(` 189 set -xu 190 cd /var/lib/juju/agents 191 for agent in * 192 do 193 status jujud-$agent| grep -q "^jujud-$agent start" > /dev/null 194 if [ $? -eq 0 ]; then 195 initctl stop jujud-$agent 196 fi 197 sed -i.old -r "/^(stateaddresses|apiaddresses):/{ 198 n 199 s/- .*(:[0-9]+)/- {{.Address}}\1/ 200 }" $agent/agent.conf 201 202 # If we're processing a unit agent's directly 203 # and it has some relations, reset 204 # the stored version of all of them to 205 # ensure that any relation hooks will 206 # fire. 207 if [[ $agent = unit-* ]] 208 then 209 find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \; 210 fi 211 # Just in case is a stale unit 212 status jujud-$agent| grep -q "^jujud-$agent stop" > /dev/null 213 if [ $? -eq 0 ]; then 214 initctl start jujud-$agent 215 systemctl stop jujud-$agent 216 systemctl start jujud-$agent 217 fi 218 done 219 `)) 220 221 // setAgentAddressScript generates an ssh script argument to update state addresses. 222 func setAgentAddressScript(stateAddr string) string { 223 var buf bytes.Buffer 224 err := agentAddressAndRelationsTemplate.Execute(&buf, struct { 225 Address string 226 }{stateAddr}) 227 if err != nil { 228 panic(errors.Annotate(err, "template error")) 229 } 230 return buf.String() 231 } 232 233 // runMachineUpdate connects via ssh to the machine and runs the update script. 234 func runMachineUpdate(machine *state.Machine, sshArg string) error { 235 addr, err := machine.PublicAddress() 236 if err != nil { 237 if network.IsNoAddress(err) { 238 return errors.Annotatef(err, "no appropriate public address found") 239 } 240 return errors.Trace(err) 241 } 242 return runViaSSH(addr.Value, sshArg) 243 } 244 245 // sshCommand hods ssh.Command type for testing purposes. 246 var sshCommand = ssh.Command 247 248 // runViaSSH runs script in the remote machine with address addr. 249 func runViaSSH(addr string, script string) error { 250 // This is taken from cmd/juju/ssh.go there is no other clear way to set user 251 userAddr := "ubuntu@" + addr 252 sshOptions := ssh.Options{} 253 sshOptions.SetIdentities("/var/lib/juju/system-identity") 254 userCmd := sshCommand(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, &sshOptions) 255 var stderrBuf bytes.Buffer 256 userCmd.Stderr = &stderrBuf 257 if err := userCmd.Run(); err != nil { 258 return errors.Annotatef(err, "ssh command failed: %q", stderrBuf.String()) 259 } 260 return nil 261 }