github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/state/backups/restore.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // +build !windows 5 6 package backups 7 8 import ( 9 "bytes" 10 "fmt" 11 "os" 12 "path" 13 "path/filepath" 14 "strings" 15 "sync" 16 "text/template" 17 "time" 18 19 "github.com/juju/errors" 20 "github.com/juju/names" 21 "github.com/juju/utils" 22 "github.com/juju/utils/symlink" 23 "gopkg.in/mgo.v2" 24 "gopkg.in/mgo.v2/bson" 25 26 "github.com/juju/juju/agent" 27 "github.com/juju/juju/agent/tools" 28 "github.com/juju/juju/environs" 29 "github.com/juju/juju/instance" 30 "github.com/juju/juju/mongo" 31 "github.com/juju/juju/network" 32 "github.com/juju/juju/state" 33 "github.com/juju/juju/utils/ssh" 34 "github.com/juju/juju/worker/peergrouper" 35 ) 36 37 // TODO(perrito666) create an authoritative source for all possible 38 // uses of this const, not only here but all around juju 39 const restoreUserHome = "/home/ubuntu/" 40 41 // resetReplicaSet re-initiates replica-set using the new state server 42 // values, this is required after a mongo restore. 43 // In case of failure returns error. 44 func resetReplicaSet(dialInfo *mgo.DialInfo, memberHostPort string) error { 45 params := peergrouper.InitiateMongoParams{dialInfo, 46 memberHostPort, 47 dialInfo.Username, 48 dialInfo.Password, 49 } 50 return peergrouper.InitiateMongoServer(params, true) 51 } 52 53 var filesystemRoot = getFilesystemRoot 54 55 func getFilesystemRoot() string { 56 return string(os.PathSeparator) 57 } 58 59 // newDialInfo returns mgo.DialInfo with the given address using the minimal 60 // possible setup. 61 func newDialInfo(privateAddr string, conf agent.Config) (*mgo.DialInfo, error) { 62 dialOpts := mongo.DialOpts{Direct: true} 63 ssi, ok := conf.StateServingInfo() 64 if !ok { 65 return nil, errors.Errorf("cannot get state serving info to dial") 66 } 67 info := mongo.Info{ 68 Addrs: []string{fmt.Sprintf("%s:%d", privateAddr, ssi.StatePort)}, 69 CACert: conf.CACert(), 70 } 71 dialInfo, err := mongo.DialInfo(info, dialOpts) 72 if err != nil { 73 return nil, errors.Annotate(err, "cannot produce a dial info") 74 } 75 dialInfo.Username = "admin" 76 dialInfo.Password = conf.OldPassword() 77 return dialInfo, nil 78 } 79 80 // updateMongoEntries will update the machine entries in the restored mongo to 81 // reflect the real machine instanceid in case it changed (a newly bootstraped 82 // server). 83 func updateMongoEntries(newInstId instance.Id, newMachineId, oldMachineId string, dialInfo *mgo.DialInfo) error { 84 session, err := mgo.DialWithInfo(dialInfo) 85 if err != nil { 86 return errors.Annotate(err, "cannot connect to mongo to update") 87 } 88 defer session.Close() 89 // TODO(perrito666): Take the Machine id from an autoritative source 90 err = session.DB("juju").C("machines").Update( 91 bson.M{"machineid": oldMachineId}, 92 bson.M{"$set": bson.M{"instanceid": string(newInstId), 93 "machineid": newMachineId}}, 94 ) 95 if err != nil { 96 return errors.Annotatef(err, "cannot update machine %s instance information", newMachineId) 97 } 98 return nil 99 } 100 101 // updateMachineAddresses will update the machine doc to the current addresses 102 func updateMachineAddresses(machine *state.Machine, privateAddress, publicAddress string) error { 103 privateAddressAddress := network.Address{ 104 Value: privateAddress, 105 Type: network.DeriveAddressType(privateAddress), 106 } 107 publicAddressAddress := network.Address{ 108 Value: publicAddress, 109 Type: network.DeriveAddressType(publicAddress), 110 } 111 if err := machine.SetProviderAddresses(publicAddressAddress, privateAddressAddress); err != nil { 112 return errors.Trace(err) 113 } 114 return nil 115 } 116 117 // assign to variables for testing purposes. 118 var mongoDefaultDialOpts = mongo.DefaultDialOpts 119 var environsNewStatePolicy = environs.NewStatePolicy 120 121 // newStateConnection tries to connect to the newly restored state server. 122 func newStateConnection(environTag names.EnvironTag, info *mongo.MongoInfo) (*state.State, error) { 123 // We need to retry here to allow mongo to come up on the restored state server. 124 // The connection might succeed due to the mongo dial retries but there may still 125 // be a problem issuing database commands. 126 var ( 127 st *state.State 128 err error 129 ) 130 const ( 131 newStateConnDelay = 15 * time.Second 132 newStateConnMinAttempts = 8 133 ) 134 attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts} 135 for a := attempt.Start(); a.Next(); { 136 st, err = state.Open(environTag, info, mongoDefaultDialOpts(), environsNewStatePolicy()) 137 if err == nil { 138 return st, nil 139 } 140 logger.Errorf("cannot open state, retrying: %v", err) 141 } 142 return st, errors.Annotate(err, "cannot open state") 143 } 144 145 // updateAllMachines finds all machines and resets the stored state address 146 // in each of them. The address does not include the port. 147 // It is too late to go back and errors in a couple of agents have 148 // better chance of being fixed by the user, if we were to fail 149 // we risk an inconsistent state server because of one unresponsive 150 // agent, we should nevertheless return the err info to the user. 151 func updateAllMachines(privateAddress string, machines []*state.Machine) error { 152 var machineUpdating sync.WaitGroup 153 for key := range machines { 154 // key is used to have machine be scope bound to the loop iteration. 155 machine := machines[key] 156 // A newly resumed state server requires no updating, and more 157 // than one state server is not yet supported by this code. 158 if machine.IsManager() || machine.Life() == state.Dead { 159 continue 160 } 161 machineUpdating.Add(1) 162 go func() { 163 defer machineUpdating.Done() 164 err := runMachineUpdate(machine.Addresses(), setAgentAddressScript(privateAddress)) 165 logger.Errorf("failed updating machine: %v", err) 166 }() 167 } 168 machineUpdating.Wait() 169 170 // We should return errors encapsulated in a digest. 171 return nil 172 } 173 174 // agentAddressAndRelationsTemplate is the template used to replace the api server data 175 // in the agents for the new ones if the machine has been rebootstraped it will also reset 176 // the relations so hooks will re-fire. 177 var agentAddressAndRelationsTemplate = template.Must(template.New("").Parse(` 178 set -xu 179 cd /var/lib/juju/agents 180 for agent in * 181 do 182 status jujud-$agent| grep -q "^jujud-$agent start" > /dev/null 183 if [ $? -eq 0 ]; then 184 initctl stop jujud-$agent 185 fi 186 sed -i.old -r "/^(stateaddresses|apiaddresses):/{ 187 n 188 s/- .*(:[0-9]+)/- {{.Address}}\1/ 189 }" $agent/agent.conf 190 191 # If we're processing a unit agent's directly 192 # and it has some relations, reset 193 # the stored version of all of them to 194 # ensure that any relation hooks will 195 # fire. 196 if [[ $agent = unit-* ]] 197 then 198 find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \; 199 fi 200 # Just in case is a stale unit 201 status jujud-$agent| grep -q "^jujud-$agent stop" > /dev/null 202 if [ $? -eq 0 ]; then 203 initctl start jujud-$agent 204 fi 205 done 206 `)) 207 208 // setAgentAddressScript generates an ssh script argument to update state addresses. 209 func setAgentAddressScript(stateAddr string) string { 210 var buf bytes.Buffer 211 err := agentAddressAndRelationsTemplate.Execute(&buf, struct { 212 Address string 213 }{stateAddr}) 214 if err != nil { 215 panic(errors.Annotate(err, "template error")) 216 } 217 return buf.String() 218 } 219 220 // runMachineUpdate connects via ssh to the machine and runs the update script. 221 func runMachineUpdate(allAddr []network.Address, sshArg string) error { 222 addr := network.SelectPublicAddress(allAddr) 223 if addr == "" { 224 return errors.Errorf("no appropriate public address found") 225 } 226 return runViaSSH(addr, sshArg) 227 } 228 229 // sshCommand hods ssh.Command type for testing purposes. 230 var sshCommand = ssh.Command 231 232 // runViaSSH runs script in the remote machine with address addr. 233 func runViaSSH(addr string, script string) error { 234 // This is taken from cmd/juju/ssh.go there is no other clear way to set user 235 userAddr := "ubuntu@" + addr 236 sshOptions := ssh.Options{} 237 sshOptions.SetIdentities("/var/lib/juju/system-identity") 238 userCmd := sshCommand(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, &sshOptions) 239 var stderrBuf bytes.Buffer 240 userCmd.Stderr = &stderrBuf 241 if err := userCmd.Run(); err != nil { 242 return errors.Annotatef(err, "ssh command failed: %q", stderrBuf.String()) 243 } 244 return nil 245 } 246 247 // updateBackupMachineTag updates the paths that are stored in the backup 248 // to the current machine. This path is tied, among other factors, to the 249 // machine tag. 250 // Eventually this will change: when backups hold relative paths. 251 func updateBackupMachineTag(oldTag, newTag names.Tag) error { 252 oldTagString := oldTag.String() 253 newTagString := newTag.String() 254 255 if oldTagString == newTagString { 256 return nil 257 } 258 oldTagPath := path.Join(agent.DefaultDataDir, oldTagString) 259 newTagPath := path.Join(agent.DefaultDataDir, newTagString) 260 261 oldToolsDir := tools.ToolsDir(agent.DefaultDataDir, oldTagString) 262 oldLink, err := filepath.EvalSymlinks(oldToolsDir) 263 264 os.Rename(oldTagPath, newTagPath) 265 newToolsDir := tools.ToolsDir(agent.DefaultDataDir, newTagString) 266 newToolsPath := strings.Replace(oldLink, oldTagPath, newTagPath, -1) 267 err = symlink.Replace(newToolsDir, newToolsPath) 268 return errors.Annotatef(err, "cannot set the new tools path") 269 }