github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/cmd/plugins/juju-restore/restore.go (about) 1 // Copyright 2013 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package main 5 6 import ( 7 "archive/tar" 8 "bytes" 9 "compress/gzip" 10 "fmt" 11 "io" 12 "io/ioutil" 13 "os" 14 "path" 15 "strconv" 16 "text/template" 17 18 "github.com/juju/loggo" 19 "github.com/juju/utils" 20 "launchpad.net/gnuflag" 21 "launchpad.net/goyaml" 22 23 "github.com/juju/juju/cmd" 24 "github.com/juju/juju/cmd/envcmd" 25 "github.com/juju/juju/constraints" 26 "github.com/juju/juju/environs" 27 "github.com/juju/juju/environs/bootstrap" 28 "github.com/juju/juju/environs/config" 29 "github.com/juju/juju/environs/configstore" 30 "github.com/juju/juju/instance" 31 "github.com/juju/juju/juju" 32 _ "github.com/juju/juju/provider/all" 33 "github.com/juju/juju/state" 34 "github.com/juju/juju/state/api" 35 "github.com/juju/juju/utils/ssh" 36 ) 37 38 func main() { 39 Main(os.Args) 40 } 41 42 func Main(args []string) { 43 ctx, err := cmd.DefaultContext() 44 if err != nil { 45 fmt.Fprintf(os.Stderr, "error: %v\n", err) 46 os.Exit(2) 47 } 48 if err := juju.InitJujuHome(); err != nil { 49 fmt.Fprintf(os.Stderr, "error: %s\n", err) 50 os.Exit(2) 51 } 52 os.Exit(cmd.Main(envcmd.Wrap(&restoreCommand{}), ctx, args[1:])) 53 } 54 55 var logger = loggo.GetLogger("juju.plugins.restore") 56 57 const restoreDoc = ` 58 Restore restores a backup created with juju backup 59 by creating a new juju bootstrap instance and arranging 60 it so that the existing instances in the environment 61 talk to it. 62 63 It verifies that the existing bootstrap instance is 64 not running. The given constraints will be used 65 to choose the new instance. 66 ` 67 68 type restoreCommand struct { 69 envcmd.EnvCommandBase 70 Log cmd.Log 71 Constraints constraints.Value 72 backupFile string 73 showDescription bool 74 } 75 76 func (c *restoreCommand) Info() *cmd.Info { 77 return &cmd.Info{ 78 Name: "juju-restore", 79 Purpose: "Restore a backup made with juju backup", 80 Args: "<backupfile.tar.gz>", 81 Doc: restoreDoc, 82 } 83 } 84 85 func (c *restoreCommand) SetFlags(f *gnuflag.FlagSet) { 86 f.Var(constraints.ConstraintsValue{Target: &c.Constraints}, "constraints", "set environment constraints") 87 f.BoolVar(&c.showDescription, "description", false, "show the purpose of this plugin") 88 c.Log.AddFlags(f) 89 } 90 91 func (c *restoreCommand) Init(args []string) error { 92 if c.showDescription { 93 return cmd.CheckEmpty(args) 94 } 95 if len(args) == 0 { 96 return fmt.Errorf("no backup file specified") 97 } 98 c.backupFile = args[0] 99 return cmd.CheckEmpty(args[1:]) 100 } 101 102 var updateBootstrapMachineTemplate = mustParseTemplate(` 103 set -exu 104 105 export LC_ALL=C 106 tar xzf juju-backup.tgz 107 test -d juju-backup 108 apt-get --option=Dpkg::Options::=--force-confold --option=Dpkg::options::=--force-unsafe-io --assume-yes --quiet install mongodb-clients 109 110 initctl stop jujud-machine-0 111 112 initctl stop juju-db 113 rm -r /var/lib/juju 114 rm -r /var/log/juju 115 116 tar -C / -xvp -f juju-backup/root.tar 117 mkdir -p /var/lib/juju/db 118 119 # Prefer jujud-mongodb binaries if available 120 export MONGORESTORE=mongorestore 121 if [ -f /usr/lib/juju/bin/mongorestore ]; then 122 export MONGORESTORE=/usr/lib/juju/bin/mongorestore; 123 fi 124 $MONGORESTORE --drop --dbpath /var/lib/juju/db juju-backup/dump 125 126 initctl start juju-db 127 128 mongoAdminEval() { 129 mongo --ssl -u admin -p {{.AgentConfig.Credentials.OldPassword | shquote}} localhost:{{.AgentConfig.StatePort}}/admin --eval "$1" 130 } 131 132 133 mongoEval() { 134 mongo --ssl -u {{.AgentConfig.Credentials.Tag}} -p {{.AgentConfig.Credentials.Password | shquote}} localhost:{{.AgentConfig.StatePort}}/juju --eval "$1" 135 } 136 137 # wait for mongo to come up after starting the juju-db upstart service. 138 for i in $(seq 1 100) 139 do 140 mongoEval ' ' && break 141 sleep 5 142 done 143 144 # Create a new replicaSet conf and re initiate it 145 mongoAdminEval ' 146 conf = { "_id" : "juju", "version" : 1, "members" : [ { "_id" : 1, "host" : "{{ .PrivateAddress | printf "%s:"}}{{.AgentConfig.StatePort}}" , "tags" : { "juju-machine-id" : "0" } }]} 147 rs.initiate(conf) 148 ' 149 150 sleep 60 151 152 # Remove all state machines but 0, to restore HA 153 mongoEval ' 154 db = db.getSiblingDB("juju") 155 db.machines.update({_id: "0"}, {$set: {instanceid: {{.NewInstanceId | printf "%q" }} } }) 156 db.instanceData.update({_id: "0"}, {$set: {instanceid: {{.NewInstanceId | printf "%q" }} } }) 157 db.machines.remove({_id: {$ne:"0"}, hasvote: true}) 158 db.stateServers.update({"_id":"e"}, {$set:{"machineids" : [0]}}) 159 db.stateServers.update({"_id":"e"}, {$set:{"votingmachineids" : [0]}}) 160 ' 161 162 163 164 # Give time to replset to initiate 165 for i in $(seq 1 20) 166 do 167 mongoEval ' ' && break 168 sleep 5 169 done 170 171 initctl stop juju-db 172 173 # Update the agent.conf for machine-0 with the new addresses 174 cd /var/lib/juju/agents 175 176 # Remove extra state machines from conf 177 REMOVECOUNT=$(grep -Ec "^-.*{{.AgentConfig.ApiPort}}$" /var/lib/juju/agents/machine-0/agent.conf ) 178 awk '/\-.*{{.AgentConfig.ApiPort}}$/{i++}i<1' machine-0/agent.conf > machine-0/agent.conf.new 179 awk -v removecount=$REMOVECOUNT '/\-.*{{.AgentConfig.ApiPort}}$/{i++}i==removecount' machine-0/agent.conf >> machine-0/agent.conf.new 180 mv machine-0/agent.conf.new machine-0/agent.conf 181 182 sed -i.old -r -e "/^(stateaddresses):/{ 183 n 184 s/- .*(:[0-9]+)/- {{.Address}}\1/ 185 }" -e "/^(apiaddresses):/{ 186 n 187 s/- .*(:[0-9]+)/- {{.PrivateAddress}}\1/ 188 }" machine-0/agent.conf 189 190 191 initctl start juju-db 192 initctl start jujud-machine-0 193 `) 194 195 func updateBootstrapMachineScript(instanceId instance.Id, agentConf agentConfig, addr, paddr string) string { 196 return execTemplate(updateBootstrapMachineTemplate, struct { 197 NewInstanceId instance.Id 198 AgentConfig agentConfig 199 Address string 200 PrivateAddress string 201 }{instanceId, agentConf, addr, paddr}) 202 } 203 204 func (c *restoreCommand) Run(ctx *cmd.Context) error { 205 if c.showDescription { 206 fmt.Fprintf(ctx.Stdout, "%s\n", c.Info().Purpose) 207 return nil 208 } 209 if err := c.Log.Start(ctx); err != nil { 210 return err 211 } 212 agentConf, err := extractConfig(c.backupFile) 213 if err != nil { 214 return fmt.Errorf("cannot extract configuration from backup file: %v", err) 215 } 216 progress("extracted credentials from backup file") 217 store, err := configstore.Default() 218 if err != nil { 219 return err 220 } 221 cfg, _, err := environs.ConfigForName(c.EnvName, store) 222 if err != nil { 223 return err 224 } 225 env, err := rebootstrap(cfg, ctx, c.Constraints) 226 if err != nil { 227 return fmt.Errorf("cannot re-bootstrap environment: %v", err) 228 } 229 progress("connecting to newly bootstrapped instance") 230 conn, err := juju.NewAPIConn(env, api.DefaultDialOpts()) 231 if err != nil { 232 return fmt.Errorf("cannot connect to bootstrap instance: %v", err) 233 } 234 progress("restoring bootstrap machine") 235 newInstId, machine0Addr, err := restoreBootstrapMachine(conn, c.backupFile, agentConf) 236 if err != nil { 237 return fmt.Errorf("cannot restore bootstrap machine: %v", err) 238 } 239 progress("restored bootstrap machine") 240 // Update the environ state to point to the new instance. 241 if err := bootstrap.SaveState(env.Storage(), &bootstrap.BootstrapState{ 242 StateInstances: []instance.Id{newInstId}, 243 }); err != nil { 244 return fmt.Errorf("cannot update environ bootstrap state storage: %v", err) 245 } 246 // Construct our own state info rather than using juju.NewConn so 247 // that we can avoid storage eventual-consistency issues 248 // (and it's faster too). 249 caCert, ok := cfg.CACert() 250 if !ok { 251 return fmt.Errorf("configuration has no CA certificate") 252 } 253 progress("opening state") 254 st, err := state.Open(&state.Info{ 255 Addrs: []string{fmt.Sprintf("%s:%d", machine0Addr, cfg.StatePort())}, 256 CACert: caCert, 257 Tag: agentConf.Credentials.Tag, 258 Password: agentConf.Credentials.Password, 259 }, state.DefaultDialOpts(), environs.NewStatePolicy()) 260 if err != nil { 261 return fmt.Errorf("cannot open state: %v", err) 262 } 263 progress("updating all machines") 264 if err := updateAllMachines(st, machine0Addr); err != nil { 265 return fmt.Errorf("cannot update machines: %v", err) 266 } 267 return nil 268 } 269 270 func progress(f string, a ...interface{}) { 271 fmt.Printf("%s\n", fmt.Sprintf(f, a...)) 272 } 273 274 func rebootstrap(cfg *config.Config, ctx *cmd.Context, cons constraints.Value) (environs.Environ, error) { 275 progress("re-bootstrapping environment") 276 // Turn on safe mode so that the newly bootstrapped instance 277 // will not destroy all the instances it does not know about. 278 cfg, err := cfg.Apply(map[string]interface{}{ 279 "provisioner-safe-mode": true, 280 }) 281 if err != nil { 282 return nil, fmt.Errorf("cannot enable provisioner-safe-mode: %v", err) 283 } 284 env, err := environs.New(cfg) 285 if err != nil { 286 return nil, err 287 } 288 state, err := bootstrap.LoadState(env.Storage()) 289 if err != nil { 290 return nil, fmt.Errorf("cannot retrieve environment storage; perhaps the environment was not bootstrapped: %v", err) 291 } 292 if len(state.StateInstances) == 0 { 293 return nil, fmt.Errorf("no instances found on bootstrap state; perhaps the environment was not bootstrapped") 294 } 295 if len(state.StateInstances) > 1 { 296 return nil, fmt.Errorf("restore does not support HA juju configurations yet") 297 } 298 inst, err := env.Instances(state.StateInstances) 299 if err == nil { 300 return nil, fmt.Errorf("old bootstrap instance %q still seems to exist; will not replace", inst) 301 } 302 if err != environs.ErrNoInstances { 303 return nil, fmt.Errorf("cannot detect whether old instance is still running: %v", err) 304 } 305 // Remove the storage so that we can bootstrap without the provider complaining. 306 if err := env.Storage().Remove(bootstrap.StateFile); err != nil { 307 return nil, fmt.Errorf("cannot remove %q from storage: %v", bootstrap.StateFile, err) 308 } 309 310 // TODO If we fail beyond here, then we won't have a state file and 311 // we won't be able to re-run this script because it fails without it. 312 // We could either try to recreate the file if we fail (which is itself 313 // error-prone) or we could provide a --no-check flag to make 314 // it go ahead anyway without the check. 315 316 args := environs.BootstrapParams{Constraints: cons} 317 if err := bootstrap.Bootstrap(ctx, env, args); err != nil { 318 return nil, fmt.Errorf("cannot bootstrap new instance: %v", err) 319 } 320 return env, nil 321 } 322 323 func restoreBootstrapMachine(conn *juju.APIConn, backupFile string, agentConf agentConfig) (newInstId instance.Id, addr string, err error) { 324 client := conn.State.Client() 325 addr, err = client.PublicAddress("0") 326 if err != nil { 327 return "", "", fmt.Errorf("cannot get public address of bootstrap machine: %v", err) 328 } 329 paddr, err := client.PrivateAddress("0") 330 if err != nil { 331 return "", "", fmt.Errorf("cannot get private address of bootstrap machine: %v", err) 332 } 333 status, err := client.Status(nil) 334 if err != nil { 335 return "", "", fmt.Errorf("cannot get environment status: %v", err) 336 } 337 info, ok := status.Machines["0"] 338 if !ok { 339 return "", "", fmt.Errorf("cannot find bootstrap machine in status") 340 } 341 newInstId = instance.Id(info.InstanceId) 342 343 progress("copying backup file to bootstrap host") 344 if err := sendViaScp(backupFile, addr, "~/juju-backup.tgz"); err != nil { 345 return "", "", fmt.Errorf("cannot copy backup file to bootstrap instance: %v", err) 346 } 347 progress("updating bootstrap machine") 348 if err := runViaSsh(addr, updateBootstrapMachineScript(newInstId, agentConf, addr, paddr)); err != nil { 349 return "", "", fmt.Errorf("update script failed: %v", err) 350 } 351 return newInstId, addr, nil 352 } 353 354 type credentials struct { 355 Tag string 356 Password string 357 OldPassword string 358 } 359 360 type agentConfig struct { 361 Credentials credentials 362 ApiPort string 363 StatePort string 364 } 365 366 func extractConfig(backupFile string) (agentConfig, error) { 367 f, err := os.Open(backupFile) 368 if err != nil { 369 return agentConfig{}, err 370 } 371 defer f.Close() 372 gzr, err := gzip.NewReader(f) 373 if err != nil { 374 return agentConfig{}, fmt.Errorf("cannot unzip %q: %v", backupFile, err) 375 } 376 defer gzr.Close() 377 outerTar, err := findFileInTar(gzr, "juju-backup/root.tar") 378 if err != nil { 379 return agentConfig{}, err 380 } 381 agentConf, err := findFileInTar(outerTar, "var/lib/juju/agents/machine-0/agent.conf") 382 if err != nil { 383 return agentConfig{}, err 384 } 385 data, err := ioutil.ReadAll(agentConf) 386 if err != nil { 387 return agentConfig{}, fmt.Errorf("failed to read agent config file: %v", err) 388 } 389 var conf interface{} 390 if err := goyaml.Unmarshal(data, &conf); err != nil { 391 return agentConfig{}, fmt.Errorf("cannot unmarshal agent config file: %v", err) 392 } 393 m, ok := conf.(map[interface{}]interface{}) 394 if !ok { 395 return agentConfig{}, fmt.Errorf("config file unmarshalled to %T not %T", conf, m) 396 } 397 password, ok := m["statepassword"].(string) 398 if !ok || password == "" { 399 return agentConfig{}, fmt.Errorf("agent password not found in configuration") 400 } 401 oldPassword, ok := m["oldpassword"].(string) 402 if !ok || oldPassword == "" { 403 return agentConfig{}, fmt.Errorf("agent old password not found in configuration") 404 } 405 statePortNum, ok := m["stateport"].(int) 406 if !ok { 407 return agentConfig{}, fmt.Errorf("state port not found in configuration") 408 } 409 410 statePort := strconv.Itoa(statePortNum) 411 apiPortNum, ok := m["apiport"].(int) 412 if !ok { 413 return agentConfig{}, fmt.Errorf("api port not found in configuration") 414 } 415 apiPort := strconv.Itoa(apiPortNum) 416 417 return agentConfig{ 418 Credentials: credentials{ 419 Tag: "machine-0", 420 Password: password, 421 OldPassword: oldPassword, 422 }, 423 StatePort: statePort, 424 ApiPort: apiPort, 425 }, nil 426 } 427 428 func findFileInTar(r io.Reader, name string) (io.Reader, error) { 429 tarr := tar.NewReader(r) 430 for { 431 hdr, err := tarr.Next() 432 if err != nil { 433 return nil, fmt.Errorf("%q not found: %v", name, err) 434 } 435 if path.Clean(hdr.Name) == name { 436 return tarr, nil 437 } 438 } 439 } 440 441 var agentAddressTemplate = mustParseTemplate(` 442 set -exu 443 cd /var/lib/juju/agents 444 for agent in * 445 do 446 initctl stop jujud-$agent 447 sed -i.old -r "/^(stateaddresses|apiaddresses):/{ 448 n 449 s/- .*(:[0-9]+)/- {{.Address}}\1/ 450 }" $agent/agent.conf 451 452 # If we're processing a unit agent's directly 453 # and it has some relations, reset 454 # the stored version of all of them to 455 # ensure that any relation hooks will 456 # fire. 457 if [[ $agent = unit-* ]] 458 then 459 find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \; 460 fi 461 initctl start jujud-$agent 462 done 463 `) 464 465 // setAgentAddressScript generates an ssh script argument to update state addresses 466 func setAgentAddressScript(stateAddr string) string { 467 return execTemplate(agentAddressTemplate, struct { 468 Address string 469 }{stateAddr}) 470 } 471 472 // updateAllMachines finds all machines and resets the stored state address 473 // in each of them. The address does not include the port. 474 func updateAllMachines(st *state.State, stateAddr string) error { 475 machines, err := st.AllMachines() 476 if err != nil { 477 return err 478 } 479 pendingMachineCount := 0 480 done := make(chan error) 481 for _, machine := range machines { 482 // A newly resumed state server requires no updating, and more 483 // than one state server is not yet support by this plugin. 484 if machine.IsManager() || machine.Life() == state.Dead { 485 continue 486 } 487 pendingMachineCount++ 488 machine := machine 489 go func() { 490 err := runMachineUpdate(machine, setAgentAddressScript(stateAddr)) 491 if err != nil { 492 logger.Errorf("failed to update machine %s: %v", machine, err) 493 } else { 494 progress("updated machine %s", machine) 495 } 496 done <- err 497 }() 498 } 499 err = nil 500 for ; pendingMachineCount > 0; pendingMachineCount-- { 501 if updateErr := <-done; updateErr != nil && err == nil { 502 err = fmt.Errorf("machine update failed") 503 } 504 } 505 return err 506 } 507 508 // runMachineUpdate connects via ssh to the machine and runs the update script 509 func runMachineUpdate(m *state.Machine, sshArg string) error { 510 progress("updating machine: %v\n", m) 511 addr := instance.SelectPublicAddress(m.Addresses()) 512 if addr == "" { 513 return fmt.Errorf("no appropriate public address found") 514 } 515 return runViaSsh(addr, sshArg) 516 } 517 518 func runViaSsh(addr string, script string) error { 519 // This is taken from cmd/juju/ssh.go there is no other clear way to set user 520 userAddr := "ubuntu@" + addr 521 cmd := ssh.Command(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, nil) 522 var stderrBuf bytes.Buffer 523 var stdoutBuf bytes.Buffer 524 cmd.Stderr = &stderrBuf 525 cmd.Stdout = &stdoutBuf 526 err := cmd.Run() 527 if err != nil { 528 return fmt.Errorf("ssh command failed: %v (%q)", err, stderrBuf.String()) 529 } 530 progress("ssh command succedded: %q", stdoutBuf.String()) 531 return nil 532 } 533 534 func sendViaScp(file, host, destFile string) error { 535 err := ssh.Copy([]string{file, "ubuntu@" + host + ":" + destFile}, nil) 536 if err != nil { 537 return fmt.Errorf("scp command failed: %v", err) 538 } 539 return nil 540 } 541 542 func mustParseTemplate(templ string) *template.Template { 543 t := template.New("").Funcs(template.FuncMap{ 544 "shquote": utils.ShQuote, 545 }) 546 return template.Must(t.Parse(templ)) 547 } 548 549 func execTemplate(tmpl *template.Template, data interface{}) string { 550 var buf bytes.Buffer 551 err := tmpl.Execute(&buf, data) 552 if err != nil { 553 panic(fmt.Errorf("template error: %v", err)) 554 } 555 return buf.String() 556 }