github.com/rogpeppe/juju@v0.0.0-20140613142852-6337964b789e/cmd/plugins/juju-restore/restore.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package main
     5  
     6  import (
     7  	"archive/tar"
     8  	"bytes"
     9  	"compress/gzip"
    10  	"fmt"
    11  	"io"
    12  	"io/ioutil"
    13  	"os"
    14  	"path"
    15  	"strconv"
    16  	"text/template"
    17  
    18  	"github.com/juju/cmd"
    19  	"github.com/juju/loggo"
    20  	"github.com/juju/utils"
    21  	"launchpad.net/gnuflag"
    22  	"launchpad.net/goyaml"
    23  
    24  	"github.com/juju/juju/cmd/envcmd"
    25  	"github.com/juju/juju/constraints"
    26  	"github.com/juju/juju/environs"
    27  	"github.com/juju/juju/environs/bootstrap"
    28  	"github.com/juju/juju/environs/config"
    29  	"github.com/juju/juju/environs/configstore"
    30  	"github.com/juju/juju/instance"
    31  	"github.com/juju/juju/juju"
    32  	"github.com/juju/juju/mongo"
    33  	"github.com/juju/juju/network"
    34  	_ "github.com/juju/juju/provider/all"
    35  	"github.com/juju/juju/state"
    36  	"github.com/juju/juju/state/api"
    37  	"github.com/juju/juju/utils/ssh"
    38  )
    39  
    40  func main() {
    41  	Main(os.Args)
    42  }
    43  
    44  func Main(args []string) {
    45  	ctx, err := cmd.DefaultContext()
    46  	if err != nil {
    47  		fmt.Fprintf(os.Stderr, "error: %v\n", err)
    48  		os.Exit(2)
    49  	}
    50  	if err := juju.InitJujuHome(); err != nil {
    51  		fmt.Fprintf(os.Stderr, "error: %s\n", err)
    52  		os.Exit(2)
    53  	}
    54  	os.Exit(cmd.Main(envcmd.Wrap(&restoreCommand{}), ctx, args[1:]))
    55  }
    56  
    57  var logger = loggo.GetLogger("juju.plugins.restore")
    58  
    59  const restoreDoc = `
    60  Restore restores a backup created with juju backup
    61  by creating a new juju bootstrap instance and arranging
    62  it so that the existing instances in the environment
    63  talk to it.
    64  
    65  It verifies that the existing bootstrap instance is
    66  not running. The given constraints will be used
    67  to choose the new instance.
    68  `
    69  
    70  type restoreCommand struct {
    71  	envcmd.EnvCommandBase
    72  	Log             cmd.Log
    73  	Constraints     constraints.Value
    74  	backupFile      string
    75  	showDescription bool
    76  }
    77  
    78  func (c *restoreCommand) Info() *cmd.Info {
    79  	return &cmd.Info{
    80  		Name:    "juju-restore",
    81  		Purpose: "Restore a backup made with juju backup",
    82  		Args:    "<backupfile.tar.gz>",
    83  		Doc:     restoreDoc,
    84  	}
    85  }
    86  
    87  func (c *restoreCommand) SetFlags(f *gnuflag.FlagSet) {
    88  	f.Var(constraints.ConstraintsValue{Target: &c.Constraints}, "constraints", "set environment constraints")
    89  	f.BoolVar(&c.showDescription, "description", false, "show the purpose of this plugin")
    90  	c.Log.AddFlags(f)
    91  }
    92  
    93  func (c *restoreCommand) Init(args []string) error {
    94  	if c.showDescription {
    95  		return cmd.CheckEmpty(args)
    96  	}
    97  	if len(args) == 0 {
    98  		return fmt.Errorf("no backup file specified")
    99  	}
   100  	c.backupFile = args[0]
   101  	return cmd.CheckEmpty(args[1:])
   102  }
   103  
   104  var updateBootstrapMachineTemplate = mustParseTemplate(`
   105  	set -exu
   106  
   107  	export LC_ALL=C
   108  	tar xzf juju-backup.tgz
   109  	test -d juju-backup
   110  	apt-get --option=Dpkg::Options::=--force-confold --option=Dpkg::options::=--force-unsafe-io --assume-yes --quiet install mongodb-clients
   111  	
   112  	initctl stop jujud-machine-0
   113  
   114  	initctl stop juju-db
   115  	rm -r /var/lib/juju
   116  	rm -r /var/log/juju
   117  
   118  	tar -C / -xvp -f juju-backup/root.tar
   119  	mkdir -p /var/lib/juju/db
   120  
   121  	# Prefer jujud-mongodb binaries if available 
   122  	export MONGORESTORE=mongorestore
   123  	if [ -f /usr/lib/juju/bin/mongorestore ]; then
   124  		export MONGORESTORE=/usr/lib/juju/bin/mongorestore;
   125  	fi	
   126  	$MONGORESTORE --drop --dbpath /var/lib/juju/db juju-backup/dump
   127  
   128  	initctl start juju-db
   129  
   130  	mongoAdminEval() {
   131  		mongo --ssl -u admin -p {{.AgentConfig.Credentials.OldPassword | shquote}} localhost:{{.AgentConfig.StatePort}}/admin --eval "$1"
   132  	}
   133  
   134  
   135  	mongoEval() {
   136  		mongo --ssl -u {{.AgentConfig.Credentials.Tag}} -p {{.AgentConfig.Credentials.Password | shquote}} localhost:{{.AgentConfig.StatePort}}/juju --eval "$1"
   137  	}
   138  
   139  	# wait for mongo to come up after starting the juju-db upstart service.
   140  	for i in $(seq 1 100)
   141  	do
   142  		mongoEval ' ' && break
   143  		sleep 5
   144  	done
   145  
   146  	# Create a new replicaSet conf and re initiate it
   147  	mongoAdminEval '
   148  		conf = { "_id" : "juju", "version" : 1, "members" : [ { "_id" : 1, "host" : "{{ .PrivateAddress | printf "%s:"}}{{.AgentConfig.StatePort}}" , "tags" : { "juju-machine-id" : "0" } }]}
   149  		rs.initiate(conf)
   150  	'
   151  
   152  	sleep 60
   153  
   154  	# Remove all state machines but 0, to restore HA
   155  	mongoEval '
   156  		db = db.getSiblingDB("juju")
   157  		db.machines.update({_id: "0"}, {$set: {instanceid: {{.NewInstanceId | printf "%q" }} } })
   158  		db.instanceData.update({_id: "0"}, {$set: {instanceid: {{.NewInstanceId | printf "%q" }} } })
   159  		db.machines.remove({_id: {$ne:"0"}, hasvote: true})
   160  		db.stateServers.update({"_id":"e"}, {$set:{"machineids" : [0]}})
   161  		db.stateServers.update({"_id":"e"}, {$set:{"votingmachineids" : [0]}})
   162  	'
   163  	
   164  
   165  
   166  	# Give time to replset to initiate
   167  	for i in $(seq 1 20)
   168  	do
   169  		mongoEval ' ' && break
   170  		sleep 5
   171  	done
   172  
   173  	initctl stop juju-db
   174  
   175  	# Update the agent.conf for machine-0 with the new addresses
   176  	cd /var/lib/juju/agents
   177  
   178  	# Remove extra state machines from conf
   179  	REMOVECOUNT=$(grep -Ec "^-.*{{.AgentConfig.ApiPort}}$" /var/lib/juju/agents/machine-0/agent.conf )
   180  	awk '/\-.*{{.AgentConfig.ApiPort}}$/{i++}i<1' machine-0/agent.conf > machine-0/agent.conf.new
   181  	awk -v removecount=$REMOVECOUNT '/\-.*{{.AgentConfig.ApiPort}}$/{i++}i==removecount' machine-0/agent.conf >> machine-0/agent.conf.new
   182  	mv machine-0/agent.conf.new  machine-0/agent.conf
   183  
   184  	sed -i.old -r -e "/^(stateaddresses):/{
   185  		n
   186  		s/- .*(:[0-9]+)/- {{.Address}}\1/
   187  	}" -e "/^(apiaddresses):/{
   188  		n
   189  		s/- .*(:[0-9]+)/- {{.PrivateAddress}}\1/
   190  	}"  machine-0/agent.conf
   191  	
   192  
   193  	initctl start juju-db
   194  	initctl start jujud-machine-0
   195  `)
   196  
   197  func updateBootstrapMachineScript(instanceId instance.Id, agentConf agentConfig, addr, paddr string) string {
   198  	return execTemplate(updateBootstrapMachineTemplate, struct {
   199  		NewInstanceId  instance.Id
   200  		AgentConfig    agentConfig
   201  		Address        string
   202  		PrivateAddress string
   203  	}{instanceId, agentConf, addr, paddr})
   204  }
   205  
   206  func (c *restoreCommand) Run(ctx *cmd.Context) error {
   207  	if c.showDescription {
   208  		fmt.Fprintf(ctx.Stdout, "%s\n", c.Info().Purpose)
   209  		return nil
   210  	}
   211  	if err := c.Log.Start(ctx); err != nil {
   212  		return err
   213  	}
   214  	agentConf, err := extractConfig(c.backupFile)
   215  	if err != nil {
   216  		return fmt.Errorf("cannot extract configuration from backup file: %v", err)
   217  	}
   218  	progress("extracted credentials from backup file")
   219  	store, err := configstore.Default()
   220  	if err != nil {
   221  		return err
   222  	}
   223  	cfg, _, err := environs.ConfigForName(c.EnvName, store)
   224  	if err != nil {
   225  		return err
   226  	}
   227  	env, err := rebootstrap(cfg, ctx, c.Constraints)
   228  	if err != nil {
   229  		return fmt.Errorf("cannot re-bootstrap environment: %v", err)
   230  	}
   231  	progress("connecting to newly bootstrapped instance")
   232  	conn, err := juju.NewAPIConn(env, api.DefaultDialOpts())
   233  	if err != nil {
   234  		return fmt.Errorf("cannot connect to bootstrap instance: %v", err)
   235  	}
   236  	progress("restoring bootstrap machine")
   237  	newInstId, machine0Addr, err := restoreBootstrapMachine(conn, c.backupFile, agentConf)
   238  	if err != nil {
   239  		return fmt.Errorf("cannot restore bootstrap machine: %v", err)
   240  	}
   241  	progress("restored bootstrap machine")
   242  	// Update the environ state to point to the new instance.
   243  	if err := bootstrap.SaveState(env.Storage(), &bootstrap.BootstrapState{
   244  		StateInstances: []instance.Id{newInstId},
   245  	}); err != nil {
   246  		return fmt.Errorf("cannot update environ bootstrap state storage: %v", err)
   247  	}
   248  	// Construct our own state info rather than using juju.NewConn so
   249  	// that we can avoid storage eventual-consistency issues
   250  	// (and it's faster too).
   251  	caCert, ok := cfg.CACert()
   252  	if !ok {
   253  		return fmt.Errorf("configuration has no CA certificate")
   254  	}
   255  	progress("opening state")
   256  	st, err := state.Open(&state.Info{
   257  		Info: mongo.Info{
   258  			Addrs:  []string{fmt.Sprintf("%s:%d", machine0Addr, cfg.StatePort())},
   259  			CACert: caCert,
   260  		},
   261  		Tag:      agentConf.Credentials.Tag,
   262  		Password: agentConf.Credentials.Password,
   263  	}, mongo.DefaultDialOpts(), environs.NewStatePolicy())
   264  	if err != nil {
   265  		return fmt.Errorf("cannot open state: %v", err)
   266  	}
   267  	progress("updating all machines")
   268  	if err := updateAllMachines(st, machine0Addr); err != nil {
   269  		return fmt.Errorf("cannot update machines: %v", err)
   270  	}
   271  	return nil
   272  }
   273  
   274  func progress(f string, a ...interface{}) {
   275  	fmt.Printf("%s\n", fmt.Sprintf(f, a...))
   276  }
   277  
   278  func rebootstrap(cfg *config.Config, ctx *cmd.Context, cons constraints.Value) (environs.Environ, error) {
   279  	progress("re-bootstrapping environment")
   280  	// Turn on safe mode so that the newly bootstrapped instance
   281  	// will not destroy all the instances it does not know about.
   282  	cfg, err := cfg.Apply(map[string]interface{}{
   283  		"provisioner-safe-mode": true,
   284  	})
   285  	if err != nil {
   286  		return nil, fmt.Errorf("cannot enable provisioner-safe-mode: %v", err)
   287  	}
   288  	env, err := environs.New(cfg)
   289  	if err != nil {
   290  		return nil, err
   291  	}
   292  	state, err := bootstrap.LoadState(env.Storage())
   293  	if err != nil {
   294  		return nil, fmt.Errorf("cannot retrieve environment storage; perhaps the environment was not bootstrapped: %v", err)
   295  	}
   296  	if len(state.StateInstances) == 0 {
   297  		return nil, fmt.Errorf("no instances found on bootstrap state; perhaps the environment was not bootstrapped")
   298  	}
   299  	if len(state.StateInstances) > 1 {
   300  		return nil, fmt.Errorf("restore does not support HA juju configurations yet")
   301  	}
   302  	inst, err := env.Instances(state.StateInstances)
   303  	if err == nil {
   304  		return nil, fmt.Errorf("old bootstrap instance %q still seems to exist; will not replace", inst)
   305  	}
   306  	if err != environs.ErrNoInstances {
   307  		return nil, fmt.Errorf("cannot detect whether old instance is still running: %v", err)
   308  	}
   309  	// Remove the storage so that we can bootstrap without the provider complaining.
   310  	if err := env.Storage().Remove(bootstrap.StateFile); err != nil {
   311  		return nil, fmt.Errorf("cannot remove %q from storage: %v", bootstrap.StateFile, err)
   312  	}
   313  
   314  	// TODO If we fail beyond here, then we won't have a state file and
   315  	// we won't be able to re-run this script because it fails without it.
   316  	// We could either try to recreate the file if we fail (which is itself
   317  	// error-prone) or we could provide a --no-check flag to make
   318  	// it go ahead anyway without the check.
   319  
   320  	args := environs.BootstrapParams{Constraints: cons}
   321  	if err := bootstrap.Bootstrap(ctx, env, args); err != nil {
   322  		return nil, fmt.Errorf("cannot bootstrap new instance: %v", err)
   323  	}
   324  	return env, nil
   325  }
   326  
   327  func restoreBootstrapMachine(conn *juju.APIConn, backupFile string, agentConf agentConfig) (newInstId instance.Id, addr string, err error) {
   328  	client := conn.State.Client()
   329  	addr, err = client.PublicAddress("0")
   330  	if err != nil {
   331  		return "", "", fmt.Errorf("cannot get public address of bootstrap machine: %v", err)
   332  	}
   333  	paddr, err := client.PrivateAddress("0")
   334  	if err != nil {
   335  		return "", "", fmt.Errorf("cannot get private address of bootstrap machine: %v", err)
   336  	}
   337  	status, err := client.Status(nil)
   338  	if err != nil {
   339  		return "", "", fmt.Errorf("cannot get environment status: %v", err)
   340  	}
   341  	info, ok := status.Machines["0"]
   342  	if !ok {
   343  		return "", "", fmt.Errorf("cannot find bootstrap machine in status")
   344  	}
   345  	newInstId = instance.Id(info.InstanceId)
   346  
   347  	progress("copying backup file to bootstrap host")
   348  	if err := sendViaScp(backupFile, addr, "~/juju-backup.tgz"); err != nil {
   349  		return "", "", fmt.Errorf("cannot copy backup file to bootstrap instance: %v", err)
   350  	}
   351  	progress("updating bootstrap machine")
   352  	if err := runViaSsh(addr, updateBootstrapMachineScript(newInstId, agentConf, addr, paddr)); err != nil {
   353  		return "", "", fmt.Errorf("update script failed: %v", err)
   354  	}
   355  	return newInstId, addr, nil
   356  }
   357  
   358  type credentials struct {
   359  	Tag         string
   360  	Password    string
   361  	OldPassword string
   362  }
   363  
   364  type agentConfig struct {
   365  	Credentials credentials
   366  	ApiPort     string
   367  	StatePort   string
   368  }
   369  
   370  func extractConfig(backupFile string) (agentConfig, error) {
   371  	f, err := os.Open(backupFile)
   372  	if err != nil {
   373  		return agentConfig{}, err
   374  	}
   375  	defer f.Close()
   376  	gzr, err := gzip.NewReader(f)
   377  	if err != nil {
   378  		return agentConfig{}, fmt.Errorf("cannot unzip %q: %v", backupFile, err)
   379  	}
   380  	defer gzr.Close()
   381  	outerTar, err := findFileInTar(gzr, "juju-backup/root.tar")
   382  	if err != nil {
   383  		return agentConfig{}, err
   384  	}
   385  	agentConf, err := findFileInTar(outerTar, "var/lib/juju/agents/machine-0/agent.conf")
   386  	if err != nil {
   387  		return agentConfig{}, err
   388  	}
   389  	data, err := ioutil.ReadAll(agentConf)
   390  	if err != nil {
   391  		return agentConfig{}, fmt.Errorf("failed to read agent config file: %v", err)
   392  	}
   393  	var conf interface{}
   394  	if err := goyaml.Unmarshal(data, &conf); err != nil {
   395  		return agentConfig{}, fmt.Errorf("cannot unmarshal agent config file: %v", err)
   396  	}
   397  	m, ok := conf.(map[interface{}]interface{})
   398  	if !ok {
   399  		return agentConfig{}, fmt.Errorf("config file unmarshalled to %T not %T", conf, m)
   400  	}
   401  	password, ok := m["statepassword"].(string)
   402  	if !ok || password == "" {
   403  		return agentConfig{}, fmt.Errorf("agent password not found in configuration")
   404  	}
   405  	oldPassword, ok := m["oldpassword"].(string)
   406  	if !ok || oldPassword == "" {
   407  		return agentConfig{}, fmt.Errorf("agent old password not found in configuration")
   408  	}
   409  	statePortNum, ok := m["stateport"].(int)
   410  	if !ok {
   411  		return agentConfig{}, fmt.Errorf("state port not found in configuration")
   412  	}
   413  
   414  	statePort := strconv.Itoa(statePortNum)
   415  	apiPortNum, ok := m["apiport"].(int)
   416  	if !ok {
   417  		return agentConfig{}, fmt.Errorf("api port not found in configuration")
   418  	}
   419  	apiPort := strconv.Itoa(apiPortNum)
   420  
   421  	return agentConfig{
   422  		Credentials: credentials{
   423  			Tag:         "machine-0",
   424  			Password:    password,
   425  			OldPassword: oldPassword,
   426  		},
   427  		StatePort: statePort,
   428  		ApiPort:   apiPort,
   429  	}, nil
   430  }
   431  
   432  func findFileInTar(r io.Reader, name string) (io.Reader, error) {
   433  	tarr := tar.NewReader(r)
   434  	for {
   435  		hdr, err := tarr.Next()
   436  		if err != nil {
   437  			return nil, fmt.Errorf("%q not found: %v", name, err)
   438  		}
   439  		if path.Clean(hdr.Name) == name {
   440  			return tarr, nil
   441  		}
   442  	}
   443  }
   444  
   445  var agentAddressTemplate = mustParseTemplate(`
   446  set -exu
   447  cd /var/lib/juju/agents
   448  for agent in *
   449  do
   450  	initctl stop jujud-$agent
   451  	sed -i.old -r "/^(stateaddresses|apiaddresses):/{
   452  		n
   453  		s/- .*(:[0-9]+)/- {{.Address}}\1/
   454  	}" $agent/agent.conf
   455  
   456  	# If we're processing a unit agent's directly
   457  	# and it has some relations, reset
   458  	# the stored version of all of them to
   459  	# ensure that any relation hooks will
   460  	# fire.
   461  	if [[ $agent = unit-* ]]
   462  	then
   463  		find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \;
   464  	fi
   465  	initctl start jujud-$agent
   466  done
   467  `)
   468  
   469  // setAgentAddressScript generates an ssh script argument to update state addresses
   470  func setAgentAddressScript(stateAddr string) string {
   471  	return execTemplate(agentAddressTemplate, struct {
   472  		Address string
   473  	}{stateAddr})
   474  }
   475  
   476  // updateAllMachines finds all machines and resets the stored state address
   477  // in each of them. The address does not include the port.
   478  func updateAllMachines(st *state.State, stateAddr string) error {
   479  	machines, err := st.AllMachines()
   480  	if err != nil {
   481  		return err
   482  	}
   483  	pendingMachineCount := 0
   484  	done := make(chan error)
   485  	for _, machine := range machines {
   486  		// A newly resumed state server requires no updating, and more
   487  		// than one state server is not yet support by this plugin.
   488  		if machine.IsManager() || machine.Life() == state.Dead {
   489  			continue
   490  		}
   491  		pendingMachineCount++
   492  		machine := machine
   493  		go func() {
   494  			err := runMachineUpdate(machine, setAgentAddressScript(stateAddr))
   495  			if err != nil {
   496  				logger.Errorf("failed to update machine %s: %v", machine, err)
   497  			} else {
   498  				progress("updated machine %s", machine)
   499  			}
   500  			done <- err
   501  		}()
   502  	}
   503  	err = nil
   504  	for ; pendingMachineCount > 0; pendingMachineCount-- {
   505  		if updateErr := <-done; updateErr != nil && err == nil {
   506  			err = fmt.Errorf("machine update failed")
   507  		}
   508  	}
   509  	return err
   510  }
   511  
   512  // runMachineUpdate connects via ssh to the machine and runs the update script
   513  func runMachineUpdate(m *state.Machine, sshArg string) error {
   514  	progress("updating machine: %v\n", m)
   515  	addr := network.SelectPublicAddress(m.Addresses())
   516  	if addr == "" {
   517  		return fmt.Errorf("no appropriate public address found")
   518  	}
   519  	return runViaSsh(addr, sshArg)
   520  }
   521  
   522  func runViaSsh(addr string, script string) error {
   523  	// This is taken from cmd/juju/ssh.go there is no other clear way to set user
   524  	userAddr := "ubuntu@" + addr
   525  	cmd := ssh.Command(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, nil)
   526  	var stderrBuf bytes.Buffer
   527  	var stdoutBuf bytes.Buffer
   528  	cmd.Stderr = &stderrBuf
   529  	cmd.Stdout = &stdoutBuf
   530  	err := cmd.Run()
   531  	if err != nil {
   532  		return fmt.Errorf("ssh command failed: %v (%q)", err, stderrBuf.String())
   533  	}
   534  	progress("ssh command succedded: %q", stdoutBuf.String())
   535  	return nil
   536  }
   537  
   538  func sendViaScp(file, host, destFile string) error {
   539  	err := ssh.Copy([]string{file, "ubuntu@" + host + ":" + destFile}, nil)
   540  	if err != nil {
   541  		return fmt.Errorf("scp command failed: %v", err)
   542  	}
   543  	return nil
   544  }
   545  
   546  func mustParseTemplate(templ string) *template.Template {
   547  	t := template.New("").Funcs(template.FuncMap{
   548  		"shquote": utils.ShQuote,
   549  	})
   550  	return template.Must(t.Parse(templ))
   551  }
   552  
   553  func execTemplate(tmpl *template.Template, data interface{}) string {
   554  	var buf bytes.Buffer
   555  	err := tmpl.Execute(&buf, data)
   556  	if err != nil {
   557  		panic(fmt.Errorf("template error: %v", err))
   558  	}
   559  	return buf.String()
   560  }