github.com/mattyw/juju@v0.0.0-20140610034352-732aecd63861/cmd/plugins/juju-restore/restore.go (about)

     1  // Copyright 2013 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  package main
     5  
     6  import (
     7  	"archive/tar"
     8  	"bytes"
     9  	"compress/gzip"
    10  	"fmt"
    11  	"io"
    12  	"io/ioutil"
    13  	"os"
    14  	"path"
    15  	"strconv"
    16  	"text/template"
    17  
    18  	"github.com/juju/loggo"
    19  	"github.com/juju/utils"
    20  	"launchpad.net/gnuflag"
    21  	"launchpad.net/goyaml"
    22  
    23  	"github.com/juju/juju/cmd"
    24  	"github.com/juju/juju/cmd/envcmd"
    25  	"github.com/juju/juju/constraints"
    26  	"github.com/juju/juju/environs"
    27  	"github.com/juju/juju/environs/bootstrap"
    28  	"github.com/juju/juju/environs/config"
    29  	"github.com/juju/juju/environs/configstore"
    30  	"github.com/juju/juju/instance"
    31  	"github.com/juju/juju/juju"
    32  	_ "github.com/juju/juju/provider/all"
    33  	"github.com/juju/juju/state"
    34  	"github.com/juju/juju/state/api"
    35  	"github.com/juju/juju/utils/ssh"
    36  )
    37  
    38  func main() {
    39  	Main(os.Args)
    40  }
    41  
    42  func Main(args []string) {
    43  	ctx, err := cmd.DefaultContext()
    44  	if err != nil {
    45  		fmt.Fprintf(os.Stderr, "error: %v\n", err)
    46  		os.Exit(2)
    47  	}
    48  	if err := juju.InitJujuHome(); err != nil {
    49  		fmt.Fprintf(os.Stderr, "error: %s\n", err)
    50  		os.Exit(2)
    51  	}
    52  	os.Exit(cmd.Main(envcmd.Wrap(&restoreCommand{}), ctx, args[1:]))
    53  }
    54  
    55  var logger = loggo.GetLogger("juju.plugins.restore")
    56  
    57  const restoreDoc = `
    58  Restore restores a backup created with juju backup
    59  by creating a new juju bootstrap instance and arranging
    60  it so that the existing instances in the environment
    61  talk to it.
    62  
    63  It verifies that the existing bootstrap instance is
    64  not running. The given constraints will be used
    65  to choose the new instance.
    66  `
    67  
    68  type restoreCommand struct {
    69  	envcmd.EnvCommandBase
    70  	Log             cmd.Log
    71  	Constraints     constraints.Value
    72  	backupFile      string
    73  	showDescription bool
    74  }
    75  
    76  func (c *restoreCommand) Info() *cmd.Info {
    77  	return &cmd.Info{
    78  		Name:    "juju-restore",
    79  		Purpose: "Restore a backup made with juju backup",
    80  		Args:    "<backupfile.tar.gz>",
    81  		Doc:     restoreDoc,
    82  	}
    83  }
    84  
    85  func (c *restoreCommand) SetFlags(f *gnuflag.FlagSet) {
    86  	f.Var(constraints.ConstraintsValue{Target: &c.Constraints}, "constraints", "set environment constraints")
    87  	f.BoolVar(&c.showDescription, "description", false, "show the purpose of this plugin")
    88  	c.Log.AddFlags(f)
    89  }
    90  
    91  func (c *restoreCommand) Init(args []string) error {
    92  	if c.showDescription {
    93  		return cmd.CheckEmpty(args)
    94  	}
    95  	if len(args) == 0 {
    96  		return fmt.Errorf("no backup file specified")
    97  	}
    98  	c.backupFile = args[0]
    99  	return cmd.CheckEmpty(args[1:])
   100  }
   101  
   102  var updateBootstrapMachineTemplate = mustParseTemplate(`
   103  	set -exu
   104  
   105  	export LC_ALL=C
   106  	tar xzf juju-backup.tgz
   107  	test -d juju-backup
   108  	apt-get --option=Dpkg::Options::=--force-confold --option=Dpkg::options::=--force-unsafe-io --assume-yes --quiet install mongodb-clients
   109  	
   110  	initctl stop jujud-machine-0
   111  
   112  	initctl stop juju-db
   113  	rm -r /var/lib/juju
   114  	rm -r /var/log/juju
   115  
   116  	tar -C / -xvp -f juju-backup/root.tar
   117  	mkdir -p /var/lib/juju/db
   118  
   119  	# Prefer jujud-mongodb binaries if available 
   120  	export MONGORESTORE=mongorestore
   121  	if [ -f /usr/lib/juju/bin/mongorestore ]; then
   122  		export MONGORESTORE=/usr/lib/juju/bin/mongorestore;
   123  	fi	
   124  	$MONGORESTORE --drop --dbpath /var/lib/juju/db juju-backup/dump
   125  
   126  	initctl start juju-db
   127  
   128  	mongoAdminEval() {
   129  		mongo --ssl -u admin -p {{.AgentConfig.Credentials.OldPassword | shquote}} localhost:{{.AgentConfig.StatePort}}/admin --eval "$1"
   130  	}
   131  
   132  
   133  	mongoEval() {
   134  		mongo --ssl -u {{.AgentConfig.Credentials.Tag}} -p {{.AgentConfig.Credentials.Password | shquote}} localhost:{{.AgentConfig.StatePort}}/juju --eval "$1"
   135  	}
   136  
   137  	# wait for mongo to come up after starting the juju-db upstart service.
   138  	for i in $(seq 1 100)
   139  	do
   140  		mongoEval ' ' && break
   141  		sleep 5
   142  	done
   143  
   144  	# Create a new replicaSet conf and re initiate it
   145  	mongoAdminEval '
   146  		conf = { "_id" : "juju", "version" : 1, "members" : [ { "_id" : 1, "host" : "{{ .PrivateAddress | printf "%s:"}}{{.AgentConfig.StatePort}}" , "tags" : { "juju-machine-id" : "0" } }]}
   147  		rs.initiate(conf)
   148  	'
   149  
   150  	sleep 60
   151  
   152  	# Remove all state machines but 0, to restore HA
   153  	mongoEval '
   154  		db = db.getSiblingDB("juju")
   155  		db.machines.update({_id: "0"}, {$set: {instanceid: {{.NewInstanceId | printf "%q" }} } })
   156  		db.instanceData.update({_id: "0"}, {$set: {instanceid: {{.NewInstanceId | printf "%q" }} } })
   157  		db.machines.remove({_id: {$ne:"0"}, hasvote: true})
   158  		db.stateServers.update({"_id":"e"}, {$set:{"machineids" : [0]}})
   159  		db.stateServers.update({"_id":"e"}, {$set:{"votingmachineids" : [0]}})
   160  	'
   161  	
   162  
   163  
   164  	# Give time to replset to initiate
   165  	for i in $(seq 1 20)
   166  	do
   167  		mongoEval ' ' && break
   168  		sleep 5
   169  	done
   170  
   171  	initctl stop juju-db
   172  
   173  	# Update the agent.conf for machine-0 with the new addresses
   174  	cd /var/lib/juju/agents
   175  
   176  	# Remove extra state machines from conf
   177  	REMOVECOUNT=$(grep -Ec "^-.*{{.AgentConfig.ApiPort}}$" /var/lib/juju/agents/machine-0/agent.conf )
   178  	awk '/\-.*{{.AgentConfig.ApiPort}}$/{i++}i<1' machine-0/agent.conf > machine-0/agent.conf.new
   179  	awk -v removecount=$REMOVECOUNT '/\-.*{{.AgentConfig.ApiPort}}$/{i++}i==removecount' machine-0/agent.conf >> machine-0/agent.conf.new
   180  	mv machine-0/agent.conf.new  machine-0/agent.conf
   181  
   182  	sed -i.old -r -e "/^(stateaddresses):/{
   183  		n
   184  		s/- .*(:[0-9]+)/- {{.Address}}\1/
   185  	}" -e "/^(apiaddresses):/{
   186  		n
   187  		s/- .*(:[0-9]+)/- {{.PrivateAddress}}\1/
   188  	}"  machine-0/agent.conf
   189  	
   190  
   191  	initctl start juju-db
   192  	initctl start jujud-machine-0
   193  `)
   194  
   195  func updateBootstrapMachineScript(instanceId instance.Id, agentConf agentConfig, addr, paddr string) string {
   196  	return execTemplate(updateBootstrapMachineTemplate, struct {
   197  		NewInstanceId  instance.Id
   198  		AgentConfig    agentConfig
   199  		Address        string
   200  		PrivateAddress string
   201  	}{instanceId, agentConf, addr, paddr})
   202  }
   203  
   204  func (c *restoreCommand) Run(ctx *cmd.Context) error {
   205  	if c.showDescription {
   206  		fmt.Fprintf(ctx.Stdout, "%s\n", c.Info().Purpose)
   207  		return nil
   208  	}
   209  	if err := c.Log.Start(ctx); err != nil {
   210  		return err
   211  	}
   212  	agentConf, err := extractConfig(c.backupFile)
   213  	if err != nil {
   214  		return fmt.Errorf("cannot extract configuration from backup file: %v", err)
   215  	}
   216  	progress("extracted credentials from backup file")
   217  	store, err := configstore.Default()
   218  	if err != nil {
   219  		return err
   220  	}
   221  	cfg, _, err := environs.ConfigForName(c.EnvName, store)
   222  	if err != nil {
   223  		return err
   224  	}
   225  	env, err := rebootstrap(cfg, ctx, c.Constraints)
   226  	if err != nil {
   227  		return fmt.Errorf("cannot re-bootstrap environment: %v", err)
   228  	}
   229  	progress("connecting to newly bootstrapped instance")
   230  	conn, err := juju.NewAPIConn(env, api.DefaultDialOpts())
   231  	if err != nil {
   232  		return fmt.Errorf("cannot connect to bootstrap instance: %v", err)
   233  	}
   234  	progress("restoring bootstrap machine")
   235  	newInstId, machine0Addr, err := restoreBootstrapMachine(conn, c.backupFile, agentConf)
   236  	if err != nil {
   237  		return fmt.Errorf("cannot restore bootstrap machine: %v", err)
   238  	}
   239  	progress("restored bootstrap machine")
   240  	// Update the environ state to point to the new instance.
   241  	if err := bootstrap.SaveState(env.Storage(), &bootstrap.BootstrapState{
   242  		StateInstances: []instance.Id{newInstId},
   243  	}); err != nil {
   244  		return fmt.Errorf("cannot update environ bootstrap state storage: %v", err)
   245  	}
   246  	// Construct our own state info rather than using juju.NewConn so
   247  	// that we can avoid storage eventual-consistency issues
   248  	// (and it's faster too).
   249  	caCert, ok := cfg.CACert()
   250  	if !ok {
   251  		return fmt.Errorf("configuration has no CA certificate")
   252  	}
   253  	progress("opening state")
   254  	st, err := state.Open(&state.Info{
   255  		Addrs:    []string{fmt.Sprintf("%s:%d", machine0Addr, cfg.StatePort())},
   256  		CACert:   caCert,
   257  		Tag:      agentConf.Credentials.Tag,
   258  		Password: agentConf.Credentials.Password,
   259  	}, state.DefaultDialOpts(), environs.NewStatePolicy())
   260  	if err != nil {
   261  		return fmt.Errorf("cannot open state: %v", err)
   262  	}
   263  	progress("updating all machines")
   264  	if err := updateAllMachines(st, machine0Addr); err != nil {
   265  		return fmt.Errorf("cannot update machines: %v", err)
   266  	}
   267  	return nil
   268  }
   269  
   270  func progress(f string, a ...interface{}) {
   271  	fmt.Printf("%s\n", fmt.Sprintf(f, a...))
   272  }
   273  
   274  func rebootstrap(cfg *config.Config, ctx *cmd.Context, cons constraints.Value) (environs.Environ, error) {
   275  	progress("re-bootstrapping environment")
   276  	// Turn on safe mode so that the newly bootstrapped instance
   277  	// will not destroy all the instances it does not know about.
   278  	cfg, err := cfg.Apply(map[string]interface{}{
   279  		"provisioner-safe-mode": true,
   280  	})
   281  	if err != nil {
   282  		return nil, fmt.Errorf("cannot enable provisioner-safe-mode: %v", err)
   283  	}
   284  	env, err := environs.New(cfg)
   285  	if err != nil {
   286  		return nil, err
   287  	}
   288  	state, err := bootstrap.LoadState(env.Storage())
   289  	if err != nil {
   290  		return nil, fmt.Errorf("cannot retrieve environment storage; perhaps the environment was not bootstrapped: %v", err)
   291  	}
   292  	if len(state.StateInstances) == 0 {
   293  		return nil, fmt.Errorf("no instances found on bootstrap state; perhaps the environment was not bootstrapped")
   294  	}
   295  	if len(state.StateInstances) > 1 {
   296  		return nil, fmt.Errorf("restore does not support HA juju configurations yet")
   297  	}
   298  	inst, err := env.Instances(state.StateInstances)
   299  	if err == nil {
   300  		return nil, fmt.Errorf("old bootstrap instance %q still seems to exist; will not replace", inst)
   301  	}
   302  	if err != environs.ErrNoInstances {
   303  		return nil, fmt.Errorf("cannot detect whether old instance is still running: %v", err)
   304  	}
   305  	// Remove the storage so that we can bootstrap without the provider complaining.
   306  	if err := env.Storage().Remove(bootstrap.StateFile); err != nil {
   307  		return nil, fmt.Errorf("cannot remove %q from storage: %v", bootstrap.StateFile, err)
   308  	}
   309  
   310  	// TODO If we fail beyond here, then we won't have a state file and
   311  	// we won't be able to re-run this script because it fails without it.
   312  	// We could either try to recreate the file if we fail (which is itself
   313  	// error-prone) or we could provide a --no-check flag to make
   314  	// it go ahead anyway without the check.
   315  
   316  	args := environs.BootstrapParams{Constraints: cons}
   317  	if err := bootstrap.Bootstrap(ctx, env, args); err != nil {
   318  		return nil, fmt.Errorf("cannot bootstrap new instance: %v", err)
   319  	}
   320  	return env, nil
   321  }
   322  
   323  func restoreBootstrapMachine(conn *juju.APIConn, backupFile string, agentConf agentConfig) (newInstId instance.Id, addr string, err error) {
   324  	client := conn.State.Client()
   325  	addr, err = client.PublicAddress("0")
   326  	if err != nil {
   327  		return "", "", fmt.Errorf("cannot get public address of bootstrap machine: %v", err)
   328  	}
   329  	paddr, err := client.PrivateAddress("0")
   330  	if err != nil {
   331  		return "", "", fmt.Errorf("cannot get private address of bootstrap machine: %v", err)
   332  	}
   333  	status, err := client.Status(nil)
   334  	if err != nil {
   335  		return "", "", fmt.Errorf("cannot get environment status: %v", err)
   336  	}
   337  	info, ok := status.Machines["0"]
   338  	if !ok {
   339  		return "", "", fmt.Errorf("cannot find bootstrap machine in status")
   340  	}
   341  	newInstId = instance.Id(info.InstanceId)
   342  
   343  	progress("copying backup file to bootstrap host")
   344  	if err := sendViaScp(backupFile, addr, "~/juju-backup.tgz"); err != nil {
   345  		return "", "", fmt.Errorf("cannot copy backup file to bootstrap instance: %v", err)
   346  	}
   347  	progress("updating bootstrap machine")
   348  	if err := runViaSsh(addr, updateBootstrapMachineScript(newInstId, agentConf, addr, paddr)); err != nil {
   349  		return "", "", fmt.Errorf("update script failed: %v", err)
   350  	}
   351  	return newInstId, addr, nil
   352  }
   353  
   354  type credentials struct {
   355  	Tag         string
   356  	Password    string
   357  	OldPassword string
   358  }
   359  
   360  type agentConfig struct {
   361  	Credentials credentials
   362  	ApiPort     string
   363  	StatePort   string
   364  }
   365  
   366  func extractConfig(backupFile string) (agentConfig, error) {
   367  	f, err := os.Open(backupFile)
   368  	if err != nil {
   369  		return agentConfig{}, err
   370  	}
   371  	defer f.Close()
   372  	gzr, err := gzip.NewReader(f)
   373  	if err != nil {
   374  		return agentConfig{}, fmt.Errorf("cannot unzip %q: %v", backupFile, err)
   375  	}
   376  	defer gzr.Close()
   377  	outerTar, err := findFileInTar(gzr, "juju-backup/root.tar")
   378  	if err != nil {
   379  		return agentConfig{}, err
   380  	}
   381  	agentConf, err := findFileInTar(outerTar, "var/lib/juju/agents/machine-0/agent.conf")
   382  	if err != nil {
   383  		return agentConfig{}, err
   384  	}
   385  	data, err := ioutil.ReadAll(agentConf)
   386  	if err != nil {
   387  		return agentConfig{}, fmt.Errorf("failed to read agent config file: %v", err)
   388  	}
   389  	var conf interface{}
   390  	if err := goyaml.Unmarshal(data, &conf); err != nil {
   391  		return agentConfig{}, fmt.Errorf("cannot unmarshal agent config file: %v", err)
   392  	}
   393  	m, ok := conf.(map[interface{}]interface{})
   394  	if !ok {
   395  		return agentConfig{}, fmt.Errorf("config file unmarshalled to %T not %T", conf, m)
   396  	}
   397  	password, ok := m["statepassword"].(string)
   398  	if !ok || password == "" {
   399  		return agentConfig{}, fmt.Errorf("agent password not found in configuration")
   400  	}
   401  	oldPassword, ok := m["oldpassword"].(string)
   402  	if !ok || oldPassword == "" {
   403  		return agentConfig{}, fmt.Errorf("agent old password not found in configuration")
   404  	}
   405  	statePortNum, ok := m["stateport"].(int)
   406  	if !ok {
   407  		return agentConfig{}, fmt.Errorf("state port not found in configuration")
   408  	}
   409  
   410  	statePort := strconv.Itoa(statePortNum)
   411  	apiPortNum, ok := m["apiport"].(int)
   412  	if !ok {
   413  		return agentConfig{}, fmt.Errorf("api port not found in configuration")
   414  	}
   415  	apiPort := strconv.Itoa(apiPortNum)
   416  
   417  	return agentConfig{
   418  		Credentials: credentials{
   419  			Tag:         "machine-0",
   420  			Password:    password,
   421  			OldPassword: oldPassword,
   422  		},
   423  		StatePort: statePort,
   424  		ApiPort:   apiPort,
   425  	}, nil
   426  }
   427  
   428  func findFileInTar(r io.Reader, name string) (io.Reader, error) {
   429  	tarr := tar.NewReader(r)
   430  	for {
   431  		hdr, err := tarr.Next()
   432  		if err != nil {
   433  			return nil, fmt.Errorf("%q not found: %v", name, err)
   434  		}
   435  		if path.Clean(hdr.Name) == name {
   436  			return tarr, nil
   437  		}
   438  	}
   439  }
   440  
   441  var agentAddressTemplate = mustParseTemplate(`
   442  set -exu
   443  cd /var/lib/juju/agents
   444  for agent in *
   445  do
   446  	initctl stop jujud-$agent
   447  	sed -i.old -r "/^(stateaddresses|apiaddresses):/{
   448  		n
   449  		s/- .*(:[0-9]+)/- {{.Address}}\1/
   450  	}" $agent/agent.conf
   451  
   452  	# If we're processing a unit agent's directly
   453  	# and it has some relations, reset
   454  	# the stored version of all of them to
   455  	# ensure that any relation hooks will
   456  	# fire.
   457  	if [[ $agent = unit-* ]]
   458  	then
   459  		find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \;
   460  	fi
   461  	initctl start jujud-$agent
   462  done
   463  `)
   464  
   465  // setAgentAddressScript generates an ssh script argument to update state addresses
   466  func setAgentAddressScript(stateAddr string) string {
   467  	return execTemplate(agentAddressTemplate, struct {
   468  		Address string
   469  	}{stateAddr})
   470  }
   471  
   472  // updateAllMachines finds all machines and resets the stored state address
   473  // in each of them. The address does not include the port.
   474  func updateAllMachines(st *state.State, stateAddr string) error {
   475  	machines, err := st.AllMachines()
   476  	if err != nil {
   477  		return err
   478  	}
   479  	pendingMachineCount := 0
   480  	done := make(chan error)
   481  	for _, machine := range machines {
   482  		// A newly resumed state server requires no updating, and more
   483  		// than one state server is not yet support by this plugin.
   484  		if machine.IsManager() || machine.Life() == state.Dead {
   485  			continue
   486  		}
   487  		pendingMachineCount++
   488  		machine := machine
   489  		go func() {
   490  			err := runMachineUpdate(machine, setAgentAddressScript(stateAddr))
   491  			if err != nil {
   492  				logger.Errorf("failed to update machine %s: %v", machine, err)
   493  			} else {
   494  				progress("updated machine %s", machine)
   495  			}
   496  			done <- err
   497  		}()
   498  	}
   499  	err = nil
   500  	for ; pendingMachineCount > 0; pendingMachineCount-- {
   501  		if updateErr := <-done; updateErr != nil && err == nil {
   502  			err = fmt.Errorf("machine update failed")
   503  		}
   504  	}
   505  	return err
   506  }
   507  
   508  // runMachineUpdate connects via ssh to the machine and runs the update script
   509  func runMachineUpdate(m *state.Machine, sshArg string) error {
   510  	progress("updating machine: %v\n", m)
   511  	addr := instance.SelectPublicAddress(m.Addresses())
   512  	if addr == "" {
   513  		return fmt.Errorf("no appropriate public address found")
   514  	}
   515  	return runViaSsh(addr, sshArg)
   516  }
   517  
   518  func runViaSsh(addr string, script string) error {
   519  	// This is taken from cmd/juju/ssh.go there is no other clear way to set user
   520  	userAddr := "ubuntu@" + addr
   521  	cmd := ssh.Command(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, nil)
   522  	var stderrBuf bytes.Buffer
   523  	var stdoutBuf bytes.Buffer
   524  	cmd.Stderr = &stderrBuf
   525  	cmd.Stdout = &stdoutBuf
   526  	err := cmd.Run()
   527  	if err != nil {
   528  		return fmt.Errorf("ssh command failed: %v (%q)", err, stderrBuf.String())
   529  	}
   530  	progress("ssh command succedded: %q", stdoutBuf.String())
   531  	return nil
   532  }
   533  
   534  func sendViaScp(file, host, destFile string) error {
   535  	err := ssh.Copy([]string{file, "ubuntu@" + host + ":" + destFile}, nil)
   536  	if err != nil {
   537  		return fmt.Errorf("scp command failed: %v", err)
   538  	}
   539  	return nil
   540  }
   541  
   542  func mustParseTemplate(templ string) *template.Template {
   543  	t := template.New("").Funcs(template.FuncMap{
   544  		"shquote": utils.ShQuote,
   545  	})
   546  	return template.Must(t.Parse(templ))
   547  }
   548  
   549  func execTemplate(tmpl *template.Template, data interface{}) string {
   550  	var buf bytes.Buffer
   551  	err := tmpl.Execute(&buf, data)
   552  	if err != nil {
   553  		panic(fmt.Errorf("template error: %v", err))
   554  	}
   555  	return buf.String()
   556  }