github.com/mhilton/juju-juju@v0.0.0-20150901100907-a94dd2c73455/state/backups/restore.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // +build !windows
     5  
     6  package backups
     7  
     8  import (
     9  	"bytes"
    10  	"fmt"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"strings"
    15  	"sync"
    16  	"text/template"
    17  	"time"
    18  
    19  	"github.com/juju/errors"
    20  	"github.com/juju/names"
    21  	"github.com/juju/utils"
    22  	"github.com/juju/utils/symlink"
    23  	"gopkg.in/mgo.v2"
    24  	"gopkg.in/mgo.v2/bson"
    25  
    26  	"github.com/juju/juju/agent"
    27  	"github.com/juju/juju/agent/tools"
    28  	"github.com/juju/juju/environs"
    29  	"github.com/juju/juju/instance"
    30  	"github.com/juju/juju/mongo"
    31  	"github.com/juju/juju/network"
    32  	"github.com/juju/juju/state"
    33  	"github.com/juju/juju/utils/ssh"
    34  	"github.com/juju/juju/worker/peergrouper"
    35  )
    36  
    37  // TODO(perrito666) create an authoritative source for all possible
    38  // uses of this const, not only here but all around juju
    39  const restoreUserHome = "/home/ubuntu/"
    40  
    41  // resetReplicaSet re-initiates replica-set using the new state server
    42  // values, this is required after a mongo restore.
    43  // In case of failure returns error.
    44  func resetReplicaSet(dialInfo *mgo.DialInfo, memberHostPort string) error {
    45  	params := peergrouper.InitiateMongoParams{dialInfo,
    46  		memberHostPort,
    47  		dialInfo.Username,
    48  		dialInfo.Password,
    49  	}
    50  	return peergrouper.InitiateMongoServer(params, true)
    51  }
    52  
    53  var filesystemRoot = getFilesystemRoot
    54  
    55  func getFilesystemRoot() string {
    56  	return string(os.PathSeparator)
    57  }
    58  
    59  // newDialInfo returns mgo.DialInfo with the given address using the minimal
    60  // possible setup.
    61  func newDialInfo(privateAddr string, conf agent.Config) (*mgo.DialInfo, error) {
    62  	dialOpts := mongo.DialOpts{Direct: true}
    63  	ssi, ok := conf.StateServingInfo()
    64  	if !ok {
    65  		return nil, errors.Errorf("cannot get state serving info to dial")
    66  	}
    67  	info := mongo.Info{
    68  		Addrs:  []string{fmt.Sprintf("%s:%d", privateAddr, ssi.StatePort)},
    69  		CACert: conf.CACert(),
    70  	}
    71  	dialInfo, err := mongo.DialInfo(info, dialOpts)
    72  	if err != nil {
    73  		return nil, errors.Annotate(err, "cannot produce a dial info")
    74  	}
    75  	dialInfo.Username = "admin"
    76  	dialInfo.Password = conf.OldPassword()
    77  	return dialInfo, nil
    78  }
    79  
    80  // updateMongoEntries will update the machine entries in the restored mongo to
    81  // reflect the real machine instanceid in case it changed (a newly bootstraped
    82  // server).
    83  func updateMongoEntries(newInstId instance.Id, newMachineId, oldMachineId string, dialInfo *mgo.DialInfo) error {
    84  	session, err := mgo.DialWithInfo(dialInfo)
    85  	if err != nil {
    86  		return errors.Annotate(err, "cannot connect to mongo to update")
    87  	}
    88  	defer session.Close()
    89  	// TODO(perrito666): Take the Machine id from an autoritative source
    90  	err = session.DB("juju").C("machines").Update(
    91  		bson.M{"machineid": oldMachineId},
    92  		bson.M{"$set": bson.M{"instanceid": string(newInstId),
    93  			"machineid": newMachineId}},
    94  	)
    95  	if err != nil {
    96  		return errors.Annotatef(err, "cannot update machine %s instance information", newMachineId)
    97  	}
    98  	return nil
    99  }
   100  
   101  // updateMachineAddresses will update the machine doc to the current addresses
   102  func updateMachineAddresses(machine *state.Machine, privateAddress, publicAddress string) error {
   103  	privateAddressAddress := network.Address{
   104  		Value: privateAddress,
   105  		Type:  network.DeriveAddressType(privateAddress),
   106  	}
   107  	publicAddressAddress := network.Address{
   108  		Value: publicAddress,
   109  		Type:  network.DeriveAddressType(publicAddress),
   110  	}
   111  	if err := machine.SetProviderAddresses(publicAddressAddress, privateAddressAddress); err != nil {
   112  		return errors.Trace(err)
   113  	}
   114  	return nil
   115  }
   116  
   117  // assign to variables for testing purposes.
   118  var mongoDefaultDialOpts = mongo.DefaultDialOpts
   119  var environsNewStatePolicy = environs.NewStatePolicy
   120  
   121  // newStateConnection tries to connect to the newly restored state server.
   122  func newStateConnection(environTag names.EnvironTag, info *mongo.MongoInfo) (*state.State, error) {
   123  	// We need to retry here to allow mongo to come up on the restored state server.
   124  	// The connection might succeed due to the mongo dial retries but there may still
   125  	// be a problem issuing database commands.
   126  	var (
   127  		st  *state.State
   128  		err error
   129  	)
   130  	const (
   131  		newStateConnDelay       = 15 * time.Second
   132  		newStateConnMinAttempts = 8
   133  	)
   134  	attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts}
   135  	for a := attempt.Start(); a.Next(); {
   136  		st, err = state.Open(environTag, info, mongoDefaultDialOpts(), environsNewStatePolicy())
   137  		if err == nil {
   138  			return st, nil
   139  		}
   140  		logger.Errorf("cannot open state, retrying: %v", err)
   141  	}
   142  	return st, errors.Annotate(err, "cannot open state")
   143  }
   144  
   145  // updateAllMachines finds all machines and resets the stored state address
   146  // in each of them. The address does not include the port.
   147  // It is too late to go back and errors in a couple of agents have
   148  // better chance of being fixed by the user, if we were to fail
   149  // we risk an inconsistent state server because of one unresponsive
   150  // agent, we should nevertheless return the err info to the user.
   151  func updateAllMachines(privateAddress string, machines []*state.Machine) error {
   152  	var machineUpdating sync.WaitGroup
   153  	for key := range machines {
   154  		// key is used to have machine be scope bound to the loop iteration.
   155  		machine := machines[key]
   156  		// A newly resumed state server requires no updating, and more
   157  		// than one state server is not yet supported by this code.
   158  		if machine.IsManager() || machine.Life() == state.Dead {
   159  			continue
   160  		}
   161  		machineUpdating.Add(1)
   162  		go func() {
   163  			defer machineUpdating.Done()
   164  			err := runMachineUpdate(machine.Addresses(), setAgentAddressScript(privateAddress))
   165  			logger.Errorf("failed updating machine: %v", err)
   166  		}()
   167  	}
   168  	machineUpdating.Wait()
   169  
   170  	// We should return errors encapsulated in a digest.
   171  	return nil
   172  }
   173  
   174  // agentAddressAndRelationsTemplate is the template used to replace the api server data
   175  // in the agents for the new ones if the machine has been rebootstraped it will also reset
   176  // the relations so hooks will re-fire.
   177  var agentAddressAndRelationsTemplate = template.Must(template.New("").Parse(`
   178  set -xu
   179  cd /var/lib/juju/agents
   180  for agent in *
   181  do
   182  	status  jujud-$agent| grep -q "^jujud-$agent start" > /dev/null
   183  	if [ $? -eq 0 ]; then
   184  		initctl stop jujud-$agent 
   185  	fi
   186  	sed -i.old -r "/^(stateaddresses|apiaddresses):/{
   187  		n
   188  		s/- .*(:[0-9]+)/- {{.Address}}\1/
   189  	}" $agent/agent.conf
   190  
   191  	# If we're processing a unit agent's directly
   192  	# and it has some relations, reset
   193  	# the stored version of all of them to
   194  	# ensure that any relation hooks will
   195  	# fire.
   196  	if [[ $agent = unit-* ]]
   197  	then
   198  		find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \;
   199  	fi
   200  	# Just in case is a stale unit
   201  	status  jujud-$agent| grep -q "^jujud-$agent stop" > /dev/null
   202  	if [ $? -eq 0 ]; then
   203  		initctl start jujud-$agent
   204  	fi
   205  done
   206  `))
   207  
   208  // setAgentAddressScript generates an ssh script argument to update state addresses.
   209  func setAgentAddressScript(stateAddr string) string {
   210  	var buf bytes.Buffer
   211  	err := agentAddressAndRelationsTemplate.Execute(&buf, struct {
   212  		Address string
   213  	}{stateAddr})
   214  	if err != nil {
   215  		panic(errors.Annotate(err, "template error"))
   216  	}
   217  	return buf.String()
   218  }
   219  
   220  // runMachineUpdate connects via ssh to the machine and runs the update script.
   221  func runMachineUpdate(allAddr []network.Address, sshArg string) error {
   222  	addr := network.SelectPublicAddress(allAddr)
   223  	if addr == "" {
   224  		return errors.Errorf("no appropriate public address found")
   225  	}
   226  	return runViaSSH(addr, sshArg)
   227  }
   228  
   229  // sshCommand hods ssh.Command type for testing purposes.
   230  var sshCommand = ssh.Command
   231  
   232  // runViaSSH runs script in the remote machine with address addr.
   233  func runViaSSH(addr string, script string) error {
   234  	// This is taken from cmd/juju/ssh.go there is no other clear way to set user
   235  	userAddr := "ubuntu@" + addr
   236  	sshOptions := ssh.Options{}
   237  	sshOptions.SetIdentities("/var/lib/juju/system-identity")
   238  	userCmd := sshCommand(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, &sshOptions)
   239  	var stderrBuf bytes.Buffer
   240  	userCmd.Stderr = &stderrBuf
   241  	if err := userCmd.Run(); err != nil {
   242  		return errors.Annotatef(err, "ssh command failed: %q", stderrBuf.String())
   243  	}
   244  	return nil
   245  }
   246  
   247  // updateBackupMachineTag updates the paths that are stored in the backup
   248  // to the current machine. This path is tied, among other factors, to the
   249  // machine tag.
   250  // Eventually this will change: when backups hold relative paths.
   251  func updateBackupMachineTag(oldTag, newTag names.Tag) error {
   252  	oldTagString := oldTag.String()
   253  	newTagString := newTag.String()
   254  
   255  	if oldTagString == newTagString {
   256  		return nil
   257  	}
   258  	oldTagPath := path.Join(agent.DefaultDataDir, oldTagString)
   259  	newTagPath := path.Join(agent.DefaultDataDir, newTagString)
   260  
   261  	oldToolsDir := tools.ToolsDir(agent.DefaultDataDir, oldTagString)
   262  	oldLink, err := filepath.EvalSymlinks(oldToolsDir)
   263  
   264  	os.Rename(oldTagPath, newTagPath)
   265  	newToolsDir := tools.ToolsDir(agent.DefaultDataDir, newTagString)
   266  	newToolsPath := strings.Replace(oldLink, oldTagPath, newTagPath, -1)
   267  	err = symlink.Replace(newToolsDir, newToolsPath)
   268  	return errors.Annotatef(err, "cannot set the new tools path")
   269  }