github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/state/backups/restore.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // +build !windows
     5  
     6  package backups
     7  
     8  import (
     9  	"bytes"
    10  	"net"
    11  	"os"
    12  	"strconv"
    13  	"sync"
    14  	"text/template"
    15  	"time"
    16  
    17  	"github.com/juju/errors"
    18  	"github.com/juju/utils"
    19  	"github.com/juju/utils/ssh"
    20  	"gopkg.in/juju/names.v2"
    21  	"gopkg.in/mgo.v2"
    22  	"gopkg.in/mgo.v2/bson"
    23  
    24  	"github.com/juju/juju/agent"
    25  	"github.com/juju/juju/environs"
    26  	"github.com/juju/juju/instance"
    27  	"github.com/juju/juju/mongo"
    28  	"github.com/juju/juju/network"
    29  	"github.com/juju/juju/state"
    30  	"github.com/juju/juju/state/stateenvirons"
    31  	"github.com/juju/juju/worker/peergrouper"
    32  )
    33  
    34  // resetReplicaSet re-initiates replica-set using the new controller
    35  // values, this is required after a mongo restore.
    36  // In case of failure returns error.
    37  func resetReplicaSet(dialInfo *mgo.DialInfo, memberHostPort string) error {
    38  	params := peergrouper.InitiateMongoParams{
    39  		DialInfo:       dialInfo,
    40  		MemberHostPort: memberHostPort,
    41  		User:           dialInfo.Username,
    42  		Password:       dialInfo.Password,
    43  	}
    44  	return peergrouper.InitiateMongoServer(params)
    45  }
    46  
    47  var filesystemRoot = getFilesystemRoot
    48  
    49  func getFilesystemRoot() string {
    50  	return string(os.PathSeparator)
    51  }
    52  
    53  // tagUserCredentials is a convenience function that extracts the
    54  // tag user and apipassword, required to access mongodb.
    55  func tagUserCredentials(conf agent.Config) (string, string, error) {
    56  	username := conf.Tag().String()
    57  	var password string
    58  	// TODO(perrito) we might need an accessor for the actual state password
    59  	// just in case it ever changes from the same as api password.
    60  	apiInfo, ok := conf.APIInfo()
    61  	if ok {
    62  		password = apiInfo.Password
    63  	} else {
    64  		// There seems to be no way to reach this inconsistence other than making a
    65  		// backup on a machine where these fields are corrupted and even so I find
    66  		// no reasonable way to reach this state, yet since APIInfo has it as a
    67  		// possibility I prefer to handle it, we cannot recover from this since
    68  		// it would mean that the agent.conf is corrupted.
    69  		return "", "", errors.New("cannot obtain password to access the controller")
    70  	}
    71  	return username, password, nil
    72  }
    73  
    74  // newDialInfo returns mgo.DialInfo with the given address using the minimal
    75  // possible setup.
    76  func newDialInfo(privateAddr string, conf agent.Config) (*mgo.DialInfo, error) {
    77  	dialOpts := mongo.DialOpts{Direct: true}
    78  	ssi, ok := conf.StateServingInfo()
    79  	if !ok {
    80  		return nil, errors.Errorf("cannot get state serving info to dial")
    81  	}
    82  	info := mongo.Info{
    83  		Addrs:  []string{net.JoinHostPort(privateAddr, strconv.Itoa(ssi.StatePort))},
    84  		CACert: conf.CACert(),
    85  	}
    86  	dialInfo, err := mongo.DialInfo(info, dialOpts)
    87  	if err != nil {
    88  		return nil, errors.Annotate(err, "cannot produce a dial info")
    89  	}
    90  	oldPassword := conf.OldPassword()
    91  	if oldPassword != "" {
    92  		dialInfo.Username = "admin"
    93  		dialInfo.Password = conf.OldPassword()
    94  	} else {
    95  		dialInfo.Username, dialInfo.Password, err = tagUserCredentials(conf)
    96  		if err != nil {
    97  			return nil, errors.Trace(err)
    98  		}
    99  	}
   100  	return dialInfo, nil
   101  }
   102  
   103  // updateMongoEntries will update the machine entries in the restored mongo to
   104  // reflect the real machine instanceid in case it changed (a newly bootstraped
   105  // server).
   106  func updateMongoEntries(newInstId instance.Id, newMachineId, oldMachineId string, dialInfo *mgo.DialInfo) error {
   107  	session, err := mgo.DialWithInfo(dialInfo)
   108  	if err != nil {
   109  		return errors.Annotate(err, "cannot connect to mongo to update")
   110  	}
   111  	defer session.Close()
   112  	// TODO(perrito666): Take the Machine id from an autoritative source
   113  	err = session.DB("juju").C("machines").Update(
   114  		bson.M{"machineid": oldMachineId},
   115  		bson.M{"$set": bson.M{"instanceid": string(newInstId)}},
   116  	)
   117  	if err != nil {
   118  		return errors.Annotatef(err, "cannot update machine %s instance information", newMachineId)
   119  	}
   120  	return nil
   121  }
   122  
   123  // updateMachineAddresses will update the machine doc to the current addresses
   124  func updateMachineAddresses(machine *state.Machine, privateAddress, publicAddress string) error {
   125  	privateAddressAddress := network.Address{
   126  		Value: privateAddress,
   127  		Type:  network.DeriveAddressType(privateAddress),
   128  	}
   129  	publicAddressAddress := network.Address{
   130  		Value: publicAddress,
   131  		Type:  network.DeriveAddressType(publicAddress),
   132  	}
   133  	if err := machine.SetProviderAddresses(publicAddressAddress, privateAddressAddress); err != nil {
   134  		return errors.Trace(err)
   135  	}
   136  	return nil
   137  }
   138  
   139  // assign to variables for testing purposes.
   140  var mongoDefaultDialOpts = mongo.DefaultDialOpts
   141  var environsGetNewPolicyFunc = stateenvirons.GetNewPolicyFunc
   142  
   143  // newStateConnection tries to connect to the newly restored controller.
   144  func newStateConnection(controllerTag names.ControllerTag, modelTag names.ModelTag, info *mongo.MongoInfo) (*state.State, error) {
   145  	// We need to retry here to allow mongo to come up on the restored controller.
   146  	// The connection might succeed due to the mongo dial retries but there may still
   147  	// be a problem issuing database commands.
   148  	var (
   149  		st  *state.State
   150  		err error
   151  	)
   152  	const (
   153  		newStateConnDelay       = 15 * time.Second
   154  		newStateConnMinAttempts = 8
   155  	)
   156  	// TODO(katco): 2016-08-09: lp:1611427
   157  	attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts}
   158  	getEnviron := stateenvirons.GetNewEnvironFunc(environs.New)
   159  	for a := attempt.Start(); a.Next(); {
   160  		st, err = state.Open(modelTag, controllerTag, info, mongoDefaultDialOpts(), environsGetNewPolicyFunc(getEnviron))
   161  		if err == nil {
   162  			return st, nil
   163  		}
   164  		logger.Errorf("cannot open state, retrying: %v", err)
   165  	}
   166  	return st, errors.Annotate(err, "cannot open state")
   167  }
   168  
   169  type machineModel struct {
   170  	machine *state.Machine
   171  	model   *state.Model
   172  }
   173  
   174  // updateAllMachines finds all machines and resets the stored state address
   175  // in each of them. The address does not include the port.
   176  // It is too late to go back and errors in a couple of agents have
   177  // better chance of being fixed by the user, if we were to fail
   178  // we risk an inconsistent controller because of one unresponsive
   179  // agent, we should nevertheless return the err info to the user.
   180  func updateAllMachines(privateAddress, publicAddress string, machines []machineModel) error {
   181  	var machineUpdating sync.WaitGroup
   182  	for _, item := range machines {
   183  		machine := item.machine
   184  		// A newly resumed controller requires no updating, and more
   185  		// than one controller is not yet supported by this code.
   186  		if machine.IsManager() || machine.Life() == state.Dead {
   187  			continue
   188  		}
   189  		machineUpdating.Add(1)
   190  		go func(machine *state.Machine, model *state.Model) {
   191  			defer machineUpdating.Done()
   192  			logger.Debugf("updating addresses for machine %s in model %s/%s", machine.Tag().Id(), model.Owner().Canonical(), model.Name())
   193  			// TODO: thumper 2016-09-20
   194  			// runMachineUpdate only handles linux machines, what about windows?
   195  			err := runMachineUpdate(machine, setAgentAddressScript(privateAddress, publicAddress))
   196  			if err != nil {
   197  				logger.Errorf("failed updating machine: %v", err)
   198  			}
   199  		}(machine, item.model)
   200  	}
   201  	machineUpdating.Wait()
   202  
   203  	// We should return errors encapsulated in a digest.
   204  	return nil
   205  }
   206  
   207  // agentAddressAndRelationsTemplate is the template used to replace the api server data
   208  // in the agents for the new ones if the machine has been rebootstraped it will also reset
   209  // the relations so hooks will re-fire.
   210  var agentAddressAndRelationsTemplate = template.Must(template.New("").Parse(`
   211  set -xu
   212  cd /var/lib/juju/agents
   213  for agent in *
   214  do
   215  	service jujud-$agent stop > /dev/null
   216  
   217  	# The below statement will work in cases where there
   218  	# is a private address for the api server only
   219  	# or where there are a private and a public, which are
   220  	# the two common cases.
   221  	sed -i.old -r "/^(stateaddresses|apiaddresses):/{
   222  		n
   223  		s/- .*(:[0-9]+)/- {{.Address}}\1/
   224  		n
   225  		s/- .*(:[0-9]+)/- {{.PubAddress}}\1/
   226  	}" $agent/agent.conf
   227  
   228  	# If we're processing a unit agent's directly
   229  	# and it has some relations, reset
   230  	# the stored version of all of them to
   231  	# ensure that any relation hooks will
   232  	# fire.
   233  	if [[ $agent = unit-* ]]
   234  	then
   235  		find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \;
   236  	fi
   237  	service jujud-$agent start > /dev/null
   238  done
   239  `))
   240  
   241  // setAgentAddressScript generates an ssh script argument to update state addresses.
   242  func setAgentAddressScript(stateAddr, statePubAddr string) string {
   243  	var buf bytes.Buffer
   244  	err := agentAddressAndRelationsTemplate.Execute(&buf, struct {
   245  		Address    string
   246  		PubAddress string
   247  	}{stateAddr, statePubAddr})
   248  	if err != nil {
   249  		panic(errors.Annotate(err, "template error"))
   250  	}
   251  	return buf.String()
   252  }
   253  
   254  // runMachineUpdate connects via ssh to the machine and runs the update script.
   255  func runMachineUpdate(machine *state.Machine, sshArg string) error {
   256  	addr, err := machine.PublicAddress()
   257  	if err != nil {
   258  		if network.IsNoAddressError(err) {
   259  			return errors.Annotatef(err, "no appropriate public address found")
   260  		}
   261  		return errors.Trace(err)
   262  	}
   263  	return runViaSSH(addr.Value, sshArg)
   264  }
   265  
   266  // sshCommand hods ssh.Command type for testing purposes.
   267  var sshCommand = ssh.Command
   268  
   269  // runViaSSH runs script in the remote machine with address addr.
   270  func runViaSSH(addr string, script string) error {
   271  	// This is taken from cmd/juju/ssh.go there is no other clear way to set user
   272  	userAddr := "ubuntu@" + addr
   273  	sshOptions := ssh.Options{}
   274  	sshOptions.SetIdentities("/var/lib/juju/system-identity")
   275  	userCmd := sshCommand(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, &sshOptions)
   276  	var stdoutBuf bytes.Buffer
   277  	var stderrBuf bytes.Buffer
   278  	userCmd.Stdout = &stdoutBuf
   279  	userCmd.Stderr = &stderrBuf
   280  	logger.Debugf("updating %s, script:\n%s", addr, script)
   281  	if err := userCmd.Run(); err != nil {
   282  		return errors.Annotatef(err, "ssh command failed: %q", stderrBuf.String())
   283  	}
   284  	logger.Debugf("result %s\nstdout: \n%s\nstderr: %s", addr, stdoutBuf.String(), stderrBuf.String())
   285  	return nil
   286  }