github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/backups/restore.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // +build !windows
     5  
     6  package backups
     7  
     8  import (
     9  	"bytes"
    10  	"net"
    11  	"os"
    12  	"strconv"
    13  	"sync"
    14  	"text/template"
    15  	"time"
    16  
    17  	"github.com/juju/clock"
    18  	"github.com/juju/errors"
    19  	"github.com/juju/utils"
    20  	"github.com/juju/utils/ssh"
    21  	"gopkg.in/juju/names.v2"
    22  	"gopkg.in/mgo.v2"
    23  	"gopkg.in/mgo.v2/bson"
    24  
    25  	"github.com/juju/juju/agent"
    26  	"github.com/juju/juju/core/instance"
    27  	"github.com/juju/juju/mongo"
    28  	"github.com/juju/juju/network"
    29  	"github.com/juju/juju/state"
    30  	"github.com/juju/juju/state/stateenvirons"
    31  	"github.com/juju/juju/worker/peergrouper"
    32  )
    33  
    34  // resetReplicaSet re-initiates replica-set using the new controller
    35  // values, this is required after a mongo restore.
    36  // In case of failure returns error.
    37  func resetReplicaSet(dialInfo *mgo.DialInfo, memberHostPort string) error {
    38  	params := peergrouper.InitiateMongoParams{
    39  		DialInfo:       dialInfo,
    40  		MemberHostPort: memberHostPort,
    41  		User:           dialInfo.Username,
    42  		Password:       dialInfo.Password,
    43  	}
    44  	return peergrouper.InitiateMongoServer(params)
    45  }
    46  
    47  var filesystemRoot = getFilesystemRoot
    48  
    49  func getFilesystemRoot() string {
    50  	return string(os.PathSeparator)
    51  }
    52  
    53  // tagUserCredentials is a convenience function that extracts the
    54  // tag user and apipassword, required to access mongodb.
    55  func tagUserCredentials(conf agent.Config) (string, string, error) {
    56  	username := conf.Tag().String()
    57  	var password string
    58  	// TODO(perrito) we might need an accessor for the actual state password
    59  	// just in case it ever changes from the same as api password.
    60  	apiInfo, ok := conf.APIInfo()
    61  	if ok {
    62  		password = apiInfo.Password
    63  	} else {
    64  		// There seems to be no way to reach this inconsistence other than making a
    65  		// backup on a machine where these fields are corrupted and even so I find
    66  		// no reasonable way to reach this state, yet since APIInfo has it as a
    67  		// possibility I prefer to handle it, we cannot recover from this since
    68  		// it would mean that the agent.conf is corrupted.
    69  		return "", "", errors.New("cannot obtain password to access the controller")
    70  	}
    71  	return username, password, nil
    72  }
    73  
    74  // newDialInfo returns mgo.DialInfo with the given address using the minimal
    75  // possible setup.
    76  func newDialInfo(privateAddr string, conf agent.Config) (*mgo.DialInfo, error) {
    77  	dialOpts := mongo.DialOpts{Direct: true}
    78  	ssi, ok := conf.StateServingInfo()
    79  	if !ok {
    80  		return nil, errors.Errorf("cannot get state serving info to dial")
    81  	}
    82  	info := mongo.Info{
    83  		Addrs:  []string{net.JoinHostPort(privateAddr, strconv.Itoa(ssi.StatePort))},
    84  		CACert: conf.CACert(),
    85  	}
    86  	dialInfo, err := mongo.DialInfo(info, dialOpts)
    87  	if err != nil {
    88  		return nil, errors.Annotate(err, "cannot produce a dial info")
    89  	}
    90  	oldPassword := conf.OldPassword()
    91  	if oldPassword != "" {
    92  		dialInfo.Username = "admin"
    93  		dialInfo.Password = conf.OldPassword()
    94  	} else {
    95  		dialInfo.Username, dialInfo.Password, err = tagUserCredentials(conf)
    96  		if err != nil {
    97  			return nil, errors.Trace(err)
    98  		}
    99  	}
   100  	return dialInfo, nil
   101  }
   102  
   103  // updateMongoEntries will update the machine entries in the restored mongo to
   104  // reflect the real machine instanceid in case it changed (a newly bootstraped
   105  // server).
   106  func updateMongoEntries(newInstId instance.Id, newMachineId, oldMachineId string, dialInfo *mgo.DialInfo) error {
   107  	session, err := mgo.DialWithInfo(dialInfo)
   108  	if err != nil {
   109  		return errors.Annotate(err, "cannot connect to mongo to update")
   110  	}
   111  	defer session.Close()
   112  	// TODO(perrito666): Take the Machine id from an autoritative source
   113  	err = session.DB("juju").C("machines").Update(
   114  		bson.M{"machineid": oldMachineId},
   115  		bson.M{"$set": bson.M{"instanceid": string(newInstId)}},
   116  	)
   117  	if err != nil {
   118  		return errors.Annotatef(err, "cannot update machine %s instance information", newMachineId)
   119  	}
   120  	return nil
   121  }
   122  
   123  // updateMachineAddresses will update the machine doc to the current addresses
   124  func updateMachineAddresses(machine *state.Machine, privateAddress, publicAddress string) error {
   125  	privateAddressAddress := network.Address{
   126  		Value: privateAddress,
   127  		Type:  network.DeriveAddressType(privateAddress),
   128  	}
   129  	publicAddressAddress := network.Address{
   130  		Value: publicAddress,
   131  		Type:  network.DeriveAddressType(publicAddress),
   132  	}
   133  	if err := machine.SetProviderAddresses(publicAddressAddress, privateAddressAddress); err != nil {
   134  		return errors.Trace(err)
   135  	}
   136  	return nil
   137  }
   138  
   139  // assign to variables for testing purposes.
   140  var mongoDefaultDialOpts = mongo.DefaultDialOpts
   141  var environsGetNewPolicyFunc = stateenvirons.GetNewPolicyFunc
   142  
   143  // connectToDB tries to connect to the newly restored controller.
   144  func connectToDB(controllerTag names.ControllerTag, modelTag names.ModelTag, info *mongo.MongoInfo) (*state.StatePool, error) {
   145  	// We need to retry here to allow mongo to come up on the restored controller.
   146  	// The connection might succeed due to the mongo dial retries but there may still
   147  	// be a problem issuing database commands.
   148  	var (
   149  		pool *state.StatePool
   150  		err  error
   151  	)
   152  	const (
   153  		newStateConnDelay       = 15 * time.Second
   154  		newStateConnMinAttempts = 8
   155  	)
   156  	// TODO(katco): 2016-08-09: lp:1611427
   157  	attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts}
   158  
   159  	session, err := mongo.DialWithInfo(*info, mongoDefaultDialOpts())
   160  	if err != nil {
   161  		return nil, errors.Trace(err)
   162  	}
   163  	defer session.Close()
   164  
   165  	for a := attempt.Start(); a.Next(); {
   166  		pool, err = state.OpenStatePool(state.OpenParams{
   167  			Clock:              clock.WallClock,
   168  			ControllerTag:      controllerTag,
   169  			ControllerModelTag: modelTag,
   170  			MongoSession:       session,
   171  			NewPolicy:          environsGetNewPolicyFunc(),
   172  		})
   173  		if err == nil {
   174  			return pool, nil
   175  		}
   176  		logger.Errorf("cannot open state, retrying: %v", err)
   177  	}
   178  	return nil, errors.Annotate(err, "cannot open state")
   179  }
   180  
   181  type machineModel struct {
   182  	machine *state.Machine
   183  	model   *state.Model
   184  }
   185  
   186  // updateAllMachines finds all machines and resets the stored state address
   187  // in each of them. The address does not include the port.
   188  // It is too late to go back and errors in a couple of agents have
   189  // better chance of being fixed by the user, if we were to fail
   190  // we risk an inconsistent controller because of one unresponsive
   191  // agent, we should nevertheless return the err info to the user.
   192  func updateAllMachines(privateAddress, publicAddress string, machines []machineModel) error {
   193  	var machineUpdating sync.WaitGroup
   194  	for _, item := range machines {
   195  		machine := item.machine
   196  		// A newly resumed controller requires no updating, and more
   197  		// than one controller is not yet supported by this code.
   198  		if machine.IsManager() || machine.Life() == state.Dead {
   199  			continue
   200  		}
   201  		machineUpdating.Add(1)
   202  		go func(machine *state.Machine, model *state.Model) {
   203  			defer machineUpdating.Done()
   204  			logger.Debugf("updating addresses for machine %s in model %s/%s", machine.Tag().Id(), model.Owner().Id(), model.Name())
   205  			// TODO: thumper 2016-09-20
   206  			// runMachineUpdate only handles linux machines, what about windows?
   207  			err := runMachineUpdate(machine, setAgentAddressScript(privateAddress, publicAddress))
   208  			if err != nil {
   209  				logger.Errorf("failed updating machine: %v", err)
   210  			}
   211  		}(machine, item.model)
   212  	}
   213  	machineUpdating.Wait()
   214  
   215  	// We should return errors encapsulated in a digest.
   216  	return nil
   217  }
   218  
   219  // agentAddressAndRelationsTemplate is the template used to replace the api server data
   220  // in the agents for the new ones if the machine has been rebootstraped it will also reset
   221  // the relations so hooks will re-fire.
   222  var agentAddressAndRelationsTemplate = template.Must(template.New("").Parse(`
   223  set -xu
   224  cd /var/lib/juju/agents
   225  for agent in *
   226  do
   227  	service jujud-$agent stop > /dev/null
   228  
   229  	# The below statement will work in cases where there
   230  	# is a private address for the api server only
   231  	# or where there are a private and a public, which are
   232  	# the two common cases.
   233  	sed -i.old -r "/^(stateaddresses|apiaddresses):/{
   234  		n
   235  		s/- .*(:[0-9]+)/- {{.Address}}\1/
   236  		n
   237  		s/- .*(:[0-9]+)/- {{.PubAddress}}\1/
   238  	}" $agent/agent.conf
   239  
   240  	# If we're processing a unit agent's directly
   241  	# and it has some relations, reset
   242  	# the stored version of all of them to
   243  	# ensure that any relation hooks will
   244  	# fire.
   245  	if [[ $agent = unit-* ]]
   246  	then
   247  		find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \;
   248  	fi
   249  	service jujud-$agent start > /dev/null
   250  done
   251  `))
   252  
   253  // setAgentAddressScript generates an ssh script argument to update state addresses.
   254  func setAgentAddressScript(stateAddr, statePubAddr string) string {
   255  	var buf bytes.Buffer
   256  	err := agentAddressAndRelationsTemplate.Execute(&buf, struct {
   257  		Address    string
   258  		PubAddress string
   259  	}{stateAddr, statePubAddr})
   260  	if err != nil {
   261  		panic(errors.Annotate(err, "template error"))
   262  	}
   263  	return buf.String()
   264  }
   265  
   266  // runMachineUpdate connects via ssh to the machine and runs the update script.
   267  func runMachineUpdate(machine *state.Machine, sshArg string) error {
   268  	addr, err := machine.PublicAddress()
   269  	if err != nil {
   270  		if network.IsNoAddressError(err) {
   271  			return errors.Annotatef(err, "no appropriate public address found")
   272  		}
   273  		return errors.Trace(err)
   274  	}
   275  	return runViaSSH(addr.Value, sshArg)
   276  }
   277  
   278  // sshCommand hods ssh.Command type for testing purposes.
   279  var sshCommand = ssh.Command
   280  
   281  // runViaSSH runs script in the remote machine with address addr.
   282  func runViaSSH(addr string, script string) error {
   283  	sshOptions := ssh.Options{}
   284  	sshOptions.SetIdentities("/var/lib/juju/system-identity")
   285  	// Disable host key checking. We're not pushing across anything
   286  	// sensitive, and there's no guarantee that the machine would
   287  	// have published up-to-date host key information.
   288  	sshOptions.SetStrictHostKeyChecking(ssh.StrictHostChecksNo)
   289  	sshOptions.SetKnownHostsFile(os.DevNull)
   290  
   291  	userAddr := "ubuntu@" + addr
   292  	userCmd := sshCommand(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, &sshOptions)
   293  	var stdoutBuf bytes.Buffer
   294  	var stderrBuf bytes.Buffer
   295  	userCmd.Stdout = &stdoutBuf
   296  	userCmd.Stderr = &stderrBuf
   297  	logger.Debugf("updating %s, script:\n%s", addr, script)
   298  	if err := userCmd.Run(); err != nil {
   299  		return errors.Annotatef(err, "ssh command failed: %q", stderrBuf.String())
   300  	}
   301  	logger.Debugf("result %s\nstdout: \n%s\nstderr: %s", addr, stdoutBuf.String(), stderrBuf.String())
   302  	return nil
   303  }