github.com/makyo/juju@v0.0.0-20160425123129-2608902037e9/state/backups/restore.go (about)

     1  // Copyright 2014 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // +build !windows
     5  
     6  package backups
     7  
     8  import (
     9  	"bytes"
    10  	"net"
    11  	"os"
    12  	"strconv"
    13  	"sync"
    14  	"text/template"
    15  	"time"
    16  
    17  	"github.com/juju/errors"
    18  	"github.com/juju/names"
    19  	"github.com/juju/utils"
    20  	"github.com/juju/utils/ssh"
    21  	"gopkg.in/mgo.v2"
    22  	"gopkg.in/mgo.v2/bson"
    23  
    24  	"github.com/juju/juju/agent"
    25  	"github.com/juju/juju/environs"
    26  	"github.com/juju/juju/instance"
    27  	"github.com/juju/juju/mongo"
    28  	"github.com/juju/juju/network"
    29  	"github.com/juju/juju/state"
    30  	"github.com/juju/juju/worker/peergrouper"
    31  )
    32  
    33  // resetReplicaSet re-initiates replica-set using the new controller
    34  // values, this is required after a mongo restore.
    35  // In case of failure returns error.
    36  func resetReplicaSet(dialInfo *mgo.DialInfo, memberHostPort string) error {
    37  	params := peergrouper.InitiateMongoParams{
    38  		DialInfo:       dialInfo,
    39  		MemberHostPort: memberHostPort,
    40  		User:           dialInfo.Username,
    41  		Password:       dialInfo.Password,
    42  	}
    43  	return peergrouper.InitiateMongoServer(params)
    44  }
    45  
    46  var filesystemRoot = getFilesystemRoot
    47  
    48  func getFilesystemRoot() string {
    49  	return string(os.PathSeparator)
    50  }
    51  
    52  // newDialInfo returns mgo.DialInfo with the given address using the minimal
    53  // possible setup.
    54  func newDialInfo(privateAddr string, conf agent.Config) (*mgo.DialInfo, error) {
    55  	dialOpts := mongo.DialOpts{Direct: true}
    56  	ssi, ok := conf.StateServingInfo()
    57  	if !ok {
    58  		return nil, errors.Errorf("cannot get state serving info to dial")
    59  	}
    60  	info := mongo.Info{
    61  		Addrs:  []string{net.JoinHostPort(privateAddr, strconv.Itoa(ssi.StatePort))},
    62  		CACert: conf.CACert(),
    63  	}
    64  	dialInfo, err := mongo.DialInfo(info, dialOpts)
    65  	if err != nil {
    66  		return nil, errors.Annotate(err, "cannot produce a dial info")
    67  	}
    68  	oldPassword := conf.OldPassword()
    69  	if oldPassword != "" {
    70  		dialInfo.Username = "admin"
    71  		dialInfo.Password = conf.OldPassword()
    72  	} else {
    73  		dialInfo.Username = conf.Tag().String()
    74  		// TODO(perrito) we might need an accessor for the actual state password
    75  		// just in case it ever changes from the same as api password.
    76  		apiInfo, ok := conf.APIInfo()
    77  		if ok {
    78  			dialInfo.Password = apiInfo.Password
    79  			logger.Infof("using API password to access controller.")
    80  		} else {
    81  			// There seems to be no way to reach this inconsistence other than making a
    82  			// backup on a machine where these fields are corrupted and even so I find
    83  			// no reasonable way to reach this state, yet since APIInfo has it as a
    84  			// possibility I prefer to handle it, we cannot recover from this since
    85  			// it would mean that the agent.conf is corrupted.
    86  			return nil, errors.New("cannot obtain password to access the controller")
    87  		}
    88  	}
    89  	return dialInfo, nil
    90  }
    91  
    92  // updateMongoEntries will update the machine entries in the restored mongo to
    93  // reflect the real machine instanceid in case it changed (a newly bootstraped
    94  // server).
    95  func updateMongoEntries(newInstId instance.Id, newMachineId, oldMachineId string, dialInfo *mgo.DialInfo) error {
    96  	session, err := mgo.DialWithInfo(dialInfo)
    97  	if err != nil {
    98  		return errors.Annotate(err, "cannot connect to mongo to update")
    99  	}
   100  	defer session.Close()
   101  	// TODO(perrito666): Take the Machine id from an autoritative source
   102  	err = session.DB("juju").C("machines").Update(
   103  		bson.M{"machineid": oldMachineId},
   104  		bson.M{"$set": bson.M{"instanceid": string(newInstId)}},
   105  	)
   106  	if err != nil {
   107  		return errors.Annotatef(err, "cannot update machine %s instance information", newMachineId)
   108  	}
   109  	return nil
   110  }
   111  
   112  // updateMachineAddresses will update the machine doc to the current addresses
   113  func updateMachineAddresses(machine *state.Machine, privateAddress, publicAddress string) error {
   114  	privateAddressAddress := network.Address{
   115  		Value: privateAddress,
   116  		Type:  network.DeriveAddressType(privateAddress),
   117  	}
   118  	publicAddressAddress := network.Address{
   119  		Value: publicAddress,
   120  		Type:  network.DeriveAddressType(publicAddress),
   121  	}
   122  	if err := machine.SetProviderAddresses(publicAddressAddress, privateAddressAddress); err != nil {
   123  		return errors.Trace(err)
   124  	}
   125  	return nil
   126  }
   127  
   128  // assign to variables for testing purposes.
   129  var mongoDefaultDialOpts = mongo.DefaultDialOpts
   130  var environsNewStatePolicy = environs.NewStatePolicy
   131  
   132  // newStateConnection tries to connect to the newly restored controller.
   133  func newStateConnection(modelTag names.ModelTag, info *mongo.MongoInfo) (*state.State, error) {
   134  	// We need to retry here to allow mongo to come up on the restored controller.
   135  	// The connection might succeed due to the mongo dial retries but there may still
   136  	// be a problem issuing database commands.
   137  	var (
   138  		st  *state.State
   139  		err error
   140  	)
   141  	const (
   142  		newStateConnDelay       = 15 * time.Second
   143  		newStateConnMinAttempts = 8
   144  	)
   145  	attempt := utils.AttemptStrategy{Delay: newStateConnDelay, Min: newStateConnMinAttempts}
   146  	for a := attempt.Start(); a.Next(); {
   147  		st, err = state.Open(modelTag, info, mongoDefaultDialOpts(), environsNewStatePolicy())
   148  		if err == nil {
   149  			return st, nil
   150  		}
   151  		logger.Errorf("cannot open state, retrying: %v", err)
   152  	}
   153  	return st, errors.Annotate(err, "cannot open state")
   154  }
   155  
   156  // updateAllMachines finds all machines and resets the stored state address
   157  // in each of them. The address does not include the port.
   158  // It is too late to go back and errors in a couple of agents have
   159  // better chance of being fixed by the user, if we were to fail
   160  // we risk an inconsistent controller because of one unresponsive
   161  // agent, we should nevertheless return the err info to the user.
   162  func updateAllMachines(privateAddress string, machines []*state.Machine) error {
   163  	var machineUpdating sync.WaitGroup
   164  	for key := range machines {
   165  		// key is used to have machine be scope bound to the loop iteration.
   166  		machine := machines[key]
   167  		// A newly resumed controller requires no updating, and more
   168  		// than one controller is not yet supported by this code.
   169  		if machine.IsManager() || machine.Life() == state.Dead {
   170  			continue
   171  		}
   172  		machineUpdating.Add(1)
   173  		go func() {
   174  			defer machineUpdating.Done()
   175  			err := runMachineUpdate(machine, setAgentAddressScript(privateAddress))
   176  			logger.Errorf("failed updating machine: %v", err)
   177  		}()
   178  	}
   179  	machineUpdating.Wait()
   180  
   181  	// We should return errors encapsulated in a digest.
   182  	return nil
   183  }
   184  
   185  // agentAddressAndRelationsTemplate is the template used to replace the api server data
   186  // in the agents for the new ones if the machine has been rebootstraped it will also reset
   187  // the relations so hooks will re-fire.
   188  var agentAddressAndRelationsTemplate = template.Must(template.New("").Parse(`
   189  set -xu
   190  cd /var/lib/juju/agents
   191  for agent in *
   192  do
   193  	status  jujud-$agent| grep -q "^jujud-$agent start" > /dev/null
   194  	if [ $? -eq 0 ]; then
   195  		initctl stop jujud-$agent
   196  	fi
   197  	sed -i.old -r "/^(stateaddresses|apiaddresses):/{
   198  		n
   199  		s/- .*(:[0-9]+)/- {{.Address}}\1/
   200  	}" $agent/agent.conf
   201  
   202  	# If we're processing a unit agent's directly
   203  	# and it has some relations, reset
   204  	# the stored version of all of them to
   205  	# ensure that any relation hooks will
   206  	# fire.
   207  	if [[ $agent = unit-* ]]
   208  	then
   209  		find $agent/state/relations -type f -exec sed -i -r 's/change-version: [0-9]+$/change-version: 0/' {} \;
   210  	fi
   211  	# Just in case is a stale unit
   212  	status  jujud-$agent| grep -q "^jujud-$agent stop" > /dev/null
   213  	if [ $? -eq 0 ]; then
   214  		initctl start jujud-$agent
   215                  systemctl stop jujud-$agent
   216                  systemctl start jujud-$agent
   217  	fi
   218  done
   219  `))
   220  
   221  // setAgentAddressScript generates an ssh script argument to update state addresses.
   222  func setAgentAddressScript(stateAddr string) string {
   223  	var buf bytes.Buffer
   224  	err := agentAddressAndRelationsTemplate.Execute(&buf, struct {
   225  		Address string
   226  	}{stateAddr})
   227  	if err != nil {
   228  		panic(errors.Annotate(err, "template error"))
   229  	}
   230  	return buf.String()
   231  }
   232  
   233  // runMachineUpdate connects via ssh to the machine and runs the update script.
   234  func runMachineUpdate(machine *state.Machine, sshArg string) error {
   235  	addr, err := machine.PublicAddress()
   236  	if err != nil {
   237  		if network.IsNoAddress(err) {
   238  			return errors.Annotatef(err, "no appropriate public address found")
   239  		}
   240  		return errors.Trace(err)
   241  	}
   242  	return runViaSSH(addr.Value, sshArg)
   243  }
   244  
   245  // sshCommand hods ssh.Command type for testing purposes.
   246  var sshCommand = ssh.Command
   247  
   248  // runViaSSH runs script in the remote machine with address addr.
   249  func runViaSSH(addr string, script string) error {
   250  	// This is taken from cmd/juju/ssh.go there is no other clear way to set user
   251  	userAddr := "ubuntu@" + addr
   252  	sshOptions := ssh.Options{}
   253  	sshOptions.SetIdentities("/var/lib/juju/system-identity")
   254  	userCmd := sshCommand(userAddr, []string{"sudo", "-n", "bash", "-c " + utils.ShQuote(script)}, &sshOptions)
   255  	var stderrBuf bytes.Buffer
   256  	userCmd.Stderr = &stderrBuf
   257  	if err := userCmd.Run(); err != nil {
   258  		return errors.Annotatef(err, "ssh command failed: %q", stderrBuf.String())
   259  	}
   260  	return nil
   261  }