github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/state/backups/backups_linux.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // +build linux
     5  
     6  package backups
     7  
     8  import (
     9  	"net"
    10  	"strconv"
    11  
    12  	"github.com/juju/errors"
    13  	"github.com/juju/utils/shell"
    14  	"gopkg.in/juju/names.v2"
    15  
    16  	"github.com/juju/juju/agent"
    17  	"github.com/juju/juju/juju/paths"
    18  	"github.com/juju/juju/mongo"
    19  	"github.com/juju/juju/network"
    20  	"github.com/juju/juju/service"
    21  	"github.com/juju/juju/state"
    22  	"github.com/juju/juju/version"
    23  )
    24  
    25  func ensureMongoService(agentConfig agent.Config) error {
    26  	var oplogSize int
    27  	if oplogSizeString := agentConfig.Value(agent.MongoOplogSize); oplogSizeString != "" {
    28  		var err error
    29  		if oplogSize, err = strconv.Atoi(oplogSizeString); err != nil {
    30  			return errors.Annotatef(err, "invalid oplog size: %q", oplogSizeString)
    31  		}
    32  	}
    33  
    34  	var numaCtlPolicy bool
    35  	if numaCtlString := agentConfig.Value(agent.NUMACtlPreference); numaCtlString != "" {
    36  		var err error
    37  		if numaCtlPolicy, err = strconv.ParseBool(numaCtlString); err != nil {
    38  			return errors.Annotatef(err, "invalid numactl preference: %q", numaCtlString)
    39  		}
    40  	}
    41  
    42  	si, ok := agentConfig.StateServingInfo()
    43  	if !ok {
    44  		return errors.Errorf("agent config has no state serving info")
    45  	}
    46  
    47  	if err := mongo.EnsureServiceInstalled(agentConfig.DataDir(),
    48  		si.StatePort,
    49  		oplogSize,
    50  		numaCtlPolicy,
    51  		agentConfig.MongoVersion(),
    52  		true,
    53  	); err != nil {
    54  		return errors.Annotate(err, "cannot ensure that mongo service start/stop scripts are in place")
    55  	}
    56  	// Installing a service will not automatically restart it.
    57  	if err := mongo.StartService(); err != nil {
    58  		return errors.Annotate(err, "failed to start mongo")
    59  	}
    60  	return nil
    61  }
    62  
    63  // Restore handles either returning or creating a controller to a backed up status:
    64  // * extracts the content of the given backup file and:
    65  // * runs mongorestore with the backed up mongo dump
    66  // * updates and writes configuration files
    67  // * updates existing db entries to make sure they hold no references to
    68  // old instances
    69  // * updates config in all agents.
    70  func (b *backups) Restore(backupId string, dbInfo *DBInfo, args RestoreArgs) (names.Tag, error) {
    71  	meta, backupReader, err := b.Get(backupId)
    72  	if err != nil {
    73  		return nil, errors.Annotatef(err, "could not fetch backup %q", backupId)
    74  	}
    75  
    76  	defer backupReader.Close()
    77  
    78  	workspace, err := NewArchiveWorkspaceReader(backupReader)
    79  	if err != nil {
    80  		return nil, errors.Annotate(err, "cannot unpack backup file")
    81  	}
    82  	defer workspace.Close()
    83  
    84  	// This might actually work, but we don't have a guarantee so we don't allow it.
    85  	if meta.Origin.Series != args.NewInstSeries {
    86  		return nil, errors.Errorf("cannot restore a backup made in a machine with series %q into a machine with series %q, %#v", meta.Origin.Series, args.NewInstSeries, meta)
    87  	}
    88  
    89  	// TODO(perrito666) Create a compatibility table of sorts.
    90  	vers := meta.Origin.Version
    91  	if vers.Major != 2 {
    92  		return nil, errors.Errorf("Juju version %v cannot restore backups made using Juju version %v", version.Current.Minor, vers)
    93  	}
    94  	backupMachine := names.NewMachineTag(meta.Origin.Machine)
    95  
    96  	// The path for the config file might change if the tag changed
    97  	// and also the rest of the path, so we assume as little as possible.
    98  	oldDatadir, err := paths.DataDir(args.NewInstSeries)
    99  	if err != nil {
   100  		return nil, errors.Annotate(err, "cannot determine DataDir for the restored machine")
   101  	}
   102  
   103  	var oldAgentConfig agent.ConfigSetterWriter
   104  	oldAgentConfigFile := agent.ConfigPath(oldDatadir, args.NewInstTag)
   105  	if oldAgentConfig, err = agent.ReadConfig(oldAgentConfigFile); err != nil {
   106  		return nil, errors.Annotate(err, "cannot load old agent config from disk")
   107  	}
   108  
   109  	logger.Infof("stopping juju-db")
   110  	if err = mongo.StopService(); err != nil {
   111  		return nil, errors.Annotate(err, "failed to stop mongo")
   112  	}
   113  
   114  	// delete all the files to be replaced
   115  	if err := PrepareMachineForRestore(oldAgentConfig.MongoVersion()); err != nil {
   116  		return nil, errors.Annotate(err, "cannot delete existing files")
   117  	}
   118  	logger.Infof("deleted old files to place new")
   119  
   120  	if err := workspace.UnpackFilesBundle(filesystemRoot()); err != nil {
   121  		return nil, errors.Annotate(err, "cannot obtain system files from backup")
   122  	}
   123  	logger.Infof("placed new restore files")
   124  
   125  	var agentConfig agent.ConfigSetterWriter
   126  	// The path for the config file might change if the tag changed
   127  	// and also the rest of the path, so we assume as little as possible.
   128  	datadir, err := paths.DataDir(args.NewInstSeries)
   129  	if err != nil {
   130  		return nil, errors.Annotate(err, "cannot determine DataDir for the restored machine")
   131  	}
   132  	agentConfigFile := agent.ConfigPath(datadir, backupMachine)
   133  	if agentConfig, err = agent.ReadConfig(agentConfigFile); err != nil {
   134  		return nil, errors.Annotate(err, "cannot load agent config from disk")
   135  	}
   136  	ssi, ok := agentConfig.StateServingInfo()
   137  	if !ok {
   138  		return nil, errors.Errorf("cannot determine state serving info")
   139  	}
   140  	APIHostPorts := network.NewHostPorts(ssi.APIPort, args.PrivateAddress, args.PublicAddress)
   141  	agentConfig.SetAPIHostPorts([][]network.HostPort{APIHostPorts})
   142  	if err := agentConfig.Write(); err != nil {
   143  		return nil, errors.Annotate(err, "cannot write new agent configuration")
   144  	}
   145  	logger.Infof("wrote new agent config for restore")
   146  
   147  	if backupMachine.Id() != "0" {
   148  		logger.Infof("extra work needed backup belongs to %q machine", backupMachine.String())
   149  		serviceName := "jujud-" + agentConfig.Tag().String()
   150  		aInfo := service.NewMachineAgentInfo(
   151  			agentConfig.Tag().Id(),
   152  			dataDir,
   153  			paths.MustSucceed(paths.LogDir(args.NewInstSeries)),
   154  		)
   155  
   156  		// TODO(perrito666) renderer should have a RendererForSeries, for the moment
   157  		// restore only works on linuxes.
   158  		renderer, _ := shell.NewRenderer("bash")
   159  		serviceAgentConf := service.AgentConf(aInfo, renderer)
   160  		svc, err := service.NewService(serviceName, serviceAgentConf, args.NewInstSeries)
   161  		if err != nil {
   162  			return nil, errors.Annotate(err, "cannot generate service for the restored agent.")
   163  		}
   164  		if err := svc.Install(); err != nil {
   165  			return nil, errors.Annotate(err, "cannot install service for the restored agent.")
   166  		}
   167  		logger.Infof("new machine service")
   168  	}
   169  
   170  	logger.Infof("mongo service will be reinstalled to ensure its presence")
   171  	if err := ensureMongoService(agentConfig); err != nil {
   172  		return nil, errors.Annotate(err, "failed to reinstall service for juju-db")
   173  	}
   174  
   175  	dialInfo, err := newDialInfo(args.PrivateAddress, agentConfig)
   176  	if err != nil {
   177  		return nil, errors.Annotate(err, "cannot produce dial information")
   178  	}
   179  
   180  	oldDialInfo, err := newDialInfo(args.PrivateAddress, oldAgentConfig)
   181  	if err != nil {
   182  		return nil, errors.Annotate(err, "cannot produce dial information for existing mongo")
   183  	}
   184  
   185  	logger.Infof("new mongo will be restored")
   186  	mgoVer := agentConfig.MongoVersion()
   187  
   188  	tagUser, tagUserPassword, err := tagUserCredentials(agentConfig)
   189  	if err != nil {
   190  		return nil, errors.Trace(err)
   191  	}
   192  	rArgs := RestorerArgs{
   193  		DialInfo:        dialInfo,
   194  		Version:         mgoVer,
   195  		TagUser:         tagUser,
   196  		TagUserPassword: tagUserPassword,
   197  		RunCommandFn:    runCommand,
   198  		StartMongo:      mongo.StartService,
   199  		StopMongo:       mongo.StopService,
   200  		NewMongoSession: NewMongoSession,
   201  		GetDB:           GetDB,
   202  	}
   203  
   204  	// Restore mongodb from backup
   205  	restorer, err := NewDBRestorer(rArgs)
   206  	if err != nil {
   207  		return nil, errors.Annotate(err, "error preparing for restore")
   208  	}
   209  	if err := restorer.Restore(workspace.DBDumpDir, oldDialInfo); err != nil {
   210  		return nil, errors.Annotate(err, "error restoring state from backup")
   211  	}
   212  
   213  	// Re-start replicaset with the new value for server address
   214  	logger.Infof("restarting replicaset")
   215  	memberHostPort := net.JoinHostPort(args.PrivateAddress, strconv.Itoa(ssi.StatePort))
   216  	err = resetReplicaSet(dialInfo, memberHostPort)
   217  	if err != nil {
   218  		return nil, errors.Annotate(err, "cannot reset replicaSet")
   219  	}
   220  
   221  	err = updateMongoEntries(args.NewInstId, args.NewInstTag.Id(), backupMachine.Id(), dialInfo)
   222  	if err != nil {
   223  		return nil, errors.Annotate(err, "cannot update mongo entries")
   224  	}
   225  
   226  	// From here we work with the restored controller
   227  	mgoInfo, ok := agentConfig.MongoInfo()
   228  	if !ok {
   229  		return nil, errors.Errorf("cannot retrieve info to connect to mongo")
   230  	}
   231  
   232  	st, err := newStateConnection(agentConfig.Controller(), agentConfig.Model(), mgoInfo)
   233  	if err != nil {
   234  		return nil, errors.Trace(err)
   235  	}
   236  	defer st.Close()
   237  
   238  	machine, err := st.Machine(backupMachine.Id())
   239  	if err != nil {
   240  		return nil, errors.Trace(err)
   241  	}
   242  
   243  	logger.Infof("updating local machine addresses")
   244  	err = updateMachineAddresses(machine, args.PrivateAddress, args.PublicAddress)
   245  	if err != nil {
   246  		return nil, errors.Annotate(err, "cannot update api server machine addresses")
   247  	}
   248  	// Update the APIHostPorts as well. Under normal circumstances the API
   249  	// Host Ports are only set during bootstrap and by the peergrouper worker.
   250  	// Unfortunately right now, the peer grouper is busy restarting and isn't
   251  	// guaranteed to set the host ports before the remote machines we are
   252  	// about to tell about us. If it doesn't, the remote machine gets its
   253  	// agent.conf file updated with this new machine's IP address, it then
   254  	// starts, and the "api-address-updater" worker asks for the api host
   255  	// ports, and gets told the old IP address of the machine that was backed
   256  	// up. It then writes this incorrect file to its agent.conf file, which
   257  	// causes it to attempt to reconnect to the api server. Unfortunately it
   258  	// now has the wrong address and can never get the  correct one.
   259  	// So, we set it explicitly here.
   260  	if err := st.SetAPIHostPorts([][]network.HostPort{APIHostPorts}); err != nil {
   261  		return nil, errors.Annotate(err, "cannot update api server host ports")
   262  	}
   263  
   264  	// update all agents known to the new controller.
   265  	// TODO(perrito666): We should never stop process because of this.
   266  	// updateAllMachines will not return errors for individual
   267  	// agent update failures
   268  	models, err := st.AllModels()
   269  	if err != nil {
   270  		return nil, errors.Trace(err)
   271  	}
   272  	machines := []machineModel{}
   273  	for _, model := range models {
   274  		machinesForModel, err := st.AllMachinesFor(model.UUID())
   275  		if err != nil {
   276  			return nil, errors.Trace(err)
   277  		}
   278  		for _, machine := range machinesForModel {
   279  			machines = append(machines, machineModel{machine: machine, model: model})
   280  		}
   281  	}
   282  	logger.Infof("updating other machine addresses")
   283  	if err := updateAllMachines(args.PrivateAddress, args.PublicAddress, machines); err != nil {
   284  		return nil, errors.Annotate(err, "cannot update agents")
   285  	}
   286  
   287  	// Mark restoreInfo as Finished so upon restart of the apiserver
   288  	// the client can reconnect and determine if we where succesful.
   289  	info := st.RestoreInfo()
   290  	// In mongo 3.2, even though the backup is made with --oplog, there
   291  	// are stale transactions in this collection.
   292  	if err := info.PurgeTxn(); err != nil {
   293  		return nil, errors.Annotate(err, "cannot purge stale transactions")
   294  	}
   295  	if err = info.SetStatus(state.RestoreFinished); err != nil {
   296  		return nil, errors.Annotate(err, "failed to set status to finished")
   297  	}
   298  
   299  	return backupMachine, nil
   300  }