github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/backups/backups_linux.go (about)

     1  // Copyright 2015 Canonical Ltd.
     2  // Licensed under the AGPLv3, see LICENCE file for details.
     3  
     4  // +build linux
     5  
     6  package backups
     7  
     8  import (
     9  	"net"
    10  	"strconv"
    11  
    12  	"github.com/juju/errors"
    13  	"github.com/juju/utils/shell"
    14  	"gopkg.in/juju/names.v2"
    15  
    16  	"github.com/juju/juju/agent"
    17  	"github.com/juju/juju/juju/paths"
    18  	"github.com/juju/juju/mongo"
    19  	"github.com/juju/juju/network"
    20  	"github.com/juju/juju/service"
    21  	"github.com/juju/juju/state"
    22  	"github.com/juju/juju/version"
    23  )
    24  
    25  func ensureMongoService(agentConfig agent.Config) error {
    26  	var oplogSize int
    27  	if oplogSizeString := agentConfig.Value(agent.MongoOplogSize); oplogSizeString != "" {
    28  		var err error
    29  		if oplogSize, err = strconv.Atoi(oplogSizeString); err != nil {
    30  			return errors.Annotatef(err, "invalid oplog size: %q", oplogSizeString)
    31  		}
    32  	}
    33  
    34  	var numaCtlPolicy bool
    35  	if numaCtlString := agentConfig.Value(agent.NUMACtlPreference); numaCtlString != "" {
    36  		var err error
    37  		if numaCtlPolicy, err = strconv.ParseBool(numaCtlString); err != nil {
    38  			return errors.Annotatef(err, "invalid numactl preference: %q", numaCtlString)
    39  		}
    40  	}
    41  
    42  	si, ok := agentConfig.StateServingInfo()
    43  	if !ok {
    44  		return errors.Errorf("agent config has no state serving info")
    45  	}
    46  
    47  	if err := mongo.EnsureServiceInstalled(agentConfig.DataDir(),
    48  		si.StatePort,
    49  		oplogSize,
    50  		numaCtlPolicy,
    51  		agentConfig.MongoVersion(),
    52  		true,
    53  		mongo.MemoryProfileDefault,
    54  	); err != nil {
    55  		return errors.Annotate(err, "cannot ensure that mongo service start/stop scripts are in place")
    56  	}
    57  	// Installing a service will not automatically restart it.
    58  	if err := mongo.StartService(); err != nil {
    59  		return errors.Annotate(err, "failed to start mongo")
    60  	}
    61  	return nil
    62  }
    63  
    64  // Restore handles either returning or creating a controller to a backed up status:
    65  // * extracts the content of the given backup file and:
    66  // * runs mongorestore with the backed up mongo dump
    67  // * updates and writes configuration files
    68  // * updates existing db entries to make sure they hold no references to
    69  // old instances
    70  // * updates config in all agents.
    71  func (b *backups) Restore(backupId string, args RestoreArgs) (names.Tag, error) {
    72  	meta, backupReader, err := b.Get(backupId)
    73  	if err != nil {
    74  		return nil, errors.Annotatef(err, "could not fetch backup %q", backupId)
    75  	}
    76  
    77  	defer backupReader.Close()
    78  
    79  	workspace, err := NewArchiveWorkspaceReader(backupReader)
    80  	if err != nil {
    81  		return nil, errors.Annotate(err, "cannot unpack backup file")
    82  	}
    83  	defer workspace.Close()
    84  
    85  	// This might actually work, but we don't have a guarantee so we don't allow it.
    86  	if meta.Origin.Series != args.NewInstSeries {
    87  		return nil, errors.Errorf("cannot restore a backup made in a machine with series %q into a machine with series %q, %#v", meta.Origin.Series, args.NewInstSeries, meta)
    88  	}
    89  
    90  	// TODO(perrito666) Create a compatibility table of sorts.
    91  	vers := meta.Origin.Version
    92  	if vers.Major != 2 {
    93  		return nil, errors.Errorf("Juju version %v cannot restore backups made using Juju version %v", version.Current.Minor, vers)
    94  	}
    95  	backupMachine := names.NewMachineTag(meta.Origin.Machine)
    96  
    97  	// The path for the config file might change if the tag changed
    98  	// and also the rest of the path, so we assume as little as possible.
    99  	oldDatadir, err := paths.DataDir(args.NewInstSeries)
   100  	if err != nil {
   101  		return nil, errors.Annotate(err, "cannot determine DataDir for the restored machine")
   102  	}
   103  
   104  	var oldAgentConfig agent.ConfigSetterWriter
   105  	oldAgentConfigFile := agent.ConfigPath(oldDatadir, args.NewInstTag)
   106  	if oldAgentConfig, err = agent.ReadConfig(oldAgentConfigFile); err != nil {
   107  		return nil, errors.Annotate(err, "cannot load old agent config from disk")
   108  	}
   109  
   110  	logger.Infof("stopping juju-db")
   111  	if err = mongo.StopService(); err != nil {
   112  		return nil, errors.Annotate(err, "failed to stop mongo")
   113  	}
   114  
   115  	// delete all the files to be replaced
   116  	if err := PrepareMachineForRestore(oldAgentConfig.MongoVersion()); err != nil {
   117  		return nil, errors.Annotate(err, "cannot delete existing files")
   118  	}
   119  	logger.Infof("deleted old files to place new")
   120  
   121  	if err := workspace.UnpackFilesBundle(filesystemRoot()); err != nil {
   122  		return nil, errors.Annotate(err, "cannot obtain system files from backup")
   123  	}
   124  	logger.Infof("placed new restore files")
   125  
   126  	var agentConfig agent.ConfigSetterWriter
   127  	// The path for the config file might change if the tag changed
   128  	// and also the rest of the path, so we assume as little as possible.
   129  	datadir, err := paths.DataDir(args.NewInstSeries)
   130  	if err != nil {
   131  		return nil, errors.Annotate(err, "cannot determine DataDir for the restored machine")
   132  	}
   133  	agentConfigFile := agent.ConfigPath(datadir, backupMachine)
   134  	if agentConfig, err = agent.ReadConfig(agentConfigFile); err != nil {
   135  		return nil, errors.Annotate(err, "cannot load agent config from disk")
   136  	}
   137  	ssi, ok := agentConfig.StateServingInfo()
   138  	if !ok {
   139  		return nil, errors.Errorf("cannot determine state serving info")
   140  	}
   141  	APIHostPorts := network.NewHostPorts(ssi.APIPort, args.PrivateAddress, args.PublicAddress)
   142  	agentConfig.SetAPIHostPorts([][]network.HostPort{APIHostPorts})
   143  	if err := agentConfig.Write(); err != nil {
   144  		return nil, errors.Annotate(err, "cannot write new agent configuration")
   145  	}
   146  	logger.Infof("wrote new agent config for restore")
   147  
   148  	if backupMachine.Id() != "0" {
   149  		logger.Infof("extra work needed backup belongs to %q machine", backupMachine.String())
   150  		serviceName := "jujud-" + agentConfig.Tag().String()
   151  		aInfo := service.NewMachineAgentInfo(
   152  			agentConfig.Tag().Id(),
   153  			dataDir,
   154  			paths.MustSucceed(paths.LogDir(args.NewInstSeries)),
   155  		)
   156  
   157  		// TODO(perrito666) renderer should have a RendererForSeries, for the moment
   158  		// restore only works on linuxes.
   159  		renderer, _ := shell.NewRenderer("bash")
   160  		serviceAgentConf := service.AgentConf(aInfo, renderer)
   161  		svc, err := service.NewService(serviceName, serviceAgentConf, args.NewInstSeries)
   162  		if err != nil {
   163  			return nil, errors.Annotate(err, "cannot generate service for the restored agent.")
   164  		}
   165  		if err := svc.Install(); err != nil {
   166  			return nil, errors.Annotate(err, "cannot install service for the restored agent.")
   167  		}
   168  		logger.Infof("new machine service")
   169  	}
   170  
   171  	logger.Infof("mongo service will be reinstalled to ensure its presence")
   172  	if err := ensureMongoService(agentConfig); err != nil {
   173  		return nil, errors.Annotate(err, "failed to reinstall service for juju-db")
   174  	}
   175  
   176  	dialInfo, err := newDialInfo(args.PrivateAddress, agentConfig)
   177  	if err != nil {
   178  		return nil, errors.Annotate(err, "cannot produce dial information")
   179  	}
   180  
   181  	// For the unresponsive controller case the oldAgentConfig and agentConfig
   182  	// have different certificates. MongoDB has been already started with a
   183  	// new certificate. Therefore all clients that would like to communicate
   184  	// with mongo should use the new certificate otherwise the
   185  	// "TLS handshake error" occurs. To avoid this error the old certificate
   186  	// should be replaced by the new one.
   187  	oldAgentConfig.SetCACert(agentConfig.CACert())
   188  	oldDialInfo, err := newDialInfo(args.PrivateAddress, oldAgentConfig)
   189  	if err != nil {
   190  		return nil, errors.Annotate(err, "cannot produce dial information for existing mongo")
   191  	}
   192  
   193  	logger.Infof("new mongo will be restored")
   194  	mgoVer := agentConfig.MongoVersion()
   195  
   196  	tagUser, tagUserPassword, err := tagUserCredentials(agentConfig)
   197  	if err != nil {
   198  		return nil, errors.Trace(err)
   199  	}
   200  	rArgs := RestorerArgs{
   201  		DialInfo:        dialInfo,
   202  		Version:         mgoVer,
   203  		TagUser:         tagUser,
   204  		TagUserPassword: tagUserPassword,
   205  		RunCommandFn:    runCommand,
   206  		StartMongo:      mongo.StartService,
   207  		StopMongo:       mongo.StopService,
   208  		NewMongoSession: NewMongoSession,
   209  		GetDB:           GetDB,
   210  	}
   211  
   212  	// Restore mongodb from backup
   213  	restorer, err := NewDBRestorer(rArgs)
   214  	if err != nil {
   215  		return nil, errors.Annotate(err, "error preparing for restore")
   216  	}
   217  	if err := restorer.Restore(workspace.DBDumpDir, oldDialInfo); err != nil {
   218  		return nil, errors.Annotate(err, "error restoring state from backup")
   219  	}
   220  
   221  	// Re-start replicaset with the new value for server address
   222  	logger.Infof("restarting replicaset")
   223  	memberHostPort := net.JoinHostPort(args.PrivateAddress, strconv.Itoa(ssi.StatePort))
   224  	err = resetReplicaSet(dialInfo, memberHostPort)
   225  	if err != nil {
   226  		return nil, errors.Annotate(err, "cannot reset replicaSet")
   227  	}
   228  
   229  	err = updateMongoEntries(args.NewInstId, args.NewInstTag.Id(), backupMachine.Id(), dialInfo)
   230  	if err != nil {
   231  		return nil, errors.Annotate(err, "cannot update mongo entries")
   232  	}
   233  
   234  	// From here we work with the restored controller
   235  	mgoInfo, ok := agentConfig.MongoInfo()
   236  	if !ok {
   237  		return nil, errors.Errorf("cannot retrieve info to connect to mongo")
   238  	}
   239  
   240  	pool, err := connectToDB(agentConfig.Controller(), agentConfig.Model(), mgoInfo)
   241  	if err != nil {
   242  		return nil, errors.Trace(err)
   243  	}
   244  	defer pool.Close()
   245  	st := pool.SystemState()
   246  
   247  	machine, err := st.Machine(backupMachine.Id())
   248  	if err != nil {
   249  		return nil, errors.Trace(err)
   250  	}
   251  
   252  	logger.Infof("updating local machine addresses")
   253  	err = updateMachineAddresses(machine, args.PrivateAddress, args.PublicAddress)
   254  	if err != nil {
   255  		return nil, errors.Annotate(err, "cannot update api server machine addresses")
   256  	}
   257  	// Update the APIHostPorts as well. Under normal circumstances the API
   258  	// Host Ports are only set during bootstrap and by the peergrouper worker.
   259  	// Unfortunately right now, the peer grouper is busy restarting and isn't
   260  	// guaranteed to set the host ports before the remote machines we are
   261  	// about to tell about us. If it doesn't, the remote machine gets its
   262  	// agent.conf file updated with this new machine's IP address, it then
   263  	// starts, and the "api-address-updater" worker asks for the api host
   264  	// ports, and gets told the old IP address of the machine that was backed
   265  	// up. It then writes this incorrect file to its agent.conf file, which
   266  	// causes it to attempt to reconnect to the api server. Unfortunately it
   267  	// now has the wrong address and can never get the  correct one.
   268  	// So, we set it explicitly here.
   269  	if err := st.SetAPIHostPorts([][]network.HostPort{APIHostPorts}); err != nil {
   270  		return nil, errors.Annotate(err, "cannot update api server host ports")
   271  	}
   272  
   273  	// update all agents known to the new controller.
   274  	// TODO(perrito666): We should never stop process because of this.
   275  	// updateAllMachines will not return errors for individual
   276  	// agent update failures
   277  
   278  	modelUUIDs, err := st.AllModelUUIDs()
   279  	if err != nil {
   280  		return nil, errors.Trace(err)
   281  	}
   282  	var machines []machineModel
   283  	for _, modelUUID := range modelUUIDs {
   284  		st, err := pool.Get(modelUUID)
   285  		if err != nil {
   286  			return nil, errors.Trace(err)
   287  		}
   288  		defer func() {
   289  			st.Release()
   290  		}()
   291  
   292  		model, err := st.Model()
   293  		if err != nil {
   294  			return nil, errors.Trace(err)
   295  		}
   296  
   297  		machinesForModel, err := st.AllMachines()
   298  		if err != nil {
   299  			return nil, errors.Trace(err)
   300  		}
   301  		for _, machine := range machinesForModel {
   302  			machines = append(machines, machineModel{machine: machine, model: model})
   303  		}
   304  	}
   305  	logger.Infof("updating other machine addresses")
   306  	if err := updateAllMachines(args.PrivateAddress, args.PublicAddress, machines); err != nil {
   307  		return nil, errors.Annotate(err, "cannot update agents")
   308  	}
   309  
   310  	// Mark restoreInfo as Finished so upon restart of the apiserver
   311  	// the client can reconnect and determine if we where successful.
   312  	info := st.RestoreInfo()
   313  	// In mongo 3.2, even though the backup is made with --oplog, there
   314  	// are stale transactions in this collection.
   315  	if err := info.PurgeTxn(); err != nil {
   316  		return nil, errors.Annotate(err, "cannot purge stale transactions")
   317  	}
   318  	if err = info.SetStatus(state.RestoreFinished); err != nil {
   319  		return nil, errors.Annotate(err, "failed to set status to finished")
   320  	}
   321  
   322  	return backupMachine, nil
   323  }