github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/state/backups/backups_linux.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // +build linux 5 6 package backups 7 8 import ( 9 "net" 10 "strconv" 11 12 "github.com/juju/errors" 13 "github.com/juju/utils/shell" 14 "gopkg.in/juju/names.v2" 15 16 "github.com/juju/juju/agent" 17 "github.com/juju/juju/juju/paths" 18 "github.com/juju/juju/mongo" 19 "github.com/juju/juju/network" 20 "github.com/juju/juju/service" 21 "github.com/juju/juju/state" 22 "github.com/juju/juju/version" 23 ) 24 25 func ensureMongoService(agentConfig agent.Config) error { 26 var oplogSize int 27 if oplogSizeString := agentConfig.Value(agent.MongoOplogSize); oplogSizeString != "" { 28 var err error 29 if oplogSize, err = strconv.Atoi(oplogSizeString); err != nil { 30 return errors.Annotatef(err, "invalid oplog size: %q", oplogSizeString) 31 } 32 } 33 34 var numaCtlPolicy bool 35 if numaCtlString := agentConfig.Value(agent.NUMACtlPreference); numaCtlString != "" { 36 var err error 37 if numaCtlPolicy, err = strconv.ParseBool(numaCtlString); err != nil { 38 return errors.Annotatef(err, "invalid numactl preference: %q", numaCtlString) 39 } 40 } 41 42 si, ok := agentConfig.StateServingInfo() 43 if !ok { 44 return errors.Errorf("agent config has no state serving info") 45 } 46 47 if err := mongo.EnsureServiceInstalled(agentConfig.DataDir(), 48 si.StatePort, 49 oplogSize, 50 numaCtlPolicy, 51 agentConfig.MongoVersion(), 52 true, 53 ); err != nil { 54 return errors.Annotate(err, "cannot ensure that mongo service start/stop scripts are in place") 55 } 56 // Installing a service will not automatically restart it. 57 if err := mongo.StartService(); err != nil { 58 return errors.Annotate(err, "failed to start mongo") 59 } 60 return nil 61 } 62 63 // Restore handles either returning or creating a controller to a backed up status: 64 // * extracts the content of the given backup file and: 65 // * runs mongorestore with the backed up mongo dump 66 // * updates and writes configuration files 67 // * updates existing db entries to make sure they hold no references to 68 // old instances 69 // * updates config in all agents. 70 func (b *backups) Restore(backupId string, dbInfo *DBInfo, args RestoreArgs) (names.Tag, error) { 71 meta, backupReader, err := b.Get(backupId) 72 if err != nil { 73 return nil, errors.Annotatef(err, "could not fetch backup %q", backupId) 74 } 75 76 defer backupReader.Close() 77 78 workspace, err := NewArchiveWorkspaceReader(backupReader) 79 if err != nil { 80 return nil, errors.Annotate(err, "cannot unpack backup file") 81 } 82 defer workspace.Close() 83 84 // This might actually work, but we don't have a guarantee so we don't allow it. 85 if meta.Origin.Series != args.NewInstSeries { 86 return nil, errors.Errorf("cannot restore a backup made in a machine with series %q into a machine with series %q, %#v", meta.Origin.Series, args.NewInstSeries, meta) 87 } 88 89 // TODO(perrito666) Create a compatibility table of sorts. 90 vers := meta.Origin.Version 91 if vers.Major != 2 { 92 return nil, errors.Errorf("Juju version %v cannot restore backups made using Juju version %v", version.Current.Minor, vers) 93 } 94 backupMachine := names.NewMachineTag(meta.Origin.Machine) 95 96 // The path for the config file might change if the tag changed 97 // and also the rest of the path, so we assume as little as possible. 98 oldDatadir, err := paths.DataDir(args.NewInstSeries) 99 if err != nil { 100 return nil, errors.Annotate(err, "cannot determine DataDir for the restored machine") 101 } 102 103 var oldAgentConfig agent.ConfigSetterWriter 104 oldAgentConfigFile := agent.ConfigPath(oldDatadir, args.NewInstTag) 105 if oldAgentConfig, err = agent.ReadConfig(oldAgentConfigFile); err != nil { 106 return nil, errors.Annotate(err, "cannot load old agent config from disk") 107 } 108 109 logger.Infof("stopping juju-db") 110 if err = mongo.StopService(); err != nil { 111 return nil, errors.Annotate(err, "failed to stop mongo") 112 } 113 114 // delete all the files to be replaced 115 if err := PrepareMachineForRestore(oldAgentConfig.MongoVersion()); err != nil { 116 return nil, errors.Annotate(err, "cannot delete existing files") 117 } 118 logger.Infof("deleted old files to place new") 119 120 if err := workspace.UnpackFilesBundle(filesystemRoot()); err != nil { 121 return nil, errors.Annotate(err, "cannot obtain system files from backup") 122 } 123 logger.Infof("placed new restore files") 124 125 var agentConfig agent.ConfigSetterWriter 126 // The path for the config file might change if the tag changed 127 // and also the rest of the path, so we assume as little as possible. 128 datadir, err := paths.DataDir(args.NewInstSeries) 129 if err != nil { 130 return nil, errors.Annotate(err, "cannot determine DataDir for the restored machine") 131 } 132 agentConfigFile := agent.ConfigPath(datadir, backupMachine) 133 if agentConfig, err = agent.ReadConfig(agentConfigFile); err != nil { 134 return nil, errors.Annotate(err, "cannot load agent config from disk") 135 } 136 ssi, ok := agentConfig.StateServingInfo() 137 if !ok { 138 return nil, errors.Errorf("cannot determine state serving info") 139 } 140 APIHostPorts := network.NewHostPorts(ssi.APIPort, args.PrivateAddress, args.PublicAddress) 141 agentConfig.SetAPIHostPorts([][]network.HostPort{APIHostPorts}) 142 if err := agentConfig.Write(); err != nil { 143 return nil, errors.Annotate(err, "cannot write new agent configuration") 144 } 145 logger.Infof("wrote new agent config for restore") 146 147 if backupMachine.Id() != "0" { 148 logger.Infof("extra work needed backup belongs to %q machine", backupMachine.String()) 149 serviceName := "jujud-" + agentConfig.Tag().String() 150 aInfo := service.NewMachineAgentInfo( 151 agentConfig.Tag().Id(), 152 dataDir, 153 paths.MustSucceed(paths.LogDir(args.NewInstSeries)), 154 ) 155 156 // TODO(perrito666) renderer should have a RendererForSeries, for the moment 157 // restore only works on linuxes. 158 renderer, _ := shell.NewRenderer("bash") 159 serviceAgentConf := service.AgentConf(aInfo, renderer) 160 svc, err := service.NewService(serviceName, serviceAgentConf, args.NewInstSeries) 161 if err != nil { 162 return nil, errors.Annotate(err, "cannot generate service for the restored agent.") 163 } 164 if err := svc.Install(); err != nil { 165 return nil, errors.Annotate(err, "cannot install service for the restored agent.") 166 } 167 logger.Infof("new machine service") 168 } 169 170 logger.Infof("mongo service will be reinstalled to ensure its presence") 171 if err := ensureMongoService(agentConfig); err != nil { 172 return nil, errors.Annotate(err, "failed to reinstall service for juju-db") 173 } 174 175 dialInfo, err := newDialInfo(args.PrivateAddress, agentConfig) 176 if err != nil { 177 return nil, errors.Annotate(err, "cannot produce dial information") 178 } 179 180 oldDialInfo, err := newDialInfo(args.PrivateAddress, oldAgentConfig) 181 if err != nil { 182 return nil, errors.Annotate(err, "cannot produce dial information for existing mongo") 183 } 184 185 logger.Infof("new mongo will be restored") 186 mgoVer := agentConfig.MongoVersion() 187 188 tagUser, tagUserPassword, err := tagUserCredentials(agentConfig) 189 if err != nil { 190 return nil, errors.Trace(err) 191 } 192 rArgs := RestorerArgs{ 193 DialInfo: dialInfo, 194 Version: mgoVer, 195 TagUser: tagUser, 196 TagUserPassword: tagUserPassword, 197 RunCommandFn: runCommand, 198 StartMongo: mongo.StartService, 199 StopMongo: mongo.StopService, 200 NewMongoSession: NewMongoSession, 201 GetDB: GetDB, 202 } 203 204 // Restore mongodb from backup 205 restorer, err := NewDBRestorer(rArgs) 206 if err != nil { 207 return nil, errors.Annotate(err, "error preparing for restore") 208 } 209 if err := restorer.Restore(workspace.DBDumpDir, oldDialInfo); err != nil { 210 return nil, errors.Annotate(err, "error restoring state from backup") 211 } 212 213 // Re-start replicaset with the new value for server address 214 logger.Infof("restarting replicaset") 215 memberHostPort := net.JoinHostPort(args.PrivateAddress, strconv.Itoa(ssi.StatePort)) 216 err = resetReplicaSet(dialInfo, memberHostPort) 217 if err != nil { 218 return nil, errors.Annotate(err, "cannot reset replicaSet") 219 } 220 221 err = updateMongoEntries(args.NewInstId, args.NewInstTag.Id(), backupMachine.Id(), dialInfo) 222 if err != nil { 223 return nil, errors.Annotate(err, "cannot update mongo entries") 224 } 225 226 // From here we work with the restored controller 227 mgoInfo, ok := agentConfig.MongoInfo() 228 if !ok { 229 return nil, errors.Errorf("cannot retrieve info to connect to mongo") 230 } 231 232 st, err := newStateConnection(agentConfig.Controller(), agentConfig.Model(), mgoInfo) 233 if err != nil { 234 return nil, errors.Trace(err) 235 } 236 defer st.Close() 237 238 machine, err := st.Machine(backupMachine.Id()) 239 if err != nil { 240 return nil, errors.Trace(err) 241 } 242 243 logger.Infof("updating local machine addresses") 244 err = updateMachineAddresses(machine, args.PrivateAddress, args.PublicAddress) 245 if err != nil { 246 return nil, errors.Annotate(err, "cannot update api server machine addresses") 247 } 248 // Update the APIHostPorts as well. Under normal circumstances the API 249 // Host Ports are only set during bootstrap and by the peergrouper worker. 250 // Unfortunately right now, the peer grouper is busy restarting and isn't 251 // guaranteed to set the host ports before the remote machines we are 252 // about to tell about us. If it doesn't, the remote machine gets its 253 // agent.conf file updated with this new machine's IP address, it then 254 // starts, and the "api-address-updater" worker asks for the api host 255 // ports, and gets told the old IP address of the machine that was backed 256 // up. It then writes this incorrect file to its agent.conf file, which 257 // causes it to attempt to reconnect to the api server. Unfortunately it 258 // now has the wrong address and can never get the correct one. 259 // So, we set it explicitly here. 260 if err := st.SetAPIHostPorts([][]network.HostPort{APIHostPorts}); err != nil { 261 return nil, errors.Annotate(err, "cannot update api server host ports") 262 } 263 264 // update all agents known to the new controller. 265 // TODO(perrito666): We should never stop process because of this. 266 // updateAllMachines will not return errors for individual 267 // agent update failures 268 models, err := st.AllModels() 269 if err != nil { 270 return nil, errors.Trace(err) 271 } 272 machines := []machineModel{} 273 for _, model := range models { 274 machinesForModel, err := st.AllMachinesFor(model.UUID()) 275 if err != nil { 276 return nil, errors.Trace(err) 277 } 278 for _, machine := range machinesForModel { 279 machines = append(machines, machineModel{machine: machine, model: model}) 280 } 281 } 282 logger.Infof("updating other machine addresses") 283 if err := updateAllMachines(args.PrivateAddress, args.PublicAddress, machines); err != nil { 284 return nil, errors.Annotate(err, "cannot update agents") 285 } 286 287 // Mark restoreInfo as Finished so upon restart of the apiserver 288 // the client can reconnect and determine if we where succesful. 289 info := st.RestoreInfo() 290 // In mongo 3.2, even though the backup is made with --oplog, there 291 // are stale transactions in this collection. 292 if err := info.PurgeTxn(); err != nil { 293 return nil, errors.Annotate(err, "cannot purge stale transactions") 294 } 295 if err = info.SetStatus(state.RestoreFinished); err != nil { 296 return nil, errors.Annotate(err, "failed to set status to finished") 297 } 298 299 return backupMachine, nil 300 }