github.com/niedbalski/juju@v0.0.0-20190215020005-8ff100488e47/state/backups/backups_linux.go (about) 1 // Copyright 2015 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 // +build linux 5 6 package backups 7 8 import ( 9 "net" 10 "strconv" 11 12 "github.com/juju/errors" 13 "github.com/juju/utils/shell" 14 "gopkg.in/juju/names.v2" 15 16 "github.com/juju/juju/agent" 17 "github.com/juju/juju/juju/paths" 18 "github.com/juju/juju/mongo" 19 "github.com/juju/juju/network" 20 "github.com/juju/juju/service" 21 "github.com/juju/juju/state" 22 "github.com/juju/juju/version" 23 ) 24 25 func ensureMongoService(agentConfig agent.Config) error { 26 var oplogSize int 27 if oplogSizeString := agentConfig.Value(agent.MongoOplogSize); oplogSizeString != "" { 28 var err error 29 if oplogSize, err = strconv.Atoi(oplogSizeString); err != nil { 30 return errors.Annotatef(err, "invalid oplog size: %q", oplogSizeString) 31 } 32 } 33 34 var numaCtlPolicy bool 35 if numaCtlString := agentConfig.Value(agent.NUMACtlPreference); numaCtlString != "" { 36 var err error 37 if numaCtlPolicy, err = strconv.ParseBool(numaCtlString); err != nil { 38 return errors.Annotatef(err, "invalid numactl preference: %q", numaCtlString) 39 } 40 } 41 42 si, ok := agentConfig.StateServingInfo() 43 if !ok { 44 return errors.Errorf("agent config has no state serving info") 45 } 46 47 if err := mongo.EnsureServiceInstalled(agentConfig.DataDir(), 48 si.StatePort, 49 oplogSize, 50 numaCtlPolicy, 51 agentConfig.MongoVersion(), 52 true, 53 mongo.MemoryProfileDefault, 54 ); err != nil { 55 return errors.Annotate(err, "cannot ensure that mongo service start/stop scripts are in place") 56 } 57 // Installing a service will not automatically restart it. 58 if err := mongo.StartService(); err != nil { 59 return errors.Annotate(err, "failed to start mongo") 60 } 61 return nil 62 } 63 64 // Restore handles either returning or creating a controller to a backed up status: 65 // * extracts the content of the given backup file and: 66 // * runs mongorestore with the backed up mongo dump 67 // * updates and writes configuration files 68 // * updates existing db entries to make sure they hold no references to 69 // old instances 70 // * updates config in all agents. 71 func (b *backups) Restore(backupId string, args RestoreArgs) (names.Tag, error) { 72 meta, backupReader, err := b.Get(backupId) 73 if err != nil { 74 return nil, errors.Annotatef(err, "could not fetch backup %q", backupId) 75 } 76 77 defer backupReader.Close() 78 79 workspace, err := NewArchiveWorkspaceReader(backupReader) 80 if err != nil { 81 return nil, errors.Annotate(err, "cannot unpack backup file") 82 } 83 defer workspace.Close() 84 85 // This might actually work, but we don't have a guarantee so we don't allow it. 86 if meta.Origin.Series != args.NewInstSeries { 87 return nil, errors.Errorf("cannot restore a backup made in a machine with series %q into a machine with series %q, %#v", meta.Origin.Series, args.NewInstSeries, meta) 88 } 89 90 // TODO(perrito666) Create a compatibility table of sorts. 91 vers := meta.Origin.Version 92 if vers.Major != 2 { 93 return nil, errors.Errorf("Juju version %v cannot restore backups made using Juju version %v", version.Current.Minor, vers) 94 } 95 backupMachine := names.NewMachineTag(meta.Origin.Machine) 96 97 // The path for the config file might change if the tag changed 98 // and also the rest of the path, so we assume as little as possible. 99 oldDatadir, err := paths.DataDir(args.NewInstSeries) 100 if err != nil { 101 return nil, errors.Annotate(err, "cannot determine DataDir for the restored machine") 102 } 103 104 var oldAgentConfig agent.ConfigSetterWriter 105 oldAgentConfigFile := agent.ConfigPath(oldDatadir, args.NewInstTag) 106 if oldAgentConfig, err = agent.ReadConfig(oldAgentConfigFile); err != nil { 107 return nil, errors.Annotate(err, "cannot load old agent config from disk") 108 } 109 110 logger.Infof("stopping juju-db") 111 if err = mongo.StopService(); err != nil { 112 return nil, errors.Annotate(err, "failed to stop mongo") 113 } 114 115 // delete all the files to be replaced 116 if err := PrepareMachineForRestore(oldAgentConfig.MongoVersion()); err != nil { 117 return nil, errors.Annotate(err, "cannot delete existing files") 118 } 119 logger.Infof("deleted old files to place new") 120 121 if err := workspace.UnpackFilesBundle(filesystemRoot()); err != nil { 122 return nil, errors.Annotate(err, "cannot obtain system files from backup") 123 } 124 logger.Infof("placed new restore files") 125 126 var agentConfig agent.ConfigSetterWriter 127 // The path for the config file might change if the tag changed 128 // and also the rest of the path, so we assume as little as possible. 129 datadir, err := paths.DataDir(args.NewInstSeries) 130 if err != nil { 131 return nil, errors.Annotate(err, "cannot determine DataDir for the restored machine") 132 } 133 agentConfigFile := agent.ConfigPath(datadir, backupMachine) 134 if agentConfig, err = agent.ReadConfig(agentConfigFile); err != nil { 135 return nil, errors.Annotate(err, "cannot load agent config from disk") 136 } 137 ssi, ok := agentConfig.StateServingInfo() 138 if !ok { 139 return nil, errors.Errorf("cannot determine state serving info") 140 } 141 APIHostPorts := network.NewHostPorts(ssi.APIPort, args.PrivateAddress, args.PublicAddress) 142 agentConfig.SetAPIHostPorts([][]network.HostPort{APIHostPorts}) 143 if err := agentConfig.Write(); err != nil { 144 return nil, errors.Annotate(err, "cannot write new agent configuration") 145 } 146 logger.Infof("wrote new agent config for restore") 147 148 if backupMachine.Id() != "0" { 149 logger.Infof("extra work needed backup belongs to %q machine", backupMachine.String()) 150 serviceName := "jujud-" + agentConfig.Tag().String() 151 aInfo := service.NewMachineAgentInfo( 152 agentConfig.Tag().Id(), 153 dataDir, 154 paths.MustSucceed(paths.LogDir(args.NewInstSeries)), 155 ) 156 157 // TODO(perrito666) renderer should have a RendererForSeries, for the moment 158 // restore only works on linuxes. 159 renderer, _ := shell.NewRenderer("bash") 160 serviceAgentConf := service.AgentConf(aInfo, renderer) 161 svc, err := service.NewService(serviceName, serviceAgentConf, args.NewInstSeries) 162 if err != nil { 163 return nil, errors.Annotate(err, "cannot generate service for the restored agent.") 164 } 165 if err := svc.Install(); err != nil { 166 return nil, errors.Annotate(err, "cannot install service for the restored agent.") 167 } 168 logger.Infof("new machine service") 169 } 170 171 logger.Infof("mongo service will be reinstalled to ensure its presence") 172 if err := ensureMongoService(agentConfig); err != nil { 173 return nil, errors.Annotate(err, "failed to reinstall service for juju-db") 174 } 175 176 dialInfo, err := newDialInfo(args.PrivateAddress, agentConfig) 177 if err != nil { 178 return nil, errors.Annotate(err, "cannot produce dial information") 179 } 180 181 // For the unresponsive controller case the oldAgentConfig and agentConfig 182 // have different certificates. MongoDB has been already started with a 183 // new certificate. Therefore all clients that would like to communicate 184 // with mongo should use the new certificate otherwise the 185 // "TLS handshake error" occurs. To avoid this error the old certificate 186 // should be replaced by the new one. 187 oldAgentConfig.SetCACert(agentConfig.CACert()) 188 oldDialInfo, err := newDialInfo(args.PrivateAddress, oldAgentConfig) 189 if err != nil { 190 return nil, errors.Annotate(err, "cannot produce dial information for existing mongo") 191 } 192 193 logger.Infof("new mongo will be restored") 194 mgoVer := agentConfig.MongoVersion() 195 196 tagUser, tagUserPassword, err := tagUserCredentials(agentConfig) 197 if err != nil { 198 return nil, errors.Trace(err) 199 } 200 rArgs := RestorerArgs{ 201 DialInfo: dialInfo, 202 Version: mgoVer, 203 TagUser: tagUser, 204 TagUserPassword: tagUserPassword, 205 RunCommandFn: runCommand, 206 StartMongo: mongo.StartService, 207 StopMongo: mongo.StopService, 208 NewMongoSession: NewMongoSession, 209 GetDB: GetDB, 210 } 211 212 // Restore mongodb from backup 213 restorer, err := NewDBRestorer(rArgs) 214 if err != nil { 215 return nil, errors.Annotate(err, "error preparing for restore") 216 } 217 if err := restorer.Restore(workspace.DBDumpDir, oldDialInfo); err != nil { 218 return nil, errors.Annotate(err, "error restoring state from backup") 219 } 220 221 // Re-start replicaset with the new value for server address 222 logger.Infof("restarting replicaset") 223 memberHostPort := net.JoinHostPort(args.PrivateAddress, strconv.Itoa(ssi.StatePort)) 224 err = resetReplicaSet(dialInfo, memberHostPort) 225 if err != nil { 226 return nil, errors.Annotate(err, "cannot reset replicaSet") 227 } 228 229 err = updateMongoEntries(args.NewInstId, args.NewInstTag.Id(), backupMachine.Id(), dialInfo) 230 if err != nil { 231 return nil, errors.Annotate(err, "cannot update mongo entries") 232 } 233 234 // From here we work with the restored controller 235 mgoInfo, ok := agentConfig.MongoInfo() 236 if !ok { 237 return nil, errors.Errorf("cannot retrieve info to connect to mongo") 238 } 239 240 pool, err := connectToDB(agentConfig.Controller(), agentConfig.Model(), mgoInfo) 241 if err != nil { 242 return nil, errors.Trace(err) 243 } 244 defer pool.Close() 245 st := pool.SystemState() 246 247 machine, err := st.Machine(backupMachine.Id()) 248 if err != nil { 249 return nil, errors.Trace(err) 250 } 251 252 logger.Infof("updating local machine addresses") 253 err = updateMachineAddresses(machine, args.PrivateAddress, args.PublicAddress) 254 if err != nil { 255 return nil, errors.Annotate(err, "cannot update api server machine addresses") 256 } 257 // Update the APIHostPorts as well. Under normal circumstances the API 258 // Host Ports are only set during bootstrap and by the peergrouper worker. 259 // Unfortunately right now, the peer grouper is busy restarting and isn't 260 // guaranteed to set the host ports before the remote machines we are 261 // about to tell about us. If it doesn't, the remote machine gets its 262 // agent.conf file updated with this new machine's IP address, it then 263 // starts, and the "api-address-updater" worker asks for the api host 264 // ports, and gets told the old IP address of the machine that was backed 265 // up. It then writes this incorrect file to its agent.conf file, which 266 // causes it to attempt to reconnect to the api server. Unfortunately it 267 // now has the wrong address and can never get the correct one. 268 // So, we set it explicitly here. 269 if err := st.SetAPIHostPorts([][]network.HostPort{APIHostPorts}); err != nil { 270 return nil, errors.Annotate(err, "cannot update api server host ports") 271 } 272 273 // update all agents known to the new controller. 274 // TODO(perrito666): We should never stop process because of this. 275 // updateAllMachines will not return errors for individual 276 // agent update failures 277 278 modelUUIDs, err := st.AllModelUUIDs() 279 if err != nil { 280 return nil, errors.Trace(err) 281 } 282 var machines []machineModel 283 for _, modelUUID := range modelUUIDs { 284 st, err := pool.Get(modelUUID) 285 if err != nil { 286 return nil, errors.Trace(err) 287 } 288 defer func() { 289 st.Release() 290 }() 291 292 model, err := st.Model() 293 if err != nil { 294 return nil, errors.Trace(err) 295 } 296 297 machinesForModel, err := st.AllMachines() 298 if err != nil { 299 return nil, errors.Trace(err) 300 } 301 for _, machine := range machinesForModel { 302 machines = append(machines, machineModel{machine: machine, model: model}) 303 } 304 } 305 logger.Infof("updating other machine addresses") 306 if err := updateAllMachines(args.PrivateAddress, args.PublicAddress, machines); err != nil { 307 return nil, errors.Annotate(err, "cannot update agents") 308 } 309 310 // Mark restoreInfo as Finished so upon restart of the apiserver 311 // the client can reconnect and determine if we where successful. 312 info := st.RestoreInfo() 313 // In mongo 3.2, even though the backup is made with --oplog, there 314 // are stale transactions in this collection. 315 if err := info.PurgeTxn(); err != nil { 316 return nil, errors.Annotate(err, "cannot purge stale transactions") 317 } 318 if err = info.SetStatus(state.RestoreFinished); err != nil { 319 return nil, errors.Annotate(err, "failed to set status to finished") 320 } 321 322 return backupMachine, nil 323 }