github.com/axw/juju@v0.0.0-20161005053422-4bd6544d08d4/state/cleanup.go (about) 1 // Copyright 2014 Canonical Ltd. 2 // Licensed under the AGPLv3, see LICENCE file for details. 3 4 package state 5 6 import ( 7 "fmt" 8 9 "github.com/juju/errors" 10 "gopkg.in/juju/charm.v6-unstable" 11 "gopkg.in/juju/names.v2" 12 "gopkg.in/mgo.v2" 13 "gopkg.in/mgo.v2/bson" 14 "gopkg.in/mgo.v2/txn" 15 ) 16 17 type cleanupKind string 18 19 const ( 20 // SCHEMACHANGE: the names are expressive, the values not so much. 21 cleanupRelationSettings cleanupKind = "settings" 22 cleanupUnitsForDyingService cleanupKind = "units" 23 cleanupCharm cleanupKind = "charm" 24 cleanupDyingUnit cleanupKind = "dyingUnit" 25 cleanupRemovedUnit cleanupKind = "removedUnit" 26 cleanupServicesForDyingModel cleanupKind = "applications" 27 cleanupDyingMachine cleanupKind = "dyingMachine" 28 cleanupForceDestroyedMachine cleanupKind = "machine" 29 cleanupAttachmentsForDyingStorage cleanupKind = "storageAttachments" 30 cleanupAttachmentsForDyingVolume cleanupKind = "volumeAttachments" 31 cleanupAttachmentsForDyingFilesystem cleanupKind = "filesystemAttachments" 32 cleanupModelsForDyingController cleanupKind = "models" 33 cleanupMachinesForDyingModel cleanupKind = "modelMachines" 34 ) 35 36 // cleanupDoc originally represented a set of documents that should be 37 // removed, but the Prefix field no longer means anything more than 38 // "what will be passed to the cleanup func". 39 type cleanupDoc struct { 40 DocID string `bson:"_id"` 41 Kind cleanupKind `bson:"kind"` 42 Prefix string `bson:"prefix"` 43 } 44 45 // newCleanupOp returns a txn.Op that creates a cleanup document with a unique 46 // id and the supplied kind and prefix. 47 func newCleanupOp(kind cleanupKind, prefix string) txn.Op { 48 doc := &cleanupDoc{ 49 DocID: fmt.Sprint(bson.NewObjectId()), 50 Kind: kind, 51 Prefix: prefix, 52 } 53 return txn.Op{ 54 C: cleanupsC, 55 Id: doc.DocID, 56 Insert: doc, 57 } 58 } 59 60 // NeedsCleanup returns true if documents previously marked for removal exist. 61 func (st *State) NeedsCleanup() (bool, error) { 62 cleanups, closer := st.getCollection(cleanupsC) 63 defer closer() 64 count, err := cleanups.Count() 65 if err != nil { 66 return false, err 67 } 68 return count > 0, nil 69 } 70 71 // Cleanup removes all documents that were previously marked for removal, if 72 // any such exist. It should be called periodically by at least one element 73 // of the system. 74 func (st *State) Cleanup() (err error) { 75 var doc cleanupDoc 76 cleanups, closer := st.getCollection(cleanupsC) 77 defer closer() 78 iter := cleanups.Find(nil).Iter() 79 defer closeIter(iter, &err, "reading cleanup document") 80 for iter.Next(&doc) { 81 var err error 82 logger.Debugf("running %q cleanup: %q", doc.Kind, doc.Prefix) 83 switch doc.Kind { 84 case cleanupRelationSettings: 85 err = st.cleanupRelationSettings(doc.Prefix) 86 case cleanupCharm: 87 err = st.cleanupCharm(doc.Prefix) 88 case cleanupUnitsForDyingService: 89 err = st.cleanupUnitsForDyingService(doc.Prefix) 90 case cleanupDyingUnit: 91 err = st.cleanupDyingUnit(doc.Prefix) 92 case cleanupRemovedUnit: 93 err = st.cleanupRemovedUnit(doc.Prefix) 94 case cleanupServicesForDyingModel: 95 err = st.cleanupServicesForDyingModel() 96 case cleanupDyingMachine: 97 err = st.cleanupDyingMachine(doc.Prefix) 98 case cleanupForceDestroyedMachine: 99 err = st.cleanupForceDestroyedMachine(doc.Prefix) 100 case cleanupAttachmentsForDyingStorage: 101 err = st.cleanupAttachmentsForDyingStorage(doc.Prefix) 102 case cleanupAttachmentsForDyingVolume: 103 err = st.cleanupAttachmentsForDyingVolume(doc.Prefix) 104 case cleanupAttachmentsForDyingFilesystem: 105 err = st.cleanupAttachmentsForDyingFilesystem(doc.Prefix) 106 case cleanupModelsForDyingController: 107 err = st.cleanupModelsForDyingController() 108 case cleanupMachinesForDyingModel: 109 err = st.cleanupMachinesForDyingModel() 110 default: 111 handler, ok := cleanupHandlers[doc.Kind] 112 if !ok { 113 err = errors.Errorf("unknown cleanup kind %q", doc.Kind) 114 } else { 115 persist := st.newPersistence() 116 err = handler(st, persist, doc.Prefix) 117 } 118 } 119 if err != nil { 120 logger.Errorf("cleanup failed for %v(%q): %v", doc.Kind, doc.Prefix, err) 121 continue 122 } 123 ops := []txn.Op{{ 124 C: cleanupsC, 125 Id: doc.DocID, 126 Remove: true, 127 }} 128 if err := st.runTransaction(ops); err != nil { 129 return errors.Annotate(err, "cannot remove empty cleanup document") 130 } 131 } 132 return nil 133 } 134 135 // CleanupHandler is a function that state may call during cleanup 136 // to perform cleanup actions for some cleanup type. 137 type CleanupHandler func(st *State, persist Persistence, prefix string) error 138 139 var cleanupHandlers = map[cleanupKind]CleanupHandler{} 140 141 // RegisterCleanupHandler identifies the handler to use a given 142 // cleanup kind. 143 func RegisterCleanupHandler(kindStr string, handler CleanupHandler) error { 144 kind := cleanupKind(kindStr) 145 if _, ok := cleanupHandlers[kind]; ok { 146 return errors.NewAlreadyExists(nil, fmt.Sprintf("cleanup handler for %q already registered", kindStr)) 147 } 148 cleanupHandlers[kind] = handler 149 return nil 150 } 151 152 func (st *State) cleanupRelationSettings(prefix string) error { 153 change := relationSettingsCleanupChange{Prefix: st.docID(prefix)} 154 if err := Apply(st.database, change); err != nil { 155 return errors.Trace(err) 156 } 157 return nil 158 } 159 160 // cleanupModelsForDyingController sets all models to dying, if 161 // they are not already Dying or Dead. It's expected to be used when a 162 // controller is destroyed. 163 func (st *State) cleanupModelsForDyingController() (err error) { 164 models, err := st.AllModels() 165 if err != nil { 166 return errors.Trace(err) 167 } 168 for _, model := range models { 169 if err := model.Destroy(); err != nil { 170 return errors.Trace(err) 171 } 172 } 173 return nil 174 } 175 176 // cleanupMachinesForDyingModel sets all non-manager machines to Dying, 177 // if they are not already Dying or Dead. It's expected to be used when 178 // a model is destroyed. 179 func (st *State) cleanupMachinesForDyingModel() (err error) { 180 // This won't miss machines, because a Dying model cannot have 181 // machines added to it. But we do have to remove the machines themselves 182 // via individual transactions, because they could be in any state at all. 183 machines, err := st.AllMachines() 184 if err != nil { 185 return errors.Trace(err) 186 } 187 for _, m := range machines { 188 if m.IsManager() { 189 continue 190 } 191 if _, isContainer := m.ParentId(); isContainer { 192 continue 193 } 194 manual, err := m.IsManual() 195 if err != nil { 196 return errors.Trace(err) 197 } 198 destroy := m.ForceDestroy 199 if manual { 200 // Manually added machines should never be force- 201 // destroyed automatically. That should be a user- 202 // driven decision, since it may leak applications 203 // and resources on the machine. If something is 204 // stuck, then the user can still force-destroy 205 // the manual machines. 206 destroy = m.Destroy 207 } 208 if err := destroy(); err != nil { 209 return errors.Trace(err) 210 } 211 } 212 return nil 213 } 214 215 // cleanupServicesForDyingModel sets all services to Dying, if they are 216 // not already Dying or Dead. It's expected to be used when a model is 217 // destroyed. 218 func (st *State) cleanupServicesForDyingModel() (err error) { 219 // This won't miss services, because a Dying model cannot have 220 // services added to it. But we do have to remove the services themselves 221 // via individual transactions, because they could be in any state at all. 222 applications, closer := st.getCollection(applicationsC) 223 defer closer() 224 application := Application{st: st} 225 sel := bson.D{{"life", Alive}} 226 iter := applications.Find(sel).Iter() 227 defer closeIter(iter, &err, "reading service document") 228 for iter.Next(&application.doc) { 229 if err := application.Destroy(); err != nil { 230 return err 231 } 232 } 233 return nil 234 } 235 236 // cleanupUnitsForDyingService sets all units with the given prefix to Dying, 237 // if they are not already Dying or Dead. It's expected to be used when a 238 // service is destroyed. 239 func (st *State) cleanupUnitsForDyingService(applicationname string) (err error) { 240 // This won't miss units, because a Dying service cannot have units added 241 // to it. But we do have to remove the units themselves via individual 242 // transactions, because they could be in any state at all. 243 units, closer := st.getCollection(unitsC) 244 defer closer() 245 246 unit := Unit{st: st} 247 sel := bson.D{{"application", applicationname}, {"life", Alive}} 248 iter := units.Find(sel).Iter() 249 defer closeIter(iter, &err, "reading unit document") 250 for iter.Next(&unit.doc) { 251 if err := unit.Destroy(); err != nil { 252 return err 253 } 254 } 255 return nil 256 } 257 258 // cleanupCharm is speculative: it can abort without error for many 259 // reasons, because it's triggered somewhat overenthusiastically for 260 // simplicity's sake. 261 func (st *State) cleanupCharm(charmURL string) error { 262 curl, err := charm.ParseURL(charmURL) 263 if err != nil { 264 return errors.Annotatef(err, "invalid charm URL %v", charmURL) 265 } 266 if curl.Schema != "local" { 267 // No cleanup necessary or possible. 268 return nil 269 } 270 271 ch, err := st.Charm(curl) 272 if errors.IsNotFound(err) { 273 // Charm already removed. 274 return nil 275 } else if err != nil { 276 return errors.Annotate(err, "reading charm") 277 } 278 279 err = ch.Destroy() 280 switch errors.Cause(err) { 281 case nil: 282 case errCharmInUse: 283 // No cleanup necessary at this time. 284 return nil 285 default: 286 return errors.Annotate(err, "destroying charm") 287 } 288 289 if err := ch.Remove(); err != nil { 290 return errors.Trace(err) 291 } 292 return nil 293 } 294 295 // cleanupDyingUnit marks resources owned by the unit as dying, to ensure 296 // they are cleaned up as well. 297 func (st *State) cleanupDyingUnit(name string) error { 298 unit, err := st.Unit(name) 299 if errors.IsNotFound(err) { 300 return nil 301 } else if err != nil { 302 return err 303 } 304 // Mark the unit as departing from its joined relations, allowing 305 // related units to start converging to a state in which that unit 306 // is gone as quickly as possible. 307 relations, err := unit.RelationsJoined() 308 if err != nil { 309 return err 310 } 311 for _, relation := range relations { 312 relationUnit, err := relation.Unit(unit) 313 if errors.IsNotFound(err) { 314 continue 315 } else if err != nil { 316 return err 317 } 318 if err := relationUnit.PrepareLeaveScope(); err != nil { 319 return err 320 } 321 } 322 // Mark storage attachments as dying, so that they are detached 323 // and removed from state, allowing the unit to terminate. 324 return st.cleanupUnitStorageAttachments(unit.UnitTag(), false) 325 } 326 327 func (st *State) cleanupUnitStorageAttachments(unitTag names.UnitTag, remove bool) error { 328 storageAttachments, err := st.UnitStorageAttachments(unitTag) 329 if err != nil { 330 return err 331 } 332 for _, storageAttachment := range storageAttachments { 333 storageTag := storageAttachment.StorageInstance() 334 err := st.DestroyStorageAttachment(storageTag, unitTag) 335 if errors.IsNotFound(err) { 336 continue 337 } else if err != nil { 338 return err 339 } 340 if !remove { 341 continue 342 } 343 err = st.RemoveStorageAttachment(storageTag, unitTag) 344 if errors.IsNotFound(err) { 345 continue 346 } else if err != nil { 347 return err 348 } 349 } 350 return nil 351 } 352 353 // cleanupRemovedUnit takes care of all the final cleanup required when 354 // a unit is removed. 355 func (st *State) cleanupRemovedUnit(unitId string) error { 356 actions, err := st.matchingActionsByReceiverId(unitId) 357 if err != nil { 358 return errors.Trace(err) 359 } 360 cancelled := ActionResults{ 361 Status: ActionCancelled, 362 Message: "unit removed", 363 } 364 for _, action := range actions { 365 if _, err = action.Finish(cancelled); err != nil { 366 return errors.Trace(err) 367 } 368 } 369 370 change := payloadCleanupChange{ 371 Unit: unitId, 372 } 373 if err := Apply(st.database, change); err != nil { 374 return errors.Trace(err) 375 } 376 return nil 377 } 378 379 // cleanupDyingMachine marks resources owned by the machine as dying, to ensure 380 // they are cleaned up as well. 381 func (st *State) cleanupDyingMachine(machineId string) error { 382 machine, err := st.Machine(machineId) 383 if errors.IsNotFound(err) { 384 return nil 385 } else if err != nil { 386 return err 387 } 388 return cleanupDyingMachineResources(machine) 389 } 390 391 // cleanupForceDestroyedMachine systematically destroys and removes all entities 392 // that depend upon the supplied machine, and removes the machine from state. It's 393 // expected to be used in response to destroy-machine --force. 394 func (st *State) cleanupForceDestroyedMachine(machineId string) error { 395 machine, err := st.Machine(machineId) 396 if errors.IsNotFound(err) { 397 return nil 398 } else if err != nil { 399 return err 400 } 401 // In an ideal world, we'd call machine.Destroy() here, and thus prevent 402 // new dependencies being added while we clean up the ones we know about. 403 // But machine destruction is unsophisticated, and doesn't allow for 404 // destruction while dependencies exist; so we just have to deal with that 405 // possibility below. 406 if err := st.cleanupContainers(machine); err != nil { 407 return err 408 } 409 for _, unitName := range machine.doc.Principals { 410 if err := st.obliterateUnit(unitName); err != nil { 411 return err 412 } 413 } 414 if err := cleanupDyingMachineResources(machine); err != nil { 415 return err 416 } 417 // We need to refresh the machine at this point, because the local copy 418 // of the document will not reflect changes caused by the unit cleanups 419 // above, and may thus fail immediately. 420 if err := machine.Refresh(); errors.IsNotFound(err) { 421 return nil 422 } else if err != nil { 423 return err 424 } 425 // TODO(fwereade): 2013-11-11 bug 1250104 426 // If this fails, it's *probably* due to a race in which new dependencies 427 // were added while we cleaned up the old ones. If the cleanup doesn't run 428 // again -- which it *probably* will anyway -- the issue can be resolved by 429 // force-destroying the machine again; that's better than adding layer 430 // upon layer of complication here. 431 if err := machine.EnsureDead(); err != nil { 432 return err 433 } 434 removePortsOps, err := machine.removePortsOps() 435 if err != nil { 436 return err 437 } 438 return st.runTransaction(removePortsOps) 439 440 // Note that we do *not* remove the machine entirely: we leave it for the 441 // provisioner to clean up, so that we don't end up with an unreferenced 442 // instance that would otherwise be ignored when in provisioner-safe-mode. 443 } 444 445 // cleanupContainers recursively calls cleanupForceDestroyedMachine on the supplied 446 // machine's containers, and removes them from state entirely. 447 func (st *State) cleanupContainers(machine *Machine) error { 448 containerIds, err := machine.Containers() 449 if errors.IsNotFound(err) { 450 return nil 451 } else if err != nil { 452 return err 453 } 454 for _, containerId := range containerIds { 455 if err := st.cleanupForceDestroyedMachine(containerId); err != nil { 456 return err 457 } 458 container, err := st.Machine(containerId) 459 if errors.IsNotFound(err) { 460 return nil 461 } else if err != nil { 462 return err 463 } 464 if err := container.Remove(); err != nil { 465 return err 466 } 467 } 468 return nil 469 } 470 471 func cleanupDyingMachineResources(m *Machine) error { 472 volumeAttachments, err := m.st.MachineVolumeAttachments(m.MachineTag()) 473 if err != nil { 474 return errors.Annotate(err, "getting machine volume attachments") 475 } 476 for _, va := range volumeAttachments { 477 if err := m.st.DetachVolume(va.Machine(), va.Volume()); err != nil { 478 if IsContainsFilesystem(err) { 479 // The volume will be destroyed when the 480 // contained filesystem is removed, whose 481 // destruction is initiated below. 482 continue 483 } 484 return errors.Trace(err) 485 } 486 } 487 filesystemAttachments, err := m.st.MachineFilesystemAttachments(m.MachineTag()) 488 if err != nil { 489 return errors.Annotate(err, "getting machine filesystem attachments") 490 } 491 for _, fsa := range filesystemAttachments { 492 if err := m.st.DetachFilesystem(fsa.Machine(), fsa.Filesystem()); err != nil { 493 return errors.Trace(err) 494 } 495 } 496 return nil 497 } 498 499 // obliterateUnit removes a unit from state completely. It is not safe or 500 // sane to obliterate any unit in isolation; its only reasonable use is in 501 // the context of machine obliteration, in which we can be sure that unclean 502 // shutdown of units is not going to leave a machine in a difficult state. 503 func (st *State) obliterateUnit(unitName string) error { 504 unit, err := st.Unit(unitName) 505 if errors.IsNotFound(err) { 506 return nil 507 } else if err != nil { 508 return err 509 } 510 // Unlike the machine, we *can* always destroy the unit, and (at least) 511 // prevent further dependencies being added. If we're really lucky, the 512 // unit will be removed immediately. 513 if err := unit.Destroy(); err != nil { 514 return errors.Annotatef(err, "cannot destroy unit %q", unitName) 515 } 516 if err := unit.Refresh(); errors.IsNotFound(err) { 517 return nil 518 } else if err != nil { 519 return err 520 } 521 // Destroy and remove all storage attachments for the unit. 522 if err := st.cleanupUnitStorageAttachments(unit.UnitTag(), true); err != nil { 523 return errors.Annotatef(err, "cannot destroy storage for unit %q", unitName) 524 } 525 for _, subName := range unit.SubordinateNames() { 526 if err := st.obliterateUnit(subName); err != nil { 527 return err 528 } 529 } 530 if err := unit.EnsureDead(); err != nil { 531 return err 532 } 533 return unit.Remove() 534 } 535 536 // cleanupAttachmentsForDyingStorage sets all storage attachments related 537 // to the specified storage instance to Dying, if they are not already Dying 538 // or Dead. It's expected to be used when a storage instance is destroyed. 539 func (st *State) cleanupAttachmentsForDyingStorage(storageId string) (err error) { 540 storageTag := names.NewStorageTag(storageId) 541 542 // This won't miss attachments, because a Dying storage instance cannot 543 // have attachments added to it. But we do have to remove the attachments 544 // themselves via individual transactions, because they could be in 545 // any state at all. 546 coll, closer := st.getCollection(storageAttachmentsC) 547 defer closer() 548 549 var doc storageAttachmentDoc 550 fields := bson.D{{"unitid", 1}} 551 iter := coll.Find(bson.D{{"storageid", storageId}}).Select(fields).Iter() 552 defer closeIter(iter, &err, "reading storage attachment document") 553 for iter.Next(&doc) { 554 unitTag := names.NewUnitTag(doc.Unit) 555 if err := st.DestroyStorageAttachment(storageTag, unitTag); err != nil { 556 return errors.Annotate(err, "destroying storage attachment") 557 } 558 } 559 return nil 560 } 561 562 // cleanupAttachmentsForDyingVolume sets all volume attachments related 563 // to the specified volume to Dying, if they are not already Dying or 564 // Dead. It's expected to be used when a volume is destroyed. 565 func (st *State) cleanupAttachmentsForDyingVolume(volumeId string) (err error) { 566 volumeTag := names.NewVolumeTag(volumeId) 567 568 // This won't miss attachments, because a Dying volume cannot have 569 // attachments added to it. But we do have to remove the attachments 570 // themselves via individual transactions, because they could be in 571 // any state at all. 572 coll, closer := st.getCollection(volumeAttachmentsC) 573 defer closer() 574 575 var doc volumeAttachmentDoc 576 fields := bson.D{{"machineid", 1}} 577 iter := coll.Find(bson.D{{"volumeid", volumeId}}).Select(fields).Iter() 578 defer closeIter(iter, &err, "reading volume attachment document") 579 for iter.Next(&doc) { 580 machineTag := names.NewMachineTag(doc.Machine) 581 if err := st.DetachVolume(machineTag, volumeTag); err != nil { 582 return errors.Annotate(err, "destroying volume attachment") 583 } 584 } 585 return nil 586 } 587 588 // cleanupAttachmentsForDyingFilesystem sets all filesystem attachments related 589 // to the specified filesystem to Dying, if they are not already Dying or 590 // Dead. It's expected to be used when a filesystem is destroyed. 591 func (st *State) cleanupAttachmentsForDyingFilesystem(filesystemId string) (err error) { 592 filesystemTag := names.NewFilesystemTag(filesystemId) 593 594 // This won't miss attachments, because a Dying filesystem cannot have 595 // attachments added to it. But we do have to remove the attachments 596 // themselves via individual transactions, because they could be in 597 // any state at all. 598 coll, closer := st.getCollection(filesystemAttachmentsC) 599 defer closer() 600 601 var doc filesystemAttachmentDoc 602 fields := bson.D{{"machineid", 1}} 603 iter := coll.Find(bson.D{{"filesystemid", filesystemId}}).Select(fields).Iter() 604 defer closeIter(iter, &err, "reading filesystem attachment document") 605 for iter.Next(&doc) { 606 machineTag := names.NewMachineTag(doc.Machine) 607 if err := st.DetachFilesystem(machineTag, filesystemTag); err != nil { 608 return errors.Annotate(err, "destroying filesystem attachment") 609 } 610 } 611 return nil 612 } 613 614 func closeIter(iter *mgo.Iter, errOut *error, message string) { 615 err := iter.Close() 616 if err == nil { 617 return 618 } 619 err = errors.Annotate(err, message) 620 if *errOut == nil { 621 *errOut = err 622 return 623 } 624 logger.Errorf("%v", err) 625 }