github.com/cloudbase/juju-core@v0.0.0-20140504232958-a7271ac7912f/state/cleanup.go (about) 1 package state 2 3 import ( 4 "fmt" 5 6 "labix.org/v2/mgo/bson" 7 "labix.org/v2/mgo/txn" 8 9 "launchpad.net/juju-core/errors" 10 ) 11 12 // cleanupDoc represents a potentially large set of documents that should be 13 // removed. 14 type cleanupDoc struct { 15 Id bson.ObjectId `bson:"_id"` 16 Kind string 17 Prefix string 18 } 19 20 // newCleanupOp returns a txn.Op that creates a cleanup document with a unique 21 // id and the supplied kind and prefix. 22 func (st *State) newCleanupOp(kind, prefix string) txn.Op { 23 doc := &cleanupDoc{ 24 Id: bson.NewObjectId(), 25 Kind: kind, 26 Prefix: prefix, 27 } 28 return txn.Op{ 29 C: st.cleanups.Name, 30 Id: doc.Id, 31 Insert: doc, 32 } 33 } 34 35 // NeedsCleanup returns true if documents previously marked for removal exist. 36 func (st *State) NeedsCleanup() (bool, error) { 37 count, err := st.cleanups.Count() 38 if err != nil { 39 return false, err 40 } 41 return count > 0, nil 42 } 43 44 // Cleanup removes all documents that were previously marked for removal, if 45 // any such exist. It should be called periodically by at least one element 46 // of the system. 47 func (st *State) Cleanup() error { 48 doc := cleanupDoc{} 49 iter := st.cleanups.Find(nil).Iter() 50 for iter.Next(&doc) { 51 var err error 52 logger.Debugf("running %q cleanup: %q", doc.Kind, doc.Prefix) 53 switch doc.Kind { 54 case "settings": 55 err = st.cleanupSettings(doc.Prefix) 56 case "units": 57 err = st.cleanupUnits(doc.Prefix) 58 case "services": 59 err = st.cleanupServices() 60 case "machine": 61 err = st.cleanupMachine(doc.Prefix) 62 default: 63 err = fmt.Errorf("unknown cleanup kind %q", doc.Kind) 64 } 65 if err != nil { 66 logger.Warningf("cleanup failed: %v", err) 67 continue 68 } 69 ops := []txn.Op{{ 70 C: st.cleanups.Name, 71 Id: doc.Id, 72 Remove: true, 73 }} 74 if err := st.runTransaction(ops); err != nil { 75 logger.Warningf("cannot remove empty cleanup document: %v", err) 76 } 77 } 78 if err := iter.Err(); err != nil { 79 return fmt.Errorf("cannot read cleanup document: %v", err) 80 } 81 return nil 82 } 83 84 func (st *State) cleanupSettings(prefix string) error { 85 // Documents marked for cleanup are not otherwise referenced in the 86 // system, and will not be under watch, and are therefore safe to 87 // delete directly. 88 sel := D{{"_id", D{{"$regex", "^" + prefix}}}} 89 if count, err := st.settings.Find(sel).Count(); err != nil { 90 return fmt.Errorf("cannot detect cleanup targets: %v", err) 91 } else if count != 0 { 92 if _, err := st.settings.RemoveAll(sel); err != nil { 93 return fmt.Errorf("cannot remove documents marked for cleanup: %v", err) 94 } 95 } 96 return nil 97 } 98 99 // cleanupServices sets all services to Dying, if they are not already Dying 100 // or Dead. It's expected to be used when an environment is destroyed. 101 func (st *State) cleanupServices() error { 102 // This won't miss services, because a Dying environment cannot have 103 // services added to it. But we do have to remove the services themselves 104 // via individual transactions, because they could be in any state at all. 105 service := &Service{st: st} 106 sel := D{{"life", Alive}} 107 iter := st.services.Find(sel).Iter() 108 for iter.Next(&service.doc) { 109 if err := service.Destroy(); err != nil { 110 return err 111 } 112 } 113 if err := iter.Err(); err != nil { 114 return fmt.Errorf("cannot read service document: %v", err) 115 } 116 return nil 117 } 118 119 // cleanupUnits sets all units with the given prefix to Dying, if they are not 120 // already Dying or Dead. It's expected to be used when a service is destroyed. 121 func (st *State) cleanupUnits(prefix string) error { 122 // This won't miss units, because a Dying service cannot have units added 123 // to it. But we do have to remove the units themselves via individual 124 // transactions, because they could be in any state at all. 125 unit := &Unit{st: st} 126 sel := D{{"_id", D{{"$regex", "^" + prefix}}}, {"life", Alive}} 127 iter := st.units.Find(sel).Iter() 128 for iter.Next(&unit.doc) { 129 if err := unit.Destroy(); err != nil { 130 return err 131 } 132 } 133 if err := iter.Err(); err != nil { 134 return fmt.Errorf("cannot read unit document: %v", err) 135 } 136 return nil 137 } 138 139 // cleanupMachine systematically destroys and removes all entities that 140 // depend upon the supplied machine, and removes the machine from state. It's 141 // expected to be used in response to destroy-machine --force. 142 func (st *State) cleanupMachine(machineId string) error { 143 machine, err := st.Machine(machineId) 144 if errors.IsNotFoundError(err) { 145 return nil 146 } else if err != nil { 147 return err 148 } 149 // In an ideal world, we'd call machine.Destroy() here, and thus prevent 150 // new dependencies being added while we clean up the ones we know about. 151 // But machine destruction is unsophisticated, and doesn't allow for 152 // destruction while dependencies exist; so we just have to deal with that 153 // possibility below. 154 if err := st.cleanupContainers(machine); err != nil { 155 return err 156 } 157 for _, unitName := range machine.doc.Principals { 158 if err := st.obliterateUnit(unitName); err != nil { 159 return err 160 } 161 } 162 // We need to refresh the machine at this point, because the local copy 163 // of the document will not reflect changes caused by the unit cleanups 164 // above, and may thus fail immediately. 165 if err := machine.Refresh(); errors.IsNotFoundError(err) { 166 return nil 167 } else if err != nil { 168 return err 169 } 170 // TODO(fwereade): 2013-11-11 bug 1250104 171 // If this fails, it's *probably* due to a race in which new dependencies 172 // were added while we cleaned up the old ones. If the cleanup doesn't run 173 // again -- which it *probably* will anyway -- the issue can be resolved by 174 // force-destroying the machine again; that's better than adding layer 175 // upon layer of complication here. 176 return machine.EnsureDead() 177 178 // Note that we do *not* remove the machine entirely: we leave it for the 179 // provisioner to clean up, so that we don't end up with an unreferenced 180 // instance that would otherwise be ignored when in provisioner-safe-mode. 181 } 182 183 // cleanupContainers recursively calls cleanupMachine on the supplied 184 // machine's containers, and removes them from state entirely. 185 func (st *State) cleanupContainers(machine *Machine) error { 186 containerIds, err := machine.Containers() 187 if errors.IsNotFoundError(err) { 188 return nil 189 } else if err != nil { 190 return err 191 } 192 for _, containerId := range containerIds { 193 if err := st.cleanupMachine(containerId); err != nil { 194 return err 195 } 196 container, err := st.Machine(containerId) 197 if errors.IsNotFoundError(err) { 198 return nil 199 } else if err != nil { 200 return err 201 } 202 if err := container.Remove(); err != nil { 203 return err 204 } 205 } 206 return nil 207 } 208 209 // obliterateUnit removes a unit from state completely. It is not safe or 210 // sane to obliterate any unit in isolation; its only reasonable use is in 211 // the context of machine obliteration, in which we can be sure that unclean 212 // shutdown of units is not going to leave a machine in a difficult state. 213 func (st *State) obliterateUnit(unitName string) error { 214 unit, err := st.Unit(unitName) 215 if errors.IsNotFoundError(err) { 216 return nil 217 } else if err != nil { 218 return err 219 } 220 // Unlike the machine, we *can* always destroy the unit, and (at least) 221 // prevent further dependencies being added. If we're really lucky, the 222 // unit will be removed immediately. 223 if err := unit.Destroy(); err != nil { 224 return err 225 } 226 if err := unit.Refresh(); errors.IsNotFoundError(err) { 227 return nil 228 } else if err != nil { 229 return err 230 } 231 for _, subName := range unit.SubordinateNames() { 232 if err := st.obliterateUnit(subName); err != nil { 233 return err 234 } 235 } 236 if err := unit.EnsureDead(); err != nil { 237 return err 238 } 239 return unit.Remove() 240 }