launchpad.net/~rogpeppe/juju-core/500-errgo-fix@v0.0.0-20140213181702-000000002356/state/cleanup.go (about) 1 package state 2 3 import ( 4 "labix.org/v2/mgo/bson" 5 "labix.org/v2/mgo/txn" 6 7 errgo "launchpad.net/errgo/errors" 8 "launchpad.net/juju-core/errors" 9 ) 10 11 // cleanupDoc represents a potentially large set of documents that should be 12 // removed. 13 type cleanupDoc struct { 14 Id bson.ObjectId `bson:"_id"` 15 Kind string 16 Prefix string 17 } 18 19 // newCleanupOp returns a txn.Op that creates a cleanup document with a unique 20 // id and the supplied kind and prefix. 21 func (st *State) newCleanupOp(kind, prefix string) txn.Op { 22 doc := &cleanupDoc{ 23 Id: bson.NewObjectId(), 24 Kind: kind, 25 Prefix: prefix, 26 } 27 return txn.Op{ 28 C: st.cleanups.Name, 29 Id: doc.Id, 30 Insert: doc, 31 } 32 } 33 34 // NeedsCleanup returns true if documents previously marked for removal exist. 35 func (st *State) NeedsCleanup() (bool, error) { 36 count, err := st.cleanups.Count() 37 if err != nil { 38 return false, mask(err) 39 } 40 return count > 0, nil 41 } 42 43 // Cleanup removes all documents that were previously marked for removal, if 44 // any such exist. It should be called periodically by at least one element 45 // of the system. 46 func (st *State) Cleanup() error { 47 doc := cleanupDoc{} 48 iter := st.cleanups.Find(nil).Iter() 49 for iter.Next(&doc) { 50 var err error 51 logger.Debugf("running %q cleanup: %q", doc.Kind, doc.Prefix) 52 switch doc.Kind { 53 case "settings": 54 err = st.cleanupSettings(doc.Prefix) 55 case "units": 56 err = st.cleanupUnits(doc.Prefix) 57 case "services": 58 err = st.cleanupServices() 59 case "machine": 60 err = st.cleanupMachine(doc.Prefix) 61 default: 62 err = errgo.Newf("unknown cleanup kind %q", doc.Kind) 63 } 64 if err != nil { 65 logger.Warningf("cleanup failed: %v", err) 66 continue 67 } 68 ops := []txn.Op{{ 69 C: st.cleanups.Name, 70 Id: doc.Id, 71 Remove: true, 72 }} 73 if err := st.runTransaction(ops); err != nil { 74 logger.Warningf("cannot remove empty cleanup document: %v", err) 75 } 76 } 77 if err := iter.Err(); err != nil { 78 return errgo.Notef(err, "cannot read cleanup document") 79 } 80 return nil 81 } 82 83 func (st *State) cleanupSettings(prefix string) error { 84 // Documents marked for cleanup are not otherwise referenced in the 85 // system, and will not be under watch, and are therefore safe to 86 // delete directly. 87 sel := D{{"_id", D{{"$regex", "^" + prefix}}}} 88 if count, err := st.settings.Find(sel).Count(); err != nil { 89 return errgo.Notef(err, "cannot detect cleanup targets") 90 } else if count != 0 { 91 if _, err := st.settings.RemoveAll(sel); err != nil { 92 return errgo.Notef(err, "cannot remove documents marked for cleanup") 93 } 94 } 95 return nil 96 } 97 98 // cleanupServices sets all services to Dying, if they are not already Dying 99 // or Dead. It's expected to be used when an environment is destroyed. 100 func (st *State) cleanupServices() error { 101 // This won't miss services, because a Dying environment cannot have 102 // services added to it. But we do have to remove the services themselves 103 // via individual transactions, because they could be in any state at all. 104 service := &Service{st: st} 105 sel := D{{"life", Alive}} 106 iter := st.services.Find(sel).Iter() 107 for iter.Next(&service.doc) { 108 if err := service.Destroy(); err != nil { 109 return mask(err) 110 } 111 } 112 if err := iter.Err(); err != nil { 113 return errgo.Notef(err, "cannot read service document") 114 } 115 return nil 116 } 117 118 // cleanupUnits sets all units with the given prefix to Dying, if they are not 119 // already Dying or Dead. It's expected to be used when a service is destroyed. 120 func (st *State) cleanupUnits(prefix string) error { 121 // This won't miss units, because a Dying service cannot have units added 122 // to it. But we do have to remove the units themselves via individual 123 // transactions, because they could be in any state at all. 124 unit := &Unit{st: st} 125 sel := D{{"_id", D{{"$regex", "^" + prefix}}}, {"life", Alive}} 126 iter := st.units.Find(sel).Iter() 127 for iter.Next(&unit.doc) { 128 if err := unit.Destroy(); err != nil { 129 return mask(err) 130 } 131 } 132 if err := iter.Err(); err != nil { 133 return errgo.Notef(err, "cannot read unit document") 134 } 135 return nil 136 } 137 138 // cleanupMachine systematically destroys and removes all entities that 139 // depend upon the supplied machine, and removes the machine from state. It's 140 // expected to be used in response to destroy-machine --force. 141 func (st *State) cleanupMachine(machineId string) error { 142 machine, err := st.Machine(machineId) 143 if errors.IsNotFoundError(err) { 144 return nil 145 } else if err != nil { 146 return mask(err) 147 } 148 149 // In an ideal world, we'd call machine.Destroy() here, and thus prevent 150 // new dependencies being added while we clean up the ones we know about. 151 // But machine destruction is unsophisticated, and doesn't allow for 152 // destruction while dependencies exist; so we just have to deal with that 153 // possibility below. 154 if err := st.cleanupContainers(machine); err != nil { 155 return mask(err) 156 } 157 for _, unitName := range machine.doc.Principals { 158 if err := st.obliterateUnit(unitName); err != nil { 159 return mask(err) 160 } 161 } 162 // We need to refresh the machine at this point, because the local copy 163 // of the document will not reflect changes caused by the unit cleanups 164 // above, and may thus fail immediately. 165 if err := machine.Refresh(); errors.IsNotFoundError(err) { 166 return nil 167 } else if err != nil { 168 return mask(err) 169 } 170 171 // TODO(fwereade): 2013-11-11 bug 1250104 172 // If this fails, it's *probably* due to a race in which new dependencies 173 // were added while we cleaned up the old ones. If the cleanup doesn't run 174 // again -- which it *probably* will anyway -- the issue can be resolved by 175 // force-destroying the machine again; that's better than adding layer 176 // upon layer of complication here. 177 return machine.EnsureDead() 178 179 // Note that we do *not* remove the machine entirely: we leave it for the 180 // provisioner to clean up, so that we don't end up with an unreferenced 181 // instance that would otherwise be ignored when in provisioner-safe-mode. 182 } 183 184 // cleanupContainers recursively calls cleanupMachine on the supplied 185 // machine's containers, and removes them from state entirely. 186 func (st *State) cleanupContainers(machine *Machine) error { 187 containerIds, err := machine.Containers() 188 if errors.IsNotFoundError(err) { 189 return nil 190 } else if err != nil { 191 return mask(err) 192 } 193 for _, containerId := range containerIds { 194 if err := st.cleanupMachine(containerId); err != nil { 195 return mask(err) 196 } 197 container, err := st.Machine(containerId) 198 if errors.IsNotFoundError(err) { 199 return nil 200 } else if err != nil { 201 return mask(err) 202 } 203 if err := container.Remove(); err != nil { 204 return mask(err) 205 } 206 } 207 return nil 208 } 209 210 // obliterateUnit removes a unit from state completely. It is not safe or 211 // sane to obliterate any unit in isolation; its only reasonable use is in 212 // the context of machine obliteration, in which we can be sure that unclean 213 // shutdown of units is not going to leave a machine in a difficult state. 214 func (st *State) obliterateUnit(unitName string) error { 215 unit, err := st.Unit(unitName) 216 if errors.IsNotFoundError(err) { 217 return nil 218 } else if err != nil { 219 return mask(err) 220 } 221 222 // Unlike the machine, we *can* always destroy the unit, and (at least) 223 // prevent further dependencies being added. If we're really lucky, the 224 // unit will be removed immediately. 225 if err := unit.Destroy(); err != nil { 226 return mask(err) 227 } 228 if err := unit.Refresh(); errors.IsNotFoundError(err) { 229 return nil 230 } else if err != nil { 231 return mask(err) 232 } 233 for _, subName := range unit.SubordinateNames() { 234 if err := st.obliterateUnit(subName); err != nil { 235 return mask(err) 236 } 237 } 238 if err := unit.EnsureDead(); err != nil { 239 return mask(err) 240 } 241 return unit.Remove() 242 }