github.com/Cloud-Foundations/Dominator@v0.3.4/sub/rpcd/disruption.go (about) 1 package rpcd 2 3 import ( 4 "fmt" 5 "os/exec" 6 "strings" 7 "time" 8 9 proto "github.com/Cloud-Foundations/Dominator/proto/sub" 10 ) 11 12 const ( 13 intervalCheckChangeToDisrupt = time.Second 14 intervalCheckChangeToNonDisrupt = 5 * time.Second 15 intervalCheckDisrupt = 15 * time.Second 16 intervalCheckNonDisrupt = 5 * time.Minute 17 intervalCheckStartup = 10 * time.Second 18 intervalCancelWhenPermitted = 31 * time.Minute 19 intervalCancelWhenRequested = 15 * time.Minute 20 intervalRequestWhenDenied = time.Minute 21 intervalRequestWhenRequested = 15 * time.Minute 22 intervalResendMinimum = time.Second 23 intervalResendSameMutation = time.Minute 24 ) 25 26 type runInfoType struct { 27 command string 28 state proto.DisruptionState 29 } 30 31 type runResultType struct { 32 command string 33 err error 34 state proto.DisruptionState 35 } 36 37 func clearTimer(timer *time.Timer) { 38 timer.Stop() 39 select { 40 case <-timer.C: 41 default: 42 } 43 } 44 45 func resetTimer(timer *time.Timer, duration time.Duration) { 46 clearTimer(timer) 47 timer.Reset(duration) 48 } 49 50 // This must be called with the lock held. 51 func (t *rpcType) disruptionCancel() { 52 if t.config.DisruptionManager == "" { 53 return 54 } 55 t.disruptionManagerControl <- false 56 } 57 58 // This will grab the lock. 59 func (t *rpcType) disruptionRequest() proto.DisruptionState { 60 if t.config.DisruptionManager == "" { 61 return proto.DisruptionStateAnytime 62 } 63 t.rwLock.RLock() 64 disruptionState := t.disruptionState 65 t.rwLock.RUnlock() 66 t.disruptionManagerControl <- true 67 return disruptionState 68 } 69 70 func (t *rpcType) runDisruptionManager(command string) ( 71 proto.DisruptionState, error) { 72 switch command { 73 case disruptionManagerCancel, disruptionManagerRequest: 74 t.params.Logger.Printf("Running: %s %s\n", 75 t.config.DisruptionManager, command) 76 default: 77 t.params.Logger.Debugf(0, "Running: %s %s\n", 78 t.config.DisruptionManager, command) 79 } 80 _output, err := exec.Command(t.config.DisruptionManager, 81 command).CombinedOutput() 82 if err == nil { 83 return proto.DisruptionStatePermitted, nil 84 } 85 output := strings.TrimSpace(string(_output)) 86 e, ok := err.(*exec.ExitError) 87 if !ok { 88 if len(output) > 0 { 89 return 0, fmt.Errorf("%s: %s", err, output) 90 } else { 91 return 0, fmt.Errorf("%s", err) 92 } 93 } 94 switch e.ExitCode() { 95 case 0: 96 return proto.DisruptionStatePermitted, nil 97 case 1: 98 return proto.DisruptionStateRequested, nil 99 case 2: 100 return proto.DisruptionStateDenied, nil 101 default: 102 if len(output) > 0 { 103 return 0, 104 fmt.Errorf("invalid exit code: %d: %s", e.ExitCode(), output) 105 } else { 106 return 0, fmt.Errorf("invalid exit code: %d", e.ExitCode()) 107 } 108 } 109 } 110 111 func (t *rpcType) startDisruptionManager() { 112 if t.config.DisruptionManager == "" { 113 return 114 } 115 commandChannel := make(chan string, 1) 116 controlChannel := make(chan bool, 1) 117 resultChannel := make(chan runInfoType, 1) 118 t.disruptionManagerControl = controlChannel 119 go t.disruptionManagerLoop(controlChannel, commandChannel, resultChannel) 120 go t.disruptionManagerQueue(commandChannel, resultChannel) 121 } 122 123 func (t *rpcType) disruptionManagerLoop(controlChannel <-chan bool, 124 commandChannel chan<- string, resultChannel <-chan runInfoType) { 125 checkInterval := intervalCheckStartup 126 checkTimer := time.NewTimer(0) 127 var currentState proto.DisruptionState 128 initialCancelTimer := time.NewTimer(intervalCancelWhenPermitted) 129 var lastCommandTime time.Time 130 var allowCancels, wantToDisrupt bool 131 for { 132 var resetCheckInterval bool 133 select { 134 case newWantToDisrupt := <-controlChannel: 135 allowCancels = true 136 clearTimer(initialCancelTimer) 137 if newWantToDisrupt != wantToDisrupt { 138 lastCommandTime = time.Time{} 139 resetCheckInterval = true 140 } 141 wantToDisrupt = newWantToDisrupt 142 case <-checkTimer.C: 143 checkInterval += checkInterval >> 1 144 if wantToDisrupt { 145 if checkInterval > intervalCheckDisrupt { 146 checkInterval = intervalCheckDisrupt 147 } 148 } else { 149 if checkInterval > intervalCheckNonDisrupt { 150 checkInterval = intervalCheckNonDisrupt 151 } 152 } 153 commandChannel <- disruptionManagerCheck 154 checkTimer.Reset(checkInterval) 155 case <-initialCancelTimer.C: 156 if !allowCancels { 157 allowCancels = true 158 lastCommandTime = time.Time{} 159 resetCheckInterval = true 160 } 161 case result := <-resultChannel: 162 if result.state != currentState { 163 t.rwLock.Lock() 164 t.disruptionState = result.state 165 t.rwLock.Unlock() 166 t.params.Logger.Printf( 167 "Ran DisruptionManager(%s): %s->%s\n", 168 result.command, currentState, result.state) 169 currentState = result.state 170 lastCommandTime = time.Time{} 171 resetCheckInterval = true 172 } else { 173 t.params.Logger.Debugf(0, "Ran DisruptionManager(%s): %s\n", 174 result.command, result.state) 175 } 176 } 177 if wantToDisrupt { 178 switch currentState { 179 case proto.DisruptionStateRequested: 180 if time.Since(lastCommandTime) > intervalRequestWhenRequested { 181 commandChannel <- disruptionManagerRequest 182 lastCommandTime = time.Now() 183 } 184 case proto.DisruptionStateDenied: 185 if time.Since(lastCommandTime) > intervalRequestWhenDenied { 186 commandChannel <- disruptionManagerRequest 187 lastCommandTime = time.Now() 188 } 189 } 190 if resetCheckInterval { 191 checkInterval = intervalCheckChangeToDisrupt 192 resetTimer(checkTimer, checkInterval) 193 } 194 } else if allowCancels { 195 switch currentState { 196 case proto.DisruptionStatePermitted: 197 if time.Since(lastCommandTime) > intervalCancelWhenPermitted { 198 commandChannel <- disruptionManagerCancel 199 lastCommandTime = time.Now() 200 } 201 case proto.DisruptionStateRequested: 202 if time.Since(lastCommandTime) > intervalCancelWhenRequested { 203 commandChannel <- disruptionManagerCancel 204 lastCommandTime = time.Now() 205 } 206 } 207 if resetCheckInterval { 208 checkInterval = intervalCheckChangeToNonDisrupt 209 resetTimer(checkTimer, checkInterval) 210 } 211 } 212 } 213 } 214 215 func (t *rpcType) disruptionManagerQueue(commandChannel <-chan string, 216 resultChannel chan<- runInfoType) { 217 commandIsRunning := false 218 delayTimer := time.NewTimer(0) 219 var lastCommandTime, lastMutatingCommandTime time.Time 220 var lastMutatingCommand, nextCommand string 221 runResultChannel := make(chan runResultType, 1) 222 for { 223 select { 224 case <-delayTimer.C: 225 if !commandIsRunning && nextCommand != "" { 226 commandIsRunning = true 227 go func(command string) { 228 state, err := t.runDisruptionManager(command) 229 runResultChannel <- runResultType{command, err, state} 230 }(nextCommand) 231 nextCommand = "" 232 } 233 case command := <-commandChannel: 234 if command != disruptionManagerCheck && 235 command == lastMutatingCommand && 236 time.Since(lastMutatingCommandTime) < 237 intervalResendSameMutation { 238 continue 239 } 240 resetTimer(delayTimer, 241 intervalResendMinimum-time.Since(lastCommandTime)) 242 if command != disruptionManagerCheck || nextCommand == "" { 243 nextCommand = command 244 } 245 case runResult := <-runResultChannel: 246 commandIsRunning = false 247 lastCommandTime = time.Now() 248 if runResult.err != nil { 249 if runResult.command != disruptionManagerCheck && 250 nextCommand == "" { 251 nextCommand = runResult.command 252 resetTimer(delayTimer, time.Minute) 253 } 254 t.params.Logger.Printf("Error running DisruptionManager: %s\n", 255 runResult.err) 256 } else { 257 if runResult.command != disruptionManagerCheck { 258 lastMutatingCommand = runResult.command 259 lastMutatingCommandTime = lastCommandTime 260 } 261 resultChannel <- runInfoType{runResult.command, runResult.state} 262 } 263 } 264 } 265 }