github.com/Cloud-Foundations/Dominator@v0.3.4/sub/rpcd/disruption.go (about)

     1  package rpcd
     2  
     3  import (
     4  	"fmt"
     5  	"os/exec"
     6  	"strings"
     7  	"time"
     8  
     9  	proto "github.com/Cloud-Foundations/Dominator/proto/sub"
    10  )
    11  
    12  const (
    13  	intervalCheckChangeToDisrupt    = time.Second
    14  	intervalCheckChangeToNonDisrupt = 5 * time.Second
    15  	intervalCheckDisrupt            = 15 * time.Second
    16  	intervalCheckNonDisrupt         = 5 * time.Minute
    17  	intervalCheckStartup            = 10 * time.Second
    18  	intervalCancelWhenPermitted     = 31 * time.Minute
    19  	intervalCancelWhenRequested     = 15 * time.Minute
    20  	intervalRequestWhenDenied       = time.Minute
    21  	intervalRequestWhenRequested    = 15 * time.Minute
    22  	intervalResendMinimum           = time.Second
    23  	intervalResendSameMutation      = time.Minute
    24  )
    25  
    26  type runInfoType struct {
    27  	command string
    28  	state   proto.DisruptionState
    29  }
    30  
    31  type runResultType struct {
    32  	command string
    33  	err     error
    34  	state   proto.DisruptionState
    35  }
    36  
    37  func clearTimer(timer *time.Timer) {
    38  	timer.Stop()
    39  	select {
    40  	case <-timer.C:
    41  	default:
    42  	}
    43  }
    44  
    45  func resetTimer(timer *time.Timer, duration time.Duration) {
    46  	clearTimer(timer)
    47  	timer.Reset(duration)
    48  }
    49  
    50  // This must be called with the lock held.
    51  func (t *rpcType) disruptionCancel() {
    52  	if t.config.DisruptionManager == "" {
    53  		return
    54  	}
    55  	t.disruptionManagerControl <- false
    56  }
    57  
    58  // This will grab the lock.
    59  func (t *rpcType) disruptionRequest() proto.DisruptionState {
    60  	if t.config.DisruptionManager == "" {
    61  		return proto.DisruptionStateAnytime
    62  	}
    63  	t.rwLock.RLock()
    64  	disruptionState := t.disruptionState
    65  	t.rwLock.RUnlock()
    66  	t.disruptionManagerControl <- true
    67  	return disruptionState
    68  }
    69  
    70  func (t *rpcType) runDisruptionManager(command string) (
    71  	proto.DisruptionState, error) {
    72  	switch command {
    73  	case disruptionManagerCancel, disruptionManagerRequest:
    74  		t.params.Logger.Printf("Running: %s %s\n",
    75  			t.config.DisruptionManager, command)
    76  	default:
    77  		t.params.Logger.Debugf(0, "Running: %s %s\n",
    78  			t.config.DisruptionManager, command)
    79  	}
    80  	_output, err := exec.Command(t.config.DisruptionManager,
    81  		command).CombinedOutput()
    82  	if err == nil {
    83  		return proto.DisruptionStatePermitted, nil
    84  	}
    85  	output := strings.TrimSpace(string(_output))
    86  	e, ok := err.(*exec.ExitError)
    87  	if !ok {
    88  		if len(output) > 0 {
    89  			return 0, fmt.Errorf("%s: %s", err, output)
    90  		} else {
    91  			return 0, fmt.Errorf("%s", err)
    92  		}
    93  	}
    94  	switch e.ExitCode() {
    95  	case 0:
    96  		return proto.DisruptionStatePermitted, nil
    97  	case 1:
    98  		return proto.DisruptionStateRequested, nil
    99  	case 2:
   100  		return proto.DisruptionStateDenied, nil
   101  	default:
   102  		if len(output) > 0 {
   103  			return 0,
   104  				fmt.Errorf("invalid exit code: %d: %s", e.ExitCode(), output)
   105  		} else {
   106  			return 0, fmt.Errorf("invalid exit code: %d", e.ExitCode())
   107  		}
   108  	}
   109  }
   110  
   111  func (t *rpcType) startDisruptionManager() {
   112  	if t.config.DisruptionManager == "" {
   113  		return
   114  	}
   115  	commandChannel := make(chan string, 1)
   116  	controlChannel := make(chan bool, 1)
   117  	resultChannel := make(chan runInfoType, 1)
   118  	t.disruptionManagerControl = controlChannel
   119  	go t.disruptionManagerLoop(controlChannel, commandChannel, resultChannel)
   120  	go t.disruptionManagerQueue(commandChannel, resultChannel)
   121  }
   122  
   123  func (t *rpcType) disruptionManagerLoop(controlChannel <-chan bool,
   124  	commandChannel chan<- string, resultChannel <-chan runInfoType) {
   125  	checkInterval := intervalCheckStartup
   126  	checkTimer := time.NewTimer(0)
   127  	var currentState proto.DisruptionState
   128  	initialCancelTimer := time.NewTimer(intervalCancelWhenPermitted)
   129  	var lastCommandTime time.Time
   130  	var allowCancels, wantToDisrupt bool
   131  	for {
   132  		var resetCheckInterval bool
   133  		select {
   134  		case newWantToDisrupt := <-controlChannel:
   135  			allowCancels = true
   136  			clearTimer(initialCancelTimer)
   137  			if newWantToDisrupt != wantToDisrupt {
   138  				lastCommandTime = time.Time{}
   139  				resetCheckInterval = true
   140  			}
   141  			wantToDisrupt = newWantToDisrupt
   142  		case <-checkTimer.C:
   143  			checkInterval += checkInterval >> 1
   144  			if wantToDisrupt {
   145  				if checkInterval > intervalCheckDisrupt {
   146  					checkInterval = intervalCheckDisrupt
   147  				}
   148  			} else {
   149  				if checkInterval > intervalCheckNonDisrupt {
   150  					checkInterval = intervalCheckNonDisrupt
   151  				}
   152  			}
   153  			commandChannel <- disruptionManagerCheck
   154  			checkTimer.Reset(checkInterval)
   155  		case <-initialCancelTimer.C:
   156  			if !allowCancels {
   157  				allowCancels = true
   158  				lastCommandTime = time.Time{}
   159  				resetCheckInterval = true
   160  			}
   161  		case result := <-resultChannel:
   162  			if result.state != currentState {
   163  				t.rwLock.Lock()
   164  				t.disruptionState = result.state
   165  				t.rwLock.Unlock()
   166  				t.params.Logger.Printf(
   167  					"Ran DisruptionManager(%s): %s->%s\n",
   168  					result.command, currentState, result.state)
   169  				currentState = result.state
   170  				lastCommandTime = time.Time{}
   171  				resetCheckInterval = true
   172  			} else {
   173  				t.params.Logger.Debugf(0, "Ran DisruptionManager(%s): %s\n",
   174  					result.command, result.state)
   175  			}
   176  		}
   177  		if wantToDisrupt {
   178  			switch currentState {
   179  			case proto.DisruptionStateRequested:
   180  				if time.Since(lastCommandTime) > intervalRequestWhenRequested {
   181  					commandChannel <- disruptionManagerRequest
   182  					lastCommandTime = time.Now()
   183  				}
   184  			case proto.DisruptionStateDenied:
   185  				if time.Since(lastCommandTime) > intervalRequestWhenDenied {
   186  					commandChannel <- disruptionManagerRequest
   187  					lastCommandTime = time.Now()
   188  				}
   189  			}
   190  			if resetCheckInterval {
   191  				checkInterval = intervalCheckChangeToDisrupt
   192  				resetTimer(checkTimer, checkInterval)
   193  			}
   194  		} else if allowCancels {
   195  			switch currentState {
   196  			case proto.DisruptionStatePermitted:
   197  				if time.Since(lastCommandTime) > intervalCancelWhenPermitted {
   198  					commandChannel <- disruptionManagerCancel
   199  					lastCommandTime = time.Now()
   200  				}
   201  			case proto.DisruptionStateRequested:
   202  				if time.Since(lastCommandTime) > intervalCancelWhenRequested {
   203  					commandChannel <- disruptionManagerCancel
   204  					lastCommandTime = time.Now()
   205  				}
   206  			}
   207  			if resetCheckInterval {
   208  				checkInterval = intervalCheckChangeToNonDisrupt
   209  				resetTimer(checkTimer, checkInterval)
   210  			}
   211  		}
   212  	}
   213  }
   214  
   215  func (t *rpcType) disruptionManagerQueue(commandChannel <-chan string,
   216  	resultChannel chan<- runInfoType) {
   217  	commandIsRunning := false
   218  	delayTimer := time.NewTimer(0)
   219  	var lastCommandTime, lastMutatingCommandTime time.Time
   220  	var lastMutatingCommand, nextCommand string
   221  	runResultChannel := make(chan runResultType, 1)
   222  	for {
   223  		select {
   224  		case <-delayTimer.C:
   225  			if !commandIsRunning && nextCommand != "" {
   226  				commandIsRunning = true
   227  				go func(command string) {
   228  					state, err := t.runDisruptionManager(command)
   229  					runResultChannel <- runResultType{command, err, state}
   230  				}(nextCommand)
   231  				nextCommand = ""
   232  			}
   233  		case command := <-commandChannel:
   234  			if command != disruptionManagerCheck &&
   235  				command == lastMutatingCommand &&
   236  				time.Since(lastMutatingCommandTime) <
   237  					intervalResendSameMutation {
   238  				continue
   239  			}
   240  			resetTimer(delayTimer,
   241  				intervalResendMinimum-time.Since(lastCommandTime))
   242  			if command != disruptionManagerCheck || nextCommand == "" {
   243  				nextCommand = command
   244  			}
   245  		case runResult := <-runResultChannel:
   246  			commandIsRunning = false
   247  			lastCommandTime = time.Now()
   248  			if runResult.err != nil {
   249  				if runResult.command != disruptionManagerCheck &&
   250  					nextCommand == "" {
   251  					nextCommand = runResult.command
   252  					resetTimer(delayTimer, time.Minute)
   253  				}
   254  				t.params.Logger.Printf("Error running DisruptionManager: %s\n",
   255  					runResult.err)
   256  			} else {
   257  				if runResult.command != disruptionManagerCheck {
   258  					lastMutatingCommand = runResult.command
   259  					lastMutatingCommandTime = lastCommandTime
   260  				}
   261  				resultChannel <- runInfoType{runResult.command, runResult.state}
   262  			}
   263  		}
   264  	}
   265  }