yunion.io/x/cloudmux@v0.3.10-0-alpha.1/pkg/multicloud/azure/concurrent/worker.go (about)

     1  // Copyright 2019 Yunion
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package concurrent
    16  
    17  import "fmt"
    18  
    19  // Worker represents a type which can listen for work from a channel and run them
    20  //
    21  type Worker struct {
    22  	RequestsToHandleChan chan *Request  // The buffered channel of works this worker needs to handle
    23  	Pending              int            // The number of pending requests this worker needs to handle (i.e. worker load)
    24  	errorChan            chan<- error   // The channel to report failure in executing work
    25  	requestHandledChan   chan<- *Worker // The channel to report that a work is done (irrespective of success or failure)
    26  	workerFinishedChan   chan<- *Worker // The channel to signal that worker has finished (worker go-routine exited)
    27  	ID                   int            // Unique Id for worker (Debugging purpose)
    28  	Index                int            // The index of the item in the heap.
    29  	pool                 *Pool          // The parent pool holding all workers (used for work stealing)
    30  }
    31  
    32  // The maximum number of times a work needs to be retried before reporting failure on errorChan.
    33  //
    34  const maxRetryCount int = 5
    35  
    36  // NewWorker creates a new instance of the worker with the given work channel size.
    37  // errorChan is the channel to report the failure in addressing a work request after all
    38  // retries, each time a work is completed (failure or success) doneChan will be signalled
    39  //
    40  func NewWorker(id int, workChannelSize int, pool *Pool, errorChan chan<- error, requestHandledChan chan<- *Worker, workerFinishedChan chan<- *Worker) *Worker {
    41  	return &Worker{
    42  		ID:                   id,
    43  		RequestsToHandleChan: make(chan *Request, workChannelSize),
    44  		errorChan:            errorChan,
    45  		requestHandledChan:   requestHandledChan,
    46  		workerFinishedChan:   workerFinishedChan,
    47  		pool:                 pool,
    48  	}
    49  }
    50  
    51  // Run starts a go-routine that read work from work-queue associated with the worker and executes one
    52  // at a time. The go-routine returns/exit once one of the following condition is met:
    53  //   1. The work-queue is closed and drained and there is no work to steal from peers worker's work-queue
    54  //   2. A signal is received in the tearDownChan channel parameter
    55  //
    56  // After executing each work, this method sends report to Worker::requestHandledChan channel
    57  // If a work fails after maximum retry, this method sends report to Worker::errorChan channel
    58  //
    59  func (w *Worker) Run(tearDownChan <-chan bool) {
    60  	go func() {
    61  		defer func() {
    62  			// Signal balancer that worker is finished
    63  			w.workerFinishedChan <- w
    64  		}()
    65  
    66  		var requestToHandle *Request
    67  		var ok bool
    68  		for {
    69  			select {
    70  			case requestToHandle, ok = <-w.RequestsToHandleChan:
    71  				if !ok {
    72  					// Request channel is closed and drained, worker can try to steal work from others.
    73  					//
    74  					// Note: load balancer does not play any role in stealing, load balancer closes send-end
    75  					// of all worker queue's at the same time, at this point we are sure that no more new job
    76  					// will be scheduled. Once we start stealing "Worker::Pending" won't reflect correct load.
    77  					requestToHandle = w.tryStealWork()
    78  					if requestToHandle == nil {
    79  						// Could not steal then return
    80  						return
    81  					}
    82  				}
    83  			case <-tearDownChan:
    84  				// immediate stop, no need to drain the request channel
    85  				return
    86  			}
    87  
    88  			var err error
    89  			// Do work, retry on failure.
    90  		Loop:
    91  			for count := 0; count < maxRetryCount+1; count++ {
    92  				select {
    93  				case <-tearDownChan:
    94  					return
    95  				default:
    96  					err = requestToHandle.Work() // Run work
    97  					if err == nil || !requestToHandle.ShouldRetry(err) {
    98  						break Loop
    99  					}
   100  				}
   101  			}
   102  
   103  			if err != nil {
   104  				select {
   105  				case w.errorChan <- fmt.Errorf("%s: %v", requestToHandle.ID, err):
   106  				case <-tearDownChan:
   107  					return
   108  				}
   109  			}
   110  
   111  			select {
   112  			case w.requestHandledChan <- w: // One work finished (successfully or unsuccessfully)
   113  			case <-tearDownChan:
   114  				return
   115  			}
   116  		}
   117  	}()
   118  }
   119  
   120  // tryStealWork will try to steal a work from peer worker if available. If all peer channels are
   121  // empty then return nil
   122  //
   123  func (w *Worker) tryStealWork() *Request {
   124  	for _, w1 := range w.pool.Workers {
   125  		request, ok := <-w1.RequestsToHandleChan
   126  		if ok {
   127  			return request
   128  		}
   129  	}
   130  	return nil
   131  }