go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/sync/parallel/runmulti.go (about)

     1  // Copyright 2016 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package parallel
    16  
    17  import (
    18  	"context"
    19  
    20  	"go.chromium.org/luci/common/errors"
    21  )
    22  
    23  // RunMulti initiates a nested RunMulti operation. It invokes an entry function,
    24  // passing it a MultiRunner instance bound to the supplied constraints. Any
    25  // nested parallel operations scheduled through that MultiRunner will not
    26  // starve each other regardless of execution order.
    27  //
    28  // This is useful when sharing the same outer Runner constraints with multiple
    29  // tiers of parallel operations. A naive approach would be to re-use a Runner's
    30  // WorkC() or Run() functions, but this can result in deadlock if the outer
    31  // functions consume all available resources running their inner payloads,
    32  // forcing their inner payloads to block forever.
    33  //
    34  // The supplied Context will be monitored for cancellation. If the Context is
    35  // canceled, new work dispatch will be inhibited. Any methods added to the
    36  // work channel will not be executed, and RunMulti will treat them as if they
    37  // ran and immediately returned the Context's Err() value.
    38  func RunMulti(ctx context.Context, workers int, fn func(MultiRunner) error) error {
    39  	// Create a Runner to manage our goroutines. We will not set its Maximum,
    40  	// since we will be metering that internally using our own semaphore.
    41  	r := Runner{
    42  		Sustained: workers,
    43  	}
    44  	defer r.Close()
    45  
    46  	nrc := nestedRunnerContext{
    47  		ctx:   ctx,
    48  		workC: r.WorkC(),
    49  	}
    50  	if workers > 0 {
    51  		// Create a semaphore with (workers-1) tokens. We subtract one because the
    52  		// runner has an implicit token on virtue of running the work.
    53  		nrc.sem = make(Semaphore, workers-1)
    54  	}
    55  	return fn(&nrc)
    56  }
    57  
    58  // MultiRunner can execute nested RunMulti against the same outer Runner.
    59  type MultiRunner interface {
    60  	// RunMulti runs the supplied generator, returning an errors.MultiError with
    61  	// the task results.
    62  	//
    63  	// Since it blocks on result, RunMulti is safe to chain with other RunMulti
    64  	// operations without risk of deadlock, as the caller's blocking counts as one
    65  	// of the run tokens.
    66  	//
    67  	// Note that there is no association between the MultiError's error order and
    68  	// the generated task order.
    69  	RunMulti(func(chan<- func() error)) error
    70  }
    71  
    72  type nestedRunnerContext struct {
    73  	ctx   context.Context
    74  	workC chan<- WorkItem
    75  	sem   Semaphore
    76  }
    77  
    78  func (nrc *nestedRunnerContext) RunMulti(gen func(chan<- func() error)) error {
    79  	var (
    80  		result    errors.MultiError
    81  		doneC     = make(chan error)
    82  		realWorkC = make(chan func() error)
    83  	)
    84  	defer close(doneC)
    85  
    86  	// Call our task generator.
    87  	go func() {
    88  		defer close(realWorkC)
    89  		gen(realWorkC)
    90  	}()
    91  
    92  	var (
    93  		outstanding = 0
    94  		contextErr  error
    95  
    96  		// We will toggle these based what we want to block on.
    97  		activeWorkC     = realWorkC
    98  		activeSem       Semaphore
    99  		newWorkC        = activeWorkC
   100  		pendingWorkItem func() error
   101  	)
   102  
   103  	// Main dispatch control loop. Our goal is to have at least one task executing
   104  	// at any given time. If we want to execute more, we must acquire a token from
   105  	// the main semaphore.
   106  	for activeWorkC != nil || outstanding > 0 {
   107  		// Track whether we have a semaphore token. If we aren't using a semaphore,
   108  		// we have an implicit token (unthrottled).
   109  		hasToken := (nrc.sem == nil)
   110  
   111  		select {
   112  		case workItem, ok := <-newWorkC:
   113  			// Incoming task.
   114  			switch {
   115  			case !ok:
   116  				// Clear activeWorkC, instructing our select loop to stop accepting new
   117  				// requests.
   118  				activeWorkC = nil
   119  
   120  			case contextErr != nil:
   121  				// Ignore this request and pretend that it returned the Context error.
   122  				result = append(result, contextErr)
   123  
   124  			default:
   125  				// Enqueue this request for dispatch.
   126  				pendingWorkItem = workItem
   127  			}
   128  
   129  		case err := <-doneC:
   130  			// A dispatched task has finished.
   131  			if err != nil {
   132  				result = append(result, err)
   133  			}
   134  
   135  			// Return one of our semaphore tokens if we have more than one outstanding
   136  			// task.
   137  			if outstanding > 1 {
   138  				hasToken = true
   139  			}
   140  			outstanding--
   141  
   142  		case <-nrc.ctx.Done():
   143  			// Record our Context error. Future jobs will immediately fail with this
   144  			// error.
   145  			contextErr = nrc.ctx.Err()
   146  
   147  		case activeSem <- SemaphoreToken{}:
   148  			// We have a pending task, and we just acquired a semaphore token.
   149  			hasToken = true
   150  		}
   151  
   152  		// If we have a pending task, maybe dispatch it.
   153  		if pendingWorkItem != nil {
   154  			// If we have no outstanding tasks, use "our" semaphore token to dispatch
   155  			// this one immediately.
   156  			//
   157  			// If we have a token, use it immediately for this task.
   158  			if outstanding == 0 || hasToken {
   159  				nrc.workC <- WorkItem{
   160  					F:    pendingWorkItem,
   161  					ErrC: doneC,
   162  				}
   163  
   164  				outstanding++
   165  				pendingWorkItem = nil
   166  				hasToken = false
   167  			}
   168  		}
   169  
   170  		// If we still have a token at this point, release it.
   171  		if hasToken {
   172  			nrc.sem.Unlock()
   173  		}
   174  
   175  		// Toggle blocking criteria.
   176  		if pendingWorkItem == nil {
   177  			// We have no currently-pending task, so pull a new one.
   178  			newWorkC = activeWorkC
   179  
   180  			// Don't try and acquire a semaphore token anymore, since we have nothing
   181  			// to dispatch with it at the moment.
   182  			activeSem = nil
   183  		} else {
   184  			// We have a pending task, but didn't dispatch, so we are blocking on
   185  			// token acquisition.
   186  			activeSem = nrc.sem
   187  
   188  			// We only handle one pending task, so don't acquire any new tasks until
   189  			// this one has been dispatched.
   190  			newWorkC = nil
   191  		}
   192  	}
   193  
   194  	// Choose our error response.
   195  	if len(result) > 0 {
   196  		return result
   197  	}
   198  	return nil
   199  }