golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/relui/worker.go (about)

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package relui
     6  
     7  import (
     8  	"context"
     9  	"encoding/json"
    10  	"errors"
    11  	"fmt"
    12  	"log"
    13  	"reflect"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/google/uuid"
    18  	"github.com/jackc/pgx/v4"
    19  	"golang.org/x/build/internal/relui/db"
    20  	"golang.org/x/build/internal/workflow"
    21  	"golang.org/x/sync/errgroup"
    22  )
    23  
    24  type Listener interface {
    25  	workflow.Listener
    26  
    27  	WorkflowStarted(ctx context.Context, workflowID uuid.UUID, name string, params map[string]interface{}, scheduleID int) error
    28  	WorkflowFinished(ctx context.Context, workflowID uuid.UUID, outputs map[string]interface{}, err error) error
    29  }
    30  
    31  // Worker runs workflows, and persists their state.
    32  type Worker struct {
    33  	dh *DefinitionHolder
    34  
    35  	db db.PGDBTX
    36  	l  Listener
    37  
    38  	done    chan struct{}
    39  	pending chan *workflow.Workflow
    40  
    41  	mu sync.Mutex
    42  	// running is a set of currently running Workflow ids. Run uses
    43  	// this set to prevent starting a simultaneous execution of a
    44  	// currently running Workflow.
    45  	running map[string]runningWorkflow
    46  }
    47  
    48  type runningWorkflow struct {
    49  	w    *workflow.Workflow
    50  	stop func()
    51  }
    52  
    53  // NewWorker returns a Worker ready to accept and run workflows.
    54  func NewWorker(dh *DefinitionHolder, db db.PGDBTX, l Listener) *Worker {
    55  	return &Worker{
    56  		dh:      dh,
    57  		db:      db,
    58  		l:       l,
    59  		done:    make(chan struct{}),
    60  		pending: make(chan *workflow.Workflow, 1),
    61  		running: make(map[string]runningWorkflow),
    62  	}
    63  }
    64  
    65  // Run runs started workflows, waiting for new workflows to start.
    66  //
    67  // On context cancellation, Run waits for all running workflows to
    68  // finish.
    69  func (w *Worker) Run(ctx context.Context) error {
    70  	eg, ctx := errgroup.WithContext(ctx)
    71  	for {
    72  		select {
    73  		case <-ctx.Done():
    74  			close(w.done)
    75  			if err := eg.Wait(); err != nil {
    76  				return err
    77  			}
    78  			return ctx.Err()
    79  		case wf := <-w.pending:
    80  			eg.Go(func() error {
    81  				runCtx, cancel := context.WithCancel(ctx)
    82  				defer cancel()
    83  				if err := w.markRunning(wf, cancel); err != nil {
    84  					log.Println(err)
    85  					return nil
    86  				}
    87  				defer w.markStopped(wf)
    88  
    89  				outputs, err := wf.Run(runCtx, w.l)
    90  				if wfErr := w.l.WorkflowFinished(ctx, wf.ID, outputs, err); wfErr != nil {
    91  					return fmt.Errorf("w.l.WorkflowFinished(_, %q, %v, %q) = %w", wf.ID, outputs, err, wfErr)
    92  				}
    93  				return nil
    94  			})
    95  		}
    96  	}
    97  }
    98  
    99  func (w *Worker) markRunning(wf *workflow.Workflow, stop func()) error {
   100  	w.mu.Lock()
   101  	defer w.mu.Unlock()
   102  	if _, ok := w.running[wf.ID.String()]; ok {
   103  		return fmt.Errorf("workflow %q already running", wf.ID)
   104  	}
   105  	w.running[wf.ID.String()] = runningWorkflow{wf, stop}
   106  	return nil
   107  }
   108  
   109  func (w *Worker) markStopped(wf *workflow.Workflow) {
   110  	w.mu.Lock()
   111  	defer w.mu.Unlock()
   112  	delete(w.running, wf.ID.String())
   113  }
   114  
   115  func (w *Worker) cancelWorkflow(id uuid.UUID) bool {
   116  	w.mu.Lock()
   117  	defer w.mu.Unlock()
   118  	rwf, ok := w.running[id.String()]
   119  	if !ok {
   120  		return ok
   121  	}
   122  	rwf.stop()
   123  	return ok
   124  }
   125  
   126  func (w *Worker) run(wf *workflow.Workflow) error {
   127  	select {
   128  	case <-w.done:
   129  		return errors.New("worker stopped")
   130  	case w.pending <- wf:
   131  		return nil
   132  	}
   133  }
   134  
   135  func (w *Worker) workflowRunning(id uuid.UUID) bool {
   136  	w.mu.Lock()
   137  	defer w.mu.Unlock()
   138  	_, ok := w.running[id.String()]
   139  	return ok
   140  }
   141  
   142  // StartWorkflow persists and starts running a workflow.
   143  func (w *Worker) StartWorkflow(ctx context.Context, name string, params map[string]interface{}, scheduleID int) (uuid.UUID, error) {
   144  	d := w.dh.Definition(name)
   145  	if d == nil {
   146  		return uuid.UUID{}, fmt.Errorf("no workflow named %q", name)
   147  	}
   148  	wf, err := workflow.Start(d, params)
   149  	if err != nil {
   150  		return uuid.UUID{}, err
   151  	}
   152  	if err := w.l.WorkflowStarted(ctx, wf.ID, name, params, scheduleID); err != nil {
   153  		return wf.ID, err
   154  	}
   155  	if err := w.run(wf); err != nil {
   156  		return wf.ID, err
   157  	}
   158  	return wf.ID, err
   159  }
   160  
   161  // ResumeAll resumes all workflows with unfinished tasks.
   162  func (w *Worker) ResumeAll(ctx context.Context) error {
   163  	q := db.New(w.db)
   164  	wfs, err := q.UnfinishedWorkflows(ctx)
   165  	if err != nil {
   166  		return fmt.Errorf("q.UnfinishedWorkflows() = _, %w", err)
   167  	}
   168  	for _, wf := range wfs {
   169  		if err := w.Resume(ctx, wf.ID); err != nil {
   170  			log.Printf("w.Resume(_, %q) = %v", wf.ID, err)
   171  		}
   172  	}
   173  	return nil
   174  }
   175  
   176  // Resume resumes a workflow.
   177  func (w *Worker) Resume(ctx context.Context, id uuid.UUID) error {
   178  	var err error
   179  	var wf db.Workflow
   180  	var tasks []db.Task
   181  	err = w.db.BeginFunc(ctx, func(tx pgx.Tx) error {
   182  		q := db.New(w.db)
   183  		wf, err = q.Workflow(ctx, id)
   184  		if err != nil {
   185  			return fmt.Errorf("q.Workflow(_, %v) = %w", id, err)
   186  		}
   187  		// The worker may have crashed, or been re-deployed. Any
   188  		// started but unfinished tasks are in an unknown state.
   189  		// Mark them as such for human review.
   190  		if err := q.FailUnfinishedTasks(ctx, db.FailUnfinishedTasksParams{WorkflowID: id, UpdatedAt: time.Now()}); err != nil {
   191  			return fmt.Errorf("q.FailUnfinishedTasks(_, %v) = %w", id, err)
   192  		}
   193  		tasks, err = q.TasksForWorkflow(ctx, id)
   194  		if err != nil {
   195  			return fmt.Errorf("q.TasksForWorkflow(_, %v) = %w", id, err)
   196  		}
   197  		return nil
   198  	})
   199  	if err != nil {
   200  		return err
   201  	}
   202  	d := w.dh.Definition(wf.Name.String)
   203  	if d == nil {
   204  		err := fmt.Errorf("no workflow named %q", wf.Name.String)
   205  		w.l.WorkflowFinished(ctx, wf.ID, nil, err)
   206  		return err
   207  	}
   208  
   209  	params, err := UnmarshalWorkflow(wf.Params.String, d)
   210  	if err != nil {
   211  		err := fmt.Errorf("UnmarshalWorkflow %q: %w", wf.ID, err)
   212  		w.l.WorkflowFinished(ctx, wf.ID, nil, err)
   213  		return err
   214  	}
   215  	state := &workflow.WorkflowState{ID: wf.ID, Params: params}
   216  
   217  	taskStates := make(map[string]*workflow.TaskState)
   218  	for _, t := range tasks {
   219  		ts := &workflow.TaskState{
   220  			Name:       t.Name,
   221  			Finished:   t.Finished,
   222  			Error:      t.Error.String,
   223  			RetryCount: int(t.RetryCount),
   224  		}
   225  		if t.Result.Valid {
   226  			ts.SerializedResult = []byte(t.Result.String)
   227  		}
   228  		taskStates[t.Name] = ts
   229  	}
   230  	res, err := workflow.Resume(d, state, taskStates)
   231  	if err != nil {
   232  		w.l.WorkflowFinished(ctx, wf.ID, nil, err)
   233  		return err
   234  	}
   235  	return w.run(res)
   236  }
   237  
   238  func UnmarshalWorkflow(marshalled string, d *workflow.Definition) (map[string]any, error) {
   239  	params := map[string]any{}
   240  	rawParams := map[string]json.RawMessage{}
   241  	if err := json.Unmarshal([]byte(marshalled), &rawParams); err != nil {
   242  		return nil, err
   243  	}
   244  	for _, param := range d.Parameters() {
   245  		ptr := reflect.New(param.Type())
   246  		if err := json.Unmarshal(rawParams[param.Name()], ptr.Interface()); err != nil {
   247  			return nil, fmt.Errorf("unmarshaling param %q: %w", param.Name(), err)
   248  		}
   249  		params[param.Name()] = ptr.Elem().Interface()
   250  	}
   251  	return params, nil
   252  }
   253  
   254  // RetryTask retries a task in a running workflow.
   255  func (w *Worker) RetryTask(ctx context.Context, id uuid.UUID, name string) error {
   256  	w.mu.Lock()
   257  	rwf, ok := w.running[id.String()]
   258  	w.mu.Unlock()
   259  	if !ok {
   260  		return fmt.Errorf("no workflow with id %q", id)
   261  	}
   262  	return rwf.w.RetryTask(ctx, name)
   263  }