golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/relui/worker.go (about) 1 // Copyright 2021 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package relui 6 7 import ( 8 "context" 9 "encoding/json" 10 "errors" 11 "fmt" 12 "log" 13 "reflect" 14 "sync" 15 "time" 16 17 "github.com/google/uuid" 18 "github.com/jackc/pgx/v4" 19 "golang.org/x/build/internal/relui/db" 20 "golang.org/x/build/internal/workflow" 21 "golang.org/x/sync/errgroup" 22 ) 23 24 type Listener interface { 25 workflow.Listener 26 27 WorkflowStarted(ctx context.Context, workflowID uuid.UUID, name string, params map[string]interface{}, scheduleID int) error 28 WorkflowFinished(ctx context.Context, workflowID uuid.UUID, outputs map[string]interface{}, err error) error 29 } 30 31 // Worker runs workflows, and persists their state. 32 type Worker struct { 33 dh *DefinitionHolder 34 35 db db.PGDBTX 36 l Listener 37 38 done chan struct{} 39 pending chan *workflow.Workflow 40 41 mu sync.Mutex 42 // running is a set of currently running Workflow ids. Run uses 43 // this set to prevent starting a simultaneous execution of a 44 // currently running Workflow. 45 running map[string]runningWorkflow 46 } 47 48 type runningWorkflow struct { 49 w *workflow.Workflow 50 stop func() 51 } 52 53 // NewWorker returns a Worker ready to accept and run workflows. 54 func NewWorker(dh *DefinitionHolder, db db.PGDBTX, l Listener) *Worker { 55 return &Worker{ 56 dh: dh, 57 db: db, 58 l: l, 59 done: make(chan struct{}), 60 pending: make(chan *workflow.Workflow, 1), 61 running: make(map[string]runningWorkflow), 62 } 63 } 64 65 // Run runs started workflows, waiting for new workflows to start. 66 // 67 // On context cancellation, Run waits for all running workflows to 68 // finish. 69 func (w *Worker) Run(ctx context.Context) error { 70 eg, ctx := errgroup.WithContext(ctx) 71 for { 72 select { 73 case <-ctx.Done(): 74 close(w.done) 75 if err := eg.Wait(); err != nil { 76 return err 77 } 78 return ctx.Err() 79 case wf := <-w.pending: 80 eg.Go(func() error { 81 runCtx, cancel := context.WithCancel(ctx) 82 defer cancel() 83 if err := w.markRunning(wf, cancel); err != nil { 84 log.Println(err) 85 return nil 86 } 87 defer w.markStopped(wf) 88 89 outputs, err := wf.Run(runCtx, w.l) 90 if wfErr := w.l.WorkflowFinished(ctx, wf.ID, outputs, err); wfErr != nil { 91 return fmt.Errorf("w.l.WorkflowFinished(_, %q, %v, %q) = %w", wf.ID, outputs, err, wfErr) 92 } 93 return nil 94 }) 95 } 96 } 97 } 98 99 func (w *Worker) markRunning(wf *workflow.Workflow, stop func()) error { 100 w.mu.Lock() 101 defer w.mu.Unlock() 102 if _, ok := w.running[wf.ID.String()]; ok { 103 return fmt.Errorf("workflow %q already running", wf.ID) 104 } 105 w.running[wf.ID.String()] = runningWorkflow{wf, stop} 106 return nil 107 } 108 109 func (w *Worker) markStopped(wf *workflow.Workflow) { 110 w.mu.Lock() 111 defer w.mu.Unlock() 112 delete(w.running, wf.ID.String()) 113 } 114 115 func (w *Worker) cancelWorkflow(id uuid.UUID) bool { 116 w.mu.Lock() 117 defer w.mu.Unlock() 118 rwf, ok := w.running[id.String()] 119 if !ok { 120 return ok 121 } 122 rwf.stop() 123 return ok 124 } 125 126 func (w *Worker) run(wf *workflow.Workflow) error { 127 select { 128 case <-w.done: 129 return errors.New("worker stopped") 130 case w.pending <- wf: 131 return nil 132 } 133 } 134 135 func (w *Worker) workflowRunning(id uuid.UUID) bool { 136 w.mu.Lock() 137 defer w.mu.Unlock() 138 _, ok := w.running[id.String()] 139 return ok 140 } 141 142 // StartWorkflow persists and starts running a workflow. 143 func (w *Worker) StartWorkflow(ctx context.Context, name string, params map[string]interface{}, scheduleID int) (uuid.UUID, error) { 144 d := w.dh.Definition(name) 145 if d == nil { 146 return uuid.UUID{}, fmt.Errorf("no workflow named %q", name) 147 } 148 wf, err := workflow.Start(d, params) 149 if err != nil { 150 return uuid.UUID{}, err 151 } 152 if err := w.l.WorkflowStarted(ctx, wf.ID, name, params, scheduleID); err != nil { 153 return wf.ID, err 154 } 155 if err := w.run(wf); err != nil { 156 return wf.ID, err 157 } 158 return wf.ID, err 159 } 160 161 // ResumeAll resumes all workflows with unfinished tasks. 162 func (w *Worker) ResumeAll(ctx context.Context) error { 163 q := db.New(w.db) 164 wfs, err := q.UnfinishedWorkflows(ctx) 165 if err != nil { 166 return fmt.Errorf("q.UnfinishedWorkflows() = _, %w", err) 167 } 168 for _, wf := range wfs { 169 if err := w.Resume(ctx, wf.ID); err != nil { 170 log.Printf("w.Resume(_, %q) = %v", wf.ID, err) 171 } 172 } 173 return nil 174 } 175 176 // Resume resumes a workflow. 177 func (w *Worker) Resume(ctx context.Context, id uuid.UUID) error { 178 var err error 179 var wf db.Workflow 180 var tasks []db.Task 181 err = w.db.BeginFunc(ctx, func(tx pgx.Tx) error { 182 q := db.New(w.db) 183 wf, err = q.Workflow(ctx, id) 184 if err != nil { 185 return fmt.Errorf("q.Workflow(_, %v) = %w", id, err) 186 } 187 // The worker may have crashed, or been re-deployed. Any 188 // started but unfinished tasks are in an unknown state. 189 // Mark them as such for human review. 190 if err := q.FailUnfinishedTasks(ctx, db.FailUnfinishedTasksParams{WorkflowID: id, UpdatedAt: time.Now()}); err != nil { 191 return fmt.Errorf("q.FailUnfinishedTasks(_, %v) = %w", id, err) 192 } 193 tasks, err = q.TasksForWorkflow(ctx, id) 194 if err != nil { 195 return fmt.Errorf("q.TasksForWorkflow(_, %v) = %w", id, err) 196 } 197 return nil 198 }) 199 if err != nil { 200 return err 201 } 202 d := w.dh.Definition(wf.Name.String) 203 if d == nil { 204 err := fmt.Errorf("no workflow named %q", wf.Name.String) 205 w.l.WorkflowFinished(ctx, wf.ID, nil, err) 206 return err 207 } 208 209 params, err := UnmarshalWorkflow(wf.Params.String, d) 210 if err != nil { 211 err := fmt.Errorf("UnmarshalWorkflow %q: %w", wf.ID, err) 212 w.l.WorkflowFinished(ctx, wf.ID, nil, err) 213 return err 214 } 215 state := &workflow.WorkflowState{ID: wf.ID, Params: params} 216 217 taskStates := make(map[string]*workflow.TaskState) 218 for _, t := range tasks { 219 ts := &workflow.TaskState{ 220 Name: t.Name, 221 Finished: t.Finished, 222 Error: t.Error.String, 223 RetryCount: int(t.RetryCount), 224 } 225 if t.Result.Valid { 226 ts.SerializedResult = []byte(t.Result.String) 227 } 228 taskStates[t.Name] = ts 229 } 230 res, err := workflow.Resume(d, state, taskStates) 231 if err != nil { 232 w.l.WorkflowFinished(ctx, wf.ID, nil, err) 233 return err 234 } 235 return w.run(res) 236 } 237 238 func UnmarshalWorkflow(marshalled string, d *workflow.Definition) (map[string]any, error) { 239 params := map[string]any{} 240 rawParams := map[string]json.RawMessage{} 241 if err := json.Unmarshal([]byte(marshalled), &rawParams); err != nil { 242 return nil, err 243 } 244 for _, param := range d.Parameters() { 245 ptr := reflect.New(param.Type()) 246 if err := json.Unmarshal(rawParams[param.Name()], ptr.Interface()); err != nil { 247 return nil, fmt.Errorf("unmarshaling param %q: %w", param.Name(), err) 248 } 249 params[param.Name()] = ptr.Elem().Interface() 250 } 251 return params, nil 252 } 253 254 // RetryTask retries a task in a running workflow. 255 func (w *Worker) RetryTask(ctx context.Context, id uuid.UUID, name string) error { 256 w.mu.Lock() 257 rwf, ok := w.running[id.String()] 258 w.mu.Unlock() 259 if !ok { 260 return fmt.Errorf("no workflow with id %q", id) 261 } 262 return rwf.w.RetryTask(ctx, name) 263 }