golang.org/x/build@v0.0.0-20240506185731-218518f32b70/internal/workflow/workflow.go (about) 1 // Copyright 2021 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package workflow declaratively defines computation graphs that support 6 // automatic parallelization, persistence, and monitoring. 7 // 8 // Workflows are a set of tasks and actions that produce and consume Values. 9 // Tasks don't run until the workflow is started, so Values represent data that 10 // doesn't exist yet, and can't be used directly. 11 // 12 // To wrap an existing Go object in a Value, use Const. To define a 13 // parameter that will be set when the workflow is started, use Param. 14 // To read a task's return value, register it as an Output, and it will be 15 // returned from Run. An arbitrary number of Values of the same type can 16 // be combined with Slice. 17 // 18 // Each Task has a set of input Values, and returns a single output Value. 19 // Calling Task defines a task that will run a Go function when it runs. That 20 // function must take a context.Context or *TaskContext, followed by arguments 21 // corresponding to the dynamic type of the Values passed to it. It must return 22 // a value of any type and an error. The TaskContext can be used as a normal 23 // Context, and also supports workflow features like unstructured logging. 24 // A task only runs once all of its inputs are ready. All task outputs must be 25 // used either as inputs to another task or as a workflow Output. 26 // 27 // In addition to Tasks, a workflow can have Actions, which represent functions 28 // that don't produce an output. Their Go function must only return an error, 29 // and their definition results in a Dependency rather than a Value. Both 30 // Dependencies and Values can be passed to After and then to Task and Action 31 // definitions to create an ordering dependency that doesn't correspond to a 32 // function argument. 33 // 34 // Expansions are a third type of function that adds to a running workflow 35 // definition rather than producing an output. Unlike Actions and Tasks, they 36 // execute multiple times and must produce exactly the same workflow 37 // modifications each time. As such, they should be pure functions of their 38 // inputs. Producing different modifications, or running multiple expansions 39 // concurrently, is an error that will corrupt the workflow's state. 40 // 41 // Once a Definition is complete, call Start to set its parameters and 42 // instantiate it into a Workflow. Call Run to execute the workflow until 43 // completion. 44 package workflow 45 46 import ( 47 "context" 48 "encoding/json" 49 "fmt" 50 "reflect" 51 "strings" 52 "time" 53 54 "github.com/google/uuid" 55 ) 56 57 // New creates a new workflow definition. 58 func New() *Definition { 59 return &Definition{ 60 definitionState: &definitionState{ 61 tasks: make(map[string]*taskDefinition), 62 outputs: make(map[string]metaValue), 63 }, 64 } 65 } 66 67 // A Definition defines the structure of a workflow. 68 type Definition struct { 69 namePrefix string // For sub-workflows, the prefix that will be prepended to various names. 70 *definitionState 71 } 72 73 func (d *Definition) Sub(name string) *Definition { 74 return &Definition{ 75 namePrefix: name + ": " + d.namePrefix, 76 definitionState: d.definitionState, 77 } 78 } 79 80 func (d *Definition) name(name string) string { 81 return d.namePrefix + name 82 } 83 84 func (d *Definition) shallowClone() *Definition { 85 clone := New() 86 clone.namePrefix = d.namePrefix 87 clone.parameters = append([]MetaParameter(nil), d.parameters...) 88 for k, v := range d.tasks { 89 clone.tasks[k] = v 90 } 91 for k, v := range d.outputs { 92 clone.outputs[k] = v 93 } 94 return clone 95 } 96 97 type definitionState struct { 98 parameters []MetaParameter // Ordered according to registration, unique parameter names. 99 tasks map[string]*taskDefinition 100 outputs map[string]metaValue 101 } 102 103 // A TaskOption affects the execution of a task but is not an argument to its function. 104 type TaskOption interface { 105 taskOption() 106 } 107 108 // A Value is a piece of data that will be produced or consumed when a task 109 // runs. It cannot be read directly. 110 type Value[T any] interface { 111 // This function prevents Values of different types from being convertible 112 // to each other. 113 valueType(T) 114 metaValue 115 } 116 117 type metaValue interface { 118 Dependency 119 typ() reflect.Type 120 value(*Workflow) reflect.Value 121 } 122 123 type MetaParameter interface { 124 // RequireNonZero reports whether parameter p is required to have a non-zero value. 125 RequireNonZero() bool 126 // Valid reports whether the given parameter value is valid. 127 // 128 // A value is considered to be valid if: 129 // - the type of v is the parameter type 130 // - if RequireNonZero is true, the value v is non-zero 131 // - if Check is set, it reports value v to be okay 132 Valid(v any) error 133 Name() string 134 Type() reflect.Type 135 HTMLElement() string 136 HTMLInputType() string 137 HTMLSelectOptions() []string 138 Doc() string 139 Example() string 140 } 141 142 // ParamDef describes a Value that is filled in at workflow creation time. 143 // 144 // It can be registered to a workflow with the Parameter function. 145 type ParamDef[T any] struct { 146 Name string // Name identifies the parameter within a workflow. Must be non-empty. 147 ParamType[T] // Parameter type. For strings, defaults to BasicString if not specified. 148 Doc string // Doc documents the parameter. Optional. 149 Example string // Example is an example value. Optional. 150 151 // Check reports whether the given parameter value is okay. Optional. 152 Check func(T) error 153 } 154 155 // parameter adds Value methods to ParamDef, so that users can't accidentally 156 // use a ParamDef without registering it. 157 type parameter[T any] struct { 158 d ParamDef[T] 159 } 160 161 func (p parameter[T]) Name() string { return p.d.Name } 162 func (p parameter[T]) Type() reflect.Type { return p.typ() } 163 func (p parameter[T]) HTMLElement() string { return p.d.HTMLElement } 164 func (p parameter[T]) HTMLInputType() string { return p.d.HTMLInputType } 165 func (p parameter[T]) HTMLSelectOptions() []string { return p.d.HTMLSelectOptions } 166 func (p parameter[T]) Doc() string { return p.d.Doc } 167 func (p parameter[T]) Example() string { return p.d.Example } 168 func (p parameter[T]) RequireNonZero() bool { 169 return !strings.HasSuffix(p.d.Name, " (optional)") 170 } 171 func (p parameter[T]) Valid(v any) error { 172 vv, ok := v.(T) 173 if !ok { 174 var zero T 175 return fmt.Errorf("parameter %q must have a value of type %T, value %[3]v type is %[3]T", p.d.Name, zero, v) 176 } else if p.RequireNonZero() && reflect.ValueOf(vv).IsZero() { 177 return fmt.Errorf("parameter %q must have non-zero value", p.d.Name) 178 } 179 if p.d.Check == nil { 180 return nil 181 } 182 return p.d.Check(vv) 183 } 184 185 func (p parameter[T]) valueType(T) {} 186 func (p parameter[T]) typ() reflect.Type { 187 var zero T 188 return reflect.TypeOf(zero) 189 } 190 func (p parameter[T]) value(w *Workflow) reflect.Value { return reflect.ValueOf(w.params[p.d.Name]) } 191 func (p parameter[T]) ready(w *Workflow) bool { return true } 192 193 // ParamType defines the type of a workflow parameter. 194 // 195 // Since parameters are entered via an HTML form, 196 // there are some HTML-related knobs available. 197 type ParamType[T any] struct { 198 // HTMLElement configures the HTML element for entering the parameter value. 199 // Supported values are "input", "textarea" and "select". 200 HTMLElement string 201 // HTMLInputType optionally configures the <input> type attribute when HTMLElement is "input". 202 // If this attribute is not specified, <input> elements default to type="text". 203 // See https://developer.mozilla.org/en-US/docs/Web/HTML/Element/input#input_types. 204 HTMLInputType string 205 // HTMLSelectOptions configures the available options when HTMLElement is "select". 206 // See https://developer.mozilla.org/en-US/docs/Web/HTML/Element/option. 207 HTMLSelectOptions []string 208 } 209 210 var ( 211 // String parameter types. 212 BasicString = ParamType[string]{ 213 HTMLElement: "input", 214 } 215 URL = ParamType[string]{ 216 HTMLElement: "input", 217 HTMLInputType: "url", 218 } 219 LongString = ParamType[string]{ 220 HTMLElement: "textarea", 221 } 222 223 // Slice of string parameter types. 224 SliceShort = ParamType[[]string]{ 225 HTMLElement: "input", 226 } 227 SliceLong = ParamType[[]string]{ 228 HTMLElement: "textarea", 229 } 230 231 // Checkbox bool parameter 232 Bool = ParamType[bool]{ 233 HTMLElement: "input", 234 HTMLInputType: "checkbox", 235 } 236 ) 237 238 // Param registers a new parameter p that is filled in at 239 // workflow creation time and returns the corresponding Value. 240 // Param name must be non-empty and uniquely identify the 241 // parameter in the workflow definition. 242 func Param[T any](d *Definition, p ParamDef[T]) Value[T] { 243 if p.Name == "" { 244 panic(fmt.Errorf("parameter name must be non-empty")) 245 } 246 p.Name = d.name(p.Name) 247 if p.HTMLElement == "" { 248 var zero T 249 switch any(zero).(type) { 250 case string: 251 p.HTMLElement = "input" 252 default: 253 panic(fmt.Errorf("must specify ParamType for %T", zero)) 254 } 255 } 256 if !(parameter[T]{p}).RequireNonZero() && p.Check != nil { 257 var zero T 258 if err := p.Check(zero); err != nil { 259 panic(fmt.Errorf("parameter %q is optional yet its check on zero value reports a non-nil error: %v", p.Name, err)) 260 } 261 } 262 for _, old := range d.parameters { 263 if p.Name == old.Name() { 264 panic(fmt.Errorf("parameter with name %q was already registered with this workflow definition", p.Name)) 265 } 266 } 267 d.parameters = append(d.parameters, parameter[T]{p}) 268 return parameter[T]{p} 269 } 270 271 // Parameters returns parameters associated with the Definition 272 // in the same order that they were registered. 273 func (d *Definition) Parameters() []MetaParameter { 274 return d.parameters 275 } 276 277 // Const creates a Value from an existing object. 278 func Const[T any](value T) Value[T] { 279 return &constant[T]{value} 280 } 281 282 type constant[T any] struct { 283 v T 284 } 285 286 func (c *constant[T]) valueType(T) {} 287 func (c *constant[T]) typ() reflect.Type { 288 var zero []T 289 return reflect.TypeOf(zero) 290 } 291 func (c *constant[T]) value(_ *Workflow) reflect.Value { return reflect.ValueOf(c.v) } 292 func (c *constant[T]) ready(_ *Workflow) bool { return true } 293 294 // Slice combines multiple Values of the same type into a Value containing 295 // a slice of that type. 296 func Slice[T any](vs ...Value[T]) Value[[]T] { 297 return &slice[T]{vals: vs} 298 } 299 300 type slice[T any] struct { 301 vals []Value[T] 302 } 303 304 func (s *slice[T]) valueType([]T) {} 305 306 func (s *slice[T]) typ() reflect.Type { 307 var zero []T 308 return reflect.TypeOf(zero) 309 } 310 311 func (s *slice[T]) value(w *Workflow) reflect.Value { 312 value := reflect.ValueOf(make([]T, len(s.vals))) 313 for i, v := range s.vals { 314 value.Index(i).Set(v.value(w)) 315 } 316 return value 317 } 318 319 func (s *slice[T]) ready(w *Workflow) bool { 320 for _, val := range s.vals { 321 if !val.ready(w) { 322 return false 323 } 324 } 325 return true 326 } 327 328 // Output registers a Value as a workflow output which will be returned when 329 // the workflow finishes. 330 func Output[T any](d *Definition, name string, v Value[T]) { 331 d.outputs[d.name(name)] = v 332 } 333 334 // A Dependency represents a dependency on a prior task. 335 type Dependency interface { 336 ready(*Workflow) bool 337 } 338 339 // After represents an ordering dependency on another Task or Action. It can be 340 // passed in addition to any arguments to the task's function. 341 func After(afters ...Dependency) TaskOption { 342 return &after{afters} 343 } 344 345 type after struct { 346 deps []Dependency 347 } 348 349 func (a *after) taskOption() {} 350 351 // TaskN adds a task to the workflow definition. It takes N inputs, and returns 352 // one output. name must uniquely identify the task in the workflow. 353 // f must be a function that takes a context.Context or *TaskContext argument, 354 // followed by one argument for each Value in inputs, corresponding to the 355 // Value's dynamic type. It must return two values, the first of which will 356 // be returned as its Value, and an error that will be used by the workflow 357 // engine. See the package documentation for examples. 358 func Task0[C context.Context, O1 any](d *Definition, name string, f func(C) (O1, error), opts ...TaskOption) Value[O1] { 359 return addTask[O1](d, name, f, nil, opts) 360 } 361 362 func Task1[C context.Context, I1, O1 any](d *Definition, name string, f func(C, I1) (O1, error), i1 Value[I1], opts ...TaskOption) Value[O1] { 363 return addTask[O1](d, name, f, []metaValue{i1}, opts) 364 } 365 366 func Task2[C context.Context, I1, I2, O1 any](d *Definition, name string, f func(C, I1, I2) (O1, error), i1 Value[I1], i2 Value[I2], opts ...TaskOption) Value[O1] { 367 return addTask[O1](d, name, f, []metaValue{i1, i2}, opts) 368 } 369 370 func Task3[C context.Context, I1, I2, I3, O1 any](d *Definition, name string, f func(C, I1, I2, I3) (O1, error), i1 Value[I1], i2 Value[I2], i3 Value[I3], opts ...TaskOption) Value[O1] { 371 return addTask[O1](d, name, f, []metaValue{i1, i2, i3}, opts) 372 } 373 374 func Task4[C context.Context, I1, I2, I3, I4, O1 any](d *Definition, name string, f func(C, I1, I2, I3, I4) (O1, error), i1 Value[I1], i2 Value[I2], i3 Value[I3], i4 Value[I4], opts ...TaskOption) Value[O1] { 375 return addTask[O1](d, name, f, []metaValue{i1, i2, i3, i4}, opts) 376 } 377 378 func Task5[C context.Context, I1, I2, I3, I4, I5, O1 any](d *Definition, name string, f func(C, I1, I2, I3, I4, I5) (O1, error), i1 Value[I1], i2 Value[I2], i3 Value[I3], i4 Value[I4], i5 Value[I5], opts ...TaskOption) Value[O1] { 379 return addTask[O1](d, name, f, []metaValue{i1, i2, i3, i4, i5}, opts) 380 } 381 382 func addFunc(d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) *taskDefinition { 383 name = d.name(name) 384 td := &taskDefinition{name: name, f: f, args: inputs} 385 for _, input := range inputs { 386 td.deps = append(td.deps, input) 387 } 388 for _, opt := range opts { 389 td.deps = append(td.deps, opt.(*after).deps...) 390 } 391 d.tasks[name] = td 392 return td 393 } 394 395 func addTask[O1 any](d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) *taskResult[O1] { 396 td := addFunc(d, name, f, inputs, opts) 397 return &taskResult[O1]{td} 398 } 399 400 func addAction(d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) *dependency { 401 td := addFunc(d, name, f, inputs, opts) 402 return &dependency{td} 403 } 404 405 func addExpansion[O1 any](d *Definition, name string, f interface{}, inputs []metaValue, opts []TaskOption) *expansionResult[O1] { 406 td := addFunc(d, name, f, inputs, opts) 407 td.isExpansion = true 408 return &expansionResult[O1]{td} 409 } 410 411 type expansionResult[T any] struct { 412 td *taskDefinition 413 } 414 415 func (er *expansionResult[T]) valueType(T) {} 416 417 func (er *expansionResult[T]) typ() reflect.Type { 418 var zero []T 419 return reflect.TypeOf(zero) 420 } 421 422 func (er *expansionResult[T]) value(w *Workflow) reflect.Value { 423 return w.tasks[er.td].resultValue.value(w) 424 } 425 426 func (er *expansionResult[T]) ready(w *Workflow) bool { 427 return w.taskReady(er.td) && w.tasks[er.td].resultValue.ready(w) 428 } 429 430 // ActionN adds an Action to the workflow definition. Its behavior and 431 // requirements are the same as Task, except that f must only return an error, 432 // and the result of the definition is a Dependency. 433 func Action0[C context.Context](d *Definition, name string, f func(C) error, opts ...TaskOption) Dependency { 434 return addAction(d, name, f, nil, opts) 435 } 436 437 func Action1[C context.Context, I1 any](d *Definition, name string, f func(C, I1) error, i1 Value[I1], opts ...TaskOption) Dependency { 438 return addAction(d, name, f, []metaValue{i1}, opts) 439 } 440 441 func Action2[C context.Context, I1, I2 any](d *Definition, name string, f func(C, I1, I2) error, i1 Value[I1], i2 Value[I2], opts ...TaskOption) Dependency { 442 return addAction(d, name, f, []metaValue{i1, i2}, opts) 443 } 444 445 func Action3[C context.Context, I1, I2, I3 any](d *Definition, name string, f func(C, I1, I2, I3) error, i1 Value[I1], i2 Value[I2], i3 Value[I3], opts ...TaskOption) Dependency { 446 return addAction(d, name, f, []metaValue{i1, i2, i3}, opts) 447 } 448 449 func Action4[C context.Context, I1, I2, I3, I4 any](d *Definition, name string, f func(C, I1, I2, I3, I4) error, i1 Value[I1], i2 Value[I2], i3 Value[I3], i4 Value[I4], opts ...TaskOption) Dependency { 450 return addAction(d, name, f, []metaValue{i1, i2, i3, i4}, opts) 451 } 452 453 func Action5[C context.Context, I1, I2, I3, I4, I5 any](d *Definition, name string, f func(C, I1, I2, I3, I4, I5) error, i1 Value[I1], i2 Value[I2], i3 Value[I3], i4 Value[I4], i5 Value[I5], opts ...TaskOption) Dependency { 454 return addAction(d, name, f, []metaValue{i1, i2, i3, i4, i5}, opts) 455 } 456 457 type dependency struct { 458 task *taskDefinition 459 } 460 461 func (d *dependency) ready(w *Workflow) bool { 462 return w.taskReady(d.task) 463 } 464 465 // ExpandN adds a workflow expansion task to the workflow definition. 466 // Expansion tasks run similarly to normal tasks, but instead of computing 467 // a result, they can add to the workflow definition. 468 // 469 // Unlike normal tasks, expansions may run multiple times and must produce 470 // the exact same changes to the definition each time. 471 // 472 // Running more than one expansion concurrently is an error and will corrupt 473 // the workflow. 474 func Expand0[O1 any](d *Definition, name string, f func(*Definition) (Value[O1], error), opts ...TaskOption) Value[O1] { 475 return addExpansion[O1](d, name, f, nil, opts) 476 } 477 478 func Expand1[I1, O1 any](d *Definition, name string, f func(*Definition, I1) (Value[O1], error), i1 Value[I1], opts ...TaskOption) Value[O1] { 479 return addExpansion[O1](d, name, f, []metaValue{i1}, opts) 480 } 481 482 func Expand2[I1, I2, O1 any](d *Definition, name string, f func(*Definition, I1, I2) (Value[O1], error), i1 Value[I1], i2 Value[I2], opts ...TaskOption) Value[O1] { 483 return addExpansion[O1](d, name, f, []metaValue{i1, i2}, opts) 484 } 485 486 func Expand3[I1, I2, I3, O1 any](d *Definition, name string, f func(*Definition, I1, I2, I3) (Value[O1], error), i1 Value[I1], i2 Value[I2], i3 Value[I3], opts ...TaskOption) Value[O1] { 487 return addExpansion[O1](d, name, f, []metaValue{i1, i2, i3}, opts) 488 } 489 490 func Expand4[I1, I2, I3, I4, O1 any](d *Definition, name string, f func(*Definition, I1, I2, I3, I4) (Value[O1], error), i1 Value[I1], i2 Value[I2], i3 Value[I3], i4 Value[I4], opts ...TaskOption) Value[O1] { 491 return addExpansion[O1](d, name, f, []metaValue{i1, i2, i3, i4}, opts) 492 } 493 494 func Expand5[I1, I2, I3, I4, I5, O1 any](d *Definition, name string, f func(*Definition, I1, I2, I3, I4, I5) (Value[O1], error), i1 Value[I1], i2 Value[I2], i3 Value[I3], i4 Value[I4], i5 Value[I5], opts ...TaskOption) Value[O1] { 495 return addExpansion[O1](d, name, f, []metaValue{i1, i2, i3, i4, i5}, opts) 496 } 497 498 // A TaskContext is a context.Context, plus workflow-related features. 499 type TaskContext struct { 500 disableRetries bool 501 context.Context 502 Logger Logger 503 TaskName string 504 WorkflowID uuid.UUID 505 506 watchdogTimer *time.Timer 507 watchdogScale int 508 } 509 510 func (c *TaskContext) Printf(format string, v ...interface{}) { 511 if false { 512 _ = fmt.Sprintf(format, v...) // enable printf checker 513 } 514 c.ResetWatchdog() 515 c.Logger.Printf(format, v...) 516 } 517 518 func (c *TaskContext) DisableRetries() { 519 c.disableRetries = true 520 } 521 522 func (c *TaskContext) ResetWatchdog() { 523 c.resetWatchdog(WatchdogDelay * time.Duration(c.watchdogScale)) 524 } 525 526 // SetWatchdogScale sets the watchdog delay scale factor to max(v, 1), 527 // and resets the watchdog with the new scale. 528 func (c *TaskContext) SetWatchdogScale(v int) { 529 if v < 1 { 530 v = 1 531 } 532 c.watchdogScale = v 533 c.ResetWatchdog() 534 } 535 536 func (c *TaskContext) DisableWatchdog() { 537 // Resetting with a very long delay is easier than canceling the timer. 538 c.resetWatchdog(365 * 24 * time.Hour) 539 } 540 541 func (c *TaskContext) resetWatchdog(d time.Duration) { 542 // Should only occur in tests. 543 if c.watchdogTimer == nil { 544 return 545 } 546 c.watchdogTimer.Reset(d) 547 } 548 549 // A Listener is used to notify the workflow host of state changes, for display 550 // and persistence. 551 type Listener interface { 552 // TaskStateChanged is called when the state of a task changes. 553 // state is safe to store or modify. 554 TaskStateChanged(workflowID uuid.UUID, taskID string, state *TaskState) error 555 // Logger is called to obtain a Logger for a particular task. 556 Logger(workflowID uuid.UUID, taskID string) Logger 557 // WorkflowStalled is called when there are no runnable tasks. 558 WorkflowStalled(workflowID uuid.UUID) error 559 } 560 561 // TaskState contains the state of a task in a running workflow. Once Finished 562 // is true, either Result or Error will be populated. 563 type TaskState struct { 564 Name string 565 Started bool 566 Finished bool 567 Result interface{} 568 SerializedResult []byte 569 Error string 570 RetryCount int 571 } 572 573 // WorkflowState contains the shallow state of a running workflow. 574 type WorkflowState struct { 575 ID uuid.UUID 576 Params map[string]interface{} 577 } 578 579 // A Logger is a debug logger passed to a task implementation. 580 type Logger interface { 581 Printf(format string, v ...interface{}) 582 } 583 584 type taskDefinition struct { 585 name string 586 isExpansion bool 587 args []metaValue 588 deps []Dependency 589 f interface{} 590 } 591 592 type taskResult[T any] struct { 593 task *taskDefinition 594 } 595 596 func (tr *taskResult[T]) valueType(T) {} 597 598 func (tr *taskResult[T]) typ() reflect.Type { 599 var zero []T 600 return reflect.TypeOf(zero) 601 } 602 603 func (tr *taskResult[T]) value(w *Workflow) reflect.Value { 604 return reflect.ValueOf(w.tasks[tr.task].result) 605 } 606 607 func (tr *taskResult[T]) ready(w *Workflow) bool { 608 return w.taskReady(tr.task) 609 } 610 611 // A Workflow is an instantiated workflow instance, ready to run. 612 type Workflow struct { 613 ID uuid.UUID 614 params map[string]interface{} 615 retryCommands chan retryCommand 616 617 // Notes on ownership and concurrency: 618 // The taskDefinitions used below are immutable. Everything else should be 619 // treated as mutable, used only in the Run goroutine, and never published 620 // to a background goroutine. 621 622 def *Definition 623 tasks map[*taskDefinition]*taskState 624 // pendingStates stores states that haven't been loaded because their 625 // tasks didn't exist at Resume time. 626 pendingStates map[string]*TaskState 627 } 628 629 func (w *Workflow) taskReady(td *taskDefinition) bool { 630 state := w.tasks[td] 631 return state.finished && state.err == nil 632 } 633 634 type taskState struct { 635 def *taskDefinition 636 created bool 637 started bool 638 finished bool 639 err error 640 641 // normal tasks 642 result interface{} 643 serializedResult []byte 644 retryCount int 645 646 // workflow expansion 647 expanded *Definition 648 resultValue metaValue 649 } 650 651 func (t *taskState) toExported() *TaskState { 652 state := &TaskState{ 653 Name: t.def.name, 654 Finished: t.finished, 655 Result: t.result, 656 SerializedResult: append([]byte(nil), t.serializedResult...), 657 Started: t.started, 658 RetryCount: t.retryCount, 659 } 660 if t.err != nil { 661 state.Error = t.err.Error() 662 } 663 return state 664 } 665 666 // Start instantiates a workflow with the given parameters. 667 func Start(def *Definition, params map[string]interface{}) (*Workflow, error) { 668 w := &Workflow{ 669 ID: uuid.New(), 670 def: def, 671 params: params, 672 tasks: map[*taskDefinition]*taskState{}, 673 retryCommands: make(chan retryCommand, len(def.tasks)), 674 } 675 if err := w.validate(); err != nil { 676 return nil, err 677 } 678 for _, taskDef := range def.tasks { 679 w.tasks[taskDef] = &taskState{def: taskDef} 680 } 681 return w, nil 682 } 683 684 func (w *Workflow) validate() error { 685 // Validate parameters. 686 if got, want := len(w.params), len(w.def.parameters); got != want { 687 return fmt.Errorf("parameter count mismatch: workflow instance has %d, but definition has %d", got, want) 688 } 689 paramDefs := map[string]MetaParameter{} // Key is parameter name. 690 for _, p := range w.def.parameters { 691 if _, ok := w.params[p.Name()]; !ok { 692 return fmt.Errorf("parameter name mismatch: workflow instance doesn't have %q, but definition requires it", p.Name()) 693 } 694 paramDefs[p.Name()] = p 695 } 696 for name, v := range w.params { 697 if !paramDefs[name].Type().AssignableTo(reflect.TypeOf(v)) { 698 return fmt.Errorf("parameter type mismatch: value of parameter %q has type %v, but definition specifies %v", name, reflect.TypeOf(v), paramDefs[name].Type()) 699 } 700 } 701 702 return nil 703 } 704 705 // Resume restores a workflow from stored state. Tasks that had not finished 706 // will be restarted, but tasks that finished in errors will not be retried. 707 // 708 // The host must create the WorkflowState. TaskStates should be saved from 709 // listener callbacks, but for ease of storage, their Result field does not 710 // need to be populated. 711 func Resume(def *Definition, state *WorkflowState, taskStates map[string]*TaskState) (*Workflow, error) { 712 w := &Workflow{ 713 ID: state.ID, 714 params: state.Params, 715 retryCommands: make(chan retryCommand, len(def.tasks)), 716 def: def, 717 tasks: map[*taskDefinition]*taskState{}, 718 pendingStates: taskStates, 719 } 720 if err := w.validate(); err != nil { 721 return nil, err 722 } 723 for _, taskDef := range def.tasks { 724 var err error 725 w.tasks[taskDef], err = loadTaskState(w.pendingStates, taskDef, false) 726 if err != nil { 727 return nil, fmt.Errorf("loading state for %v: %v", taskDef.name, err) 728 } 729 } 730 return w, nil 731 } 732 733 func loadTaskState(states map[string]*TaskState, def *taskDefinition, allowMissing bool) (*taskState, error) { 734 tState, ok := states[def.name] 735 if !ok { 736 if !allowMissing { 737 return nil, fmt.Errorf("task state not found") 738 } 739 tState = &TaskState{} 740 } 741 // Can't resume tasks, so either it's new or done. 742 // Expansions need to run every time. 743 finished := tState.Finished && !def.isExpansion 744 state := &taskState{ 745 def: def, 746 created: ok, 747 started: finished, 748 finished: finished, 749 serializedResult: tState.SerializedResult, 750 retryCount: tState.RetryCount, 751 } 752 if state.serializedResult != nil { 753 result, err := unmarshalNew(reflect.ValueOf(def.f).Type().Out(0), tState.SerializedResult) 754 if err != nil { 755 return nil, fmt.Errorf("failed to unmarshal result: %v", err) 756 } 757 state.result = result 758 } 759 if tState.Error != "" { 760 state.err = fmt.Errorf("serialized error: %v", tState.Error) // untyped, but hopefully that doesn't matter. 761 } 762 return state, nil 763 } 764 765 func unmarshalNew(t reflect.Type, data []byte) (interface{}, error) { 766 ptr := reflect.New(t) 767 if err := json.Unmarshal(data, ptr.Interface()); err != nil { 768 return nil, err 769 } 770 return ptr.Elem().Interface(), nil 771 } 772 773 // Run runs a workflow and returns its outputs. 774 // A workflow will either complete successfully, 775 // reach a blocking state waiting on a task to be approved or retried, 776 // or get stopped early via context cancellation. 777 // 778 // listener.TaskStateChanged can be used for monitoring and persistence purposes: 779 // it will be called immediately, when each task starts, and when they finish. 780 // 781 // Register Outputs to read task results. 782 func (w *Workflow) Run(ctx context.Context, listener Listener) (map[string]interface{}, error) { 783 ctx, cancel := context.WithCancel(ctx) 784 defer cancel() 785 if listener == nil { 786 listener = &defaultListener{} 787 } 788 789 stateChan := make(chan taskState, 2*len(w.def.tasks)) 790 doneOnce := ctx.Done() 791 for { 792 running := 0 793 allDone := true 794 for _, task := range w.tasks { 795 if !task.created { 796 task.created = true 797 listener.TaskStateChanged(w.ID, task.def.name, task.toExported()) 798 } 799 if task.started && !task.finished { 800 running++ 801 } 802 if !task.finished || task.err != nil { 803 allDone = false 804 } 805 } 806 if allDone { 807 break 808 } 809 810 if ctx.Err() == nil { 811 // Start any idle tasks whose dependencies are all done. 812 for _, task := range w.tasks { 813 if task.started { 814 continue 815 } 816 args, ready := w.taskArgs(task.def) 817 if !ready { 818 continue 819 } 820 task.started = true 821 running++ 822 listener.TaskStateChanged(w.ID, task.def.name, task.toExported()) 823 taskCopy := *task 824 if task.def.isExpansion { 825 defCopy := w.def.shallowClone() 826 go func() { stateChan <- runExpansion(defCopy, taskCopy, args) }() 827 } else { 828 go func() { stateChan <- runTask(ctx, w.ID, listener, taskCopy, args) }() 829 } 830 } 831 } 832 833 // Honor context cancellation only after all tasks have exited. 834 if running == 0 { 835 select { 836 case <-ctx.Done(): 837 return nil, ctx.Err() 838 default: 839 listener.WorkflowStalled(w.ID) 840 } 841 } 842 843 select { 844 case state := <-stateChan: 845 if state.def.isExpansion && state.finished && state.err == nil { 846 state.err = w.expand(state.expanded) 847 } 848 listener.TaskStateChanged(w.ID, state.def.name, state.toExported()) 849 w.tasks[state.def] = &state 850 case retry := <-w.retryCommands: 851 def, ok := w.def.tasks[retry.name] 852 if !ok { 853 retry.reply <- fmt.Errorf("unknown task %q", retry.name) 854 break 855 } 856 state := w.tasks[def] 857 if !state.finished || state.err == nil { 858 retry.reply <- fmt.Errorf("cannot retry task that did not finish in error") 859 break 860 } 861 listener.Logger(w.ID, def.name).Printf("Manual retry requested") 862 stateChan <- taskState{def: def, created: true} 863 retry.reply <- nil 864 // Don't get stuck when cancellation comes in after all tasks have 865 // finished, but also don't busy wait if something's still running. 866 case <-doneOnce: 867 doneOnce = nil 868 } 869 } 870 871 outs := map[string]interface{}{} 872 for name, def := range w.def.outputs { 873 outs[name] = def.value(w).Interface() 874 } 875 return outs, nil 876 } 877 878 func (w *Workflow) taskArgs(def *taskDefinition) ([]reflect.Value, bool) { 879 for _, dep := range def.deps { 880 if !dep.ready(w) { 881 return nil, false 882 } 883 } 884 var args []reflect.Value 885 for _, v := range def.args { 886 args = append(args, v.value(w)) 887 } 888 return args, true 889 } 890 891 // Maximum number of retries. This could be a workflow property. 892 var MaxRetries = 3 893 894 var WatchdogDelay = 11 * time.Minute // A little over go test -timeout's default value of 10 minutes. 895 896 func runTask(ctx context.Context, workflowID uuid.UUID, listener Listener, state taskState, args []reflect.Value) taskState { 897 ctx, cancel := context.WithCancel(ctx) 898 defer cancel() 899 900 tctx := &TaskContext{ 901 Context: ctx, 902 Logger: listener.Logger(workflowID, state.def.name), 903 TaskName: state.def.name, 904 WorkflowID: workflowID, 905 watchdogTimer: time.AfterFunc(WatchdogDelay, cancel), 906 watchdogScale: 1, 907 } 908 909 in := append([]reflect.Value{reflect.ValueOf(tctx)}, args...) 910 fv := reflect.ValueOf(state.def.f) 911 out := fv.Call(in) 912 913 if !tctx.watchdogTimer.Stop() { 914 state.err = fmt.Errorf("task did not log for %v, assumed hung", WatchdogDelay) 915 } else if errIdx := len(out) - 1; !out[errIdx].IsNil() { 916 state.err = out[errIdx].Interface().(error) 917 } 918 state.finished = true 919 if len(out) == 2 && state.err == nil { 920 state.serializedResult, state.err = json.Marshal(out[0].Interface()) 921 if state.err == nil { 922 state.result, state.err = unmarshalNew(fv.Type().Out(0), state.serializedResult) 923 } 924 if state.err == nil && !reflect.DeepEqual(out[0].Interface(), state.result) { 925 state.err = fmt.Errorf("JSON marshaling changed result from %#v to %#v", out[0].Interface(), state.result) 926 } 927 } 928 929 if state.err != nil && !tctx.disableRetries && state.retryCount+1 < MaxRetries { 930 tctx.Printf("task failed, will retry (%v of %v): %v", state.retryCount+1, MaxRetries, state.err) 931 state = taskState{ 932 def: state.def, 933 created: true, 934 retryCount: state.retryCount + 1, 935 } 936 } 937 return state 938 } 939 940 func runExpansion(d *Definition, state taskState, args []reflect.Value) taskState { 941 in := append([]reflect.Value{reflect.ValueOf(d)}, args...) 942 fv := reflect.ValueOf(state.def.f) 943 out := fv.Call(in) 944 state.finished = true 945 if out[1].IsNil() { 946 state.expanded = d 947 state.resultValue = out[0].Interface().(metaValue) 948 } else { 949 state.err = out[1].Interface().(error) 950 } 951 return state 952 } 953 954 func (w *Workflow) expand(expanded *Definition) error { 955 origDef := w.def 956 w.def = expanded 957 if err := w.validate(); err != nil { 958 w.def = origDef 959 return err 960 } 961 for _, def := range w.def.tasks { 962 if _, ok := w.tasks[def]; ok { 963 continue 964 } 965 // w.tasks[def] = &taskState{def: def} 966 var err error 967 w.tasks[def], err = loadTaskState(w.pendingStates, def, true) 968 if err != nil { 969 return err 970 } 971 } 972 return nil 973 } 974 975 type defaultListener struct{} 976 977 func (s *defaultListener) WorkflowStalled(workflowID uuid.UUID) error { 978 return nil 979 } 980 981 func (s *defaultListener) TaskStateChanged(_ uuid.UUID, _ string, _ *TaskState) error { 982 return nil 983 } 984 985 func (s *defaultListener) Logger(_ uuid.UUID, task string) Logger { 986 return &defaultLogger{} 987 } 988 989 type defaultLogger struct{} 990 991 func (l *defaultLogger) Printf(format string, v ...interface{}) {} 992 993 type retryCommand struct { 994 name string 995 reply chan error 996 } 997 998 // RetryTask retries the named task. 999 func (w *Workflow) RetryTask(ctx context.Context, name string) error { 1000 reply := make(chan error) 1001 w.retryCommands <- retryCommand{name, reply} 1002 select { 1003 case err := <-reply: 1004 return err 1005 case <-ctx.Done(): 1006 return ctx.Err() 1007 } 1008 }