cuelang.org/go@v0.13.0/tools/flow/flow.go (about) 1 // Copyright 2020 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package flow provides a low-level workflow manager based on a CUE Instance. 16 // 17 // A Task defines an operational unit in a Workflow and corresponds to a struct 18 // in a CUE instance. This package does not define what a Task looks like in a 19 // CUE Instance. Instead, the user of this package must supply a TaskFunc that 20 // creates a Runner for cue.Values that are deemed to be a Task. 21 // 22 // Tasks may depend on other tasks. Cyclic dependencies are thereby not allowed. 23 // A Task A depends on another Task B if A, directly or indirectly, has a 24 // reference to any field of Task B, including its root. 25 package flow 26 27 // TODO: Add hooks. This would allow UIs, for instance, to report on progress. 28 // 29 // - New(inst *cue.Instance, options ...Option) 30 // - AddTask(v cue.Value, r Runner) *Task 31 // - AddDependency(a, b *Task) 32 // - AddTaskGraph(root cue.Value, fn taskFunc) 33 // - AddSequence(list cue.Value, fn taskFunc) 34 // - Err() 35 36 // TODO: 37 // Should we allow lists as a shorthand for a sequence of tasks? 38 // If so, how do we specify termination behavior? 39 40 // TODO: 41 // Should we allow tasks to be a child of another task? Currently, the search 42 // for tasks end once a task root is found. 43 // 44 // Semantically it is somewhat unclear to do so: for instance, if an $after 45 // is used to refer to an explicit task dependency, it is logically 46 // indistinguishable whether this should be a subtask or is a dependency. 47 // Using higher-order constructs for analysis is generally undesirable. 48 // 49 // A possible solution would be to define specific "grouping tasks" whose sole 50 // purpose is to define sub tasks. The user of this package would then need 51 // to explicitly distinguish between tasks that are dependencies and tasks that 52 // are subtasks. 53 54 // TODO: streaming tasks/ server applications 55 // 56 // Workflows are currently implemented for batch processing, for instance to 57 // implement shell scripting or other kinds of batch processing. 58 // 59 // This API has been designed, however, to also allow for streaming 60 // applications. For instance, a streaming Task could listen for Etcd changes 61 // or incoming HTTP requests and send updates each time an input changes. 62 // Downstream tasks could then alternate between a Waiting and Running state. 63 // 64 // Note that such streaming applications would also cause configurations to 65 // potentially not become increasingly more specific. Instead, a Task would 66 // replace its old result each time it is updated. This would require tracking 67 // of which conjunct was previously created by a task. 68 69 import ( 70 "context" 71 "fmt" 72 "strings" 73 "sync/atomic" 74 75 "cuelang.org/go/cue" 76 "cuelang.org/go/cue/errors" 77 "cuelang.org/go/cue/stats" 78 "cuelang.org/go/internal/core/adt" 79 "cuelang.org/go/internal/core/convert" 80 "cuelang.org/go/internal/core/eval" 81 "cuelang.org/go/internal/value" 82 ) 83 84 var ( 85 // ErrAbort may be returned by a task to avoid processing downstream tasks. 86 // This can be used by control nodes to influence execution. 87 ErrAbort = errors.New("abort dependant tasks without failure") 88 89 // TODO: ErrUpdate: update and run a dependency, but don't complete a 90 // dependency as more results may come. This is useful in server mode. 91 ) 92 93 // A TaskFunc creates a Runner for v if v defines a task or reports nil 94 // otherwise. It reports an error for illformed tasks. 95 // 96 // If TaskFunc returns a non-nil Runner the search for task within v stops. 97 // That is, subtasks are not supported. 98 type TaskFunc func(v cue.Value) (Runner, error) 99 100 // A Runner executes a Task. 101 type Runner interface { 102 // Run runs a Task. If any of the tasks it depends on returned an error it 103 // is passed to this task. It reports an error upon failure. 104 // 105 // Any results to be returned can be set by calling Fill on the passed task. 106 // 107 // TODO: what is a good contract for receiving and passing errors and abort. 108 // 109 // If for a returned error x errors.Is(x, ErrAbort), all dependant tasks 110 // will not be run, without this being an error. 111 Run(t *Task, err error) error 112 } 113 114 // A RunnerFunc runs a Task. 115 type RunnerFunc func(t *Task) error 116 117 func (f RunnerFunc) Run(t *Task, err error) error { 118 return f(t) 119 } 120 121 // A Config defines options for interpreting an Instance as a Workflow. 122 type Config struct { 123 // Root limits the search for tasks to be within the path indicated to root. 124 // For the cue command, this is set to ["command"]. The default value is 125 // for all tasks to be root. 126 Root cue.Path 127 128 // InferTasks allows tasks to be defined outside of the Root. Such tasks 129 // will only be included in the workflow if any of its fields is referenced 130 // by any of the tasks defined within Root. 131 // 132 // CAVEAT EMPTOR: this features is mostly provided for backwards 133 // compatibility with v0.2. A problem with this approach is that it will 134 // look for task structs within arbitrary data. So if not careful, there may 135 // be spurious matches. 136 InferTasks bool 137 138 // IgnoreConcrete ignores references for which the values are already 139 // concrete and cannot change. 140 IgnoreConcrete bool 141 142 // FindHiddenTasks allows tasks to be defined in hidden fields. 143 FindHiddenTasks bool 144 145 // UpdateFunc is called whenever the information in the controller is 146 // updated. This includes directly after initialization. The task may be 147 // nil if this call is not the result of a task completing. 148 UpdateFunc func(c *Controller, t *Task) error 149 } 150 151 // A Controller defines a set of Tasks to be executed. 152 type Controller struct { 153 cfg Config 154 isTask TaskFunc 155 156 inst cue.Value 157 valueSeqNum int64 158 159 env *adt.Environment 160 161 conjuncts []adt.Conjunct 162 conjunctSeq int64 163 164 taskCh chan *Task 165 166 opCtx *adt.OpContext 167 context context.Context 168 cancelFunc context.CancelFunc 169 170 // taskStats tracks counters for auxiliary operations done by tasks. It does 171 // not include the CUE operations done by the Controller on behalf of tasks, 172 // which is likely going to tbe the bulk of the operations. 173 taskStats stats.Counts 174 175 done atomic.Bool 176 177 // keys maps task keys to their index. This allows a recreation of the 178 // Instance while retaining the original task indices. 179 // 180 // TODO: do instance updating in place to allow for more efficient 181 // processing. 182 keys map[string]*Task 183 tasks []*Task 184 185 // Only used during task initialization. 186 nodes map[*adt.Vertex]*Task 187 188 errs errors.Error 189 } 190 191 // Stats reports statistics on the total number of CUE operations used. 192 // 193 // This is an experimental method and the API is likely to change. The 194 // Counts.String method will likely stay and is the safest way to use this API. 195 // 196 // This currently should only be called after completion or within a call to 197 // UpdateFunc. 198 func (c *Controller) Stats() (counts stats.Counts) { 199 counts = *c.opCtx.Stats() 200 counts.Add(c.taskStats) 201 return counts 202 } 203 204 // Tasks reports the tasks that are currently registered with the controller. 205 // 206 // This may currently only be called before Run is called or from within 207 // a call to UpdateFunc. Task pointers returned by this call are not guaranteed 208 // to be the same between successive calls to this method. 209 func (c *Controller) Tasks() []*Task { 210 return c.tasks 211 } 212 213 func (c *Controller) cancel() { 214 if c.cancelFunc != nil { 215 c.cancelFunc() 216 } 217 } 218 219 func (c *Controller) addErr(err error, msg string) { 220 c.errs = errors.Append(c.errs, errors.Promote(err, msg)) 221 } 222 223 // New creates a Controller for a given Instance and TaskFunc. 224 // 225 // The instance value can either be a *cue.Instance or a cue.Value. 226 func New(cfg *Config, inst cue.InstanceOrValue, f TaskFunc) *Controller { 227 v := inst.Value() 228 ctx := eval.NewContext(value.ToInternal(v)) 229 230 c := &Controller{ 231 isTask: f, 232 inst: v, 233 opCtx: ctx, 234 235 taskCh: make(chan *Task), 236 keys: map[string]*Task{}, 237 } 238 239 if cfg != nil { 240 c.cfg = *cfg 241 } 242 c.initTasks(true) 243 return c 244 245 } 246 247 // Run runs the tasks of a workflow until completion. 248 func (c *Controller) Run(ctx context.Context) error { 249 c.context, c.cancelFunc = context.WithCancel(ctx) 250 defer c.cancelFunc() 251 252 c.runLoop() 253 254 // NOTE: track state here as runLoop might add more tasks to the flow 255 // during the execution so checking current tasks state may not be 256 // accurate enough to determine that the flow is terminated. 257 // This is used to determine if the controller value can be retrieved. 258 // When the controller value is safe to be read concurrently this tracking 259 // can be removed. 260 c.done.Store(true) 261 262 return c.errs 263 } 264 265 // Value returns the value managed by the controller. 266 // 267 // It is safe to use the value only after [Controller.Run] has returned. 268 // It panics if the flow is running. 269 func (c *Controller) Value() cue.Value { 270 if !c.done.Load() { 271 panic("can't retrieve value before flow has terminated") 272 } 273 return c.inst 274 } 275 276 // We need to escape quotes in the path, per 277 // https://mermaid-js.github.io/mermaid/#/flowchart?id=entity-codes-to-escape-characters 278 // This also requires that we escape the quoting character #. 279 var mermaidQuote = strings.NewReplacer("#", "#35;", `"`, "#quot;") 280 281 // mermaidGraph generates a mermaid graph of the current state. This can be 282 // pasted into https://mermaid-js.github.io/mermaid-live-editor/ for 283 // visualization. 284 func mermaidGraph(c *Controller) string { 285 w := &strings.Builder{} 286 fmt.Fprintln(w, "graph TD") 287 for i, t := range c.Tasks() { 288 path := mermaidQuote.Replace(t.Path().String()) 289 fmt.Fprintf(w, " t%d(\"%s [%s]\")\n", i, path, t.State()) 290 for _, t := range t.Dependencies() { 291 fmt.Fprintf(w, " t%d-->t%d\n", i, t.Index()) 292 } 293 } 294 return w.String() 295 } 296 297 // A State indicates the state of a Task. 298 // 299 // The following state diagram indicates the possible state transitions: 300 // 301 // Ready 302 // ↗︎ ↘︎ 303 // Waiting ← Running 304 // ↘︎ ↙︎ 305 // Terminated 306 // 307 // A Task may move from Waiting to Terminating if one of 308 // the tasks on which it depends fails. 309 // 310 // NOTE: transitions from Running to Waiting are currently not supported. In 311 // the future this may be possible if a task depends on continuously running 312 // tasks that send updates. 313 type State int 314 315 //go:generate go run golang.org/x/tools/cmd/stringer -type=State 316 317 const ( 318 // Waiting indicates a task is blocked on input from another task. 319 // 320 // NOTE: although this is currently not implemented, a task could 321 // theoretically move from the Running to Waiting state. 322 Waiting State = iota 323 324 // Ready means a tasks is ready to run, but currently not running. 325 Ready 326 327 // Running indicates a goroutine is currently active for a task and that 328 // it is not Waiting. 329 Running 330 331 // Terminated means a task has stopped running either because it terminated 332 // while Running or was aborted by task on which it depends. The error 333 // value of a Task indicates the reason for the termination. 334 Terminated 335 ) 336 337 // A Task contains the context for a single task execution. 338 // Tasks may be run concurrently. 339 type Task struct { 340 // Static 341 c *Controller 342 ctxt *adt.OpContext 343 r Runner 344 345 index int 346 path cue.Path 347 key string 348 labels []adt.Feature 349 350 // Dynamic 351 update adt.Expr 352 deps map[*Task]bool 353 pathDeps map[string][]*Task 354 355 conjunctSeq int64 356 valueSeq int64 357 v cue.Value 358 err errors.Error 359 state State 360 depTasks []*Task 361 362 stats stats.Counts 363 } 364 365 // Stats reports statistics on the number of CUE operations used to complete 366 // this task. 367 // 368 // This is an experimental method and the API is likely to change. 369 // 370 // It only shows numbers upon completion. This may change in the future. 371 func (t *Task) Stats() stats.Counts { 372 return t.stats 373 } 374 375 // Context reports the Controller's Context. 376 func (t *Task) Context() context.Context { 377 return t.c.context 378 } 379 380 // Path reports the path of Task within the Instance in which it is defined. 381 // The Path is always valid. 382 func (t *Task) Path() cue.Path { 383 return t.path 384 } 385 386 // Index reports the sequence number of the Task. This will not change over 387 // time. 388 func (t *Task) Index() int { 389 return t.index 390 } 391 392 func (t *Task) done() bool { 393 return t.state > Running 394 } 395 396 func (t *Task) isReady() bool { 397 for _, d := range t.depTasks { 398 if !d.done() { 399 return false 400 } 401 } 402 return true 403 } 404 405 func (t *Task) vertex() *adt.Vertex { 406 _, x := value.ToInternal(t.v) 407 return x 408 } 409 410 func (t *Task) addDep(path string, dep *Task) { 411 if dep == nil || dep == t { 412 return 413 } 414 if t.deps == nil { 415 t.deps = map[*Task]bool{} 416 t.pathDeps = map[string][]*Task{} 417 } 418 419 // Add the dependencies for a given path to the controller. We could compute 420 // this again later, but this ensures there will be no discrepancies. 421 a := t.pathDeps[path] 422 found := false 423 for _, t := range a { 424 if t == dep { 425 found = true 426 break 427 } 428 } 429 if !found { 430 t.pathDeps[path] = append(a, dep) 431 432 } 433 434 if !t.deps[dep] { 435 t.deps[dep] = true 436 t.depTasks = append(t.depTasks, dep) 437 } 438 } 439 440 // Fill fills in values of the Controller's configuration for the current task. 441 // The changes take effect after the task completes. 442 // 443 // This method may currently only be called by the runner. 444 func (t *Task) Fill(x interface{}) error { 445 expr := convert.GoValueToExpr(t.ctxt, true, x) 446 if t.update == nil { 447 t.update = expr 448 return nil 449 } 450 t.update = &adt.BinaryExpr{ 451 Op: adt.AndOp, 452 X: t.update, 453 Y: expr, 454 } 455 return nil 456 } 457 458 // Value reports the latest value of this task. 459 // 460 // This method may currently only be called before Run is called or after a 461 // Task completed, or from within a call to UpdateFunc. 462 func (t *Task) Value() cue.Value { 463 // TODO: synchronize 464 return t.v 465 } 466 467 // Dependencies reports the Tasks t depends on. 468 // 469 // This method may currently only be called before Run is called or after a 470 // Task completed, or from within a call to UpdateFunc. 471 func (t *Task) Dependencies() []*Task { 472 // TODO: add synchronization. 473 return t.depTasks 474 } 475 476 // PathDependencies reports the dependencies found for a value at the given 477 // path. 478 // 479 // This may currently only be called before Run is called or from within 480 // a call to UpdateFunc. 481 func (t *Task) PathDependencies(p cue.Path) []*Task { 482 return t.pathDeps[p.String()] 483 } 484 485 // Err returns the error of a completed Task. 486 // 487 // This method may currently only be called before Run is called, after a 488 // Task completed, or from within a call to UpdateFunc. 489 func (t *Task) Err() error { 490 return t.err 491 } 492 493 // State is the current state of the Task. 494 // 495 // This method may currently only be called before Run is called or after a 496 // Task completed, or from within a call to UpdateFunc. 497 func (t *Task) State() State { 498 return t.state 499 }