cuelang.org/go@v0.10.1/tools/flow/flow.go (about) 1 // Copyright 2020 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package flow provides a low-level workflow manager based on a CUE Instance. 16 // 17 // A Task defines an operational unit in a Workflow and corresponds to a struct 18 // in a CUE instance. This package does not define what a Task looks like in a 19 // CUE Instance. Instead, the user of this package must supply a TaskFunc that 20 // creates a Runner for cue.Values that are deemed to be a Task. 21 // 22 // Tasks may depend on other tasks. Cyclic dependencies are thereby not allowed. 23 // A Task A depends on another Task B if A, directly or indirectly, has a 24 // reference to any field of Task B, including its root. 25 package flow 26 27 // TODO: Add hooks. This would allow UIs, for instance, to report on progress. 28 // 29 // - New(inst *cue.Instance, options ...Option) 30 // - AddTask(v cue.Value, r Runner) *Task 31 // - AddDependency(a, b *Task) 32 // - AddTaskGraph(root cue.Value, fn taskFunc) 33 // - AddSequence(list cue.Value, fn taskFunc) 34 // - Err() 35 36 // TODO: 37 // Should we allow lists as a shorthand for a sequence of tasks? 38 // If so, how do we specify termination behavior? 39 40 // TODO: 41 // Should we allow tasks to be a child of another task? Currently, the search 42 // for tasks end once a task root is found. 43 // 44 // Semantically it is somewhat unclear to do so: for instance, if an $after 45 // is used to refer to an explicit task dependency, it is logically 46 // indistinguishable whether this should be a subtask or is a dependency. 47 // Using higher-order constructs for analysis is generally undesirable. 48 // 49 // A possible solution would be to define specific "grouping tasks" whose sole 50 // purpose is to define sub tasks. The user of this package would then need 51 // to explicitly distinguish between tasks that are dependencies and tasks that 52 // are subtasks. 53 54 // TODO: streaming tasks/ server applications 55 // 56 // Workflows are currently implemented for batch processing, for instance to 57 // implement shell scripting or other kinds of batch processing. 58 // 59 // This API has been designed, however, to also allow for streaming 60 // applications. For instance, a streaming Task could listen for Etcd changes 61 // or incoming HTTP requests and send updates each time an input changes. 62 // Downstream tasks could then alternate between a Waiting and Running state. 63 // 64 // Note that such streaming applications would also cause configurations to 65 // potentially not become increasingly more specific. Instead, a Task would 66 // replace its old result each time it is updated. This would require tracking 67 // of which conjunct was previously created by a task. 68 69 import ( 70 "context" 71 "fmt" 72 "os" 73 "strings" 74 "sync/atomic" 75 76 "cuelang.org/go/cue" 77 "cuelang.org/go/cue/errors" 78 "cuelang.org/go/cue/stats" 79 "cuelang.org/go/internal/core/adt" 80 "cuelang.org/go/internal/core/convert" 81 "cuelang.org/go/internal/core/eval" 82 "cuelang.org/go/internal/value" 83 ) 84 85 var ( 86 // ErrAbort may be returned by a task to avoid processing downstream tasks. 87 // This can be used by control nodes to influence execution. 88 ErrAbort = errors.New("abort dependant tasks without failure") 89 90 // TODO: ErrUpdate: update and run a dependency, but don't complete a 91 // dependency as more results may come. This is useful in server mode. 92 93 debug = os.Getenv("CUE_DEBUG_TOOLS_FLOW") != "" 94 ) 95 96 // A TaskFunc creates a Runner for v if v defines a task or reports nil 97 // otherwise. It reports an error for illformed tasks. 98 // 99 // If TaskFunc returns a non-nil Runner the search for task within v stops. 100 // That is, subtasks are not supported. 101 type TaskFunc func(v cue.Value) (Runner, error) 102 103 // A Runner executes a Task. 104 type Runner interface { 105 // Run runs a Task. If any of the tasks it depends on returned an error it 106 // is passed to this task. It reports an error upon failure. 107 // 108 // Any results to be returned can be set by calling Fill on the passed task. 109 // 110 // TODO: what is a good contract for receiving and passing errors and abort. 111 // 112 // If for a returned error x errors.Is(x, ErrAbort), all dependant tasks 113 // will not be run, without this being an error. 114 Run(t *Task, err error) error 115 } 116 117 // A RunnerFunc runs a Task. 118 type RunnerFunc func(t *Task) error 119 120 func (f RunnerFunc) Run(t *Task, err error) error { 121 return f(t) 122 } 123 124 // A Config defines options for interpreting an Instance as a Workflow. 125 type Config struct { 126 // Root limits the search for tasks to be within the path indicated to root. 127 // For the cue command, this is set to ["command"]. The default value is 128 // for all tasks to be root. 129 Root cue.Path 130 131 // InferTasks allows tasks to be defined outside of the Root. Such tasks 132 // will only be included in the workflow if any of its fields is referenced 133 // by any of the tasks defined within Root. 134 // 135 // CAVEAT EMPTOR: this features is mostly provided for backwards 136 // compatibility with v0.2. A problem with this approach is that it will 137 // look for task structs within arbitrary data. So if not careful, there may 138 // be spurious matches. 139 InferTasks bool 140 141 // IgnoreConcrete ignores references for which the values are already 142 // concrete and cannot change. 143 IgnoreConcrete bool 144 145 // FindHiddenTasks allows tasks to be defined in hidden fields. 146 FindHiddenTasks bool 147 148 // UpdateFunc is called whenever the information in the controller is 149 // updated. This includes directly after initialization. The task may be 150 // nil if this call is not the result of a task completing. 151 UpdateFunc func(c *Controller, t *Task) error 152 } 153 154 // A Controller defines a set of Tasks to be executed. 155 type Controller struct { 156 cfg Config 157 isTask TaskFunc 158 159 inst cue.Value 160 valueSeqNum int64 161 162 env *adt.Environment 163 164 conjuncts []adt.Conjunct 165 conjunctSeq int64 166 167 taskCh chan *Task 168 169 opCtx *adt.OpContext 170 context context.Context 171 cancelFunc context.CancelFunc 172 173 // taskStats tracks counters for auxiliary operations done by tasks. It does 174 // not include the CUE operations done by the Controller on behalf of tasks, 175 // which is likely going to tbe the bulk of the operations. 176 taskStats stats.Counts 177 178 done atomic.Bool 179 180 // keys maps task keys to their index. This allows a recreation of the 181 // Instance while retaining the original task indices. 182 // 183 // TODO: do instance updating in place to allow for more efficient 184 // processing. 185 keys map[string]*Task 186 tasks []*Task 187 188 // Only used during task initialization. 189 nodes map[*adt.Vertex]*Task 190 191 errs errors.Error 192 } 193 194 // Stats reports statistics on the total number of CUE operations used. 195 // 196 // This is an experimental method and the API is likely to change. The 197 // Counts.String method will likely stay and is the safest way to use this API. 198 // 199 // This currently should only be called after completion or within a call to 200 // UpdateFunc. 201 func (c *Controller) Stats() (counts stats.Counts) { 202 counts = *c.opCtx.Stats() 203 counts.Add(c.taskStats) 204 return counts 205 } 206 207 // Tasks reports the tasks that are currently registered with the controller. 208 // 209 // This may currently only be called before Run is called or from within 210 // a call to UpdateFunc. Task pointers returned by this call are not guaranteed 211 // to be the same between successive calls to this method. 212 func (c *Controller) Tasks() []*Task { 213 return c.tasks 214 } 215 216 func (c *Controller) cancel() { 217 if c.cancelFunc != nil { 218 c.cancelFunc() 219 } 220 } 221 222 func (c *Controller) addErr(err error, msg string) { 223 c.errs = errors.Append(c.errs, errors.Promote(err, msg)) 224 } 225 226 // New creates a Controller for a given Instance and TaskFunc. 227 // 228 // The instance value can either be a *cue.Instance or a cue.Value. 229 func New(cfg *Config, inst cue.InstanceOrValue, f TaskFunc) *Controller { 230 v := inst.Value() 231 ctx := eval.NewContext(value.ToInternal(v)) 232 233 c := &Controller{ 234 isTask: f, 235 inst: v, 236 opCtx: ctx, 237 238 taskCh: make(chan *Task), 239 keys: map[string]*Task{}, 240 } 241 242 if cfg != nil { 243 c.cfg = *cfg 244 } 245 c.initTasks(true) 246 return c 247 248 } 249 250 // Run runs the tasks of a workflow until completion. 251 func (c *Controller) Run(ctx context.Context) error { 252 c.context, c.cancelFunc = context.WithCancel(ctx) 253 defer c.cancelFunc() 254 255 c.runLoop() 256 257 // NOTE: track state here as runLoop might add more tasks to the flow 258 // during the execution so checking current tasks state may not be 259 // accurate enough to determine that the flow is terminated. 260 // This is used to determine if the controller value can be retrieved. 261 // When the controller value is safe to be read concurrently this tracking 262 // can be removed. 263 c.done.Store(true) 264 265 return c.errs 266 } 267 268 // Value returns the value managed by the controller. 269 // 270 // It is safe to use the value only after Run() has returned. 271 // It panics if the flow is running. 272 func (c *Controller) Value() cue.Value { 273 if !c.done.Load() { 274 panic("can't retrieve value before flow has terminated") 275 } 276 return c.inst 277 } 278 279 // We need to escape quotes in the path, per 280 // https://mermaid-js.github.io/mermaid/#/flowchart?id=entity-codes-to-escape-characters 281 // This also requires that we escape the quoting character #. 282 var mermaidQuote = strings.NewReplacer("#", "#35;", `"`, "#quot;") 283 284 // mermaidGraph generates a mermaid graph of the current state. This can be 285 // pasted into https://mermaid-js.github.io/mermaid-live-editor/ for 286 // visualization. 287 func mermaidGraph(c *Controller) string { 288 w := &strings.Builder{} 289 fmt.Fprintln(w, "graph TD") 290 for i, t := range c.Tasks() { 291 path := mermaidQuote.Replace(t.Path().String()) 292 fmt.Fprintf(w, " t%d(\"%s [%s]\")\n", i, path, t.State()) 293 for _, t := range t.Dependencies() { 294 fmt.Fprintf(w, " t%d-->t%d\n", i, t.Index()) 295 } 296 } 297 return w.String() 298 } 299 300 // A State indicates the state of a Task. 301 // 302 // The following state diagram indicates the possible state transitions: 303 // 304 // Ready 305 // ↗︎ ↘︎ 306 // Waiting ← Running 307 // ↘︎ ↙︎ 308 // Terminated 309 // 310 // A Task may move from Waiting to Terminating if one of 311 // the tasks on which it depends fails. 312 // 313 // NOTE: transitions from Running to Waiting are currently not supported. In 314 // the future this may be possible if a task depends on continuously running 315 // tasks that send updates. 316 type State int 317 318 //go:generate go run golang.org/x/tools/cmd/stringer -type=State 319 320 const ( 321 // Waiting indicates a task is blocked on input from another task. 322 // 323 // NOTE: although this is currently not implemented, a task could 324 // theoretically move from the Running to Waiting state. 325 Waiting State = iota 326 327 // Ready means a tasks is ready to run, but currently not running. 328 Ready 329 330 // Running indicates a goroutine is currently active for a task and that 331 // it is not Waiting. 332 Running 333 334 // Terminated means a task has stopped running either because it terminated 335 // while Running or was aborted by task on which it depends. The error 336 // value of a Task indicates the reason for the termination. 337 Terminated 338 ) 339 340 // A Task contains the context for a single task execution. 341 // Tasks may be run concurrently. 342 type Task struct { 343 // Static 344 c *Controller 345 ctxt *adt.OpContext 346 r Runner 347 348 index int 349 path cue.Path 350 key string 351 labels []adt.Feature 352 353 // Dynamic 354 update adt.Expr 355 deps map[*Task]bool 356 pathDeps map[string][]*Task 357 358 conjunctSeq int64 359 valueSeq int64 360 v cue.Value 361 err errors.Error 362 state State 363 depTasks []*Task 364 365 stats stats.Counts 366 } 367 368 // Stats reports statistics on the number of CUE operations used to complete 369 // this task. 370 // 371 // This is an experimental method and the API is likely to change. 372 // 373 // It only shows numbers upon completion. This may change in the future. 374 func (t *Task) Stats() stats.Counts { 375 return t.stats 376 } 377 378 // Context reports the Controller's Context. 379 func (t *Task) Context() context.Context { 380 return t.c.context 381 } 382 383 // Path reports the path of Task within the Instance in which it is defined. 384 // The Path is always valid. 385 func (t *Task) Path() cue.Path { 386 return t.path 387 } 388 389 // Index reports the sequence number of the Task. This will not change over 390 // time. 391 func (t *Task) Index() int { 392 return t.index 393 } 394 395 func (t *Task) done() bool { 396 return t.state > Running 397 } 398 399 func (t *Task) isReady() bool { 400 for _, d := range t.depTasks { 401 if !d.done() { 402 return false 403 } 404 } 405 return true 406 } 407 408 func (t *Task) vertex() *adt.Vertex { 409 _, x := value.ToInternal(t.v) 410 return x 411 } 412 413 func (t *Task) addDep(path string, dep *Task) { 414 if dep == nil || dep == t { 415 return 416 } 417 if t.deps == nil { 418 t.deps = map[*Task]bool{} 419 t.pathDeps = map[string][]*Task{} 420 } 421 422 // Add the dependencies for a given path to the controller. We could compute 423 // this again later, but this ensures there will be no discrepancies. 424 a := t.pathDeps[path] 425 found := false 426 for _, t := range a { 427 if t == dep { 428 found = true 429 break 430 } 431 } 432 if !found { 433 t.pathDeps[path] = append(a, dep) 434 435 } 436 437 if !t.deps[dep] { 438 t.deps[dep] = true 439 t.depTasks = append(t.depTasks, dep) 440 } 441 } 442 443 // Fill fills in values of the Controller's configuration for the current task. 444 // The changes take effect after the task completes. 445 // 446 // This method may currently only be called by the runner. 447 func (t *Task) Fill(x interface{}) error { 448 expr := convert.GoValueToExpr(t.ctxt, true, x) 449 if t.update == nil { 450 t.update = expr 451 return nil 452 } 453 t.update = &adt.BinaryExpr{ 454 Op: adt.AndOp, 455 X: t.update, 456 Y: expr, 457 } 458 return nil 459 } 460 461 // Value reports the latest value of this task. 462 // 463 // This method may currently only be called before Run is called or after a 464 // Task completed, or from within a call to UpdateFunc. 465 func (t *Task) Value() cue.Value { 466 // TODO: synchronize 467 return t.v 468 } 469 470 // Dependencies reports the Tasks t depends on. 471 // 472 // This method may currently only be called before Run is called or after a 473 // Task completed, or from within a call to UpdateFunc. 474 func (t *Task) Dependencies() []*Task { 475 // TODO: add synchronization. 476 return t.depTasks 477 } 478 479 // PathDependencies reports the dependencies found for a value at the given 480 // path. 481 // 482 // This may currently only be called before Run is called or from within 483 // a call to UpdateFunc. 484 func (t *Task) PathDependencies(p cue.Path) []*Task { 485 return t.pathDeps[p.String()] 486 } 487 488 // Err returns the error of a completed Task. 489 // 490 // This method may currently only be called before Run is called, after a 491 // Task completed, or from within a call to UpdateFunc. 492 func (t *Task) Err() error { 493 return t.err 494 } 495 496 // State is the current state of the Task. 497 // 498 // This method may currently only be called before Run is called or after a 499 // Task completed, or from within a call to UpdateFunc. 500 func (t *Task) State() State { 501 return t.state 502 }