github.com/joomcode/cue@v0.4.4-0.20221111115225-539fe3512047/tools/flow/flow.go (about) 1 // Copyright 2020 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package flow provides a low-level workflow manager based on a CUE Instance. 16 // 17 // A Task defines an operational unit in a Workflow and corresponds to a struct 18 // in a CUE instance. This package does not define what a Task looks like in a 19 // CUE Instance. Instead, the user of this package must supply a TaskFunc that 20 // creates a Runner for cue.Values that are deemed to be a Task. 21 // 22 // Tasks may depend on other tasks. Cyclic dependencies are thereby not allowed. 23 // A Task A depends on another Task B if A, directly or indirectly, has a 24 // reference to any field of Task B, including its root. 25 package flow 26 27 // TODO: Add hooks. This would allow UIs, for instance, to report on progress. 28 // 29 // - New(inst *cue.Instance, options ...Option) 30 // - AddTask(v cue.Value, r Runner) *Task 31 // - AddDependency(a, b *Task) 32 // - AddTaskGraph(root cue.Value, fn taskFunc) 33 // - AddSequence(list cue.Value, fn taskFunc) 34 // - Err() 35 36 // TODO: 37 // Should we allow lists as a shorthand for a sequence of tasks? 38 // If so, how do we specify termination behavior? 39 40 // TODO: 41 // Should we allow tasks to be a child of another task? Currently, the search 42 // for tasks end once a task root is found. 43 // 44 // Semantically it is somewhat unclear to do so: for instance, if an $after 45 // is used to refer to an explicit task dependency, it is logically 46 // indistinguishable whether this should be a subtask or is a dependency. 47 // Using higher-order constructs for analysis is generally undesirable. 48 // 49 // A possible solution would be to define specific "grouping tasks" whose sole 50 // purpose is to define sub tasks. The user of this package would then need 51 // to explicitly distinguish between tasks that are dependencies and tasks that 52 // are subtasks. 53 54 // TODO: streaming tasks/ server applications 55 // 56 // Workflows are currently implemented for batch processing, for instance to 57 // implement shell scripting or other kinds of batch processing. 58 // 59 // This API has been designed, however, to also allow for streaming 60 // applications. For instance, a streaming Task could listen for Etcd changes 61 // or incoming HTTP requests and send updates each time an input changes. 62 // Downstream tasks could then alternate between a Waiting and Running state. 63 // 64 // Note that such streaming applications would also cause configurations to 65 // potentially not become increasingly more specific. Instead, a Task would 66 // replace its old result each time it is updated. This would require tracking 67 // of which conjunct was previously created by a task. 68 69 import ( 70 "context" 71 "sync" 72 73 "github.com/joomcode/cue/cue" 74 "github.com/joomcode/cue/cue/errors" 75 "github.com/joomcode/cue/internal/core/adt" 76 "github.com/joomcode/cue/internal/core/convert" 77 "github.com/joomcode/cue/internal/core/eval" 78 "github.com/joomcode/cue/internal/value" 79 ) 80 81 var ( 82 // ErrAbort may be returned by a task to avoid processing downstream tasks. 83 // This can be used by control nodes to influence execution. 84 ErrAbort = errors.New("abort dependant tasks without failure") 85 86 // TODO: ErrUpdate: update and run a dependency, but don't complete a 87 // dependency as more results may come. This is useful in server mode. 88 ) 89 90 // A TaskFunc creates a Runner for v if v defines a task or reports nil 91 // otherwise. It reports an error for illformed tasks. 92 // 93 // If TaskFunc returns a non-nil Runner the search for task within v stops. 94 // That is, subtasks are not supported. 95 type TaskFunc func(v cue.Value) (Runner, error) 96 97 // A Runner executes a Task. 98 type Runner interface { 99 // Run runs a Task. If any of the tasks it depends on returned an error it 100 // is passed to this task. It reports an error upon failure. 101 // 102 // Any results to be returned can be set by calling Fill on the passed task. 103 // 104 // TODO: what is a good contract for receiving and passing errors and abort. 105 // 106 // If for a returned error x errors.Is(x, ErrAbort), all dependant tasks 107 // will not be run, without this being an error. 108 Run(t *Task, err error) error 109 } 110 111 // A RunnerFunc runs a Task. 112 type RunnerFunc func(t *Task) error 113 114 func (f RunnerFunc) Run(t *Task, err error) error { 115 return f(t) 116 } 117 118 // A Config defines options for interpreting an Instance as a Workflow. 119 type Config struct { 120 // Root limits the search for tasks to be within the path indicated to root. 121 // For the cue command, this is set to ["command"]. The default value is 122 // for all tasks to be root. 123 Root cue.Path 124 125 // InferTasks allows tasks to be defined outside of the Root. Such tasks 126 // will only be included in the workflow if any of its fields is referenced 127 // by any of the tasks defined within Root. 128 // 129 // CAVEAT EMPTOR: this features is mostly provided for backwards 130 // compatibility with v0.2. A problem with this approach is that it will 131 // look for task structs within arbitrary data. So if not careful, there may 132 // be spurious matches. 133 InferTasks bool 134 135 // IgnoreConcrete ignores references for which the values are already 136 // concrete and cannot change. 137 IgnoreConcrete bool 138 139 // FindHiddenTasks allows tasks to be defined in hidden fields. 140 FindHiddenTasks bool 141 142 // UpdateFunc is called whenever the information in the controller is 143 // updated. This includes directly after initialization. The task may be 144 // nil if this call is not the result of a task completing. 145 UpdateFunc func(c *Controller, t *Task) error 146 } 147 148 // A Controller defines a set of Tasks to be executed. 149 type Controller struct { 150 cfg Config 151 isTask TaskFunc 152 153 inst cue.Value 154 valueSeqNum int64 155 156 env *adt.Environment 157 158 conjuncts []adt.Conjunct 159 conjunctSeq int64 160 161 taskCh chan *Task 162 163 opCtx *adt.OpContext 164 context context.Context 165 cancelFunc context.CancelFunc 166 167 mut *sync.Mutex 168 done bool 169 170 // keys maps task keys to their index. This allows a recreation of the 171 // Instance while retaining the original task indices. 172 // 173 // TODO: do instance updating in place to allow for more efficient 174 // processing. 175 keys map[string]*Task 176 tasks []*Task 177 178 // Only used during task initialization. 179 nodes map[*adt.Vertex]*Task 180 181 errs errors.Error 182 } 183 184 // Tasks reports the tasks that are currently registered with the controller. 185 // 186 // This may currently only be called before Run is called or from within 187 // a call to UpdateFunc. Task pointers returned by this call are not guaranteed 188 // to be the same between successive calls to this method. 189 func (c *Controller) Tasks() []*Task { 190 return c.tasks 191 } 192 193 func (c *Controller) cancel() { 194 if c.cancelFunc != nil { 195 c.cancelFunc() 196 } 197 } 198 199 func (c *Controller) addErr(err error, msg string) { 200 c.errs = errors.Append(c.errs, errors.Promote(err, msg)) 201 } 202 203 // New creates a Controller for a given Instance and TaskFunc. 204 // 205 // The instance value can either be a *cue.Instance or a cue.Value. 206 func New(cfg *Config, inst cue.InstanceOrValue, f TaskFunc) *Controller { 207 v := inst.Value() 208 ctx := eval.NewContext(value.ToInternal(v)) 209 210 c := &Controller{ 211 isTask: f, 212 inst: v, 213 opCtx: ctx, 214 215 taskCh: make(chan *Task), 216 keys: map[string]*Task{}, 217 mut: &sync.Mutex{}, 218 } 219 220 if cfg != nil { 221 c.cfg = *cfg 222 } 223 224 c.initTasks() 225 return c 226 227 } 228 229 // Run runs the tasks of a workflow until completion. 230 func (c *Controller) Run(ctx context.Context) error { 231 c.context, c.cancelFunc = context.WithCancel(ctx) 232 defer c.cancelFunc() 233 234 c.runLoop() 235 236 // NOTE: track state here as runLoop might add more tasks to the flow 237 // during the execution so checking current tasks state may not be 238 // accurate enough to determine that the flow is terminated. 239 // This is used to determine if the controller value can be retrieved. 240 // When the controller value is safe to be read concurrently this tracking 241 // can be removed. 242 c.mut.Lock() 243 defer c.mut.Unlock() 244 c.done = true 245 246 return c.errs 247 } 248 249 // Value returns the value managed by the controller. 250 // 251 // It is safe to use the value only after Run() has returned. 252 // It panics if the flow is running. 253 func (c *Controller) Value() cue.Value { 254 c.mut.Lock() 255 defer c.mut.Unlock() 256 if !c.done { 257 panic("can't retrieve value before flow has terminated") 258 } 259 return c.inst 260 } 261 262 // A State indicates the state of a Task. 263 // 264 // The following state diagram indicates the possible state transitions: 265 // 266 // Ready 267 // ↗︎ ↘︎ 268 // Waiting ← Running 269 // ↘︎ ↙︎ 270 // Terminated 271 // 272 // A Task may move from Waiting to Terminating if one of 273 // the tasks on which it dependends fails. 274 // 275 // NOTE: transitions from Running to Waiting are currently not supported. In 276 // the future this may be possible if a task depends on continuously running 277 // tasks that send updates. 278 // 279 type State int 280 281 const ( 282 // Waiting indicates a task is blocked on input from another task. 283 // 284 // NOTE: although this is currently not implemented, a task could 285 // theoretically move from the Running to Waiting state. 286 Waiting State = iota 287 288 // Ready means a tasks is ready to run, but currently not running. 289 Ready 290 291 // Running indicates a goroutine is currently active for a task and that 292 // it is not Waiting. 293 Running 294 295 // Terminated means a task has stopped running either because it terminated 296 // while Running or was aborted by task on which it depends. The error 297 // value of a Task indicates the reason for the termination. 298 Terminated 299 ) 300 301 var stateStrings = map[State]string{ 302 Waiting: "Waiting", 303 Ready: "Ready", 304 Running: "Running", 305 Terminated: "Terminated", 306 } 307 308 // String reports a human readable string of status s. 309 func (s State) String() string { 310 return stateStrings[s] 311 } 312 313 // A Task contains the context for a single task execution. 314 // Tasks may be run concurrently. 315 type Task struct { 316 // Static 317 c *Controller 318 ctxt *adt.OpContext 319 r Runner 320 321 index int 322 path cue.Path 323 key string 324 labels []adt.Feature 325 326 // Dynamic 327 update adt.Expr 328 deps map[*Task]bool 329 pathDeps map[string][]*Task 330 331 conjunctSeq int64 332 valueSeq int64 333 v cue.Value 334 err errors.Error 335 state State 336 depTasks []*Task 337 } 338 339 // Context reports the Controller's Context. 340 func (t *Task) Context() context.Context { 341 return t.c.context 342 } 343 344 // Path reports the path of Task within the Instance in which it is defined. 345 // The Path is always valid. 346 func (t *Task) Path() cue.Path { 347 return t.path 348 } 349 350 // Index reports the sequence number of the Task. This will not change over 351 // time. 352 func (t *Task) Index() int { 353 return t.index 354 } 355 356 func (t *Task) done() bool { 357 return t.state > Running 358 } 359 360 func (t *Task) isReady() bool { 361 for _, d := range t.depTasks { 362 if !d.done() { 363 return false 364 } 365 } 366 return true 367 } 368 369 func (t *Task) vertex() *adt.Vertex { 370 _, x := value.ToInternal(t.v) 371 return x 372 } 373 374 func (t *Task) addDep(path string, dep *Task) { 375 if dep == nil || dep == t { 376 return 377 } 378 if t.deps == nil { 379 t.deps = map[*Task]bool{} 380 t.pathDeps = map[string][]*Task{} 381 } 382 383 // Add the dependencies for a given path to the controller. We could compute 384 // this again later, but this ensures there will be no discrepancies. 385 a := t.pathDeps[path] 386 found := false 387 for _, t := range a { 388 if t == dep { 389 found = true 390 break 391 } 392 } 393 if !found { 394 t.pathDeps[path] = append(a, dep) 395 396 } 397 398 if !t.deps[dep] { 399 t.deps[dep] = true 400 t.depTasks = append(t.depTasks, dep) 401 } 402 } 403 404 // Fill fills in values of the Controller's configuration for the current task. 405 // The changes take effect after the task completes. 406 // 407 // This method may currently only be called by the runner. 408 func (t *Task) Fill(x interface{}) error { 409 expr := convert.GoValueToExpr(t.ctxt, true, x) 410 if t.update == nil { 411 t.update = expr 412 return nil 413 } 414 t.update = &adt.BinaryExpr{ 415 Op: adt.AndOp, 416 X: t.update, 417 Y: expr, 418 } 419 return nil 420 } 421 422 // Value reports the latest value of this task. 423 // 424 // This method may currently only be called before Run is called or after a 425 // Task completed, or from within a call to UpdateFunc. 426 func (t *Task) Value() cue.Value { 427 // TODO: synchronize 428 return t.v 429 } 430 431 // Dependencies reports the Tasks t depends on. 432 // 433 // This method may currently only be called before Run is called or after a 434 // Task completed, or from within a call to UpdateFunc. 435 func (t *Task) Dependencies() []*Task { 436 // TODO: add synchronization. 437 return t.depTasks 438 } 439 440 // PathDependencies reports the dependencies found for a value at the given 441 // path. 442 // 443 // This may currently only be called before Run is called or from within 444 // a call to UpdateFunc. 445 func (t *Task) PathDependencies(p cue.Path) []*Task { 446 return t.pathDeps[p.String()] 447 } 448 449 // Err returns the error of a completed Task. 450 // 451 // This method may currently only be called before Run is called, after a 452 // Task completed, or from within a call to UpdateFunc. 453 func (t *Task) Err() error { 454 return t.err 455 } 456 457 // State is the current state of the Task. 458 // 459 // This method may currently only be called before Run is called or after a 460 // Task completed, or from within a call to UpdateFunc. 461 func (t *Task) State() State { 462 return t.state 463 }