github.com/joomcode/cue@v0.4.4-0.20221111115225-539fe3512047/tools/flow/flow.go (about)

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package flow provides a low-level workflow manager based on a CUE Instance.
    16  //
    17  // A Task defines an operational unit in a Workflow and corresponds to a struct
    18  // in a CUE instance. This package does not define what a Task looks like in a
    19  // CUE Instance. Instead, the user of this package must supply a TaskFunc that
    20  // creates a Runner for cue.Values that are deemed to be a Task.
    21  //
    22  // Tasks may depend on other tasks. Cyclic dependencies are thereby not allowed.
    23  // A Task A depends on another Task B if A, directly or indirectly, has a
    24  // reference to any field of Task B, including its root.
    25  package flow
    26  
    27  // TODO: Add hooks. This would allow UIs, for instance, to report on progress.
    28  //
    29  // - New(inst *cue.Instance, options ...Option)
    30  // - AddTask(v cue.Value, r Runner) *Task
    31  // - AddDependency(a, b *Task)
    32  // - AddTaskGraph(root cue.Value, fn taskFunc)
    33  // - AddSequence(list cue.Value, fn taskFunc)
    34  // - Err()
    35  
    36  // TODO:
    37  // Should we allow lists as a shorthand for a sequence of tasks?
    38  // If so, how do we specify termination behavior?
    39  
    40  // TODO:
    41  // Should we allow tasks to be a child of another task? Currently, the search
    42  // for tasks end once a task root is found.
    43  //
    44  // Semantically it is somewhat unclear to do so: for instance, if an $after
    45  // is used to refer to an explicit task dependency, it is logically
    46  // indistinguishable whether this should be a subtask or is a dependency.
    47  // Using higher-order constructs for analysis is generally undesirable.
    48  //
    49  // A possible solution would be to define specific "grouping tasks" whose sole
    50  // purpose is to define sub tasks. The user of this package would then need
    51  // to explicitly distinguish between tasks that are dependencies and tasks that
    52  // are subtasks.
    53  
    54  // TODO: streaming tasks/ server applications
    55  //
    56  // Workflows are currently implemented for batch processing, for instance to
    57  // implement shell scripting or other kinds of batch processing.
    58  //
    59  // This API has been designed, however, to also allow for streaming
    60  // applications. For instance, a streaming Task could listen for Etcd changes
    61  // or incoming HTTP requests and send updates each time an input changes.
    62  // Downstream tasks could then alternate between a Waiting and Running state.
    63  //
    64  // Note that such streaming applications would also cause configurations to
    65  // potentially not become increasingly more specific. Instead, a Task would
    66  // replace its old result each time it is updated. This would require tracking
    67  // of which conjunct was previously created by a task.
    68  
    69  import (
    70  	"context"
    71  	"sync"
    72  
    73  	"github.com/joomcode/cue/cue"
    74  	"github.com/joomcode/cue/cue/errors"
    75  	"github.com/joomcode/cue/internal/core/adt"
    76  	"github.com/joomcode/cue/internal/core/convert"
    77  	"github.com/joomcode/cue/internal/core/eval"
    78  	"github.com/joomcode/cue/internal/value"
    79  )
    80  
    81  var (
    82  	// ErrAbort may be returned by a task to avoid processing downstream tasks.
    83  	// This can be used by control nodes to influence execution.
    84  	ErrAbort = errors.New("abort dependant tasks without failure")
    85  
    86  	// TODO: ErrUpdate: update and run a dependency, but don't complete a
    87  	// dependency as more results may come. This is useful in server mode.
    88  )
    89  
    90  // A TaskFunc creates a Runner for v if v defines a task or reports nil
    91  // otherwise. It reports an error for illformed tasks.
    92  //
    93  // If TaskFunc returns a non-nil Runner the search for task within v stops.
    94  // That is, subtasks are not supported.
    95  type TaskFunc func(v cue.Value) (Runner, error)
    96  
    97  // A Runner executes a Task.
    98  type Runner interface {
    99  	// Run runs a Task. If any of the tasks it depends on returned an error it
   100  	// is passed to this task. It reports an error upon failure.
   101  	//
   102  	// Any results to be returned can be set by calling Fill on the passed task.
   103  	//
   104  	// TODO: what is a good contract for receiving and passing errors and abort.
   105  	//
   106  	// If for a returned error x errors.Is(x, ErrAbort), all dependant tasks
   107  	// will not be run, without this being an error.
   108  	Run(t *Task, err error) error
   109  }
   110  
   111  // A RunnerFunc runs a Task.
   112  type RunnerFunc func(t *Task) error
   113  
   114  func (f RunnerFunc) Run(t *Task, err error) error {
   115  	return f(t)
   116  }
   117  
   118  // A Config defines options for interpreting an Instance as a Workflow.
   119  type Config struct {
   120  	// Root limits the search for tasks to be within the path indicated to root.
   121  	// For the cue command, this is set to ["command"]. The default value is
   122  	// for all tasks to be root.
   123  	Root cue.Path
   124  
   125  	// InferTasks allows tasks to be defined outside of the Root. Such tasks
   126  	// will only be included in the workflow if any of its fields is referenced
   127  	// by any of the tasks defined within Root.
   128  	//
   129  	// CAVEAT EMPTOR: this features is mostly provided for backwards
   130  	// compatibility with v0.2. A problem with this approach is that it will
   131  	// look for task structs within arbitrary data. So if not careful, there may
   132  	// be spurious matches.
   133  	InferTasks bool
   134  
   135  	// IgnoreConcrete ignores references for which the values are already
   136  	// concrete and cannot change.
   137  	IgnoreConcrete bool
   138  
   139  	// FindHiddenTasks allows tasks to be defined in hidden fields.
   140  	FindHiddenTasks bool
   141  
   142  	// UpdateFunc is called whenever the information in the controller is
   143  	// updated. This includes directly after initialization. The task may be
   144  	// nil if this call is not the result of a task completing.
   145  	UpdateFunc func(c *Controller, t *Task) error
   146  }
   147  
   148  // A Controller defines a set of Tasks to be executed.
   149  type Controller struct {
   150  	cfg    Config
   151  	isTask TaskFunc
   152  
   153  	inst        cue.Value
   154  	valueSeqNum int64
   155  
   156  	env *adt.Environment
   157  
   158  	conjuncts   []adt.Conjunct
   159  	conjunctSeq int64
   160  
   161  	taskCh chan *Task
   162  
   163  	opCtx      *adt.OpContext
   164  	context    context.Context
   165  	cancelFunc context.CancelFunc
   166  
   167  	mut  *sync.Mutex
   168  	done bool
   169  
   170  	// keys maps task keys to their index. This allows a recreation of the
   171  	// Instance while retaining the original task indices.
   172  	//
   173  	// TODO: do instance updating in place to allow for more efficient
   174  	// processing.
   175  	keys  map[string]*Task
   176  	tasks []*Task
   177  
   178  	// Only used during task initialization.
   179  	nodes map[*adt.Vertex]*Task
   180  
   181  	errs errors.Error
   182  }
   183  
   184  // Tasks reports the tasks that are currently registered with the controller.
   185  //
   186  // This may currently only be called before Run is called or from within
   187  // a call to UpdateFunc. Task pointers returned by this call are not guaranteed
   188  // to be the same between successive calls to this method.
   189  func (c *Controller) Tasks() []*Task {
   190  	return c.tasks
   191  }
   192  
   193  func (c *Controller) cancel() {
   194  	if c.cancelFunc != nil {
   195  		c.cancelFunc()
   196  	}
   197  }
   198  
   199  func (c *Controller) addErr(err error, msg string) {
   200  	c.errs = errors.Append(c.errs, errors.Promote(err, msg))
   201  }
   202  
   203  // New creates a Controller for a given Instance and TaskFunc.
   204  //
   205  // The instance value can either be a *cue.Instance or a cue.Value.
   206  func New(cfg *Config, inst cue.InstanceOrValue, f TaskFunc) *Controller {
   207  	v := inst.Value()
   208  	ctx := eval.NewContext(value.ToInternal(v))
   209  
   210  	c := &Controller{
   211  		isTask: f,
   212  		inst:   v,
   213  		opCtx:  ctx,
   214  
   215  		taskCh: make(chan *Task),
   216  		keys:   map[string]*Task{},
   217  		mut:    &sync.Mutex{},
   218  	}
   219  
   220  	if cfg != nil {
   221  		c.cfg = *cfg
   222  	}
   223  
   224  	c.initTasks()
   225  	return c
   226  
   227  }
   228  
   229  // Run runs the tasks of a workflow until completion.
   230  func (c *Controller) Run(ctx context.Context) error {
   231  	c.context, c.cancelFunc = context.WithCancel(ctx)
   232  	defer c.cancelFunc()
   233  
   234  	c.runLoop()
   235  
   236  	// NOTE: track state here as runLoop might add more tasks to the flow
   237  	// during the execution so checking current tasks state may not be
   238  	// accurate enough to determine that the flow is terminated.
   239  	// This is used to determine if the controller value can be retrieved.
   240  	// When the controller value is safe to be read concurrently this tracking
   241  	// can be removed.
   242  	c.mut.Lock()
   243  	defer c.mut.Unlock()
   244  	c.done = true
   245  
   246  	return c.errs
   247  }
   248  
   249  // Value returns the value managed by the controller.
   250  //
   251  // It is safe to use the value only after Run() has returned.
   252  // It panics if the flow is running.
   253  func (c *Controller) Value() cue.Value {
   254  	c.mut.Lock()
   255  	defer c.mut.Unlock()
   256  	if !c.done {
   257  		panic("can't retrieve value before flow has terminated")
   258  	}
   259  	return c.inst
   260  }
   261  
   262  // A State indicates the state of a Task.
   263  //
   264  // The following state diagram indicates the possible state transitions:
   265  //
   266  //          Ready
   267  //       ↗︎        ↘︎
   268  //   Waiting  ←  Running
   269  //       ↘︎        ↙︎
   270  //       Terminated
   271  //
   272  // A Task may move from Waiting to Terminating if one of
   273  // the tasks on which it dependends fails.
   274  //
   275  // NOTE: transitions from Running to Waiting are currently not supported. In
   276  // the future this may be possible if a task depends on continuously running
   277  // tasks that send updates.
   278  //
   279  type State int
   280  
   281  const (
   282  	// Waiting indicates a task is blocked on input from another task.
   283  	//
   284  	// NOTE: although this is currently not implemented, a task could
   285  	// theoretically move from the Running to Waiting state.
   286  	Waiting State = iota
   287  
   288  	// Ready means a tasks is ready to run, but currently not running.
   289  	Ready
   290  
   291  	// Running indicates a goroutine is currently active for a task and that
   292  	// it is not Waiting.
   293  	Running
   294  
   295  	// Terminated means a task has stopped running either because it terminated
   296  	// while Running or was aborted by task on which it depends. The error
   297  	// value of a Task indicates the reason for the termination.
   298  	Terminated
   299  )
   300  
   301  var stateStrings = map[State]string{
   302  	Waiting:    "Waiting",
   303  	Ready:      "Ready",
   304  	Running:    "Running",
   305  	Terminated: "Terminated",
   306  }
   307  
   308  // String reports a human readable string of status s.
   309  func (s State) String() string {
   310  	return stateStrings[s]
   311  }
   312  
   313  // A Task contains the context for a single task execution.
   314  // Tasks may be run concurrently.
   315  type Task struct {
   316  	// Static
   317  	c    *Controller
   318  	ctxt *adt.OpContext
   319  	r    Runner
   320  
   321  	index  int
   322  	path   cue.Path
   323  	key    string
   324  	labels []adt.Feature
   325  
   326  	// Dynamic
   327  	update   adt.Expr
   328  	deps     map[*Task]bool
   329  	pathDeps map[string][]*Task
   330  
   331  	conjunctSeq int64
   332  	valueSeq    int64
   333  	v           cue.Value
   334  	err         errors.Error
   335  	state       State
   336  	depTasks    []*Task
   337  }
   338  
   339  // Context reports the Controller's Context.
   340  func (t *Task) Context() context.Context {
   341  	return t.c.context
   342  }
   343  
   344  // Path reports the path of Task within the Instance in which it is defined.
   345  // The Path is always valid.
   346  func (t *Task) Path() cue.Path {
   347  	return t.path
   348  }
   349  
   350  // Index reports the sequence number of the Task. This will not change over
   351  // time.
   352  func (t *Task) Index() int {
   353  	return t.index
   354  }
   355  
   356  func (t *Task) done() bool {
   357  	return t.state > Running
   358  }
   359  
   360  func (t *Task) isReady() bool {
   361  	for _, d := range t.depTasks {
   362  		if !d.done() {
   363  			return false
   364  		}
   365  	}
   366  	return true
   367  }
   368  
   369  func (t *Task) vertex() *adt.Vertex {
   370  	_, x := value.ToInternal(t.v)
   371  	return x
   372  }
   373  
   374  func (t *Task) addDep(path string, dep *Task) {
   375  	if dep == nil || dep == t {
   376  		return
   377  	}
   378  	if t.deps == nil {
   379  		t.deps = map[*Task]bool{}
   380  		t.pathDeps = map[string][]*Task{}
   381  	}
   382  
   383  	// Add the dependencies for a given path to the controller. We could compute
   384  	// this again later, but this ensures there will be no discrepancies.
   385  	a := t.pathDeps[path]
   386  	found := false
   387  	for _, t := range a {
   388  		if t == dep {
   389  			found = true
   390  			break
   391  		}
   392  	}
   393  	if !found {
   394  		t.pathDeps[path] = append(a, dep)
   395  
   396  	}
   397  
   398  	if !t.deps[dep] {
   399  		t.deps[dep] = true
   400  		t.depTasks = append(t.depTasks, dep)
   401  	}
   402  }
   403  
   404  // Fill fills in values of the Controller's configuration for the current task.
   405  // The changes take effect after the task completes.
   406  //
   407  // This method may currently only be called by the runner.
   408  func (t *Task) Fill(x interface{}) error {
   409  	expr := convert.GoValueToExpr(t.ctxt, true, x)
   410  	if t.update == nil {
   411  		t.update = expr
   412  		return nil
   413  	}
   414  	t.update = &adt.BinaryExpr{
   415  		Op: adt.AndOp,
   416  		X:  t.update,
   417  		Y:  expr,
   418  	}
   419  	return nil
   420  }
   421  
   422  // Value reports the latest value of this task.
   423  //
   424  // This method may currently only be called before Run is called or after a
   425  // Task completed, or from within a call to UpdateFunc.
   426  func (t *Task) Value() cue.Value {
   427  	// TODO: synchronize
   428  	return t.v
   429  }
   430  
   431  // Dependencies reports the Tasks t depends on.
   432  //
   433  // This method may currently only be called before Run is called or after a
   434  // Task completed, or from within a call to UpdateFunc.
   435  func (t *Task) Dependencies() []*Task {
   436  	// TODO: add synchronization.
   437  	return t.depTasks
   438  }
   439  
   440  // PathDependencies reports the dependencies found for a value at the given
   441  // path.
   442  //
   443  // This may currently only be called before Run is called or from within
   444  // a call to UpdateFunc.
   445  func (t *Task) PathDependencies(p cue.Path) []*Task {
   446  	return t.pathDeps[p.String()]
   447  }
   448  
   449  // Err returns the error of a completed Task.
   450  //
   451  // This method may currently only be called before Run is called, after a
   452  // Task completed, or from within a call to UpdateFunc.
   453  func (t *Task) Err() error {
   454  	return t.err
   455  }
   456  
   457  // State is the current state of the Task.
   458  //
   459  // This method may currently only be called before Run is called or after a
   460  // Task completed, or from within a call to UpdateFunc.
   461  func (t *Task) State() State {
   462  	return t.state
   463  }