cuelang.org/go@v0.13.0/tools/flow/flow.go

cuelang.org/go@v0.13.0/tools/flow/flow.go (about)

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package flow provides a low-level workflow manager based on a CUE Instance.
    16  //
    17  // A Task defines an operational unit in a Workflow and corresponds to a struct
    18  // in a CUE instance. This package does not define what a Task looks like in a
    19  // CUE Instance. Instead, the user of this package must supply a TaskFunc that
    20  // creates a Runner for cue.Values that are deemed to be a Task.
    21  //
    22  // Tasks may depend on other tasks. Cyclic dependencies are thereby not allowed.
    23  // A Task A depends on another Task B if A, directly or indirectly, has a
    24  // reference to any field of Task B, including its root.
    25  package flow
    26  
    27  // TODO: Add hooks. This would allow UIs, for instance, to report on progress.
    28  //
    29  // - New(inst *cue.Instance, options ...Option)
    30  // - AddTask(v cue.Value, r Runner) *Task
    31  // - AddDependency(a, b *Task)
    32  // - AddTaskGraph(root cue.Value, fn taskFunc)
    33  // - AddSequence(list cue.Value, fn taskFunc)
    34  // - Err()
    35  
    36  // TODO:
    37  // Should we allow lists as a shorthand for a sequence of tasks?
    38  // If so, how do we specify termination behavior?
    39  
    40  // TODO:
    41  // Should we allow tasks to be a child of another task? Currently, the search
    42  // for tasks end once a task root is found.
    43  //
    44  // Semantically it is somewhat unclear to do so: for instance, if an $after
    45  // is used to refer to an explicit task dependency, it is logically
    46  // indistinguishable whether this should be a subtask or is a dependency.
    47  // Using higher-order constructs for analysis is generally undesirable.
    48  //
    49  // A possible solution would be to define specific "grouping tasks" whose sole
    50  // purpose is to define sub tasks. The user of this package would then need
    51  // to explicitly distinguish between tasks that are dependencies and tasks that
    52  // are subtasks.
    53  
    54  // TODO: streaming tasks/ server applications
    55  //
    56  // Workflows are currently implemented for batch processing, for instance to
    57  // implement shell scripting or other kinds of batch processing.
    58  //
    59  // This API has been designed, however, to also allow for streaming
    60  // applications. For instance, a streaming Task could listen for Etcd changes
    61  // or incoming HTTP requests and send updates each time an input changes.
    62  // Downstream tasks could then alternate between a Waiting and Running state.
    63  //
    64  // Note that such streaming applications would also cause configurations to
    65  // potentially not become increasingly more specific. Instead, a Task would
    66  // replace its old result each time it is updated. This would require tracking
    67  // of which conjunct was previously created by a task.
    68  
    69  import (
    70  	"context"
    71  	"fmt"
    72  	"strings"
    73  	"sync/atomic"
    74  
    75  	"cuelang.org/go/cue"
    76  	"cuelang.org/go/cue/errors"
    77  	"cuelang.org/go/cue/stats"
    78  	"cuelang.org/go/internal/core/adt"
    79  	"cuelang.org/go/internal/core/convert"
    80  	"cuelang.org/go/internal/core/eval"
    81  	"cuelang.org/go/internal/value"
    82  )
    83  
    84  var (
    85  	// ErrAbort may be returned by a task to avoid processing downstream tasks.
    86  	// This can be used by control nodes to influence execution.
    87  	ErrAbort = errors.New("abort dependant tasks without failure")
    88  
    89  	// TODO: ErrUpdate: update and run a dependency, but don't complete a
    90  	// dependency as more results may come. This is useful in server mode.
    91  )
    92  
    93  // A TaskFunc creates a Runner for v if v defines a task or reports nil
    94  // otherwise. It reports an error for illformed tasks.
    95  //
    96  // If TaskFunc returns a non-nil Runner the search for task within v stops.
    97  // That is, subtasks are not supported.
    98  type TaskFunc func(v cue.Value) (Runner, error)
    99  
   100  // A Runner executes a Task.
   101  type Runner interface {
   102  	// Run runs a Task. If any of the tasks it depends on returned an error it
   103  	// is passed to this task. It reports an error upon failure.
   104  	//
   105  	// Any results to be returned can be set by calling Fill on the passed task.
   106  	//
   107  	// TODO: what is a good contract for receiving and passing errors and abort.
   108  	//
   109  	// If for a returned error x errors.Is(x, ErrAbort), all dependant tasks
   110  	// will not be run, without this being an error.
   111  	Run(t *Task, err error) error
   112  }
   113  
   114  // A RunnerFunc runs a Task.
   115  type RunnerFunc func(t *Task) error
   116  
   117  func (f RunnerFunc) Run(t *Task, err error) error {
   118  	return f(t)
   119  }
   120  
   121  // A Config defines options for interpreting an Instance as a Workflow.
   122  type Config struct {
   123  	// Root limits the search for tasks to be within the path indicated to root.
   124  	// For the cue command, this is set to ["command"]. The default value is
   125  	// for all tasks to be root.
   126  	Root cue.Path
   127  
   128  	// InferTasks allows tasks to be defined outside of the Root. Such tasks
   129  	// will only be included in the workflow if any of its fields is referenced
   130  	// by any of the tasks defined within Root.
   131  	//
   132  	// CAVEAT EMPTOR: this features is mostly provided for backwards
   133  	// compatibility with v0.2. A problem with this approach is that it will
   134  	// look for task structs within arbitrary data. So if not careful, there may
   135  	// be spurious matches.
   136  	InferTasks bool
   137  
   138  	// IgnoreConcrete ignores references for which the values are already
   139  	// concrete and cannot change.
   140  	IgnoreConcrete bool
   141  
   142  	// FindHiddenTasks allows tasks to be defined in hidden fields.
   143  	FindHiddenTasks bool
   144  
   145  	// UpdateFunc is called whenever the information in the controller is
   146  	// updated. This includes directly after initialization. The task may be
   147  	// nil if this call is not the result of a task completing.
   148  	UpdateFunc func(c *Controller, t *Task) error
   149  }
   150  
   151  // A Controller defines a set of Tasks to be executed.
   152  type Controller struct {
   153  	cfg    Config
   154  	isTask TaskFunc
   155  
   156  	inst        cue.Value
   157  	valueSeqNum int64
   158  
   159  	env *adt.Environment
   160  
   161  	conjuncts   []adt.Conjunct
   162  	conjunctSeq int64
   163  
   164  	taskCh chan *Task
   165  
   166  	opCtx      *adt.OpContext
   167  	context    context.Context
   168  	cancelFunc context.CancelFunc
   169  
   170  	// taskStats tracks counters for auxiliary operations done by tasks. It does
   171  	// not include the CUE operations done by the Controller on behalf of tasks,
   172  	// which is likely going to tbe the bulk of the operations.
   173  	taskStats stats.Counts
   174  
   175  	done atomic.Bool
   176  
   177  	// keys maps task keys to their index. This allows a recreation of the
   178  	// Instance while retaining the original task indices.
   179  	//
   180  	// TODO: do instance updating in place to allow for more efficient
   181  	// processing.
   182  	keys  map[string]*Task
   183  	tasks []*Task
   184  
   185  	// Only used during task initialization.
   186  	nodes map[*adt.Vertex]*Task
   187  
   188  	errs errors.Error
   189  }
   190  
   191  // Stats reports statistics on the total number of CUE operations used.
   192  //
   193  // This is an experimental method and the API is likely to change. The
   194  // Counts.String method will likely stay and is the safest way to use this API.
   195  //
   196  // This currently should only be called after completion or within a call to
   197  // UpdateFunc.
   198  func (c *Controller) Stats() (counts stats.Counts) {
   199  	counts = *c.opCtx.Stats()
   200  	counts.Add(c.taskStats)
   201  	return counts
   202  }
   203  
   204  // Tasks reports the tasks that are currently registered with the controller.
   205  //
   206  // This may currently only be called before Run is called or from within
   207  // a call to UpdateFunc. Task pointers returned by this call are not guaranteed
   208  // to be the same between successive calls to this method.
   209  func (c *Controller) Tasks() []*Task {
   210  	return c.tasks
   211  }
   212  
   213  func (c *Controller) cancel() {
   214  	if c.cancelFunc != nil {
   215  		c.cancelFunc()
   216  	}
   217  }
   218  
   219  func (c *Controller) addErr(err error, msg string) {
   220  	c.errs = errors.Append(c.errs, errors.Promote(err, msg))
   221  }
   222  
   223  // New creates a Controller for a given Instance and TaskFunc.
   224  //
   225  // The instance value can either be a *cue.Instance or a cue.Value.
   226  func New(cfg *Config, inst cue.InstanceOrValue, f TaskFunc) *Controller {
   227  	v := inst.Value()
   228  	ctx := eval.NewContext(value.ToInternal(v))
   229  
   230  	c := &Controller{
   231  		isTask: f,
   232  		inst:   v,
   233  		opCtx:  ctx,
   234  
   235  		taskCh: make(chan *Task),
   236  		keys:   map[string]*Task{},
   237  	}
   238  
   239  	if cfg != nil {
   240  		c.cfg = *cfg
   241  	}
   242  	c.initTasks(true)
   243  	return c
   244  
   245  }
   246  
   247  // Run runs the tasks of a workflow until completion.
   248  func (c *Controller) Run(ctx context.Context) error {
   249  	c.context, c.cancelFunc = context.WithCancel(ctx)
   250  	defer c.cancelFunc()
   251  
   252  	c.runLoop()
   253  
   254  	// NOTE: track state here as runLoop might add more tasks to the flow
   255  	// during the execution so checking current tasks state may not be
   256  	// accurate enough to determine that the flow is terminated.
   257  	// This is used to determine if the controller value can be retrieved.
   258  	// When the controller value is safe to be read concurrently this tracking
   259  	// can be removed.
   260  	c.done.Store(true)
   261  
   262  	return c.errs
   263  }
   264  
   265  // Value returns the value managed by the controller.
   266  //
   267  // It is safe to use the value only after [Controller.Run] has returned.
   268  // It panics if the flow is running.
   269  func (c *Controller) Value() cue.Value {
   270  	if !c.done.Load() {
   271  		panic("can't retrieve value before flow has terminated")
   272  	}
   273  	return c.inst
   274  }
   275  
   276  // We need to escape quotes in the path, per
   277  // https://mermaid-js.github.io/mermaid/#/flowchart?id=entity-codes-to-escape-characters
   278  // This also requires that we escape the quoting character #.
   279  var mermaidQuote = strings.NewReplacer("#", "#35;", `"`, "#quot;")
   280  
   281  // mermaidGraph generates a mermaid graph of the current state. This can be
   282  // pasted into https://mermaid-js.github.io/mermaid-live-editor/ for
   283  // visualization.
   284  func mermaidGraph(c *Controller) string {
   285  	w := &strings.Builder{}
   286  	fmt.Fprintln(w, "graph TD")
   287  	for i, t := range c.Tasks() {
   288  		path := mermaidQuote.Replace(t.Path().String())
   289  		fmt.Fprintf(w, "  t%d(\"%s [%s]\")\n", i, path, t.State())
   290  		for _, t := range t.Dependencies() {
   291  			fmt.Fprintf(w, "  t%d-->t%d\n", i, t.Index())
   292  		}
   293  	}
   294  	return w.String()
   295  }
   296  
   297  // A State indicates the state of a Task.
   298  //
   299  // The following state diagram indicates the possible state transitions:
   300  //
   301  //	       Ready
   302  //	    ↗︎        ↘︎
   303  //	Waiting  ←  Running
   304  //	    ↘︎        ↙︎
   305  //	    Terminated
   306  //
   307  // A Task may move from Waiting to Terminating if one of
   308  // the tasks on which it depends fails.
   309  //
   310  // NOTE: transitions from Running to Waiting are currently not supported. In
   311  // the future this may be possible if a task depends on continuously running
   312  // tasks that send updates.
   313  type State int
   314  
   315  //go:generate go run golang.org/x/tools/cmd/stringer -type=State
   316  
   317  const (
   318  	// Waiting indicates a task is blocked on input from another task.
   319  	//
   320  	// NOTE: although this is currently not implemented, a task could
   321  	// theoretically move from the Running to Waiting state.
   322  	Waiting State = iota
   323  
   324  	// Ready means a tasks is ready to run, but currently not running.
   325  	Ready
   326  
   327  	// Running indicates a goroutine is currently active for a task and that
   328  	// it is not Waiting.
   329  	Running
   330  
   331  	// Terminated means a task has stopped running either because it terminated
   332  	// while Running or was aborted by task on which it depends. The error
   333  	// value of a Task indicates the reason for the termination.
   334  	Terminated
   335  )
   336  
   337  // A Task contains the context for a single task execution.
   338  // Tasks may be run concurrently.
   339  type Task struct {
   340  	// Static
   341  	c    *Controller
   342  	ctxt *adt.OpContext
   343  	r    Runner
   344  
   345  	index  int
   346  	path   cue.Path
   347  	key    string
   348  	labels []adt.Feature
   349  
   350  	// Dynamic
   351  	update   adt.Expr
   352  	deps     map[*Task]bool
   353  	pathDeps map[string][]*Task
   354  
   355  	conjunctSeq int64
   356  	valueSeq    int64
   357  	v           cue.Value
   358  	err         errors.Error
   359  	state       State
   360  	depTasks    []*Task
   361  
   362  	stats stats.Counts
   363  }
   364  
   365  // Stats reports statistics on the number of CUE operations used to complete
   366  // this task.
   367  //
   368  // This is an experimental method and the API is likely to change.
   369  //
   370  // It only shows numbers upon completion. This may change in the future.
   371  func (t *Task) Stats() stats.Counts {
   372  	return t.stats
   373  }
   374  
   375  // Context reports the Controller's Context.
   376  func (t *Task) Context() context.Context {
   377  	return t.c.context
   378  }
   379  
   380  // Path reports the path of Task within the Instance in which it is defined.
   381  // The Path is always valid.
   382  func (t *Task) Path() cue.Path {
   383  	return t.path
   384  }
   385  
   386  // Index reports the sequence number of the Task. This will not change over
   387  // time.
   388  func (t *Task) Index() int {
   389  	return t.index
   390  }
   391  
   392  func (t *Task) done() bool {
   393  	return t.state > Running
   394  }
   395  
   396  func (t *Task) isReady() bool {
   397  	for _, d := range t.depTasks {
   398  		if !d.done() {
   399  			return false
   400  		}
   401  	}
   402  	return true
   403  }
   404  
   405  func (t *Task) vertex() *adt.Vertex {
   406  	_, x := value.ToInternal(t.v)
   407  	return x
   408  }
   409  
   410  func (t *Task) addDep(path string, dep *Task) {
   411  	if dep == nil || dep == t {
   412  		return
   413  	}
   414  	if t.deps == nil {
   415  		t.deps = map[*Task]bool{}
   416  		t.pathDeps = map[string][]*Task{}
   417  	}
   418  
   419  	// Add the dependencies for a given path to the controller. We could compute
   420  	// this again later, but this ensures there will be no discrepancies.
   421  	a := t.pathDeps[path]
   422  	found := false
   423  	for _, t := range a {
   424  		if t == dep {
   425  			found = true
   426  			break
   427  		}
   428  	}
   429  	if !found {
   430  		t.pathDeps[path] = append(a, dep)
   431  
   432  	}
   433  
   434  	if !t.deps[dep] {
   435  		t.deps[dep] = true
   436  		t.depTasks = append(t.depTasks, dep)
   437  	}
   438  }
   439  
   440  // Fill fills in values of the Controller's configuration for the current task.
   441  // The changes take effect after the task completes.
   442  //
   443  // This method may currently only be called by the runner.
   444  func (t *Task) Fill(x interface{}) error {
   445  	expr := convert.GoValueToExpr(t.ctxt, true, x)
   446  	if t.update == nil {
   447  		t.update = expr
   448  		return nil
   449  	}
   450  	t.update = &adt.BinaryExpr{
   451  		Op: adt.AndOp,
   452  		X:  t.update,
   453  		Y:  expr,
   454  	}
   455  	return nil
   456  }
   457  
   458  // Value reports the latest value of this task.
   459  //
   460  // This method may currently only be called before Run is called or after a
   461  // Task completed, or from within a call to UpdateFunc.
   462  func (t *Task) Value() cue.Value {
   463  	// TODO: synchronize
   464  	return t.v
   465  }
   466  
   467  // Dependencies reports the Tasks t depends on.
   468  //
   469  // This method may currently only be called before Run is called or after a
   470  // Task completed, or from within a call to UpdateFunc.
   471  func (t *Task) Dependencies() []*Task {
   472  	// TODO: add synchronization.
   473  	return t.depTasks
   474  }
   475  
   476  // PathDependencies reports the dependencies found for a value at the given
   477  // path.
   478  //
   479  // This may currently only be called before Run is called or from within
   480  // a call to UpdateFunc.
   481  func (t *Task) PathDependencies(p cue.Path) []*Task {
   482  	return t.pathDeps[p.String()]
   483  }
   484  
   485  // Err returns the error of a completed Task.
   486  //
   487  // This method may currently only be called before Run is called, after a
   488  // Task completed, or from within a call to UpdateFunc.
   489  func (t *Task) Err() error {
   490  	return t.err
   491  }
   492  
   493  // State is the current state of the Task.
   494  //
   495  // This method may currently only be called before Run is called or after a
   496  // Task completed, or from within a call to UpdateFunc.
   497  func (t *Task) State() State {
   498  	return t.state
   499  }