cuelang.org/go@v0.10.1/tools/flow/flow.go (about)

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package flow provides a low-level workflow manager based on a CUE Instance.
    16  //
    17  // A Task defines an operational unit in a Workflow and corresponds to a struct
    18  // in a CUE instance. This package does not define what a Task looks like in a
    19  // CUE Instance. Instead, the user of this package must supply a TaskFunc that
    20  // creates a Runner for cue.Values that are deemed to be a Task.
    21  //
    22  // Tasks may depend on other tasks. Cyclic dependencies are thereby not allowed.
    23  // A Task A depends on another Task B if A, directly or indirectly, has a
    24  // reference to any field of Task B, including its root.
    25  package flow
    26  
    27  // TODO: Add hooks. This would allow UIs, for instance, to report on progress.
    28  //
    29  // - New(inst *cue.Instance, options ...Option)
    30  // - AddTask(v cue.Value, r Runner) *Task
    31  // - AddDependency(a, b *Task)
    32  // - AddTaskGraph(root cue.Value, fn taskFunc)
    33  // - AddSequence(list cue.Value, fn taskFunc)
    34  // - Err()
    35  
    36  // TODO:
    37  // Should we allow lists as a shorthand for a sequence of tasks?
    38  // If so, how do we specify termination behavior?
    39  
    40  // TODO:
    41  // Should we allow tasks to be a child of another task? Currently, the search
    42  // for tasks end once a task root is found.
    43  //
    44  // Semantically it is somewhat unclear to do so: for instance, if an $after
    45  // is used to refer to an explicit task dependency, it is logically
    46  // indistinguishable whether this should be a subtask or is a dependency.
    47  // Using higher-order constructs for analysis is generally undesirable.
    48  //
    49  // A possible solution would be to define specific "grouping tasks" whose sole
    50  // purpose is to define sub tasks. The user of this package would then need
    51  // to explicitly distinguish between tasks that are dependencies and tasks that
    52  // are subtasks.
    53  
    54  // TODO: streaming tasks/ server applications
    55  //
    56  // Workflows are currently implemented for batch processing, for instance to
    57  // implement shell scripting or other kinds of batch processing.
    58  //
    59  // This API has been designed, however, to also allow for streaming
    60  // applications. For instance, a streaming Task could listen for Etcd changes
    61  // or incoming HTTP requests and send updates each time an input changes.
    62  // Downstream tasks could then alternate between a Waiting and Running state.
    63  //
    64  // Note that such streaming applications would also cause configurations to
    65  // potentially not become increasingly more specific. Instead, a Task would
    66  // replace its old result each time it is updated. This would require tracking
    67  // of which conjunct was previously created by a task.
    68  
    69  import (
    70  	"context"
    71  	"fmt"
    72  	"os"
    73  	"strings"
    74  	"sync/atomic"
    75  
    76  	"cuelang.org/go/cue"
    77  	"cuelang.org/go/cue/errors"
    78  	"cuelang.org/go/cue/stats"
    79  	"cuelang.org/go/internal/core/adt"
    80  	"cuelang.org/go/internal/core/convert"
    81  	"cuelang.org/go/internal/core/eval"
    82  	"cuelang.org/go/internal/value"
    83  )
    84  
    85  var (
    86  	// ErrAbort may be returned by a task to avoid processing downstream tasks.
    87  	// This can be used by control nodes to influence execution.
    88  	ErrAbort = errors.New("abort dependant tasks without failure")
    89  
    90  	// TODO: ErrUpdate: update and run a dependency, but don't complete a
    91  	// dependency as more results may come. This is useful in server mode.
    92  
    93  	debug = os.Getenv("CUE_DEBUG_TOOLS_FLOW") != ""
    94  )
    95  
    96  // A TaskFunc creates a Runner for v if v defines a task or reports nil
    97  // otherwise. It reports an error for illformed tasks.
    98  //
    99  // If TaskFunc returns a non-nil Runner the search for task within v stops.
   100  // That is, subtasks are not supported.
   101  type TaskFunc func(v cue.Value) (Runner, error)
   102  
   103  // A Runner executes a Task.
   104  type Runner interface {
   105  	// Run runs a Task. If any of the tasks it depends on returned an error it
   106  	// is passed to this task. It reports an error upon failure.
   107  	//
   108  	// Any results to be returned can be set by calling Fill on the passed task.
   109  	//
   110  	// TODO: what is a good contract for receiving and passing errors and abort.
   111  	//
   112  	// If for a returned error x errors.Is(x, ErrAbort), all dependant tasks
   113  	// will not be run, without this being an error.
   114  	Run(t *Task, err error) error
   115  }
   116  
   117  // A RunnerFunc runs a Task.
   118  type RunnerFunc func(t *Task) error
   119  
   120  func (f RunnerFunc) Run(t *Task, err error) error {
   121  	return f(t)
   122  }
   123  
   124  // A Config defines options for interpreting an Instance as a Workflow.
   125  type Config struct {
   126  	// Root limits the search for tasks to be within the path indicated to root.
   127  	// For the cue command, this is set to ["command"]. The default value is
   128  	// for all tasks to be root.
   129  	Root cue.Path
   130  
   131  	// InferTasks allows tasks to be defined outside of the Root. Such tasks
   132  	// will only be included in the workflow if any of its fields is referenced
   133  	// by any of the tasks defined within Root.
   134  	//
   135  	// CAVEAT EMPTOR: this features is mostly provided for backwards
   136  	// compatibility with v0.2. A problem with this approach is that it will
   137  	// look for task structs within arbitrary data. So if not careful, there may
   138  	// be spurious matches.
   139  	InferTasks bool
   140  
   141  	// IgnoreConcrete ignores references for which the values are already
   142  	// concrete and cannot change.
   143  	IgnoreConcrete bool
   144  
   145  	// FindHiddenTasks allows tasks to be defined in hidden fields.
   146  	FindHiddenTasks bool
   147  
   148  	// UpdateFunc is called whenever the information in the controller is
   149  	// updated. This includes directly after initialization. The task may be
   150  	// nil if this call is not the result of a task completing.
   151  	UpdateFunc func(c *Controller, t *Task) error
   152  }
   153  
   154  // A Controller defines a set of Tasks to be executed.
   155  type Controller struct {
   156  	cfg    Config
   157  	isTask TaskFunc
   158  
   159  	inst        cue.Value
   160  	valueSeqNum int64
   161  
   162  	env *adt.Environment
   163  
   164  	conjuncts   []adt.Conjunct
   165  	conjunctSeq int64
   166  
   167  	taskCh chan *Task
   168  
   169  	opCtx      *adt.OpContext
   170  	context    context.Context
   171  	cancelFunc context.CancelFunc
   172  
   173  	// taskStats tracks counters for auxiliary operations done by tasks. It does
   174  	// not include the CUE operations done by the Controller on behalf of tasks,
   175  	// which is likely going to tbe the bulk of the operations.
   176  	taskStats stats.Counts
   177  
   178  	done atomic.Bool
   179  
   180  	// keys maps task keys to their index. This allows a recreation of the
   181  	// Instance while retaining the original task indices.
   182  	//
   183  	// TODO: do instance updating in place to allow for more efficient
   184  	// processing.
   185  	keys  map[string]*Task
   186  	tasks []*Task
   187  
   188  	// Only used during task initialization.
   189  	nodes map[*adt.Vertex]*Task
   190  
   191  	errs errors.Error
   192  }
   193  
   194  // Stats reports statistics on the total number of CUE operations used.
   195  //
   196  // This is an experimental method and the API is likely to change. The
   197  // Counts.String method will likely stay and is the safest way to use this API.
   198  //
   199  // This currently should only be called after completion or within a call to
   200  // UpdateFunc.
   201  func (c *Controller) Stats() (counts stats.Counts) {
   202  	counts = *c.opCtx.Stats()
   203  	counts.Add(c.taskStats)
   204  	return counts
   205  }
   206  
   207  // Tasks reports the tasks that are currently registered with the controller.
   208  //
   209  // This may currently only be called before Run is called or from within
   210  // a call to UpdateFunc. Task pointers returned by this call are not guaranteed
   211  // to be the same between successive calls to this method.
   212  func (c *Controller) Tasks() []*Task {
   213  	return c.tasks
   214  }
   215  
   216  func (c *Controller) cancel() {
   217  	if c.cancelFunc != nil {
   218  		c.cancelFunc()
   219  	}
   220  }
   221  
   222  func (c *Controller) addErr(err error, msg string) {
   223  	c.errs = errors.Append(c.errs, errors.Promote(err, msg))
   224  }
   225  
   226  // New creates a Controller for a given Instance and TaskFunc.
   227  //
   228  // The instance value can either be a *cue.Instance or a cue.Value.
   229  func New(cfg *Config, inst cue.InstanceOrValue, f TaskFunc) *Controller {
   230  	v := inst.Value()
   231  	ctx := eval.NewContext(value.ToInternal(v))
   232  
   233  	c := &Controller{
   234  		isTask: f,
   235  		inst:   v,
   236  		opCtx:  ctx,
   237  
   238  		taskCh: make(chan *Task),
   239  		keys:   map[string]*Task{},
   240  	}
   241  
   242  	if cfg != nil {
   243  		c.cfg = *cfg
   244  	}
   245  	c.initTasks(true)
   246  	return c
   247  
   248  }
   249  
   250  // Run runs the tasks of a workflow until completion.
   251  func (c *Controller) Run(ctx context.Context) error {
   252  	c.context, c.cancelFunc = context.WithCancel(ctx)
   253  	defer c.cancelFunc()
   254  
   255  	c.runLoop()
   256  
   257  	// NOTE: track state here as runLoop might add more tasks to the flow
   258  	// during the execution so checking current tasks state may not be
   259  	// accurate enough to determine that the flow is terminated.
   260  	// This is used to determine if the controller value can be retrieved.
   261  	// When the controller value is safe to be read concurrently this tracking
   262  	// can be removed.
   263  	c.done.Store(true)
   264  
   265  	return c.errs
   266  }
   267  
   268  // Value returns the value managed by the controller.
   269  //
   270  // It is safe to use the value only after Run() has returned.
   271  // It panics if the flow is running.
   272  func (c *Controller) Value() cue.Value {
   273  	if !c.done.Load() {
   274  		panic("can't retrieve value before flow has terminated")
   275  	}
   276  	return c.inst
   277  }
   278  
   279  // We need to escape quotes in the path, per
   280  // https://mermaid-js.github.io/mermaid/#/flowchart?id=entity-codes-to-escape-characters
   281  // This also requires that we escape the quoting character #.
   282  var mermaidQuote = strings.NewReplacer("#", "#35;", `"`, "#quot;")
   283  
   284  // mermaidGraph generates a mermaid graph of the current state. This can be
   285  // pasted into https://mermaid-js.github.io/mermaid-live-editor/ for
   286  // visualization.
   287  func mermaidGraph(c *Controller) string {
   288  	w := &strings.Builder{}
   289  	fmt.Fprintln(w, "graph TD")
   290  	for i, t := range c.Tasks() {
   291  		path := mermaidQuote.Replace(t.Path().String())
   292  		fmt.Fprintf(w, "  t%d(\"%s [%s]\")\n", i, path, t.State())
   293  		for _, t := range t.Dependencies() {
   294  			fmt.Fprintf(w, "  t%d-->t%d\n", i, t.Index())
   295  		}
   296  	}
   297  	return w.String()
   298  }
   299  
   300  // A State indicates the state of a Task.
   301  //
   302  // The following state diagram indicates the possible state transitions:
   303  //
   304  //	       Ready
   305  //	    ↗︎        ↘︎
   306  //	Waiting  ←  Running
   307  //	    ↘︎        ↙︎
   308  //	    Terminated
   309  //
   310  // A Task may move from Waiting to Terminating if one of
   311  // the tasks on which it depends fails.
   312  //
   313  // NOTE: transitions from Running to Waiting are currently not supported. In
   314  // the future this may be possible if a task depends on continuously running
   315  // tasks that send updates.
   316  type State int
   317  
   318  //go:generate go run golang.org/x/tools/cmd/stringer -type=State
   319  
   320  const (
   321  	// Waiting indicates a task is blocked on input from another task.
   322  	//
   323  	// NOTE: although this is currently not implemented, a task could
   324  	// theoretically move from the Running to Waiting state.
   325  	Waiting State = iota
   326  
   327  	// Ready means a tasks is ready to run, but currently not running.
   328  	Ready
   329  
   330  	// Running indicates a goroutine is currently active for a task and that
   331  	// it is not Waiting.
   332  	Running
   333  
   334  	// Terminated means a task has stopped running either because it terminated
   335  	// while Running or was aborted by task on which it depends. The error
   336  	// value of a Task indicates the reason for the termination.
   337  	Terminated
   338  )
   339  
   340  // A Task contains the context for a single task execution.
   341  // Tasks may be run concurrently.
   342  type Task struct {
   343  	// Static
   344  	c    *Controller
   345  	ctxt *adt.OpContext
   346  	r    Runner
   347  
   348  	index  int
   349  	path   cue.Path
   350  	key    string
   351  	labels []adt.Feature
   352  
   353  	// Dynamic
   354  	update   adt.Expr
   355  	deps     map[*Task]bool
   356  	pathDeps map[string][]*Task
   357  
   358  	conjunctSeq int64
   359  	valueSeq    int64
   360  	v           cue.Value
   361  	err         errors.Error
   362  	state       State
   363  	depTasks    []*Task
   364  
   365  	stats stats.Counts
   366  }
   367  
   368  // Stats reports statistics on the number of CUE operations used to complete
   369  // this task.
   370  //
   371  // This is an experimental method and the API is likely to change.
   372  //
   373  // It only shows numbers upon completion. This may change in the future.
   374  func (t *Task) Stats() stats.Counts {
   375  	return t.stats
   376  }
   377  
   378  // Context reports the Controller's Context.
   379  func (t *Task) Context() context.Context {
   380  	return t.c.context
   381  }
   382  
   383  // Path reports the path of Task within the Instance in which it is defined.
   384  // The Path is always valid.
   385  func (t *Task) Path() cue.Path {
   386  	return t.path
   387  }
   388  
   389  // Index reports the sequence number of the Task. This will not change over
   390  // time.
   391  func (t *Task) Index() int {
   392  	return t.index
   393  }
   394  
   395  func (t *Task) done() bool {
   396  	return t.state > Running
   397  }
   398  
   399  func (t *Task) isReady() bool {
   400  	for _, d := range t.depTasks {
   401  		if !d.done() {
   402  			return false
   403  		}
   404  	}
   405  	return true
   406  }
   407  
   408  func (t *Task) vertex() *adt.Vertex {
   409  	_, x := value.ToInternal(t.v)
   410  	return x
   411  }
   412  
   413  func (t *Task) addDep(path string, dep *Task) {
   414  	if dep == nil || dep == t {
   415  		return
   416  	}
   417  	if t.deps == nil {
   418  		t.deps = map[*Task]bool{}
   419  		t.pathDeps = map[string][]*Task{}
   420  	}
   421  
   422  	// Add the dependencies for a given path to the controller. We could compute
   423  	// this again later, but this ensures there will be no discrepancies.
   424  	a := t.pathDeps[path]
   425  	found := false
   426  	for _, t := range a {
   427  		if t == dep {
   428  			found = true
   429  			break
   430  		}
   431  	}
   432  	if !found {
   433  		t.pathDeps[path] = append(a, dep)
   434  
   435  	}
   436  
   437  	if !t.deps[dep] {
   438  		t.deps[dep] = true
   439  		t.depTasks = append(t.depTasks, dep)
   440  	}
   441  }
   442  
   443  // Fill fills in values of the Controller's configuration for the current task.
   444  // The changes take effect after the task completes.
   445  //
   446  // This method may currently only be called by the runner.
   447  func (t *Task) Fill(x interface{}) error {
   448  	expr := convert.GoValueToExpr(t.ctxt, true, x)
   449  	if t.update == nil {
   450  		t.update = expr
   451  		return nil
   452  	}
   453  	t.update = &adt.BinaryExpr{
   454  		Op: adt.AndOp,
   455  		X:  t.update,
   456  		Y:  expr,
   457  	}
   458  	return nil
   459  }
   460  
   461  // Value reports the latest value of this task.
   462  //
   463  // This method may currently only be called before Run is called or after a
   464  // Task completed, or from within a call to UpdateFunc.
   465  func (t *Task) Value() cue.Value {
   466  	// TODO: synchronize
   467  	return t.v
   468  }
   469  
   470  // Dependencies reports the Tasks t depends on.
   471  //
   472  // This method may currently only be called before Run is called or after a
   473  // Task completed, or from within a call to UpdateFunc.
   474  func (t *Task) Dependencies() []*Task {
   475  	// TODO: add synchronization.
   476  	return t.depTasks
   477  }
   478  
   479  // PathDependencies reports the dependencies found for a value at the given
   480  // path.
   481  //
   482  // This may currently only be called before Run is called or from within
   483  // a call to UpdateFunc.
   484  func (t *Task) PathDependencies(p cue.Path) []*Task {
   485  	return t.pathDeps[p.String()]
   486  }
   487  
   488  // Err returns the error of a completed Task.
   489  //
   490  // This method may currently only be called before Run is called, after a
   491  // Task completed, or from within a call to UpdateFunc.
   492  func (t *Task) Err() error {
   493  	return t.err
   494  }
   495  
   496  // State is the current state of the Task.
   497  //
   498  // This method may currently only be called before Run is called or after a
   499  // Task completed, or from within a call to UpdateFunc.
   500  func (t *Task) State() State {
   501  	return t.state
   502  }