github.com/databricks/cli@v0.203.0/bundle/run/pipeline.go

github.com/databricks/cli@v0.203.0/bundle/run/pipeline.go (about)

     1  package run
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  	"time"
     8  
     9  	"github.com/databricks/cli/bundle"
    10  	"github.com/databricks/cli/bundle/config/resources"
    11  	"github.com/databricks/cli/bundle/run/output"
    12  	"github.com/databricks/cli/bundle/run/progress"
    13  	"github.com/databricks/cli/libs/cmdio"
    14  	"github.com/databricks/cli/libs/log"
    15  	"github.com/databricks/databricks-sdk-go/service/pipelines"
    16  	flag "github.com/spf13/pflag"
    17  )
    18  
    19  func filterEventsByUpdateId(events []pipelines.PipelineEvent, updateId string) []pipelines.PipelineEvent {
    20  	result := []pipelines.PipelineEvent{}
    21  	for i := 0; i < len(events); i++ {
    22  		if events[i].Origin.UpdateId == updateId {
    23  			result = append(result, events[i])
    24  		}
    25  	}
    26  	return result
    27  }
    28  
    29  func (r *pipelineRunner) logEvent(ctx context.Context, event pipelines.PipelineEvent) {
    30  	logString := ""
    31  	if event.Message != "" {
    32  		logString += fmt.Sprintf(" %s\n", event.Message)
    33  	}
    34  	if event.Error != nil && len(event.Error.Exceptions) > 0 {
    35  		logString += "trace for most recent exception: \n"
    36  		for i := 0; i < len(event.Error.Exceptions); i++ {
    37  			logString += fmt.Sprintf("%s\n", event.Error.Exceptions[i].Message)
    38  		}
    39  	}
    40  	if logString != "" {
    41  		log.Errorf(ctx, fmt.Sprintf("[%s] %s", event.EventType, logString))
    42  	}
    43  }
    44  
    45  func (r *pipelineRunner) logErrorEvent(ctx context.Context, pipelineId string, updateId string) error {
    46  	w := r.bundle.WorkspaceClient()
    47  
    48  	// Note: For a 100 percent correct and complete solution we should use the
    49  	// w.Pipelines.ListPipelineEventsAll method to find all relevant events. However the
    50  	// probablity of the relevant last error event not being present in the most
    51  	// recent 100 error events is very close to 0 and the first 100 error events
    52  	// should give us a good picture of the error.
    53  	//
    54  	// Otherwise for long lived pipelines, there can be a lot of unnecessary
    55  	// latency due to multiple pagination API calls needed underneath the hood for
    56  	// ListPipelineEventsAll
    57  	res, err := w.Pipelines.Impl().ListPipelineEvents(ctx, pipelines.ListPipelineEventsRequest{
    58  		Filter:     `level='ERROR'`,
    59  		MaxResults: 100,
    60  		PipelineId: pipelineId,
    61  	})
    62  	if err != nil {
    63  		return err
    64  	}
    65  	updateEvents := filterEventsByUpdateId(res.Events, updateId)
    66  	// The events API returns most recent events first. We iterate in a reverse order
    67  	// to print the events chronologically
    68  	for i := len(updateEvents) - 1; i >= 0; i-- {
    69  		r.logEvent(ctx, updateEvents[i])
    70  	}
    71  	return nil
    72  }
    73  
    74  // PipelineOptions defines options for running a pipeline update.
    75  type PipelineOptions struct {
    76  	// Perform a full graph update.
    77  	RefreshAll bool
    78  
    79  	// List of tables to update.
    80  	Refresh []string
    81  
    82  	// Perform a full graph reset and recompute.
    83  	FullRefreshAll bool
    84  
    85  	// List of tables to reset and recompute.
    86  	FullRefresh []string
    87  }
    88  
    89  func (o *PipelineOptions) Define(fs *flag.FlagSet) {
    90  	fs.BoolVar(&o.RefreshAll, "refresh-all", false, "Perform a full graph update.")
    91  	fs.StringSliceVar(&o.Refresh, "refresh", nil, "List of tables to update.")
    92  	fs.BoolVar(&o.FullRefreshAll, "full-refresh-all", false, "Perform a full graph reset and recompute.")
    93  	fs.StringSliceVar(&o.FullRefresh, "full-refresh", nil, "List of tables to reset and recompute.")
    94  }
    95  
    96  // Validate returns if the combination of options is valid.
    97  func (o *PipelineOptions) Validate() error {
    98  	set := []string{}
    99  	if o.RefreshAll {
   100  		set = append(set, "--refresh-all")
   101  	}
   102  	if len(o.Refresh) > 0 {
   103  		set = append(set, "--refresh")
   104  	}
   105  	if o.FullRefreshAll {
   106  		set = append(set, "--full-refresh-all")
   107  	}
   108  	if len(o.FullRefresh) > 0 {
   109  		set = append(set, "--full-refresh")
   110  	}
   111  	if len(set) > 1 {
   112  		return fmt.Errorf("pipeline run arguments are mutually exclusive (got %s)", strings.Join(set, ", "))
   113  	}
   114  	return nil
   115  }
   116  
   117  func (o *PipelineOptions) toPayload(pipelineID string) (*pipelines.StartUpdate, error) {
   118  	if err := o.Validate(); err != nil {
   119  		return nil, err
   120  	}
   121  	payload := &pipelines.StartUpdate{
   122  		PipelineId: pipelineID,
   123  
   124  		// Note: `RefreshAll` is implied if the fields below are not set.
   125  		RefreshSelection:     o.Refresh,
   126  		FullRefresh:          o.FullRefreshAll,
   127  		FullRefreshSelection: o.FullRefresh,
   128  	}
   129  	return payload, nil
   130  }
   131  
   132  type pipelineRunner struct {
   133  	key
   134  
   135  	bundle   *bundle.Bundle
   136  	pipeline *resources.Pipeline
   137  }
   138  
   139  func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, error) {
   140  	var pipelineID = r.pipeline.ID
   141  
   142  	// Include resource key in logger.
   143  	ctx = log.NewContext(ctx, log.GetLogger(ctx).With("resource", r.Key()))
   144  	w := r.bundle.WorkspaceClient()
   145  	_, err := w.Pipelines.GetByPipelineId(ctx, pipelineID)
   146  	if err != nil {
   147  		log.Warnf(ctx, "Cannot get pipeline: %s", err)
   148  		return nil, err
   149  	}
   150  
   151  	req, err := opts.Pipeline.toPayload(pipelineID)
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  
   156  	res, err := w.Pipelines.StartUpdate(ctx, *req)
   157  	if err != nil {
   158  		return nil, err
   159  	}
   160  
   161  	updateID := res.UpdateId
   162  
   163  	// setup progress logger and tracker to query events
   164  	updateTracker := progress.NewUpdateTracker(pipelineID, updateID, w)
   165  	progressLogger, ok := cmdio.FromContext(ctx)
   166  	if !ok {
   167  		return nil, fmt.Errorf("no progress logger found")
   168  	}
   169  
   170  	// Log the pipeline update URL as soon as it is available.
   171  	progressLogger.Log(progress.NewPipelineUpdateUrlEvent(w.Config.Host, updateID, pipelineID))
   172  
   173  	if opts.NoWait {
   174  		return nil, nil
   175  	}
   176  
   177  	// Poll update for completion and post status.
   178  	// Note: there is no "StartUpdateAndWait" wrapper for this API.
   179  	var prevState *pipelines.UpdateInfoState
   180  	for {
   181  		events, err := updateTracker.Events(ctx)
   182  		if err != nil {
   183  			return nil, err
   184  		}
   185  		for _, event := range events {
   186  			progressLogger.Log(&event)
   187  			log.Infof(ctx, event.String())
   188  		}
   189  
   190  		update, err := w.Pipelines.GetUpdateByPipelineIdAndUpdateId(ctx, pipelineID, updateID)
   191  		if err != nil {
   192  			return nil, err
   193  		}
   194  
   195  		// Log only if the current state is different from the previous state.
   196  		state := update.Update.State
   197  		if prevState == nil || *prevState != state {
   198  			log.Infof(ctx, "Update status: %s", state)
   199  			prevState = &state
   200  		}
   201  
   202  		if state == pipelines.UpdateInfoStateCanceled {
   203  			log.Infof(ctx, "Update was cancelled!")
   204  			return nil, fmt.Errorf("update cancelled")
   205  		}
   206  		if state == pipelines.UpdateInfoStateFailed {
   207  			log.Infof(ctx, "Update has failed!")
   208  			err := r.logErrorEvent(ctx, pipelineID, updateID)
   209  			if err != nil {
   210  				return nil, err
   211  			}
   212  			return nil, fmt.Errorf("update failed")
   213  		}
   214  		if state == pipelines.UpdateInfoStateCompleted {
   215  			log.Infof(ctx, "Update has completed successfully!")
   216  			return nil, nil
   217  		}
   218  
   219  		time.Sleep(time.Second)
   220  	}
   221  }