github.com/databricks/cli@v0.203.0/bundle/run/pipeline.go (about) 1 package run 2 3 import ( 4 "context" 5 "fmt" 6 "strings" 7 "time" 8 9 "github.com/databricks/cli/bundle" 10 "github.com/databricks/cli/bundle/config/resources" 11 "github.com/databricks/cli/bundle/run/output" 12 "github.com/databricks/cli/bundle/run/progress" 13 "github.com/databricks/cli/libs/cmdio" 14 "github.com/databricks/cli/libs/log" 15 "github.com/databricks/databricks-sdk-go/service/pipelines" 16 flag "github.com/spf13/pflag" 17 ) 18 19 func filterEventsByUpdateId(events []pipelines.PipelineEvent, updateId string) []pipelines.PipelineEvent { 20 result := []pipelines.PipelineEvent{} 21 for i := 0; i < len(events); i++ { 22 if events[i].Origin.UpdateId == updateId { 23 result = append(result, events[i]) 24 } 25 } 26 return result 27 } 28 29 func (r *pipelineRunner) logEvent(ctx context.Context, event pipelines.PipelineEvent) { 30 logString := "" 31 if event.Message != "" { 32 logString += fmt.Sprintf(" %s\n", event.Message) 33 } 34 if event.Error != nil && len(event.Error.Exceptions) > 0 { 35 logString += "trace for most recent exception: \n" 36 for i := 0; i < len(event.Error.Exceptions); i++ { 37 logString += fmt.Sprintf("%s\n", event.Error.Exceptions[i].Message) 38 } 39 } 40 if logString != "" { 41 log.Errorf(ctx, fmt.Sprintf("[%s] %s", event.EventType, logString)) 42 } 43 } 44 45 func (r *pipelineRunner) logErrorEvent(ctx context.Context, pipelineId string, updateId string) error { 46 w := r.bundle.WorkspaceClient() 47 48 // Note: For a 100 percent correct and complete solution we should use the 49 // w.Pipelines.ListPipelineEventsAll method to find all relevant events. However the 50 // probablity of the relevant last error event not being present in the most 51 // recent 100 error events is very close to 0 and the first 100 error events 52 // should give us a good picture of the error. 53 // 54 // Otherwise for long lived pipelines, there can be a lot of unnecessary 55 // latency due to multiple pagination API calls needed underneath the hood for 56 // ListPipelineEventsAll 57 res, err := w.Pipelines.Impl().ListPipelineEvents(ctx, pipelines.ListPipelineEventsRequest{ 58 Filter: `level='ERROR'`, 59 MaxResults: 100, 60 PipelineId: pipelineId, 61 }) 62 if err != nil { 63 return err 64 } 65 updateEvents := filterEventsByUpdateId(res.Events, updateId) 66 // The events API returns most recent events first. We iterate in a reverse order 67 // to print the events chronologically 68 for i := len(updateEvents) - 1; i >= 0; i-- { 69 r.logEvent(ctx, updateEvents[i]) 70 } 71 return nil 72 } 73 74 // PipelineOptions defines options for running a pipeline update. 75 type PipelineOptions struct { 76 // Perform a full graph update. 77 RefreshAll bool 78 79 // List of tables to update. 80 Refresh []string 81 82 // Perform a full graph reset and recompute. 83 FullRefreshAll bool 84 85 // List of tables to reset and recompute. 86 FullRefresh []string 87 } 88 89 func (o *PipelineOptions) Define(fs *flag.FlagSet) { 90 fs.BoolVar(&o.RefreshAll, "refresh-all", false, "Perform a full graph update.") 91 fs.StringSliceVar(&o.Refresh, "refresh", nil, "List of tables to update.") 92 fs.BoolVar(&o.FullRefreshAll, "full-refresh-all", false, "Perform a full graph reset and recompute.") 93 fs.StringSliceVar(&o.FullRefresh, "full-refresh", nil, "List of tables to reset and recompute.") 94 } 95 96 // Validate returns if the combination of options is valid. 97 func (o *PipelineOptions) Validate() error { 98 set := []string{} 99 if o.RefreshAll { 100 set = append(set, "--refresh-all") 101 } 102 if len(o.Refresh) > 0 { 103 set = append(set, "--refresh") 104 } 105 if o.FullRefreshAll { 106 set = append(set, "--full-refresh-all") 107 } 108 if len(o.FullRefresh) > 0 { 109 set = append(set, "--full-refresh") 110 } 111 if len(set) > 1 { 112 return fmt.Errorf("pipeline run arguments are mutually exclusive (got %s)", strings.Join(set, ", ")) 113 } 114 return nil 115 } 116 117 func (o *PipelineOptions) toPayload(pipelineID string) (*pipelines.StartUpdate, error) { 118 if err := o.Validate(); err != nil { 119 return nil, err 120 } 121 payload := &pipelines.StartUpdate{ 122 PipelineId: pipelineID, 123 124 // Note: `RefreshAll` is implied if the fields below are not set. 125 RefreshSelection: o.Refresh, 126 FullRefresh: o.FullRefreshAll, 127 FullRefreshSelection: o.FullRefresh, 128 } 129 return payload, nil 130 } 131 132 type pipelineRunner struct { 133 key 134 135 bundle *bundle.Bundle 136 pipeline *resources.Pipeline 137 } 138 139 func (r *pipelineRunner) Run(ctx context.Context, opts *Options) (output.RunOutput, error) { 140 var pipelineID = r.pipeline.ID 141 142 // Include resource key in logger. 143 ctx = log.NewContext(ctx, log.GetLogger(ctx).With("resource", r.Key())) 144 w := r.bundle.WorkspaceClient() 145 _, err := w.Pipelines.GetByPipelineId(ctx, pipelineID) 146 if err != nil { 147 log.Warnf(ctx, "Cannot get pipeline: %s", err) 148 return nil, err 149 } 150 151 req, err := opts.Pipeline.toPayload(pipelineID) 152 if err != nil { 153 return nil, err 154 } 155 156 res, err := w.Pipelines.StartUpdate(ctx, *req) 157 if err != nil { 158 return nil, err 159 } 160 161 updateID := res.UpdateId 162 163 // setup progress logger and tracker to query events 164 updateTracker := progress.NewUpdateTracker(pipelineID, updateID, w) 165 progressLogger, ok := cmdio.FromContext(ctx) 166 if !ok { 167 return nil, fmt.Errorf("no progress logger found") 168 } 169 170 // Log the pipeline update URL as soon as it is available. 171 progressLogger.Log(progress.NewPipelineUpdateUrlEvent(w.Config.Host, updateID, pipelineID)) 172 173 if opts.NoWait { 174 return nil, nil 175 } 176 177 // Poll update for completion and post status. 178 // Note: there is no "StartUpdateAndWait" wrapper for this API. 179 var prevState *pipelines.UpdateInfoState 180 for { 181 events, err := updateTracker.Events(ctx) 182 if err != nil { 183 return nil, err 184 } 185 for _, event := range events { 186 progressLogger.Log(&event) 187 log.Infof(ctx, event.String()) 188 } 189 190 update, err := w.Pipelines.GetUpdateByPipelineIdAndUpdateId(ctx, pipelineID, updateID) 191 if err != nil { 192 return nil, err 193 } 194 195 // Log only if the current state is different from the previous state. 196 state := update.Update.State 197 if prevState == nil || *prevState != state { 198 log.Infof(ctx, "Update status: %s", state) 199 prevState = &state 200 } 201 202 if state == pipelines.UpdateInfoStateCanceled { 203 log.Infof(ctx, "Update was cancelled!") 204 return nil, fmt.Errorf("update cancelled") 205 } 206 if state == pipelines.UpdateInfoStateFailed { 207 log.Infof(ctx, "Update has failed!") 208 err := r.logErrorEvent(ctx, pipelineID, updateID) 209 if err != nil { 210 return nil, err 211 } 212 return nil, fmt.Errorf("update failed") 213 } 214 if state == pipelines.UpdateInfoStateCompleted { 215 log.Infof(ctx, "Update has completed successfully!") 216 return nil, nil 217 } 218 219 time.Sleep(time.Second) 220 } 221 }