github.com/databricks/cli@v0.203.0/bundle/run/progress/pipeline.go (about)

     1  package progress
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"strings"
     7  
     8  	"github.com/databricks/databricks-sdk-go"
     9  	"github.com/databricks/databricks-sdk-go/service/pipelines"
    10  )
    11  
    12  // The dlt backend computes events for pipeline runs which are accessable through
    13  // the 2.0/pipelines/{pipeline_id}/events API
    14  //
    15  // There are 4 levels for these events: ("ERROR", "WARN", "INFO", "METRICS")
    16  //
    17  // Here's short introduction to a few important events we display on the console:
    18  //
    19  // 1. `update_progress`: A state transition occured for the entire pipeline update
    20  // 2. `flow_progress`: A state transition occured for a single flow in the pipeine
    21  type ProgressEvent pipelines.PipelineEvent
    22  
    23  func (event *ProgressEvent) String() string {
    24  	result := strings.Builder{}
    25  	result.WriteString(event.Timestamp + " ")
    26  
    27  	// Print event type with some padding to make output more pretty
    28  	result.WriteString(fmt.Sprintf("%-15s", event.EventType) + " ")
    29  
    30  	result.WriteString(event.Level.String() + " ")
    31  	result.WriteString(fmt.Sprintf(`"%s"`, event.Message))
    32  
    33  	// construct error string if level=`Error`
    34  	if event.Level == pipelines.EventLevelError && event.Error != nil {
    35  		for _, exception := range event.Error.Exceptions {
    36  			result.WriteString(fmt.Sprintf("\n%s", exception.Message))
    37  		}
    38  	}
    39  	return result.String()
    40  }
    41  
    42  func (event *ProgressEvent) IsInplaceSupported() bool {
    43  	return false
    44  }
    45  
    46  // TODO: Add inplace logging to pipelines. https://github.com/databricks/cli/issues/280
    47  type UpdateTracker struct {
    48  	UpdateId             string
    49  	PipelineId           string
    50  	LatestEventTimestamp string
    51  	w                    *databricks.WorkspaceClient
    52  }
    53  
    54  func NewUpdateTracker(pipelineId string, updateId string, w *databricks.WorkspaceClient) *UpdateTracker {
    55  	return &UpdateTracker{
    56  		w:                    w,
    57  		PipelineId:           pipelineId,
    58  		UpdateId:             updateId,
    59  		LatestEventTimestamp: "",
    60  	}
    61  }
    62  
    63  // To keep the logic simple we do not use pagination. This means that if there are
    64  // more than 100 new events since the last query then we will miss out on progress events.
    65  //
    66  // This is fine because:
    67  // 1. This should happen fairly rarely if ever
    68  // 2. There is no expectation of the console progress logs being a complete representation
    69  //
    70  // # If a user needs the complete logs, they can always visit the run URL
    71  //
    72  // NOTE: Incase we want inplace logging, then we will need to implement pagination
    73  func (l *UpdateTracker) Events(ctx context.Context) ([]ProgressEvent, error) {
    74  	// create filter to fetch only new events
    75  	filter := fmt.Sprintf(`update_id = '%s'`, l.UpdateId)
    76  	if l.LatestEventTimestamp != "" {
    77  		filter = filter + fmt.Sprintf(" AND timestamp > '%s'", l.LatestEventTimestamp)
    78  	}
    79  
    80  	// we only check the most recent 100 events for progress
    81  	response, err := l.w.Pipelines.Impl().ListPipelineEvents(ctx, pipelines.ListPipelineEventsRequest{
    82  		PipelineId: l.PipelineId,
    83  		MaxResults: 100,
    84  		Filter:     filter,
    85  	})
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	result := make([]ProgressEvent, 0)
    91  	// we iterate in reverse to return events in chronological order
    92  	for i := len(response.Events) - 1; i >= 0; i-- {
    93  		event := response.Events[i]
    94  		// filter to only include update_progress and flow_progress events
    95  		if event.EventType == "flow_progress" || event.EventType == "update_progress" {
    96  			result = append(result, ProgressEvent(event))
    97  		}
    98  	}
    99  
   100  	// update latest event timestamp for next time
   101  	if len(result) > 0 {
   102  		l.LatestEventTimestamp = result[len(result)-1].Timestamp
   103  	}
   104  
   105  	return result, nil
   106  }