github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/dashboard/api/pkg/model/job_event_handler.go (about)

     1  package model
     2  
     3  import (
     4  	"context"
     5  	"sync"
     6  	"time"
     7  
     8  	"github.com/filecoin-project/bacalhau/pkg/localdb"
     9  	bacalhau_model "github.com/filecoin-project/bacalhau/pkg/model/v1beta1"
    10  	"github.com/rs/zerolog/log"
    11  )
    12  
    13  type jobEventBuffer struct {
    14  	created time.Time
    15  	exists  bool
    16  	ignore  bool
    17  	events  []bacalhau_model.JobEvent
    18  }
    19  
    20  type jobEventHandler struct {
    21  	localDB      localdb.LocalDB
    22  	eventHandler *localdb.LocalDBEventHandler
    23  	eventBuffers map[string]*jobEventBuffer
    24  	eventMutex   sync.Mutex
    25  }
    26  
    27  func newJobEventHandler(localDB localdb.LocalDB) *jobEventHandler {
    28  	return &jobEventHandler{
    29  		localDB:      localDB,
    30  		eventHandler: localdb.NewLocalDBEventHandler(localDB),
    31  		eventBuffers: map[string]*jobEventBuffer{},
    32  	}
    33  }
    34  
    35  func (handler *jobEventHandler) startBufferGC(ctx context.Context) {
    36  	// reap the event buffer so we don't accumulate memory forever
    37  	ticker := time.NewTicker(1 * time.Minute)
    38  	go func() {
    39  		for {
    40  			select {
    41  			case <-ctx.Done():
    42  				return
    43  			case <-ticker.C:
    44  				handler.cleanEventBuffer()
    45  			}
    46  		}
    47  	}()
    48  }
    49  
    50  func (handler *jobEventHandler) writeEventToDatabase(ctx context.Context, event bacalhau_model.JobEvent) error {
    51  	return handler.eventHandler.HandleJobEvent(ctx, event)
    52  }
    53  
    54  // sometimes events can be out of order and we need the job to exist
    55  // before we record events against the job - it's OK if we hear about
    56  // out of order events once the job exists in db (they have timestamps)
    57  func (handler *jobEventHandler) readEvent(ctx context.Context, event bacalhau_model.JobEvent) error {
    58  	handler.eventMutex.Lock()
    59  	defer handler.eventMutex.Unlock()
    60  	eventBuffer, ok := handler.eventBuffers[event.JobID]
    61  
    62  	// so this is the first event we have seen for this job
    63  	// let's create a buffer for it
    64  	if !ok {
    65  		eventBuffer = &jobEventBuffer{
    66  			created: time.Now(),
    67  			exists:  false,
    68  			ignore:  false,
    69  			events:  []bacalhau_model.JobEvent{},
    70  		}
    71  		handler.eventBuffers[event.JobID] = eventBuffer
    72  	}
    73  
    74  	if event.EventName == bacalhau_model.JobEventCreated {
    75  		isCanary := false
    76  		for _, label := range event.Spec.Annotations {
    77  			if label == "canary" {
    78  				isCanary = true
    79  				break
    80  			}
    81  		}
    82  		for _, entrypointPart := range event.Spec.Docker.Entrypoint {
    83  			if entrypointPart == "hello λ!" {
    84  				isCanary = true
    85  				break
    86  			}
    87  		}
    88  		if isCanary {
    89  			eventBuffer.ignore = true
    90  			return nil
    91  		}
    92  		eventBuffer.exists = true
    93  		err := handler.writeEventToDatabase(ctx, event)
    94  		if err != nil {
    95  			log.Ctx(ctx).Error().Msgf("error writing event to database: %s", err.Error())
    96  		}
    97  		for _, bufferedEvent := range eventBuffer.events {
    98  			err := handler.writeEventToDatabase(ctx, bufferedEvent)
    99  			if err != nil {
   100  				log.Ctx(ctx).Error().Msgf("error writing event to database: %s", err.Error())
   101  			}
   102  		}
   103  	} else if !eventBuffer.exists {
   104  		eventBuffer.events = append(eventBuffer.events, event)
   105  	} else {
   106  		err := handler.writeEventToDatabase(ctx, event)
   107  		if err != nil {
   108  			log.Ctx(ctx).Error().Msgf("error writing event to database: %s", err.Error())
   109  		}
   110  	}
   111  	return nil
   112  }
   113  
   114  func (handler *jobEventHandler) cleanEventBuffer() {
   115  	handler.eventMutex.Lock()
   116  	defer handler.eventMutex.Unlock()
   117  	// clean up all event buffers that are older than 1 minute
   118  	// if there is a 1 minute gap between hearing the first out of order
   119  	// event and then hearing the create event then something has
   120  	// gone badly wrong - this should be more like < 100ms in reality
   121  	for jobID, eventBuffer := range handler.eventBuffers {
   122  		if time.Since(eventBuffer.created) > 1*time.Minute {
   123  			delete(handler.eventBuffers, jobID)
   124  		}
   125  	}
   126  }