github.com/filecoin-project/bacalhau@v0.3.23-0.20230228154132-45c989550ace/dashboard/api/pkg/model/job_event_handler.go (about) 1 package model 2 3 import ( 4 "context" 5 "sync" 6 "time" 7 8 "github.com/filecoin-project/bacalhau/pkg/localdb" 9 bacalhau_model "github.com/filecoin-project/bacalhau/pkg/model/v1beta1" 10 "github.com/rs/zerolog/log" 11 ) 12 13 type jobEventBuffer struct { 14 created time.Time 15 exists bool 16 ignore bool 17 events []bacalhau_model.JobEvent 18 } 19 20 type jobEventHandler struct { 21 localDB localdb.LocalDB 22 eventHandler *localdb.LocalDBEventHandler 23 eventBuffers map[string]*jobEventBuffer 24 eventMutex sync.Mutex 25 } 26 27 func newJobEventHandler(localDB localdb.LocalDB) *jobEventHandler { 28 return &jobEventHandler{ 29 localDB: localDB, 30 eventHandler: localdb.NewLocalDBEventHandler(localDB), 31 eventBuffers: map[string]*jobEventBuffer{}, 32 } 33 } 34 35 func (handler *jobEventHandler) startBufferGC(ctx context.Context) { 36 // reap the event buffer so we don't accumulate memory forever 37 ticker := time.NewTicker(1 * time.Minute) 38 go func() { 39 for { 40 select { 41 case <-ctx.Done(): 42 return 43 case <-ticker.C: 44 handler.cleanEventBuffer() 45 } 46 } 47 }() 48 } 49 50 func (handler *jobEventHandler) writeEventToDatabase(ctx context.Context, event bacalhau_model.JobEvent) error { 51 return handler.eventHandler.HandleJobEvent(ctx, event) 52 } 53 54 // sometimes events can be out of order and we need the job to exist 55 // before we record events against the job - it's OK if we hear about 56 // out of order events once the job exists in db (they have timestamps) 57 func (handler *jobEventHandler) readEvent(ctx context.Context, event bacalhau_model.JobEvent) error { 58 handler.eventMutex.Lock() 59 defer handler.eventMutex.Unlock() 60 eventBuffer, ok := handler.eventBuffers[event.JobID] 61 62 // so this is the first event we have seen for this job 63 // let's create a buffer for it 64 if !ok { 65 eventBuffer = &jobEventBuffer{ 66 created: time.Now(), 67 exists: false, 68 ignore: false, 69 events: []bacalhau_model.JobEvent{}, 70 } 71 handler.eventBuffers[event.JobID] = eventBuffer 72 } 73 74 if event.EventName == bacalhau_model.JobEventCreated { 75 isCanary := false 76 for _, label := range event.Spec.Annotations { 77 if label == "canary" { 78 isCanary = true 79 break 80 } 81 } 82 for _, entrypointPart := range event.Spec.Docker.Entrypoint { 83 if entrypointPart == "hello λ!" { 84 isCanary = true 85 break 86 } 87 } 88 if isCanary { 89 eventBuffer.ignore = true 90 return nil 91 } 92 eventBuffer.exists = true 93 err := handler.writeEventToDatabase(ctx, event) 94 if err != nil { 95 log.Ctx(ctx).Error().Msgf("error writing event to database: %s", err.Error()) 96 } 97 for _, bufferedEvent := range eventBuffer.events { 98 err := handler.writeEventToDatabase(ctx, bufferedEvent) 99 if err != nil { 100 log.Ctx(ctx).Error().Msgf("error writing event to database: %s", err.Error()) 101 } 102 } 103 } else if !eventBuffer.exists { 104 eventBuffer.events = append(eventBuffer.events, event) 105 } else { 106 err := handler.writeEventToDatabase(ctx, event) 107 if err != nil { 108 log.Ctx(ctx).Error().Msgf("error writing event to database: %s", err.Error()) 109 } 110 } 111 return nil 112 } 113 114 func (handler *jobEventHandler) cleanEventBuffer() { 115 handler.eventMutex.Lock() 116 defer handler.eventMutex.Unlock() 117 // clean up all event buffers that are older than 1 minute 118 // if there is a 1 minute gap between hearing the first out of order 119 // event and then hearing the create event then something has 120 // gone badly wrong - this should be more like < 100ms in reality 121 for jobID, eventBuffer := range handler.eventBuffers { 122 if time.Since(eventBuffer.created) > 1*time.Minute { 123 delete(handler.eventBuffers, jobID) 124 } 125 } 126 }