github.com/onflow/flow-go@v0.33.17/module/jobqueue/consumer.go (about) 1 package jobqueue 2 3 import ( 4 "errors" 5 "fmt" 6 "sync" 7 8 "github.com/rs/zerolog" 9 "go.uber.org/atomic" 10 11 "github.com/onflow/flow-go/module" 12 "github.com/onflow/flow-go/storage" 13 ) 14 15 type Worker interface { 16 // returned error must be unexpected fatal error 17 Run(job module.Job) error 18 } 19 20 type Consumer struct { 21 mu sync.Mutex 22 log zerolog.Logger 23 24 // Storage 25 jobs module.Jobs // storage to read jobs from 26 progress storage.ConsumerProgress // to resume from first unprocessed job after restarting 27 28 // dependency 29 worker Worker // to process job and notify consumer when finish processing a job 30 31 // Config 32 maxProcessing uint64 // max number of jobs to be processed concurrently 33 maxSearchAhead uint64 // max number of jobs beyond processedIndex to process. 0 means no limit 34 35 // State Variables 36 running bool // a signal to control whether to start processing more jobs. Useful for waiting 37 // until the workers are ready 38 isChecking *atomic.Bool // allow only one process checking job processable 39 // are ready, and stop when shutting down. 40 runningJobs sync.WaitGroup // to wait for all existing jobs to finish for graceful shutdown 41 42 processedIndex uint64 43 processings map[uint64]*jobStatus // keep track of the status of each on going job 44 processingsIndex map[module.JobID]uint64 // lookup the index of the job, useful when fast forwarding the 45 // `processed` variable 46 47 started *atomic.Bool // only allow the consumer to be started once, and forbid calls to Check before Start 48 } 49 50 func NewConsumer( 51 log zerolog.Logger, 52 jobs module.Jobs, 53 progress storage.ConsumerProgress, 54 worker Worker, 55 maxProcessing uint64, 56 maxSearchAhead uint64, 57 defaultIndex uint64, 58 ) (*Consumer, error) { 59 60 processedIndex, err := readProcessedIndex(log, progress, defaultIndex) 61 if err != nil { 62 return nil, fmt.Errorf("could not read processed index: %w", err) 63 } 64 65 return &Consumer{ 66 log: log.With().Str("sub_module", "job_queue").Logger(), 67 68 // store dependency 69 jobs: jobs, 70 progress: progress, 71 worker: worker, 72 73 // update config 74 maxProcessing: maxProcessing, 75 maxSearchAhead: maxSearchAhead, 76 77 // init state variables 78 running: false, 79 isChecking: atomic.NewBool(false), 80 started: atomic.NewBool(false), 81 processedIndex: processedIndex, 82 processings: make(map[uint64]*jobStatus), 83 processingsIndex: make(map[module.JobID]uint64), 84 }, nil 85 } 86 87 func readProcessedIndex(log zerolog.Logger, progress storage.ConsumerProgress, defaultIndex uint64) (uint64, error) { 88 // on startup, sync with storage for the processed index 89 // to ensure the consistency 90 processedIndex, err := progress.ProcessedIndex() 91 if errors.Is(err, storage.ErrNotFound) { 92 err := progress.InitProcessedIndex(defaultIndex) 93 if errors.Is(err, storage.ErrAlreadyExists) { 94 return 0, fmt.Errorf("processed index has already been inited, no effect for the second time. default index: %v", 95 defaultIndex) 96 } 97 98 if err != nil { 99 return 0, fmt.Errorf("could not init processed index: %w", err) 100 } 101 102 log.Warn().Uint64("processed index", processedIndex). 103 Msg("processed index not found, initialized.") 104 return defaultIndex, nil 105 } 106 107 if err != nil { 108 return 0, fmt.Errorf("could not read processed index: %w", err) 109 } 110 111 return processedIndex, nil 112 } 113 114 // Start starts consuming the jobs from the job queue. 115 func (c *Consumer) Start() error { 116 c.mu.Lock() 117 defer c.mu.Unlock() 118 119 if !c.started.CompareAndSwap(false, true) { 120 return fmt.Errorf("consumer has already been started") 121 } 122 c.running = true 123 124 c.log.Info(). 125 Uint64("processed", c.processedIndex). 126 Msg("consumer started") 127 128 c.checkProcessable() 129 130 return nil 131 } 132 133 // Stop stops consuming jobs from the job queue. 134 // It blocks until the existing worker finish processing the job 135 // Note, it won't stop the existing worker from finishing their job 136 func (c *Consumer) Stop() { 137 c.mu.Lock() 138 c.running = false 139 // not to use `defer`, otherwise runningJobs.Wait will hold the lock and cause deadlock 140 c.mu.Unlock() 141 142 c.log.Info().Msg("stopping consumer") 143 c.runningJobs.Wait() 144 c.log.Info().Msg("consumer stopped") 145 } 146 147 // Size returns number of in-memory jobs that consumer is processing. 148 func (c *Consumer) Size() uint { 149 c.mu.Lock() 150 defer c.mu.Unlock() 151 152 return uint(len(c.processings)) 153 } 154 155 // LastProcessedIndex returns the last processed job index 156 func (c *Consumer) LastProcessedIndex() uint64 { 157 c.mu.Lock() 158 defer c.mu.Unlock() 159 160 return c.processedIndex 161 } 162 163 // NotifyJobIsDone let the consumer know a job has been finished, so that consumer will take 164 // the next job from the job queue if there are workers available. It returns the last processed job index. 165 func (c *Consumer) NotifyJobIsDone(jobID module.JobID) uint64 { 166 c.mu.Lock() 167 defer c.mu.Unlock() 168 c.log.Debug().Str("job_id", string(jobID)).Msg("finishing job") 169 170 if c.doneJob(jobID) { 171 c.checkProcessable() 172 } 173 174 return c.processedIndex 175 } 176 177 // Check allows the job publisher to notify the consumer that a new job has been added, so that 178 // the consumer can check if the job is processable 179 // since multiple checks at the same time are unnecessary, we could only keep one check by checking. 180 // an atomic isChecking value. 181 func (c *Consumer) Check() { 182 if !c.started.Load() { 183 // Check is not allowed before the consumer is started 184 c.log.Warn().Msg("ignoring Check before Start") 185 return 186 } 187 188 if !c.isChecking.CompareAndSwap(false, true) { 189 // other process is checking, we could exit and rely on that process to check 190 // processable jobs 191 return 192 } 193 194 // still need to lock here, since checkProcessable might update the state vars. 195 c.mu.Lock() 196 defer c.mu.Unlock() 197 198 c.checkProcessable() 199 200 c.isChecking.Store(false) 201 } 202 203 // checkProcessable is a wrap of the `run` function with logging 204 func (c *Consumer) checkProcessable() { 205 c.log.Debug().Msg("checking processable jobs") 206 207 processingCount, err := c.run() 208 if err != nil { 209 c.log.Error().Err(err).Msg("failed to check processables") 210 return 211 } 212 213 if processingCount > 0 { 214 c.log.Info().Int64("processing", processingCount).Msg("processing jobs") 215 } else { 216 c.log.Debug().Bool("running", c.running).Msg("no job found") 217 } 218 219 } 220 221 // run checks if there are processable jobs and process them by giving 222 // them to the callback functions. 223 // this function is passive, it won't trigger itself, but can only be 224 // triggered by either Start or NotifyJobIsDone 225 func (c *Consumer) run() (int64, error) { 226 processedFrom := c.processedIndex 227 processables, processedTo, err := c.processableJobs() 228 if err != nil { 229 return 0, fmt.Errorf("could not query processable jobs: %w", err) 230 } 231 232 c.log.Debug(). 233 Uint64("processed_from", processedFrom). 234 Uint64("processed_to", processedTo). 235 Int("processables", len(processables)). 236 Bool("running", c.running). 237 Msg("running") 238 239 for _, indexedJob := range processables { 240 jobID := indexedJob.job.ID() 241 242 c.processingsIndex[jobID] = indexedJob.index 243 c.processings[indexedJob.index] = &jobStatus{ 244 jobID: jobID, 245 done: false, 246 } 247 248 c.runningJobs.Add(1) 249 go func(j *jobAtIndex) { 250 err := c.worker.Run(j.job) 251 if err != nil { 252 c.log.Fatal().Err(err).Msg("could not run the job") 253 } 254 c.runningJobs.Done() 255 }(indexedJob) 256 } 257 258 err = c.progress.SetProcessedIndex(processedTo) 259 if err != nil { 260 return 0, fmt.Errorf("could not set processed index %v, %w", processedTo, err) 261 } 262 263 for index := c.processedIndex + 1; index <= processedTo; index++ { 264 jobStatus, ok := c.processings[index] 265 if !ok { 266 continue 267 } 268 269 delete(c.processings, index) 270 delete(c.processingsIndex, jobStatus.jobID) 271 } 272 273 c.processedIndex = processedTo 274 275 return int64(len(processables)), nil 276 } 277 278 func (c *Consumer) processableJobs() ([]*jobAtIndex, uint64, error) { 279 processables, processedTo, err := processableJobs( 280 c.jobs, 281 c.processings, 282 c.maxProcessing, 283 c.maxSearchAhead, 284 c.processedIndex, 285 ) 286 287 if err != nil { 288 return nil, 0, err 289 } 290 291 // if the consumer has been stopped, we allow the existing worker to update the progressed index 292 // but won't return any new job for processing 293 if !c.running { 294 return nil, processedTo, nil 295 } 296 297 return processables, processedTo, nil 298 } 299 300 // processableJobs check the worker's capacity and if sufficient, read 301 // jobs from the storage, return the processable jobs, and the processed 302 // index 303 func processableJobs(jobs module.Jobs, processings map[uint64]*jobStatus, maxProcessing uint64, maxSearchAhead uint64, processedIndex uint64) ([]*jobAtIndex, uint64, 304 error) { 305 processables := make([]*jobAtIndex, 0) 306 307 // count how many jobs are still processing, 308 // in order to decide whether to process a new job 309 processing := uint64(0) 310 311 // determine if the consumer should pause processing new jobs because it's too far ahead of 312 // the lowest in progress index 313 shouldPause := func(index uint64) bool { 314 if maxSearchAhead == 0 { 315 return false 316 } 317 318 return index-processedIndex > maxSearchAhead 319 } 320 321 // if still have processing capacity, find the next processable job 322 for i := processedIndex + 1; processing < maxProcessing && !shouldPause(i); i++ { 323 status, ok := processings[i] 324 325 // if no worker is processing the next job, try to read it and process 326 if !ok { 327 // take one job 328 job, err := jobs.AtIndex(i) 329 330 // if there is no more job at this index, we could stop 331 if errors.Is(err, storage.ErrNotFound) { 332 break 333 } 334 335 // exception 336 if err != nil { 337 return nil, 0, fmt.Errorf("could not read job at index %v, %w", i, err) 338 } 339 340 processing++ 341 342 processables = append(processables, &jobAtIndex{ 343 job: job, 344 index: i, 345 }) 346 continue 347 } 348 349 // only increment the processing variable when 350 // the job is not done, meaning still processing 351 if !status.done { 352 processing++ 353 continue 354 } 355 356 if i == processedIndex+1 { 357 processedIndex++ 358 } 359 } 360 361 return processables, processedIndex, nil 362 } 363 364 // doneJob updates the internal state to mark the job has been processed 365 // return true if the job is changed from processing to finished 366 // return false if the job is already finished, or removed 367 func (c *Consumer) doneJob(jobID module.JobID) bool { 368 // lock 369 index, ok := c.processingsIndex[jobID] 370 if !ok { 371 // job must has been processed 372 return false 373 } 374 375 status, ok := c.processings[index] 376 if !ok { 377 // must be a bug, if went here 378 c.log.Fatal().Msgf("bug, job (%v) can not be found by index (%v)", jobID, index) 379 return false 380 } 381 382 if status.done { 383 // job has been done already 384 return false 385 } 386 387 status.done = true 388 return true 389 } 390 391 type jobAtIndex struct { 392 job module.Job 393 index uint64 394 } 395 396 type jobStatus struct { 397 jobID module.JobID 398 done bool 399 }