github.com/onflow/flow-go@v0.35.7-crescendo-preview.23-atree-inlining/module/jobqueue/consumer.go (about) 1 package jobqueue 2 3 import ( 4 "errors" 5 "fmt" 6 "sync" 7 8 "github.com/rs/zerolog" 9 "go.uber.org/atomic" 10 11 "github.com/onflow/flow-go/module" 12 "github.com/onflow/flow-go/storage" 13 ) 14 15 type Worker interface { 16 // returned error must be unexpected fatal error 17 Run(job module.Job) error 18 } 19 20 type Consumer struct { 21 mu sync.Mutex 22 log zerolog.Logger 23 24 // Storage 25 jobs module.Jobs // storage to read jobs from 26 progress storage.ConsumerProgress // to resume from first unprocessed job after restarting 27 28 // dependency 29 worker Worker // to process job and notify consumer when finish processing a job 30 31 // Config 32 maxProcessing uint64 // max number of jobs to be processed concurrently 33 maxSearchAhead uint64 // max number of jobs beyond processedIndex to process. 0 means no limit 34 35 // State Variables 36 running bool // a signal to control whether to start processing more jobs. Useful for waiting 37 // until the workers are ready 38 isChecking *atomic.Bool // allow only one process checking job processable 39 // are ready, and stop when shutting down. 40 runningJobs sync.WaitGroup // to wait for all existing jobs to finish for graceful shutdown 41 42 processedIndex uint64 43 processings map[uint64]*jobStatus // keep track of the status of each on going job 44 processingsIndex map[module.JobID]uint64 // lookup the index of the job, useful when fast forwarding the 45 // `processed` variable 46 47 started *atomic.Bool // only allow the consumer to be started once, and forbid calls to Check before Start 48 } 49 50 func NewConsumer( 51 log zerolog.Logger, 52 jobs module.Jobs, 53 progress storage.ConsumerProgress, 54 worker Worker, 55 maxProcessing uint64, 56 maxSearchAhead uint64, 57 defaultIndex uint64, 58 ) (*Consumer, error) { 59 60 processedIndex, err := readProcessedIndex(log, progress, defaultIndex) 61 if err != nil { 62 return nil, fmt.Errorf("could not read processed index: %w", err) 63 } 64 65 return &Consumer{ 66 log: log.With().Str("sub_module", "job_queue").Logger(), 67 68 // store dependency 69 jobs: jobs, 70 progress: progress, 71 worker: worker, 72 73 // update config 74 maxProcessing: maxProcessing, 75 maxSearchAhead: maxSearchAhead, 76 77 // init state variables 78 running: false, 79 isChecking: atomic.NewBool(false), 80 started: atomic.NewBool(false), 81 processedIndex: processedIndex, 82 processings: make(map[uint64]*jobStatus), 83 processingsIndex: make(map[module.JobID]uint64), 84 }, nil 85 } 86 87 func readProcessedIndex(log zerolog.Logger, progress storage.ConsumerProgress, defaultIndex uint64) (uint64, error) { 88 // on startup, sync with storage for the processed index 89 // to ensure the consistency 90 processedIndex, err := progress.ProcessedIndex() 91 if errors.Is(err, storage.ErrNotFound) { 92 err := progress.InitProcessedIndex(defaultIndex) 93 if errors.Is(err, storage.ErrAlreadyExists) { 94 return 0, fmt.Errorf("processed index has already been inited, no effect for the second time. default index: %v", 95 defaultIndex) 96 } 97 98 if err != nil { 99 return 0, fmt.Errorf("could not init processed index: %w", err) 100 } 101 102 log.Warn().Uint64("processed index", processedIndex). 103 Msg("processed index not found, initialized.") 104 return defaultIndex, nil 105 } 106 107 if err != nil { 108 return 0, fmt.Errorf("could not read processed index: %w", err) 109 } 110 111 return processedIndex, nil 112 } 113 114 // Start starts consuming the jobs from the job queue. 115 func (c *Consumer) Start() error { 116 c.mu.Lock() 117 defer c.mu.Unlock() 118 119 if !c.started.CompareAndSwap(false, true) { 120 return fmt.Errorf("consumer has already been started") 121 } 122 c.running = true 123 124 c.log.Info(). 125 Uint64("processed", c.processedIndex). 126 Msg("consumer started") 127 128 c.checkProcessable() 129 130 return nil 131 } 132 133 // Stop stops consuming jobs from the job queue. 134 // It blocks until the existing worker finish processing the job 135 // Note, it won't stop the existing worker from finishing their job 136 func (c *Consumer) Stop() { 137 c.mu.Lock() 138 c.running = false 139 // not to use `defer`, otherwise runningJobs.Wait will hold the lock and cause deadlock 140 c.mu.Unlock() 141 142 c.log.Info().Msg("stopping consumer") 143 c.runningJobs.Wait() 144 c.log.Info().Msg("consumer stopped") 145 } 146 147 // Size returns number of in-memory jobs that consumer is processing. 148 func (c *Consumer) Size() uint { 149 c.mu.Lock() 150 defer c.mu.Unlock() 151 152 return uint(len(c.processings)) 153 } 154 155 // LastProcessedIndex returns the last processed job index 156 func (c *Consumer) LastProcessedIndex() uint64 { 157 c.mu.Lock() 158 defer c.mu.Unlock() 159 160 return c.processedIndex 161 } 162 163 // NotifyJobIsDone let the consumer know a job has been finished, so that consumer will take 164 // the next job from the job queue if there are workers available. It returns the last processed job index. 165 func (c *Consumer) NotifyJobIsDone(jobID module.JobID) uint64 { 166 c.mu.Lock() 167 defer c.mu.Unlock() 168 c.log.Debug().Str("job_id", string(jobID)).Msg("finishing job") 169 170 if c.doneJob(jobID) { 171 c.checkProcessable() 172 } 173 174 return c.processedIndex 175 } 176 177 // Check allows the job publisher to notify the consumer that a new job has been added, so that 178 // the consumer can check if the job is processable 179 // since multiple checks at the same time are unnecessary, we could only keep one check by checking. 180 // an atomic isChecking value. 181 func (c *Consumer) Check() { 182 if !c.started.Load() { 183 // Check is not allowed before the consumer is started 184 c.log.Warn().Msg("ignoring Check before Start") 185 return 186 } 187 188 if !c.isChecking.CompareAndSwap(false, true) { 189 // other process is checking, we could exit and rely on that process to check 190 // processable jobs 191 return 192 } 193 194 // still need to lock here, since checkProcessable might update the state vars. 195 c.mu.Lock() 196 defer c.mu.Unlock() 197 198 c.checkProcessable() 199 200 c.isChecking.Store(false) 201 } 202 203 // checkProcessable is a wrap of the `run` function with logging 204 func (c *Consumer) checkProcessable() { 205 c.log.Debug().Msg("checking processable jobs") 206 207 processingCount, err := c.run() 208 if err != nil { 209 c.log.Error().Err(err).Msg("failed to check processables") 210 return 211 } 212 213 if processingCount > 0 { 214 c.log.Info().Int64("processing", processingCount).Msg("processing jobs") 215 } else { 216 c.log.Debug().Bool("running", c.running).Msg("no job found") 217 } 218 } 219 220 // run checks if there are processable jobs and process them by giving 221 // them to the callback functions. 222 // this function is passive, it won't trigger itself, but can only be 223 // triggered by either Start or NotifyJobIsDone 224 func (c *Consumer) run() (int64, error) { 225 processedFrom := c.processedIndex 226 processables, processedTo, err := c.processableJobs() 227 if err != nil { 228 return 0, fmt.Errorf("could not query processable jobs: %w", err) 229 } 230 231 c.log.Debug(). 232 Uint64("processed_from", processedFrom). 233 Uint64("processed_to", processedTo). 234 Int("processables", len(processables)). 235 Bool("running", c.running). 236 Msg("running") 237 238 for _, indexedJob := range processables { 239 jobID := indexedJob.job.ID() 240 241 c.processingsIndex[jobID] = indexedJob.index 242 c.processings[indexedJob.index] = &jobStatus{ 243 jobID: jobID, 244 done: false, 245 } 246 247 c.runningJobs.Add(1) 248 go func(j *jobAtIndex) { 249 err := c.worker.Run(j.job) 250 if err != nil { 251 c.log.Fatal().Err(err).Msg("could not run the job") 252 } 253 c.runningJobs.Done() 254 }(indexedJob) 255 } 256 257 err = c.progress.SetProcessedIndex(processedTo) 258 if err != nil { 259 return 0, fmt.Errorf("could not set processed index %v, %w", processedTo, err) 260 } 261 262 for index := c.processedIndex + 1; index <= processedTo; index++ { 263 jobStatus, ok := c.processings[index] 264 if !ok { 265 continue 266 } 267 268 delete(c.processings, index) 269 delete(c.processingsIndex, jobStatus.jobID) 270 } 271 272 c.processedIndex = processedTo 273 274 return int64(len(processables)), nil 275 } 276 277 func (c *Consumer) processableJobs() ([]*jobAtIndex, uint64, error) { 278 processables, processedTo, err := processableJobs( 279 c.jobs, 280 c.processings, 281 c.maxProcessing, 282 c.maxSearchAhead, 283 c.processedIndex, 284 ) 285 286 if err != nil { 287 return nil, 0, err 288 } 289 290 // if the consumer has been stopped, we allow the existing worker to update the progressed index 291 // but won't return any new job for processing 292 if !c.running { 293 return nil, processedTo, nil 294 } 295 296 return processables, processedTo, nil 297 } 298 299 // processableJobs check the worker's capacity and if sufficient, read 300 // jobs from the storage, return the processable jobs, and the processed 301 // index 302 func processableJobs(jobs module.Jobs, processings map[uint64]*jobStatus, maxProcessing uint64, maxSearchAhead uint64, processedIndex uint64) ([]*jobAtIndex, uint64, 303 error) { 304 processables := make([]*jobAtIndex, 0) 305 306 // count how many jobs are still processing, 307 // in order to decide whether to process a new job 308 processing := uint64(0) 309 310 // determine if the consumer should pause processing new jobs because it's too far ahead of 311 // the lowest in progress index 312 shouldPause := func(index uint64) bool { 313 if maxSearchAhead == 0 { 314 return false 315 } 316 317 return index-processedIndex > maxSearchAhead 318 } 319 320 // if still have processing capacity, find the next processable job 321 for i := processedIndex + 1; processing < maxProcessing && !shouldPause(i); i++ { 322 status, ok := processings[i] 323 324 // if no worker is processing the next job, try to read it and process 325 if !ok { 326 // take one job 327 job, err := jobs.AtIndex(i) 328 329 // if there is no more job at this index, we could stop 330 if errors.Is(err, storage.ErrNotFound) { 331 break 332 } 333 334 // exception 335 if err != nil { 336 return nil, 0, fmt.Errorf("could not read job at index %v, %w", i, err) 337 } 338 339 processing++ 340 341 processables = append(processables, &jobAtIndex{ 342 job: job, 343 index: i, 344 }) 345 continue 346 } 347 348 // only increment the processing variable when 349 // the job is not done, meaning still processing 350 if !status.done { 351 processing++ 352 continue 353 } 354 355 if i == processedIndex+1 { 356 processedIndex++ 357 } 358 } 359 360 return processables, processedIndex, nil 361 } 362 363 // doneJob updates the internal state to mark the job has been processed 364 // return true if the job is changed from processing to finished 365 // return false if the job is already finished, or removed 366 func (c *Consumer) doneJob(jobID module.JobID) bool { 367 // lock 368 index, ok := c.processingsIndex[jobID] 369 if !ok { 370 // job must has been processed 371 return false 372 } 373 374 status, ok := c.processings[index] 375 if !ok { 376 // must be a bug, if went here 377 c.log.Fatal().Msgf("bug, job (%v) can not be found by index (%v)", jobID, index) 378 return false 379 } 380 381 if status.done { 382 // job has been done already 383 return false 384 } 385 386 status.done = true 387 return true 388 } 389 390 type jobAtIndex struct { 391 job module.Job 392 index uint64 393 } 394 395 type jobStatus struct { 396 jobID module.JobID 397 done bool 398 }