github.com/sunriselayer/sunrise-da@v0.13.1-sr3/das/coordinator.go (about) 1 package das 2 3 import ( 4 "context" 5 "sync" 6 "time" 7 8 libhead "github.com/celestiaorg/go-header" 9 10 "github.com/sunriselayer/sunrise-da/header" 11 "github.com/sunriselayer/sunrise-da/share/p2p/shrexsub" 12 ) 13 14 // samplingCoordinator runs and coordinates sampling workers and updates current sampling state 15 type samplingCoordinator struct { 16 concurrencyLimit int 17 samplingTimeout time.Duration 18 19 getter libhead.Getter[*header.ExtendedHeader] 20 sampleFn sampleFn 21 broadcastFn shrexsub.BroadcastFn 22 23 state coordinatorState 24 25 // resultCh fans-in sampling results from worker to coordinator 26 resultCh chan result 27 // updHeadCh signals to update network head header height 28 updHeadCh chan *header.ExtendedHeader 29 // waitCh signals to block coordinator for external access to state 30 waitCh chan *sync.WaitGroup 31 32 workersWg sync.WaitGroup 33 metrics *metrics 34 done 35 } 36 37 // result will carry errors to coordinator after worker finishes the job 38 type result struct { 39 job 40 failed map[uint64]int 41 err error 42 } 43 44 func newSamplingCoordinator( 45 params Parameters, 46 getter libhead.Getter[*header.ExtendedHeader], 47 sample sampleFn, 48 broadcast shrexsub.BroadcastFn, 49 ) *samplingCoordinator { 50 return &samplingCoordinator{ 51 concurrencyLimit: params.ConcurrencyLimit, 52 samplingTimeout: params.SampleTimeout, 53 getter: getter, 54 sampleFn: sample, 55 broadcastFn: broadcast, 56 state: newCoordinatorState(params), 57 resultCh: make(chan result), 58 updHeadCh: make(chan *header.ExtendedHeader), 59 waitCh: make(chan *sync.WaitGroup), 60 done: newDone("sampling coordinator"), 61 } 62 } 63 64 func (sc *samplingCoordinator) run(ctx context.Context, cp checkpoint) { 65 sc.state.resumeFromCheckpoint(cp) 66 67 // resume workers 68 for _, wk := range cp.Workers { 69 sc.runWorker(ctx, sc.state.newJob(wk.JobType, wk.From, wk.To)) 70 } 71 72 for { 73 for !sc.concurrencyLimitReached() { 74 next, found := sc.state.nextJob() 75 if !found { 76 break 77 } 78 sc.runWorker(ctx, next) 79 } 80 81 select { 82 case head := <-sc.updHeadCh: 83 if sc.state.isNewHead(head.Height()) { 84 if !sc.recentJobsLimitReached() { 85 sc.runWorker(ctx, sc.state.recentJob(head)) 86 } 87 sc.state.updateHead(head.Height()) 88 // run worker without concurrency limit restrictions to reduced delay 89 sc.metrics.observeNewHead(ctx) 90 } 91 case res := <-sc.resultCh: 92 sc.state.handleResult(res) 93 case wg := <-sc.waitCh: 94 wg.Wait() 95 case <-ctx.Done(): 96 sc.workersWg.Wait() 97 sc.indicateDone() 98 return 99 } 100 } 101 } 102 103 // runWorker runs job in separate worker go-routine 104 func (sc *samplingCoordinator) runWorker(ctx context.Context, j job) { 105 w := newWorker(j, sc.getter, sc.sampleFn, sc.broadcastFn, sc.metrics) 106 sc.state.putInProgress(j.id, w.getState) 107 108 // launch worker go-routine 109 sc.workersWg.Add(1) 110 go func() { 111 defer sc.workersWg.Done() 112 w.run(ctx, sc.samplingTimeout, sc.resultCh) 113 }() 114 } 115 116 // listen notifies the coordinator about a new network head received via subscription. 117 func (sc *samplingCoordinator) listen(ctx context.Context, h *header.ExtendedHeader) { 118 select { 119 case sc.updHeadCh <- h: 120 case <-ctx.Done(): 121 } 122 } 123 124 // stats pauses the coordinator to get stats in a concurrently safe manner 125 func (sc *samplingCoordinator) stats(ctx context.Context) (SamplingStats, error) { 126 var wg sync.WaitGroup 127 wg.Add(1) 128 defer wg.Done() 129 130 select { 131 case sc.waitCh <- &wg: 132 case <-ctx.Done(): 133 return SamplingStats{}, ctx.Err() 134 } 135 136 return sc.state.unsafeStats(), nil 137 } 138 139 func (sc *samplingCoordinator) getCheckpoint(ctx context.Context) (checkpoint, error) { 140 stats, err := sc.stats(ctx) 141 if err != nil { 142 return checkpoint{}, err 143 } 144 return newCheckpoint(stats), nil 145 } 146 147 // concurrencyLimitReached indicates whether concurrencyLimit has been reached 148 func (sc *samplingCoordinator) concurrencyLimitReached() bool { 149 return len(sc.state.inProgress) >= sc.concurrencyLimit 150 } 151 152 // recentJobsLimitReached indicates whether concurrency limit for recent jobs has been reached 153 func (sc *samplingCoordinator) recentJobsLimitReached() bool { 154 return len(sc.state.inProgress) >= 2*sc.concurrencyLimit 155 }