github.com/kaleido-io/firefly@v0.0.0-20210622132723-8b4b6aacb971/internal/events/event_poller.go (about) 1 // Copyright © 2021 Kaleido, Inc. 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 // 5 // Licensed under the Apache License, Version 2.0 (the "License"); 6 // you may not use this file except in compliance with the License. 7 // You may obtain a copy of the License at 8 // 9 // http://www.apache.org/licenses/LICENSE-2.0 10 // 11 // Unless required by applicable law or agreed to in writing, software 12 // distributed under the License is distributed on an "AS IS" BASIS, 13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 // See the License for the specific language governing permissions and 15 // limitations under the License. 16 17 package events 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "github.com/kaleido-io/firefly/internal/log" 26 "github.com/kaleido-io/firefly/internal/retry" 27 "github.com/kaleido-io/firefly/pkg/database" 28 "github.com/kaleido-io/firefly/pkg/fftypes" 29 ) 30 31 type eventPoller struct { 32 ctx context.Context 33 database database.Plugin 34 shoulderTaps chan bool 35 eventNotifier *eventNotifier 36 closed chan struct{} 37 offsetID *fftypes.UUID 38 pollingOffset int64 39 mux sync.Mutex 40 conf *eventPollerConf 41 } 42 43 type newEventsHandler func(events []fftypes.LocallySequenced) (bool, error) 44 45 type eventPollerConf struct { 46 ephemeral bool 47 eventBatchSize int 48 eventBatchTimeout time.Duration 49 eventPollTimeout time.Duration 50 firstEvent *fftypes.SubOptsFirstEvent 51 queryFactory database.QueryFactory 52 addCriteria func(database.AndFilter) database.AndFilter 53 getItems func(context.Context, database.Filter) ([]fftypes.LocallySequenced, error) 54 maybeRewind func() (bool, int64) 55 newEventsHandler newEventsHandler 56 offsetName string 57 offsetNamespace string 58 offsetType fftypes.OffsetType 59 retry retry.Retry 60 startupOffsetRetryAttempts int 61 } 62 63 func newEventPoller(ctx context.Context, di database.Plugin, en *eventNotifier, conf *eventPollerConf) *eventPoller { 64 ep := &eventPoller{ 65 ctx: log.WithLogField(ctx, "role", fmt.Sprintf("ep[%s:%s]", conf.offsetName, conf.offsetNamespace)), 66 database: di, 67 shoulderTaps: make(chan bool, 1), 68 eventNotifier: en, 69 closed: make(chan struct{}), 70 conf: conf, 71 } 72 if ep.conf.maybeRewind == nil { 73 ep.conf.maybeRewind = func() (bool, int64) { return false, -1 } 74 } 75 return ep 76 } 77 78 func (ep *eventPoller) restoreOffset() error { 79 return ep.conf.retry.Do(ep.ctx, "restore offset", func(attempt int) (retry bool, err error) { 80 retry = ep.conf.startupOffsetRetryAttempts == 0 || attempt <= ep.conf.startupOffsetRetryAttempts 81 var offset *fftypes.Offset 82 if ep.conf.ephemeral { 83 ep.pollingOffset, err = calcFirstOffset(ep.ctx, ep.database, ep.conf.firstEvent) 84 return retry, err 85 } 86 for offset == nil { 87 offset, err = ep.database.GetOffset(ep.ctx, ep.conf.offsetType, ep.conf.offsetNamespace, ep.conf.offsetName) 88 if err != nil { 89 return retry, err 90 } 91 if offset == nil { 92 firstOffset, err := calcFirstOffset(ep.ctx, ep.database, ep.conf.firstEvent) 93 if err != nil { 94 return retry, err 95 } 96 err = ep.database.UpsertOffset(ep.ctx, &fftypes.Offset{ 97 ID: fftypes.NewUUID(), 98 Type: ep.conf.offsetType, 99 Namespace: ep.conf.offsetNamespace, 100 Name: ep.conf.offsetName, 101 Current: firstOffset, 102 }, false) 103 if err != nil { 104 return retry, err 105 } 106 } 107 } 108 ep.offsetID = offset.ID 109 ep.pollingOffset = offset.Current 110 log.L(ep.ctx).Infof("Event offset restored %d", ep.pollingOffset) 111 return false, nil 112 }) 113 } 114 115 func (ep *eventPoller) start() error { 116 if err := ep.restoreOffset(); err != nil { 117 return err 118 } 119 go ep.newEventNotifications() 120 go ep.eventLoop() 121 return nil 122 } 123 124 func (ep *eventPoller) rewindPollingOffset(offset int64) { 125 log.L(ep.ctx).Infof("Event polling rewind to: %d", offset) 126 ep.mux.Lock() 127 defer ep.mux.Unlock() 128 if offset < ep.pollingOffset { 129 ep.pollingOffset = offset // this will be re-delivered 130 } 131 } 132 133 func (ep *eventPoller) getPollingOffset() int64 { 134 ep.mux.Lock() 135 defer ep.mux.Unlock() 136 return ep.pollingOffset 137 } 138 139 func (ep *eventPoller) commitOffset(ctx context.Context, offset int64) error { 140 // Next polling cycle should start one higher than this offset 141 ep.pollingOffset = offset 142 143 // Must be called from the event polling routine 144 l := log.L(ctx) 145 // No persistence for ephemeral (non-durable) subscriptions 146 if !ep.conf.ephemeral { 147 u := database.OffsetQueryFactory.NewUpdate(ep.ctx).Set("current", ep.pollingOffset) 148 if err := ep.database.UpdateOffset(ctx, ep.offsetID, u); err != nil { 149 return err 150 } 151 } 152 l.Debugf("Event polling offset committed %d", ep.pollingOffset) 153 return nil 154 } 155 156 func (ep *eventPoller) readPage() ([]fftypes.LocallySequenced, error) { 157 158 var items []fftypes.LocallySequenced 159 160 // We have a hook here to allow a safe to do operations that check pin state, and perform 161 // a rewind based on it. 162 rewind, pollingOffset := ep.conf.maybeRewind() 163 if rewind { 164 ep.rewindPollingOffset(pollingOffset) 165 } else { 166 // Ensure we go through the mutex to pickup rewinds that happened elsewhere 167 pollingOffset = ep.getPollingOffset() 168 } 169 170 err := ep.conf.retry.Do(ep.ctx, "retrieve events", func(attempt int) (retry bool, err error) { 171 fb := ep.conf.queryFactory.NewFilter(ep.ctx) 172 filter := fb.And( 173 fb.Gt("sequence", pollingOffset), 174 ) 175 filter = ep.conf.addCriteria(filter) 176 items, err = ep.conf.getItems(ep.ctx, filter.Sort("sequence").Limit(uint64(ep.conf.eventBatchSize))) 177 if err != nil { 178 return true, err // Retry indefinitely, until context cancelled 179 } 180 return false, nil 181 }) 182 return items, err 183 } 184 185 func (ep *eventPoller) eventLoop() { 186 l := log.L(ep.ctx) 187 l.Debugf("Started event detector") 188 defer close(ep.closed) 189 190 for { 191 // Read messages from the DB - in an error condition we retry until success, or a closed context 192 events, err := ep.readPage() 193 if err != nil { 194 l.Debugf("Exiting: %s", err) 195 return 196 } 197 198 eventCount := len(events) 199 repoll := false 200 if eventCount > 0 { 201 // We process all the events in the page in a single database run group, and 202 // keep retrying on all retryable errors, indefinitely (). 203 var err error 204 repoll, err = ep.dispatchEventsRetry(events) 205 if err != nil { 206 l.Debugf("Exiting: %s", err) 207 return 208 } 209 } 210 211 // Once we run out of events, wait to be woken 212 if !repoll { 213 if ok := ep.waitForShoulderTapOrPollTimeout(eventCount); !ok { 214 return 215 } 216 } 217 } 218 } 219 220 func (ep *eventPoller) dispatchEventsRetry(events []fftypes.LocallySequenced) (repoll bool, err error) { 221 err = ep.conf.retry.Do(ep.ctx, "process events", func(attempt int) (retry bool, err error) { 222 repoll, err = ep.conf.newEventsHandler(events) 223 return err != nil, err // always retry (retry will end on cancelled context) 224 }) 225 return repoll, err 226 } 227 228 // newEventNotifications just consumes new events, logs them, then ensures there's a shoulderTap 229 // in the channel - without blocking. This is important as we must not block the notifier 230 // - which might be our own eventLoop 231 func (ep *eventPoller) newEventNotifications() { 232 defer close(ep.shoulderTaps) 233 var lastNotified int64 = -1 234 for { 235 latestSequence := ep.getPollingOffset() 236 if latestSequence <= lastNotified { 237 latestSequence = lastNotified + 1 238 } 239 err := ep.eventNotifier.waitNext(latestSequence) 240 if err != nil { 241 log.L(ep.ctx).Debugf("event notifier closing") 242 return 243 } 244 ep.shoulderTap() 245 lastNotified = latestSequence 246 } 247 } 248 249 func (ep *eventPoller) shoulderTap() { 250 // Do not block sending to the shoulderTap - as it can only contain one 251 select { 252 case ep.shoulderTaps <- true: 253 default: 254 } 255 } 256 257 func (ep *eventPoller) waitForShoulderTapOrPollTimeout(lastEventCount int) bool { 258 l := log.L(ep.ctx) 259 longTimeoutDuration := ep.conf.eventPollTimeout 260 // We avoid a tight spin with the eventBatchingTimeout to allow messages to arrive 261 if ep.conf.eventBatchTimeout > 0 && lastEventCount > 0 && lastEventCount < ep.conf.eventBatchSize { 262 shortTimeout := time.NewTimer(ep.conf.eventBatchTimeout) 263 select { 264 case <-shortTimeout.C: 265 l.Tracef("Woken after batch timeout") 266 case <-ep.ctx.Done(): 267 l.Debugf("Exiting due to cancelled context") 268 return false 269 } 270 longTimeoutDuration -= ep.conf.eventBatchTimeout 271 } 272 273 longTimeout := time.NewTimer(longTimeoutDuration) 274 select { 275 case <-longTimeout.C: 276 l.Debugf("Woken after poll timeout") 277 case <-ep.shoulderTaps: 278 l.Debug("Woken for trigger on event") 279 case <-ep.ctx.Done(): 280 l.Debugf("Exiting due to cancelled context") 281 return false 282 } 283 return true 284 }