github.com/kaleido-io/firefly@v0.0.0-20210622132723-8b4b6aacb971/internal/events/event_poller.go (about)

     1  // Copyright © 2021 Kaleido, Inc.
     2  //
     3  // SPDX-License-Identifier: Apache-2.0
     4  //
     5  // Licensed under the Apache License, Version 2.0 (the "License");
     6  // you may not use this file except in compliance with the License.
     7  // You may obtain a copy of the License at
     8  //
     9  //     http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  package events
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/kaleido-io/firefly/internal/log"
    26  	"github.com/kaleido-io/firefly/internal/retry"
    27  	"github.com/kaleido-io/firefly/pkg/database"
    28  	"github.com/kaleido-io/firefly/pkg/fftypes"
    29  )
    30  
    31  type eventPoller struct {
    32  	ctx           context.Context
    33  	database      database.Plugin
    34  	shoulderTaps  chan bool
    35  	eventNotifier *eventNotifier
    36  	closed        chan struct{}
    37  	offsetID      *fftypes.UUID
    38  	pollingOffset int64
    39  	mux           sync.Mutex
    40  	conf          *eventPollerConf
    41  }
    42  
    43  type newEventsHandler func(events []fftypes.LocallySequenced) (bool, error)
    44  
    45  type eventPollerConf struct {
    46  	ephemeral                  bool
    47  	eventBatchSize             int
    48  	eventBatchTimeout          time.Duration
    49  	eventPollTimeout           time.Duration
    50  	firstEvent                 *fftypes.SubOptsFirstEvent
    51  	queryFactory               database.QueryFactory
    52  	addCriteria                func(database.AndFilter) database.AndFilter
    53  	getItems                   func(context.Context, database.Filter) ([]fftypes.LocallySequenced, error)
    54  	maybeRewind                func() (bool, int64)
    55  	newEventsHandler           newEventsHandler
    56  	offsetName                 string
    57  	offsetNamespace            string
    58  	offsetType                 fftypes.OffsetType
    59  	retry                      retry.Retry
    60  	startupOffsetRetryAttempts int
    61  }
    62  
    63  func newEventPoller(ctx context.Context, di database.Plugin, en *eventNotifier, conf *eventPollerConf) *eventPoller {
    64  	ep := &eventPoller{
    65  		ctx:           log.WithLogField(ctx, "role", fmt.Sprintf("ep[%s:%s]", conf.offsetName, conf.offsetNamespace)),
    66  		database:      di,
    67  		shoulderTaps:  make(chan bool, 1),
    68  		eventNotifier: en,
    69  		closed:        make(chan struct{}),
    70  		conf:          conf,
    71  	}
    72  	if ep.conf.maybeRewind == nil {
    73  		ep.conf.maybeRewind = func() (bool, int64) { return false, -1 }
    74  	}
    75  	return ep
    76  }
    77  
    78  func (ep *eventPoller) restoreOffset() error {
    79  	return ep.conf.retry.Do(ep.ctx, "restore offset", func(attempt int) (retry bool, err error) {
    80  		retry = ep.conf.startupOffsetRetryAttempts == 0 || attempt <= ep.conf.startupOffsetRetryAttempts
    81  		var offset *fftypes.Offset
    82  		if ep.conf.ephemeral {
    83  			ep.pollingOffset, err = calcFirstOffset(ep.ctx, ep.database, ep.conf.firstEvent)
    84  			return retry, err
    85  		}
    86  		for offset == nil {
    87  			offset, err = ep.database.GetOffset(ep.ctx, ep.conf.offsetType, ep.conf.offsetNamespace, ep.conf.offsetName)
    88  			if err != nil {
    89  				return retry, err
    90  			}
    91  			if offset == nil {
    92  				firstOffset, err := calcFirstOffset(ep.ctx, ep.database, ep.conf.firstEvent)
    93  				if err != nil {
    94  					return retry, err
    95  				}
    96  				err = ep.database.UpsertOffset(ep.ctx, &fftypes.Offset{
    97  					ID:        fftypes.NewUUID(),
    98  					Type:      ep.conf.offsetType,
    99  					Namespace: ep.conf.offsetNamespace,
   100  					Name:      ep.conf.offsetName,
   101  					Current:   firstOffset,
   102  				}, false)
   103  				if err != nil {
   104  					return retry, err
   105  				}
   106  			}
   107  		}
   108  		ep.offsetID = offset.ID
   109  		ep.pollingOffset = offset.Current
   110  		log.L(ep.ctx).Infof("Event offset restored %d", ep.pollingOffset)
   111  		return false, nil
   112  	})
   113  }
   114  
   115  func (ep *eventPoller) start() error {
   116  	if err := ep.restoreOffset(); err != nil {
   117  		return err
   118  	}
   119  	go ep.newEventNotifications()
   120  	go ep.eventLoop()
   121  	return nil
   122  }
   123  
   124  func (ep *eventPoller) rewindPollingOffset(offset int64) {
   125  	log.L(ep.ctx).Infof("Event polling rewind to: %d", offset)
   126  	ep.mux.Lock()
   127  	defer ep.mux.Unlock()
   128  	if offset < ep.pollingOffset {
   129  		ep.pollingOffset = offset // this will be re-delivered
   130  	}
   131  }
   132  
   133  func (ep *eventPoller) getPollingOffset() int64 {
   134  	ep.mux.Lock()
   135  	defer ep.mux.Unlock()
   136  	return ep.pollingOffset
   137  }
   138  
   139  func (ep *eventPoller) commitOffset(ctx context.Context, offset int64) error {
   140  	// Next polling cycle should start one higher than this offset
   141  	ep.pollingOffset = offset
   142  
   143  	// Must be called from the event polling routine
   144  	l := log.L(ctx)
   145  	// No persistence for ephemeral (non-durable) subscriptions
   146  	if !ep.conf.ephemeral {
   147  		u := database.OffsetQueryFactory.NewUpdate(ep.ctx).Set("current", ep.pollingOffset)
   148  		if err := ep.database.UpdateOffset(ctx, ep.offsetID, u); err != nil {
   149  			return err
   150  		}
   151  	}
   152  	l.Debugf("Event polling offset committed %d", ep.pollingOffset)
   153  	return nil
   154  }
   155  
   156  func (ep *eventPoller) readPage() ([]fftypes.LocallySequenced, error) {
   157  
   158  	var items []fftypes.LocallySequenced
   159  
   160  	// We have a hook here to allow a safe to do operations that check pin state, and perform
   161  	// a rewind based on it.
   162  	rewind, pollingOffset := ep.conf.maybeRewind()
   163  	if rewind {
   164  		ep.rewindPollingOffset(pollingOffset)
   165  	} else {
   166  		// Ensure we go through the mutex to pickup rewinds that happened elsewhere
   167  		pollingOffset = ep.getPollingOffset()
   168  	}
   169  
   170  	err := ep.conf.retry.Do(ep.ctx, "retrieve events", func(attempt int) (retry bool, err error) {
   171  		fb := ep.conf.queryFactory.NewFilter(ep.ctx)
   172  		filter := fb.And(
   173  			fb.Gt("sequence", pollingOffset),
   174  		)
   175  		filter = ep.conf.addCriteria(filter)
   176  		items, err = ep.conf.getItems(ep.ctx, filter.Sort("sequence").Limit(uint64(ep.conf.eventBatchSize)))
   177  		if err != nil {
   178  			return true, err // Retry indefinitely, until context cancelled
   179  		}
   180  		return false, nil
   181  	})
   182  	return items, err
   183  }
   184  
   185  func (ep *eventPoller) eventLoop() {
   186  	l := log.L(ep.ctx)
   187  	l.Debugf("Started event detector")
   188  	defer close(ep.closed)
   189  
   190  	for {
   191  		// Read messages from the DB - in an error condition we retry until success, or a closed context
   192  		events, err := ep.readPage()
   193  		if err != nil {
   194  			l.Debugf("Exiting: %s", err)
   195  			return
   196  		}
   197  
   198  		eventCount := len(events)
   199  		repoll := false
   200  		if eventCount > 0 {
   201  			// We process all the events in the page in a single database run group, and
   202  			// keep retrying on all retryable errors, indefinitely ().
   203  			var err error
   204  			repoll, err = ep.dispatchEventsRetry(events)
   205  			if err != nil {
   206  				l.Debugf("Exiting: %s", err)
   207  				return
   208  			}
   209  		}
   210  
   211  		// Once we run out of events, wait to be woken
   212  		if !repoll {
   213  			if ok := ep.waitForShoulderTapOrPollTimeout(eventCount); !ok {
   214  				return
   215  			}
   216  		}
   217  	}
   218  }
   219  
   220  func (ep *eventPoller) dispatchEventsRetry(events []fftypes.LocallySequenced) (repoll bool, err error) {
   221  	err = ep.conf.retry.Do(ep.ctx, "process events", func(attempt int) (retry bool, err error) {
   222  		repoll, err = ep.conf.newEventsHandler(events)
   223  		return err != nil, err // always retry (retry will end on cancelled context)
   224  	})
   225  	return repoll, err
   226  }
   227  
   228  // newEventNotifications just consumes new events, logs them, then ensures there's a shoulderTap
   229  // in the channel - without blocking. This is important as we must not block the notifier
   230  // - which might be our own eventLoop
   231  func (ep *eventPoller) newEventNotifications() {
   232  	defer close(ep.shoulderTaps)
   233  	var lastNotified int64 = -1
   234  	for {
   235  		latestSequence := ep.getPollingOffset()
   236  		if latestSequence <= lastNotified {
   237  			latestSequence = lastNotified + 1
   238  		}
   239  		err := ep.eventNotifier.waitNext(latestSequence)
   240  		if err != nil {
   241  			log.L(ep.ctx).Debugf("event notifier closing")
   242  			return
   243  		}
   244  		ep.shoulderTap()
   245  		lastNotified = latestSequence
   246  	}
   247  }
   248  
   249  func (ep *eventPoller) shoulderTap() {
   250  	// Do not block sending to the shoulderTap - as it can only contain one
   251  	select {
   252  	case ep.shoulderTaps <- true:
   253  	default:
   254  	}
   255  }
   256  
   257  func (ep *eventPoller) waitForShoulderTapOrPollTimeout(lastEventCount int) bool {
   258  	l := log.L(ep.ctx)
   259  	longTimeoutDuration := ep.conf.eventPollTimeout
   260  	// We avoid a tight spin with the eventBatchingTimeout to allow messages to arrive
   261  	if ep.conf.eventBatchTimeout > 0 && lastEventCount > 0 && lastEventCount < ep.conf.eventBatchSize {
   262  		shortTimeout := time.NewTimer(ep.conf.eventBatchTimeout)
   263  		select {
   264  		case <-shortTimeout.C:
   265  			l.Tracef("Woken after batch timeout")
   266  		case <-ep.ctx.Done():
   267  			l.Debugf("Exiting due to cancelled context")
   268  			return false
   269  		}
   270  		longTimeoutDuration -= ep.conf.eventBatchTimeout
   271  	}
   272  
   273  	longTimeout := time.NewTimer(longTimeoutDuration)
   274  	select {
   275  	case <-longTimeout.C:
   276  		l.Debugf("Woken after poll timeout")
   277  	case <-ep.shoulderTaps:
   278  		l.Debug("Woken for trigger on event")
   279  	case <-ep.ctx.Done():
   280  		l.Debugf("Exiting due to cancelled context")
   281  		return false
   282  	}
   283  	return true
   284  }