github.com/argoproj/argo-events@v1.9.1/eventbus/stan/sensor/trigger_conn.go (about)

     1  package sensor
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"strings"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/Knetic/govaluate"
    12  	cloudevents "github.com/cloudevents/sdk-go/v2"
    13  	"github.com/gobwas/glob"
    14  	"github.com/nats-io/stan.go"
    15  	"github.com/nats-io/stan.go/pb"
    16  	"go.uber.org/zap"
    17  
    18  	eventbuscommon "github.com/argoproj/argo-events/eventbus/common"
    19  
    20  	stanbase "github.com/argoproj/argo-events/eventbus/stan/base"
    21  )
    22  
    23  type STANTriggerConn struct {
    24  	*stanbase.STANConnection
    25  
    26  	sensorName           string
    27  	triggerName          string
    28  	dependencyExpression string
    29  	deps                 []eventbuscommon.Dependency
    30  }
    31  
    32  func NewSTANTriggerConn(conn *stanbase.STANConnection, sensorName string, triggerName string, dependencyExpression string, deps []eventbuscommon.Dependency) *STANTriggerConn {
    33  	n := &STANTriggerConn{conn, sensorName, triggerName, dependencyExpression, deps}
    34  	n.Logger = n.Logger.With("triggerName", n.triggerName).With("clientID", n.ClientID)
    35  	return n
    36  }
    37  
    38  func (n *STANTriggerConn) String() string {
    39  	if n == nil {
    40  		return ""
    41  	}
    42  	return fmt.Sprintf("STANTriggerConn{ClientID:%s,Sensor:%s,Trigger:%s}", n.ClientID, n.sensorName, n.triggerName)
    43  }
    44  
    45  func (conn *STANTriggerConn) IsClosed() bool {
    46  	return conn == nil || conn.STANConnection.IsClosed()
    47  }
    48  
    49  func (conn *STANTriggerConn) Close() error {
    50  	if conn == nil {
    51  		return fmt.Errorf("can't close STAN trigger connection, STANTriggerConn is nil")
    52  	}
    53  	return conn.STANConnection.Close()
    54  }
    55  
    56  // Subscribe is used to subscribe to multiple event source dependencies
    57  // Parameter - ctx, context
    58  // Parameter - conn, eventbus connection
    59  // Parameter - group, queue group name
    60  // Parameter - closeCh, channel to indicate to close the subscription
    61  // Parameter - resetConditionsCh, channel to indicate to reset trigger conditions
    62  // Parameter - lastResetTime, the last time reset would have occurred, if any
    63  // Parameter - dependencyExpr, example: "(dep1 || dep2) && dep3"
    64  // Parameter - dependencies, array of dependencies information
    65  // Parameter - filter, a function used to filter the message
    66  // Parameter - action, a function to be triggered after all conditions meet
    67  func (n *STANTriggerConn) Subscribe(
    68  	ctx context.Context,
    69  	closeCh <-chan struct{},
    70  	resetConditionsCh <-chan struct{},
    71  	lastResetTime time.Time,
    72  	transform func(depName string, event cloudevents.Event) (*cloudevents.Event, error),
    73  	filter func(string, cloudevents.Event) bool,
    74  	action func(map[string]cloudevents.Event),
    75  	defaultSubject *string) error {
    76  	if n == nil {
    77  		return fmt.Errorf("Subscribe() failed; STANTriggerConn is nil")
    78  	}
    79  
    80  	log := n.Logger
    81  
    82  	if defaultSubject == nil {
    83  		log.Error("can't subscribe over NATS streaming: defaultSubject not set")
    84  	}
    85  
    86  	msgHolder, err := newEventSourceMessageHolder(log, n.dependencyExpression, n.deps, lastResetTime)
    87  	if err != nil {
    88  		return err
    89  	}
    90  	// use group name as durable name
    91  	group, err := n.getGroupNameFromClientID(n.ClientID)
    92  	if err != nil {
    93  		return err
    94  	}
    95  	durableName := group
    96  	sub, err := n.STANConn.QueueSubscribe(*defaultSubject, group, func(m *stan.Msg) {
    97  		n.processEventSourceMsg(m, msgHolder, transform, filter, action, log)
    98  	}, stan.DurableName(durableName),
    99  		stan.SetManualAckMode(),
   100  		stan.StartAt(pb.StartPosition_NewOnly),
   101  		stan.AckWait(1*time.Second),
   102  		stan.MaxInflight(len(msgHolder.depNames)+2))
   103  	if err != nil {
   104  		log.Errorf("failed to subscribe to subject %s", *defaultSubject)
   105  		return err
   106  	}
   107  	log.Infof("Subscribed to subject %s using durable name %s", *defaultSubject, durableName)
   108  
   109  	// Daemon to evict cache and reset trigger conditions
   110  	wg := &sync.WaitGroup{}
   111  	daemonStopCh := make(chan struct{})
   112  	wg.Add(1)
   113  	go func() {
   114  		defer wg.Done()
   115  		log.Info("starting ExactOnce cache clean up daemon ...")
   116  		ticker := time.NewTicker(60 * time.Second)
   117  		defer ticker.Stop()
   118  		for {
   119  			select {
   120  			case <-daemonStopCh:
   121  				log.Info("exiting ExactOnce cache clean up daemon...")
   122  				return
   123  			case <-ticker.C:
   124  				now := time.Now().UnixNano()
   125  				num := 0
   126  				msgHolder.smap.Range(func(key, value interface{}) bool {
   127  					v := value.(int64)
   128  					// Evict cached ID older than 5 minutes
   129  					if now-v > 5*60*1000*1000*1000 {
   130  						msgHolder.smap.Delete(key)
   131  						num++
   132  						log.Debugw("cached ID evicted", "id", key)
   133  					}
   134  					return true
   135  				})
   136  				log.Debugf("finished evicting %v cached IDs, time cost: %v ms", num, (time.Now().UnixNano()-now)/1000/1000)
   137  			case <-resetConditionsCh:
   138  				log.Info("reset conditions")
   139  				msgHolder.setLastResetTime(time.Now())
   140  			}
   141  		}
   142  	}()
   143  
   144  	for {
   145  		select {
   146  		case <-ctx.Done():
   147  			log.Info("exiting, unsubscribing and closing connection...")
   148  			_ = sub.Close()
   149  			log.Infof("subscription on subject %s closed", *defaultSubject)
   150  			daemonStopCh <- struct{}{}
   151  			wg.Wait()
   152  			return nil
   153  		case <-closeCh:
   154  			log.Info("closing subscription...")
   155  			_ = sub.Close()
   156  			log.Infof("subscription on subject %s closed", *defaultSubject)
   157  			daemonStopCh <- struct{}{}
   158  			wg.Wait()
   159  			return nil
   160  		}
   161  	}
   162  }
   163  
   164  func (n *STANTriggerConn) processEventSourceMsg(m *stan.Msg, msgHolder *eventSourceMessageHolder, transform func(depName string, event cloudevents.Event) (*cloudevents.Event, error), filter func(dependencyName string, event cloudevents.Event) bool, action func(map[string]cloudevents.Event), log *zap.SugaredLogger) {
   165  	var event *cloudevents.Event
   166  	if err := json.Unmarshal(m.Data, &event); err != nil {
   167  		log.Errorf("Failed to convert to a cloudevent, discarding it... err: %v", err)
   168  		_ = m.Ack()
   169  		return
   170  	}
   171  
   172  	depName, err := msgHolder.getDependencyName(event.Source(), event.Subject())
   173  	if err != nil {
   174  		log.Errorf("Failed to get the dependency name, discarding it... err: %v", err)
   175  		_ = m.Ack()
   176  		return
   177  	}
   178  
   179  	log.Debugf("New incoming Event Source Message, dependency name=%s", depName)
   180  
   181  	if depName == "" {
   182  		_ = m.Ack()
   183  		return
   184  	}
   185  
   186  	event, err = transform(depName, *event)
   187  	if err != nil {
   188  		log.Errorw("failed to apply event transformation", zap.Error(err))
   189  		_ = m.Ack()
   190  		return
   191  	}
   192  
   193  	if !filter(depName, *event) {
   194  		// message not interested
   195  		log.Debugf("not interested in dependency %s", depName)
   196  		_ = m.Ack()
   197  		return
   198  	}
   199  
   200  	// NATS Streaming guarantees At Least Once delivery,
   201  	// so need to check if the message is duplicate
   202  	if _, ok := msgHolder.smap.Load(event.ID()); ok {
   203  		log.Infow("ATTENTION: Duplicate delivered message detected", "message", m)
   204  		_ = m.Ack()
   205  		return
   206  	}
   207  
   208  	// Acknowledge any old messages that occurred before the last reset (standard reset after trigger or conditional reset)
   209  	if m.Timestamp <= msgHolder.getLastResetTime().UnixNano() {
   210  		if depName != "" {
   211  			msgHolder.reset(depName)
   212  		}
   213  		msgHolder.ackAndCache(m, event.ID())
   214  
   215  		log.Debugf("reset and acked dependency=%s due to message time occurred before reset, m.Timestamp=%d, msgHolder.getLastResetTime()=%d",
   216  			depName, m.Timestamp, msgHolder.getLastResetTime().UnixNano())
   217  		return
   218  	}
   219  	// make sure that everything has been cleared within a certain amount of time
   220  	if msgHolder.fullResetTimeout() {
   221  		log.Infof("ATTENTION: Resetting the flags because they didn't get cleared before the timeout: msgHolder=%+v", msgHolder)
   222  		msgHolder.resetAll()
   223  	}
   224  
   225  	now := time.Now().Unix()
   226  
   227  	// Start a new round
   228  	if existingMsg, ok := msgHolder.msgs[depName]; ok {
   229  		if m.Timestamp == existingMsg.timestamp {
   230  			// Re-delivered latest messge, update delivery timestamp and return
   231  			existingMsg.lastDeliveredTime = now
   232  			msgHolder.msgs[depName] = existingMsg
   233  			log.Debugf("Updating timestamp for dependency=%s", depName)
   234  			return
   235  		} else if m.Timestamp < existingMsg.timestamp {
   236  			// Re-delivered old message, ack and return
   237  			msgHolder.ackAndCache(m, event.ID())
   238  			log.Debugw("Dropping this message because later ones also satisfy", "eventID", event.ID())
   239  			return
   240  		}
   241  	}
   242  	// New message, set and check
   243  	msgHolder.msgs[depName] = &eventSourceMessage{seq: m.Sequence, timestamp: m.Timestamp, event: event, lastDeliveredTime: now}
   244  	msgHolder.parameters[depName] = true
   245  
   246  	// Check if there's any stale message being held.
   247  	// Stale message could be message age has been longer than NATS streaming max message age,
   248  	// which means it has ben deleted from NATS server side, but it's still held here.
   249  	// Use last delivery timestamp to determine that.
   250  	for k, v := range msgHolder.msgs {
   251  		// Since the message is not acked, the server will keep re-sending it.
   252  		// If a message being held didn't get re-delivered in the last 10 minutes, treat it as stale.
   253  		if (now - v.lastDeliveredTime) > 10*60 {
   254  			msgHolder.reset(k)
   255  		}
   256  	}
   257  
   258  	result, err := msgHolder.expr.Evaluate(msgHolder.parameters)
   259  	if err != nil {
   260  		log.Errorf("failed to evaluate dependency expression: %v", err)
   261  		// TODO: how to handle this situation?
   262  		return
   263  	}
   264  	if result != true {
   265  		// Log current meet dependency information
   266  		meetDeps := []string{}
   267  		meetMsgIds := []string{}
   268  		for k, v := range msgHolder.msgs {
   269  			meetDeps = append(meetDeps, k)
   270  			meetMsgIds = append(meetMsgIds, v.event.ID())
   271  		}
   272  		log.Infow("trigger conditions not met", zap.Any("meetDependencies", meetDeps), zap.Any("meetEvents", meetMsgIds))
   273  		return
   274  	}
   275  
   276  	msgHolder.setLastResetTime(time.Unix(m.Timestamp/1e9, m.Timestamp%1e9))
   277  	// Trigger actions
   278  	messages := make(map[string]cloudevents.Event)
   279  	for k, v := range msgHolder.msgs {
   280  		messages[k] = *v.event
   281  	}
   282  	log.Debugf("Triggering actions for client %s", n.ClientID)
   283  
   284  	action(messages)
   285  
   286  	msgHolder.reset(depName)
   287  	msgHolder.ackAndCache(m, event.ID())
   288  }
   289  
   290  func (n *STANTriggerConn) getGroupNameFromClientID(clientID string) (string, error) {
   291  	log := n.Logger.With("clientID", n.ClientID)
   292  	// take off the last part: clientID should have a dash at the end and we can remove that part
   293  	strs := strings.Split(clientID, "-")
   294  	if len(strs) < 2 {
   295  		err := fmt.Errorf("Expected client ID to contain dash: %s", clientID)
   296  		log.Error(err)
   297  		return "", err
   298  	}
   299  	return strings.Join(strs[:len(strs)-1], "-"), nil
   300  }
   301  
   302  // eventSourceMessage is used by messageHolder to hold the latest message
   303  type eventSourceMessage struct {
   304  	seq       uint64
   305  	timestamp int64
   306  	event     *cloudevents.Event
   307  	// timestamp of last delivered
   308  	lastDeliveredTime int64
   309  }
   310  
   311  // eventSourceMessageHolder is a struct used to hold the message information of subscribed dependencies
   312  type eventSourceMessageHolder struct {
   313  	// time that resets conditions, usually the time all conditions meet,
   314  	// or the time getting an external signal to reset.
   315  	lastResetTime time.Time
   316  	// if we reach this time, we reset everything (occurs 60 seconds after lastResetTime)
   317  	resetTimeout int64
   318  	expr         *govaluate.EvaluableExpression
   319  	depNames     []string
   320  	// Mapping of [eventSourceName + eventName]dependencyName
   321  	sourceDepMap map[string]string
   322  	parameters   map[string]interface{}
   323  	msgs         map[string]*eventSourceMessage
   324  	// A sync map used to cache the message IDs, it is used to guarantee Exact Once triggering
   325  	smap        *sync.Map
   326  	lock        sync.RWMutex
   327  	timeoutLock sync.RWMutex
   328  
   329  	logger *zap.SugaredLogger
   330  }
   331  
   332  func newEventSourceMessageHolder(logger *zap.SugaredLogger, dependencyExpr string, dependencies []eventbuscommon.Dependency, lastResetTime time.Time) (*eventSourceMessageHolder, error) {
   333  	dependencyExpr = strings.ReplaceAll(dependencyExpr, "-", "\\-")
   334  	expression, err := govaluate.NewEvaluableExpression(dependencyExpr)
   335  	if err != nil {
   336  		return nil, err
   337  	}
   338  	deps := unique(expression.Vars())
   339  	if len(dependencyExpr) == 0 {
   340  		return nil, fmt.Errorf("no dependencies found: %s", dependencyExpr)
   341  	}
   342  
   343  	srcDepMap := make(map[string]string)
   344  	for _, d := range dependencies {
   345  		key := d.EventSourceName + "__" + d.EventName
   346  		srcDepMap[key] = d.Name
   347  	}
   348  
   349  	parameters := make(map[string]interface{}, len(deps))
   350  	msgs := make(map[string]*eventSourceMessage)
   351  	for _, dep := range deps {
   352  		parameters[dep] = false
   353  	}
   354  
   355  	return &eventSourceMessageHolder{
   356  		lastResetTime: lastResetTime,
   357  		expr:          expression,
   358  		depNames:      deps,
   359  		sourceDepMap:  srcDepMap,
   360  		parameters:    parameters,
   361  		msgs:          msgs,
   362  		smap:          new(sync.Map),
   363  		lock:          sync.RWMutex{},
   364  		logger:        logger,
   365  	}, nil
   366  }
   367  
   368  func (mh *eventSourceMessageHolder) getLastResetTime() time.Time {
   369  	mh.lock.RLock()
   370  	defer mh.lock.RUnlock()
   371  	return mh.lastResetTime
   372  }
   373  
   374  func (mh *eventSourceMessageHolder) setLastResetTime(t time.Time) {
   375  	{
   376  		mh.lock.Lock() // since this can be called asyncronously as part of a ConditionReset, we neeed to lock this code
   377  		defer mh.lock.Unlock()
   378  		mh.lastResetTime = t
   379  	}
   380  	mh.setResetTimeout(t.Add(time.Second * 60).Unix()) // failsafe condition: determine if we for some reason we haven't acknowledged all dependencies within 60 seconds of the lastResetTime
   381  }
   382  
   383  func (mh *eventSourceMessageHolder) setResetTimeout(t int64) {
   384  	mh.timeoutLock.Lock() // since this can be called asyncronously as part of a ConditionReset, we neeed to lock this code
   385  	defer mh.timeoutLock.Unlock()
   386  	mh.resetTimeout = t
   387  }
   388  
   389  func (mh *eventSourceMessageHolder) getResetTimeout() int64 {
   390  	mh.timeoutLock.RLock()
   391  	defer mh.timeoutLock.RUnlock()
   392  	return mh.resetTimeout
   393  }
   394  
   395  // failsafe condition after lastResetTime
   396  func (mh *eventSourceMessageHolder) fullResetTimeout() bool {
   397  	resetTimeout := mh.getResetTimeout()
   398  	return resetTimeout != 0 && time.Now().Unix() > resetTimeout
   399  }
   400  
   401  func (mh *eventSourceMessageHolder) getDependencyName(eventSourceName, eventName string) (string, error) {
   402  	for k, v := range mh.sourceDepMap {
   403  		sourceGlob, err := glob.Compile(k)
   404  		if err != nil {
   405  			return "", err
   406  		}
   407  		if sourceGlob.Match(eventSourceName + "__" + eventName) {
   408  			return v, nil
   409  		}
   410  	}
   411  	return "", nil
   412  }
   413  
   414  // Ack the stan message and cache the ID to make sure Exact Once triggering
   415  func (mh *eventSourceMessageHolder) ackAndCache(m *stan.Msg, id string) {
   416  	_ = m.Ack()
   417  	mh.smap.Store(id, time.Now().UnixNano())
   418  }
   419  
   420  // Reset the parameter and message that a dependency holds
   421  func (mh *eventSourceMessageHolder) reset(depName string) {
   422  	mh.parameters[depName] = false
   423  	delete(mh.msgs, depName)
   424  
   425  	if mh.isCleanedUp() {
   426  		mh.setResetTimeout(0)
   427  	}
   428  }
   429  
   430  func (mh *eventSourceMessageHolder) resetAll() {
   431  	for k := range mh.msgs {
   432  		delete(mh.msgs, k)
   433  	}
   434  
   435  	for k := range mh.parameters {
   436  		mh.parameters[k] = false
   437  	}
   438  	mh.setResetTimeout(0)
   439  }
   440  
   441  // Check if all the parameters and messages have been cleaned up
   442  func (mh *eventSourceMessageHolder) isCleanedUp() bool {
   443  	for _, v := range mh.parameters {
   444  		if v == true {
   445  			return false
   446  		}
   447  	}
   448  	return len(mh.msgs) == 0
   449  }
   450  
   451  func unique(stringSlice []string) []string {
   452  	if len(stringSlice) == 0 {
   453  		return stringSlice
   454  	}
   455  	keys := make(map[string]bool)
   456  	list := []string{}
   457  	for _, entry := range stringSlice {
   458  		if _, value := keys[entry]; !value {
   459  			keys[entry] = true
   460  			list = append(list, entry)
   461  		}
   462  	}
   463  	return list
   464  }