github.com/argoproj/argo-events@v1.9.1/eventbus/stan/sensor/trigger_conn.go (about) 1 package sensor 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/Knetic/govaluate" 12 cloudevents "github.com/cloudevents/sdk-go/v2" 13 "github.com/gobwas/glob" 14 "github.com/nats-io/stan.go" 15 "github.com/nats-io/stan.go/pb" 16 "go.uber.org/zap" 17 18 eventbuscommon "github.com/argoproj/argo-events/eventbus/common" 19 20 stanbase "github.com/argoproj/argo-events/eventbus/stan/base" 21 ) 22 23 type STANTriggerConn struct { 24 *stanbase.STANConnection 25 26 sensorName string 27 triggerName string 28 dependencyExpression string 29 deps []eventbuscommon.Dependency 30 } 31 32 func NewSTANTriggerConn(conn *stanbase.STANConnection, sensorName string, triggerName string, dependencyExpression string, deps []eventbuscommon.Dependency) *STANTriggerConn { 33 n := &STANTriggerConn{conn, sensorName, triggerName, dependencyExpression, deps} 34 n.Logger = n.Logger.With("triggerName", n.triggerName).With("clientID", n.ClientID) 35 return n 36 } 37 38 func (n *STANTriggerConn) String() string { 39 if n == nil { 40 return "" 41 } 42 return fmt.Sprintf("STANTriggerConn{ClientID:%s,Sensor:%s,Trigger:%s}", n.ClientID, n.sensorName, n.triggerName) 43 } 44 45 func (conn *STANTriggerConn) IsClosed() bool { 46 return conn == nil || conn.STANConnection.IsClosed() 47 } 48 49 func (conn *STANTriggerConn) Close() error { 50 if conn == nil { 51 return fmt.Errorf("can't close STAN trigger connection, STANTriggerConn is nil") 52 } 53 return conn.STANConnection.Close() 54 } 55 56 // Subscribe is used to subscribe to multiple event source dependencies 57 // Parameter - ctx, context 58 // Parameter - conn, eventbus connection 59 // Parameter - group, queue group name 60 // Parameter - closeCh, channel to indicate to close the subscription 61 // Parameter - resetConditionsCh, channel to indicate to reset trigger conditions 62 // Parameter - lastResetTime, the last time reset would have occurred, if any 63 // Parameter - dependencyExpr, example: "(dep1 || dep2) && dep3" 64 // Parameter - dependencies, array of dependencies information 65 // Parameter - filter, a function used to filter the message 66 // Parameter - action, a function to be triggered after all conditions meet 67 func (n *STANTriggerConn) Subscribe( 68 ctx context.Context, 69 closeCh <-chan struct{}, 70 resetConditionsCh <-chan struct{}, 71 lastResetTime time.Time, 72 transform func(depName string, event cloudevents.Event) (*cloudevents.Event, error), 73 filter func(string, cloudevents.Event) bool, 74 action func(map[string]cloudevents.Event), 75 defaultSubject *string) error { 76 if n == nil { 77 return fmt.Errorf("Subscribe() failed; STANTriggerConn is nil") 78 } 79 80 log := n.Logger 81 82 if defaultSubject == nil { 83 log.Error("can't subscribe over NATS streaming: defaultSubject not set") 84 } 85 86 msgHolder, err := newEventSourceMessageHolder(log, n.dependencyExpression, n.deps, lastResetTime) 87 if err != nil { 88 return err 89 } 90 // use group name as durable name 91 group, err := n.getGroupNameFromClientID(n.ClientID) 92 if err != nil { 93 return err 94 } 95 durableName := group 96 sub, err := n.STANConn.QueueSubscribe(*defaultSubject, group, func(m *stan.Msg) { 97 n.processEventSourceMsg(m, msgHolder, transform, filter, action, log) 98 }, stan.DurableName(durableName), 99 stan.SetManualAckMode(), 100 stan.StartAt(pb.StartPosition_NewOnly), 101 stan.AckWait(1*time.Second), 102 stan.MaxInflight(len(msgHolder.depNames)+2)) 103 if err != nil { 104 log.Errorf("failed to subscribe to subject %s", *defaultSubject) 105 return err 106 } 107 log.Infof("Subscribed to subject %s using durable name %s", *defaultSubject, durableName) 108 109 // Daemon to evict cache and reset trigger conditions 110 wg := &sync.WaitGroup{} 111 daemonStopCh := make(chan struct{}) 112 wg.Add(1) 113 go func() { 114 defer wg.Done() 115 log.Info("starting ExactOnce cache clean up daemon ...") 116 ticker := time.NewTicker(60 * time.Second) 117 defer ticker.Stop() 118 for { 119 select { 120 case <-daemonStopCh: 121 log.Info("exiting ExactOnce cache clean up daemon...") 122 return 123 case <-ticker.C: 124 now := time.Now().UnixNano() 125 num := 0 126 msgHolder.smap.Range(func(key, value interface{}) bool { 127 v := value.(int64) 128 // Evict cached ID older than 5 minutes 129 if now-v > 5*60*1000*1000*1000 { 130 msgHolder.smap.Delete(key) 131 num++ 132 log.Debugw("cached ID evicted", "id", key) 133 } 134 return true 135 }) 136 log.Debugf("finished evicting %v cached IDs, time cost: %v ms", num, (time.Now().UnixNano()-now)/1000/1000) 137 case <-resetConditionsCh: 138 log.Info("reset conditions") 139 msgHolder.setLastResetTime(time.Now()) 140 } 141 } 142 }() 143 144 for { 145 select { 146 case <-ctx.Done(): 147 log.Info("exiting, unsubscribing and closing connection...") 148 _ = sub.Close() 149 log.Infof("subscription on subject %s closed", *defaultSubject) 150 daemonStopCh <- struct{}{} 151 wg.Wait() 152 return nil 153 case <-closeCh: 154 log.Info("closing subscription...") 155 _ = sub.Close() 156 log.Infof("subscription on subject %s closed", *defaultSubject) 157 daemonStopCh <- struct{}{} 158 wg.Wait() 159 return nil 160 } 161 } 162 } 163 164 func (n *STANTriggerConn) processEventSourceMsg(m *stan.Msg, msgHolder *eventSourceMessageHolder, transform func(depName string, event cloudevents.Event) (*cloudevents.Event, error), filter func(dependencyName string, event cloudevents.Event) bool, action func(map[string]cloudevents.Event), log *zap.SugaredLogger) { 165 var event *cloudevents.Event 166 if err := json.Unmarshal(m.Data, &event); err != nil { 167 log.Errorf("Failed to convert to a cloudevent, discarding it... err: %v", err) 168 _ = m.Ack() 169 return 170 } 171 172 depName, err := msgHolder.getDependencyName(event.Source(), event.Subject()) 173 if err != nil { 174 log.Errorf("Failed to get the dependency name, discarding it... err: %v", err) 175 _ = m.Ack() 176 return 177 } 178 179 log.Debugf("New incoming Event Source Message, dependency name=%s", depName) 180 181 if depName == "" { 182 _ = m.Ack() 183 return 184 } 185 186 event, err = transform(depName, *event) 187 if err != nil { 188 log.Errorw("failed to apply event transformation", zap.Error(err)) 189 _ = m.Ack() 190 return 191 } 192 193 if !filter(depName, *event) { 194 // message not interested 195 log.Debugf("not interested in dependency %s", depName) 196 _ = m.Ack() 197 return 198 } 199 200 // NATS Streaming guarantees At Least Once delivery, 201 // so need to check if the message is duplicate 202 if _, ok := msgHolder.smap.Load(event.ID()); ok { 203 log.Infow("ATTENTION: Duplicate delivered message detected", "message", m) 204 _ = m.Ack() 205 return 206 } 207 208 // Acknowledge any old messages that occurred before the last reset (standard reset after trigger or conditional reset) 209 if m.Timestamp <= msgHolder.getLastResetTime().UnixNano() { 210 if depName != "" { 211 msgHolder.reset(depName) 212 } 213 msgHolder.ackAndCache(m, event.ID()) 214 215 log.Debugf("reset and acked dependency=%s due to message time occurred before reset, m.Timestamp=%d, msgHolder.getLastResetTime()=%d", 216 depName, m.Timestamp, msgHolder.getLastResetTime().UnixNano()) 217 return 218 } 219 // make sure that everything has been cleared within a certain amount of time 220 if msgHolder.fullResetTimeout() { 221 log.Infof("ATTENTION: Resetting the flags because they didn't get cleared before the timeout: msgHolder=%+v", msgHolder) 222 msgHolder.resetAll() 223 } 224 225 now := time.Now().Unix() 226 227 // Start a new round 228 if existingMsg, ok := msgHolder.msgs[depName]; ok { 229 if m.Timestamp == existingMsg.timestamp { 230 // Re-delivered latest messge, update delivery timestamp and return 231 existingMsg.lastDeliveredTime = now 232 msgHolder.msgs[depName] = existingMsg 233 log.Debugf("Updating timestamp for dependency=%s", depName) 234 return 235 } else if m.Timestamp < existingMsg.timestamp { 236 // Re-delivered old message, ack and return 237 msgHolder.ackAndCache(m, event.ID()) 238 log.Debugw("Dropping this message because later ones also satisfy", "eventID", event.ID()) 239 return 240 } 241 } 242 // New message, set and check 243 msgHolder.msgs[depName] = &eventSourceMessage{seq: m.Sequence, timestamp: m.Timestamp, event: event, lastDeliveredTime: now} 244 msgHolder.parameters[depName] = true 245 246 // Check if there's any stale message being held. 247 // Stale message could be message age has been longer than NATS streaming max message age, 248 // which means it has ben deleted from NATS server side, but it's still held here. 249 // Use last delivery timestamp to determine that. 250 for k, v := range msgHolder.msgs { 251 // Since the message is not acked, the server will keep re-sending it. 252 // If a message being held didn't get re-delivered in the last 10 minutes, treat it as stale. 253 if (now - v.lastDeliveredTime) > 10*60 { 254 msgHolder.reset(k) 255 } 256 } 257 258 result, err := msgHolder.expr.Evaluate(msgHolder.parameters) 259 if err != nil { 260 log.Errorf("failed to evaluate dependency expression: %v", err) 261 // TODO: how to handle this situation? 262 return 263 } 264 if result != true { 265 // Log current meet dependency information 266 meetDeps := []string{} 267 meetMsgIds := []string{} 268 for k, v := range msgHolder.msgs { 269 meetDeps = append(meetDeps, k) 270 meetMsgIds = append(meetMsgIds, v.event.ID()) 271 } 272 log.Infow("trigger conditions not met", zap.Any("meetDependencies", meetDeps), zap.Any("meetEvents", meetMsgIds)) 273 return 274 } 275 276 msgHolder.setLastResetTime(time.Unix(m.Timestamp/1e9, m.Timestamp%1e9)) 277 // Trigger actions 278 messages := make(map[string]cloudevents.Event) 279 for k, v := range msgHolder.msgs { 280 messages[k] = *v.event 281 } 282 log.Debugf("Triggering actions for client %s", n.ClientID) 283 284 action(messages) 285 286 msgHolder.reset(depName) 287 msgHolder.ackAndCache(m, event.ID()) 288 } 289 290 func (n *STANTriggerConn) getGroupNameFromClientID(clientID string) (string, error) { 291 log := n.Logger.With("clientID", n.ClientID) 292 // take off the last part: clientID should have a dash at the end and we can remove that part 293 strs := strings.Split(clientID, "-") 294 if len(strs) < 2 { 295 err := fmt.Errorf("Expected client ID to contain dash: %s", clientID) 296 log.Error(err) 297 return "", err 298 } 299 return strings.Join(strs[:len(strs)-1], "-"), nil 300 } 301 302 // eventSourceMessage is used by messageHolder to hold the latest message 303 type eventSourceMessage struct { 304 seq uint64 305 timestamp int64 306 event *cloudevents.Event 307 // timestamp of last delivered 308 lastDeliveredTime int64 309 } 310 311 // eventSourceMessageHolder is a struct used to hold the message information of subscribed dependencies 312 type eventSourceMessageHolder struct { 313 // time that resets conditions, usually the time all conditions meet, 314 // or the time getting an external signal to reset. 315 lastResetTime time.Time 316 // if we reach this time, we reset everything (occurs 60 seconds after lastResetTime) 317 resetTimeout int64 318 expr *govaluate.EvaluableExpression 319 depNames []string 320 // Mapping of [eventSourceName + eventName]dependencyName 321 sourceDepMap map[string]string 322 parameters map[string]interface{} 323 msgs map[string]*eventSourceMessage 324 // A sync map used to cache the message IDs, it is used to guarantee Exact Once triggering 325 smap *sync.Map 326 lock sync.RWMutex 327 timeoutLock sync.RWMutex 328 329 logger *zap.SugaredLogger 330 } 331 332 func newEventSourceMessageHolder(logger *zap.SugaredLogger, dependencyExpr string, dependencies []eventbuscommon.Dependency, lastResetTime time.Time) (*eventSourceMessageHolder, error) { 333 dependencyExpr = strings.ReplaceAll(dependencyExpr, "-", "\\-") 334 expression, err := govaluate.NewEvaluableExpression(dependencyExpr) 335 if err != nil { 336 return nil, err 337 } 338 deps := unique(expression.Vars()) 339 if len(dependencyExpr) == 0 { 340 return nil, fmt.Errorf("no dependencies found: %s", dependencyExpr) 341 } 342 343 srcDepMap := make(map[string]string) 344 for _, d := range dependencies { 345 key := d.EventSourceName + "__" + d.EventName 346 srcDepMap[key] = d.Name 347 } 348 349 parameters := make(map[string]interface{}, len(deps)) 350 msgs := make(map[string]*eventSourceMessage) 351 for _, dep := range deps { 352 parameters[dep] = false 353 } 354 355 return &eventSourceMessageHolder{ 356 lastResetTime: lastResetTime, 357 expr: expression, 358 depNames: deps, 359 sourceDepMap: srcDepMap, 360 parameters: parameters, 361 msgs: msgs, 362 smap: new(sync.Map), 363 lock: sync.RWMutex{}, 364 logger: logger, 365 }, nil 366 } 367 368 func (mh *eventSourceMessageHolder) getLastResetTime() time.Time { 369 mh.lock.RLock() 370 defer mh.lock.RUnlock() 371 return mh.lastResetTime 372 } 373 374 func (mh *eventSourceMessageHolder) setLastResetTime(t time.Time) { 375 { 376 mh.lock.Lock() // since this can be called asyncronously as part of a ConditionReset, we neeed to lock this code 377 defer mh.lock.Unlock() 378 mh.lastResetTime = t 379 } 380 mh.setResetTimeout(t.Add(time.Second * 60).Unix()) // failsafe condition: determine if we for some reason we haven't acknowledged all dependencies within 60 seconds of the lastResetTime 381 } 382 383 func (mh *eventSourceMessageHolder) setResetTimeout(t int64) { 384 mh.timeoutLock.Lock() // since this can be called asyncronously as part of a ConditionReset, we neeed to lock this code 385 defer mh.timeoutLock.Unlock() 386 mh.resetTimeout = t 387 } 388 389 func (mh *eventSourceMessageHolder) getResetTimeout() int64 { 390 mh.timeoutLock.RLock() 391 defer mh.timeoutLock.RUnlock() 392 return mh.resetTimeout 393 } 394 395 // failsafe condition after lastResetTime 396 func (mh *eventSourceMessageHolder) fullResetTimeout() bool { 397 resetTimeout := mh.getResetTimeout() 398 return resetTimeout != 0 && time.Now().Unix() > resetTimeout 399 } 400 401 func (mh *eventSourceMessageHolder) getDependencyName(eventSourceName, eventName string) (string, error) { 402 for k, v := range mh.sourceDepMap { 403 sourceGlob, err := glob.Compile(k) 404 if err != nil { 405 return "", err 406 } 407 if sourceGlob.Match(eventSourceName + "__" + eventName) { 408 return v, nil 409 } 410 } 411 return "", nil 412 } 413 414 // Ack the stan message and cache the ID to make sure Exact Once triggering 415 func (mh *eventSourceMessageHolder) ackAndCache(m *stan.Msg, id string) { 416 _ = m.Ack() 417 mh.smap.Store(id, time.Now().UnixNano()) 418 } 419 420 // Reset the parameter and message that a dependency holds 421 func (mh *eventSourceMessageHolder) reset(depName string) { 422 mh.parameters[depName] = false 423 delete(mh.msgs, depName) 424 425 if mh.isCleanedUp() { 426 mh.setResetTimeout(0) 427 } 428 } 429 430 func (mh *eventSourceMessageHolder) resetAll() { 431 for k := range mh.msgs { 432 delete(mh.msgs, k) 433 } 434 435 for k := range mh.parameters { 436 mh.parameters[k] = false 437 } 438 mh.setResetTimeout(0) 439 } 440 441 // Check if all the parameters and messages have been cleaned up 442 func (mh *eventSourceMessageHolder) isCleanedUp() bool { 443 for _, v := range mh.parameters { 444 if v == true { 445 return false 446 } 447 } 448 return len(mh.msgs) == 0 449 } 450 451 func unique(stringSlice []string) []string { 452 if len(stringSlice) == 0 { 453 return stringSlice 454 } 455 keys := make(map[string]bool) 456 list := []string{} 457 for _, entry := range stringSlice { 458 if _, value := keys[entry]; !value { 459 keys[entry] = true 460 list = append(list, entry) 461 } 462 } 463 return list 464 }