github.com/choria-io/go-choria@v0.28.1-0.20240416190746-b3bf9c7d5a45/aagent/watchers/gossipwatcher/gossip.go (about) 1 // Copyright (c) 2022-2024, R.I. Pienaar and the Choria Project contributors 2 // 3 // SPDX-License-Identifier: Apache-2.0 4 5 package gossipwatcher 6 7 import ( 8 "context" 9 "encoding/json" 10 "fmt" 11 "net" 12 "regexp" 13 "strings" 14 "sync" 15 "time" 16 17 "github.com/choria-io/go-choria/aagent/model" 18 "github.com/choria-io/go-choria/aagent/util" 19 "github.com/choria-io/go-choria/aagent/watchers/event" 20 "github.com/choria-io/go-choria/aagent/watchers/watcher" 21 iu "github.com/choria-io/go-choria/internal/util" 22 "github.com/nats-io/nats.go" 23 ) 24 25 type State int 26 27 const ( 28 Stopped State = iota 29 Running 30 31 wtype = "gossip" 32 version = "v1" 33 ) 34 35 var ( 36 validBasicName = `[a-zA-Z][a-zA-Z\d_-]*` 37 validServiceRegex = regexp.MustCompile(`^` + validBasicName + `$`) 38 ) 39 40 type Registration struct { 41 Cluster string `json:"cluster"` 42 Service string `json:"service"` 43 Protocol string `json:"protocol"` 44 IP string `json:"address"` 45 Port uint `json:"port"` 46 Priority uint `json:"priority"` 47 Annotations map[string]string `json:"annotations,omitempty"` 48 Prefix string `json:"-"` 49 } 50 51 type properties struct { 52 Subject string 53 Payload string 54 Registration *Registration 55 } 56 57 type Watcher struct { 58 *watcher.Watcher 59 properties *properties 60 61 name string 62 machine model.Machine 63 nc *nats.Conn 64 interval time.Duration 65 gossipCancel context.CancelFunc 66 runCtx context.Context 67 state State 68 lastSubject string 69 lastPayload string 70 lastGossip time.Time 71 72 terminate chan struct{} 73 mu *sync.Mutex 74 } 75 76 func New(machine model.Machine, name string, states []string, failEvent string, successEvent string, interval string, ai time.Duration, properties map[string]any) (any, error) { 77 var err error 78 79 tw := &Watcher{ 80 name: name, 81 machine: machine, 82 terminate: make(chan struct{}), 83 mu: &sync.Mutex{}, 84 } 85 86 tw.interval, err = iu.ParseDuration(interval) 87 if err != nil { 88 return nil, err 89 } 90 91 tw.Watcher, err = watcher.NewWatcher(name, wtype, ai, states, machine, failEvent, successEvent) 92 if err != nil { 93 return nil, err 94 } 95 96 err = tw.setProperties(properties) 97 if err != nil { 98 return nil, fmt.Errorf("could not set properties: %s", err) 99 } 100 101 return tw, nil 102 } 103 104 func (w *Watcher) getConn() (*nats.Conn, error) { 105 w.mu.Lock() 106 defer w.mu.Unlock() 107 108 if w.nc != nil { 109 return w.nc, nil 110 } 111 112 mgr, err := w.machine.JetStreamConnection() 113 if err != nil { 114 return nil, err 115 } 116 117 w.nc = mgr.NatsConn() 118 119 return w.nc, nil 120 } 121 122 func (w *Watcher) stopGossip() { 123 w.mu.Lock() 124 cancel := w.gossipCancel 125 w.state = Stopped 126 w.mu.Unlock() 127 128 if cancel != nil { 129 w.Infof("Stopping gossip on transition to %s", w.machine.State()) 130 cancel() 131 } 132 } 133 134 func (w *Watcher) startGossip() { 135 w.mu.Lock() 136 cancel := w.gossipCancel 137 ctx := w.runCtx 138 w.mu.Unlock() 139 140 if cancel != nil { 141 return 142 } 143 144 go func() { 145 tick := time.NewTicker(w.interval) 146 gCtx, cancel := context.WithCancel(ctx) 147 148 var err error 149 150 w.mu.Lock() 151 w.state = Running 152 w.gossipCancel = cancel 153 w.mu.Unlock() 154 155 if err != nil { 156 w.Errorf("Could not get a NATS connection to publish Gossip") 157 } 158 159 stop := func() { 160 w.mu.Lock() 161 w.gossipCancel = nil 162 w.state = Stopped 163 tick.Stop() 164 w.mu.Unlock() 165 } 166 167 publish := func() { 168 if !w.ShouldWatch() { 169 return 170 } 171 172 w.Infof("Gossiping while in state %v", w.machine.State()) 173 nc, err := w.getConn() 174 if err != nil { 175 w.Errorf("Could not get NATS connection: %v", err) 176 return 177 } 178 179 subject, err := w.ProcessTemplate(w.properties.Subject) 180 if err != nil { 181 w.Errorf("Could not template parse subject: %v", err) 182 return 183 } 184 185 payload, err := w.ProcessTemplate(w.properties.Payload) 186 if err != nil { 187 w.Errorf("Could not template parse payload: %v", err) 188 return 189 } 190 191 w.Debugf("Publishing gossip to %s", subject) 192 nc.Publish(subject, []byte(payload)) 193 194 w.mu.Lock() 195 w.lastGossip = time.Now() 196 w.lastSubject = subject 197 w.lastPayload = payload 198 w.mu.Unlock() 199 } 200 201 publish() 202 203 for { 204 select { 205 case <-tick.C: 206 publish() 207 case <-gCtx.Done(): 208 stop() 209 return 210 case <-w.terminate: 211 stop() 212 return 213 } 214 } 215 }() 216 } 217 218 func (w *Watcher) watch() { 219 if !w.ShouldWatch() { 220 w.stopGossip() 221 return 222 } 223 224 w.Infof("Starting gossip timer") 225 w.startGossip() 226 } 227 228 func (w *Watcher) Run(ctx context.Context, wg *sync.WaitGroup) { 229 defer wg.Done() 230 231 w.mu.Lock() 232 w.runCtx = ctx 233 w.mu.Unlock() 234 235 w.Infof("Gossip watcher starting with subject %q on interval %v", w.properties.Subject, w.interval) 236 237 w.watch() 238 239 for { 240 select { 241 case <-w.StateChangeC(): 242 w.watch() 243 244 case <-w.terminate: 245 w.Infof("Handling terminate notification") 246 return 247 case <-ctx.Done(): 248 w.Infof("Stopping on context interrupt") 249 return 250 } 251 } 252 } 253 254 func (w *Watcher) setProperties(props map[string]any) error { 255 if w.properties == nil { 256 w.properties = &properties{} 257 } 258 259 err := util.ParseMapStructure(props, w.properties) 260 if err != nil { 261 return err 262 } 263 264 return w.validate() 265 } 266 267 func (w *Watcher) validate() error { 268 switch { 269 case w.properties.Registration == nil: 270 if w.properties.Subject == "" { 271 return fmt.Errorf("subject is required") 272 } 273 if w.properties.Payload == "" { 274 return fmt.Errorf("payload is required") 275 } 276 default: 277 if w.properties.Subject != "" { 278 return fmt.Errorf("subject cannot be set with registration") 279 } 280 if w.properties.Payload != "" { 281 return fmt.Errorf("payload cannot be set with registration") 282 } 283 reg := w.properties.Registration 284 if reg.Cluster == "" { 285 return fmt.Errorf("cluster is required") 286 } 287 if !validServiceRegex.MatchString(reg.Cluster) { 288 return fmt.Errorf("invalid cluster") 289 } 290 if reg.Service == "" { 291 return fmt.Errorf("service is required") 292 } 293 if !validServiceRegex.MatchString(reg.Service) { 294 return fmt.Errorf("invalid service") 295 } 296 if reg.Protocol == "" { 297 return fmt.Errorf("protocol is required") 298 } 299 if !validServiceRegex.MatchString(reg.Protocol) { 300 return fmt.Errorf("invalid protocol") 301 } 302 if reg.IP == "" { 303 return fmt.Errorf("ip is required") 304 } 305 if net.ParseIP(reg.IP) == nil { 306 return fmt.Errorf("invalid ip") 307 } 308 if reg.Port == 0 { 309 return fmt.Errorf("port is required") 310 } 311 312 subj := fmt.Sprintf("%s.%s.%s.%s", reg.Cluster, reg.Protocol, reg.Service, w.machine.InstanceID()) 313 if reg.Prefix == "" { 314 w.properties.Subject = fmt.Sprintf("$KV.CHORIA_SERVICES.%s", subj) 315 } else { 316 w.properties.Subject = fmt.Sprintf("%s.%s", reg.Prefix, subj) 317 } 318 319 if strings.ContainsAny(w.properties.Subject, " ^*") || strings.Contains(w.properties.Subject, "..") { 320 return fmt.Errorf("invalid registration properties") 321 } 322 323 pj, err := json.Marshal(w.properties.Registration) 324 if err != nil { 325 return err 326 } 327 w.properties.Payload = string(pj) 328 } 329 330 if w.interval == 0 { 331 w.interval = 15 * time.Second 332 } 333 334 return nil 335 } 336 337 func (w *Watcher) Delete() { 338 close(w.terminate) 339 } 340 341 func (w *Watcher) CurrentState() any { 342 w.mu.Lock() 343 defer w.mu.Unlock() 344 345 s := &StateNotification{ 346 Event: event.New(w.name, wtype, version, w.machine), 347 Published: w.lastGossip.Unix(), 348 Payload: w.lastPayload, 349 Subject: w.lastSubject, 350 } 351 352 return s 353 }