github.com/simpleiot/simpleiot@v0.18.3/client/manager.go (about) 1 package client 2 3 import ( 4 "fmt" 5 "log" 6 "reflect" 7 "strings" 8 "time" 9 10 "github.com/nats-io/nats.go" 11 "github.com/simpleiot/simpleiot/data" 12 ) 13 14 // Manager manages a node type, watches for changes, adds/removes instances that get 15 // added/deleted 16 type Manager[T any] struct { 17 // initial state 18 nc *nats.Conn 19 root string 20 nodeType string 21 parentTypes []string 22 construct func(*nats.Conn, T) Client 23 24 // synchronization fields 25 stop chan struct{} 26 chScan chan struct{} 27 chAction chan func() 28 chCSStopped chan string 29 chDeleteCS chan string 30 31 // keep track of clients 32 clientStates map[string]*clientState[T] 33 clientUpSub map[string]*nats.Subscription 34 35 // subscription to listen for new points 36 upSub *nats.Subscription 37 } 38 39 // NewManager takes constructor for a node client and returns a Manager for that client 40 // The Node Type is inferred from the Go type passed in, so you must name Go client 41 // Types to manage the node type definitions. The manager recursively finds nodes 42 // that are children of group nodes and the node types found in parentTypes. 43 func NewManager[T any](nc *nats.Conn, 44 construct func(nc *nats.Conn, config T) Client, parentTypes []string) *Manager[T] { 45 var x T 46 nodeType := data.ToCamelCase(reflect.TypeOf(x).Name()) 47 48 return &Manager[T]{ 49 nc: nc, 50 nodeType: nodeType, 51 parentTypes: append(parentTypes, data.NodeTypeGroup), 52 construct: construct, 53 stop: make(chan struct{}), 54 chScan: make(chan struct{}), 55 chAction: make(chan func()), 56 chCSStopped: make(chan string), 57 chDeleteCS: make(chan string), 58 clientStates: make(map[string]*clientState[T]), 59 clientUpSub: make(map[string]*nats.Subscription), 60 } 61 } 62 63 // Run node manager. This function looks for children of a certain node type. 64 // When new nodes are found, the data is decoded into the client type config, and the 65 // constructor for the node client is called. This call blocks until Stop is called. 66 func (m *Manager[T]) Run() error { 67 nodes, err := GetNodes(m.nc, "root", "all", "", false) 68 if err != nil { 69 return fmt.Errorf("Manager: Error getting root node: %v", err) 70 } 71 72 if len(nodes) < 1 { 73 return fmt.Errorf("Manager: Error no root node") 74 } 75 76 m.root = nodes[0].ID 77 78 // TODO: it may make sense at some point to have a special topic 79 // for new nodes so that all client managers don't have to listen 80 // to all points 81 m.upSub, err = m.nc.Subscribe("up.root.>", func(msg *nats.Msg) { 82 points, err := data.PbDecodePoints(msg.Data) 83 if err != nil { 84 log.Println("Error decoding points") 85 return 86 } 87 88 for _, p := range points { 89 if p.Type == data.PointTypeNodeType { 90 m.chScan <- struct{}{} 91 } 92 } 93 }) 94 95 if err != nil { 96 return err 97 } 98 99 err = m.scan(m.root) 100 if err != nil { 101 log.Println("Error scanning for new nodes:", err) 102 } 103 104 shutdownTimer := time.NewTimer(time.Hour) 105 shutdownTimer.Stop() 106 107 restartTimer := time.NewTimer(time.Hour) 108 restartTimer.Stop() 109 110 stopping := false 111 112 scan := func() { 113 if stopping { 114 return 115 } 116 117 err := m.scan(m.root) 118 if err != nil { 119 log.Println("Error scanning for new nodes:", err) 120 } 121 } 122 123 done: 124 for { 125 select { 126 case <-m.stop: 127 stopping = true 128 _ = m.upSub.Unsubscribe() 129 if len(m.clientStates) > 0 { 130 for _, c := range m.clientStates { 131 c.stop(err) 132 } 133 shutdownTimer.Reset(time.Second * 5) 134 } else { 135 break done 136 } 137 case f := <-m.chAction: 138 f() 139 case <-time.After(time.Minute): 140 scan() 141 case <-m.chScan: 142 scan() 143 case key := <-m.chCSStopped: 144 // TODO: the following can be used to wait until all messages 145 // have been drained, but have not been able to get this to 146 // work reliably without deadlocking 147 err = m.clientUpSub[key].Drain() 148 if err != nil { 149 log.Println("Error unsubscribing subscription:", err) 150 } 151 start := time.Now() 152 for { 153 if !m.clientUpSub[key].IsValid() { 154 break 155 } 156 if time.Since(start) > time.Second*1 { 157 log.Println("Error: timeout waiting for subscription to drain:", key) 158 break 159 } 160 time.Sleep(10 * time.Millisecond) 161 } 162 163 m.chDeleteCS <- key 164 case key := <-m.chDeleteCS: 165 err = m.clientUpSub[key].Unsubscribe() 166 if err != nil { 167 log.Println("Error unsubscribing subscription:", err) 168 } 169 delete(m.clientUpSub, key) 170 // client state must be deleted after the subscription is stopped 171 // as the subscription uses it 172 delete(m.clientStates, key) 173 174 if stopping { 175 if len(m.clientStates) <= 0 { 176 break done 177 } 178 } else { 179 // client may have exitted itself due to child 180 // node changes so scan to re-initialize it again 181 scan() 182 } 183 case <-shutdownTimer.C: 184 // TODO: should we return an error here? 185 log.Println("BUG: Client manager: not all clients shutdown for node type:", m.nodeType) 186 for _, v := range m.clientStates { 187 log.Println("Client stuck for node:", v.node.ID) 188 } 189 break done 190 } 191 } 192 193 return nil 194 } 195 196 // Stop manager. This also stops all registered clients and causes Start to exit. 197 func (m *Manager[T]) Stop(_ error) { 198 close(m.stop) 199 } 200 201 func (m *Manager[T]) scanHelper(id string, nodes []data.NodeEdge) ([]data.NodeEdge, error) { 202 children, err := GetNodes(m.nc, id, "all", m.nodeType, false) 203 if err != nil { 204 return nil, err 205 } 206 207 nodes = append(nodes, children...) 208 209 // recurse into any nodes that may have children 210 for _, parentType := range m.parentTypes { 211 parentNodes, err := GetNodes(m.nc, id, "all", parentType, false) 212 if err != nil { 213 return []data.NodeEdge{}, err 214 } 215 for _, p := range parentNodes { 216 c, err := m.scanHelper(p.ID, nodes) 217 if err != nil { 218 return nil, err 219 } 220 nodes = append(nodes, c...) 221 } 222 } 223 224 return nodes, nil 225 } 226 227 func (m *Manager[T]) scan(id string) error { 228 nodes, err := m.scanHelper(id, []data.NodeEdge{}) 229 if err != nil { 230 return err 231 } 232 233 if len(nodes) == 0 { 234 return nil 235 } 236 237 found := make(map[string]bool) 238 239 // create new nodes 240 for _, n := range nodes { 241 key := mapKey(n) 242 found[key] = true 243 244 if _, ok := m.clientStates[key]; ok { 245 continue 246 } 247 248 // Need to create a new client 249 cs, err := newClientState(m.nc, m.construct, n) 250 251 if err != nil { 252 log.Printf("Error starting client %v: %v", n, err) 253 } 254 255 go func() { 256 err := cs.run() 257 258 if err != nil { 259 log.Printf("clientState error %v: %v\n", m.nodeType, err) 260 } 261 262 m.chDeleteCS <- key 263 }() 264 265 m.clientStates[key] = cs 266 267 // Set up subscriptions 268 subject := fmt.Sprintf("up.%v.>", cs.node.ID) 269 270 m.clientUpSub[key], err = cs.nc.Subscribe(subject, func(msg *nats.Msg) { 271 points, err := data.PbDecodePoints(msg.Data) 272 if err != nil { 273 log.Println("Error decoding points") 274 return 275 } 276 277 // find node ID for points 278 chunks := strings.Split(msg.Subject, ".") 279 280 if len(chunks) != 3 && len(chunks) != 4 { 281 log.Println("up subject malformed:", msg.Subject) 282 return 283 } 284 285 nodeID := chunks[2] 286 287 if len(chunks) == 3 { 288 // process node points 289 290 // only filter node points for now. The Shelly client broke badly 291 // when we applied the below filtering to edge points as well, 292 // probably because the tombstone edge points were filtered. 293 // We may optimize this later if we make extensive use of edge 294 // points. 295 for _, p := range points { 296 if p.Origin == "" && nodeID == cs.node.ID { 297 // if this point came from the owning client, it already knows about it 298 return 299 } 300 301 if p.Origin == cs.node.ID { 302 // if this client sent this point, it already knows about it 303 return 304 } 305 } 306 307 cs.client.Points(nodeID, points) 308 } else if len(chunks) == 4 { 309 // process edge points 310 parentID := chunks[3] 311 for _, p := range points { 312 switch { 313 case p.Type == data.PointTypeTombstone && p.Value == 1: 314 // node was deleted, make sure we don't see it in DB 315 // before restarting client 316 start := time.Now() 317 for { 318 if time.Since(start) > time.Second*5 { 319 log.Println("Client state timeout getting nodes") 320 cs.stop(nil) 321 return 322 } 323 nodes, err := GetNodes(cs.nc, parentID, nodeID, "", false) 324 if err != nil { 325 log.Println("Client state error getting nodes:", err) 326 cs.stop(nil) 327 return 328 } 329 if len(nodes) == 0 { 330 // confirmed the node was deleted 331 cs.stop(nil) 332 return 333 } 334 time.Sleep(time.Millisecond * 10) 335 } 336 337 case (p.Type == data.PointTypeTombstone && p.Value == 0) || 338 p.Type == data.PointTypeNodeType: 339 // node was created or undeleted, make sure we see it in DB 340 // before restarting client 341 start := time.Now() 342 for { 343 if time.Since(start) > time.Second*5 { 344 log.Println("Client state timeout getting nodes") 345 cs.stop(nil) 346 return 347 } 348 nodes, err := GetNodes(cs.nc, parentID, nodeID, "", false) 349 if err != nil { 350 log.Println("Client state error getting nodes:", err) 351 cs.stop(nil) 352 return 353 } 354 if len(nodes) > 0 { 355 // confirmed the node was added 356 cs.stop(nil) 357 return 358 } 359 time.Sleep(time.Millisecond * 10) 360 } 361 } 362 } 363 364 // send edge points to client 365 if cs.client == nil { 366 log.Fatal("Client is nil: ", cs.node.ID) 367 } 368 cs.client.EdgePoints(chunks[2], chunks[3], points) 369 } 370 }) 371 372 if err != nil { 373 return err 374 } 375 376 } 377 378 // remove nodes that have been deleted 379 for key, client := range m.clientStates { 380 if _, ok := found[key]; ok { 381 continue 382 } 383 384 // bus was deleted so close and clear it 385 log.Println("removing client node:", m.clientStates[key].node.ID) 386 client.stop(nil) 387 } 388 389 return nil 390 } 391 392 func mapKey(node data.NodeEdge) string { 393 return node.Parent + "-" + node.ID 394 }