github.com/simpleiot/simpleiot@v0.18.3/client/manager.go (about)

     1  package client
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"reflect"
     7  	"strings"
     8  	"time"
     9  
    10  	"github.com/nats-io/nats.go"
    11  	"github.com/simpleiot/simpleiot/data"
    12  )
    13  
    14  // Manager manages a node type, watches for changes, adds/removes instances that get
    15  // added/deleted
    16  type Manager[T any] struct {
    17  	// initial state
    18  	nc          *nats.Conn
    19  	root        string
    20  	nodeType    string
    21  	parentTypes []string
    22  	construct   func(*nats.Conn, T) Client
    23  
    24  	// synchronization fields
    25  	stop        chan struct{}
    26  	chScan      chan struct{}
    27  	chAction    chan func()
    28  	chCSStopped chan string
    29  	chDeleteCS  chan string
    30  
    31  	// keep track of clients
    32  	clientStates map[string]*clientState[T]
    33  	clientUpSub  map[string]*nats.Subscription
    34  
    35  	// subscription to listen for new points
    36  	upSub *nats.Subscription
    37  }
    38  
    39  // NewManager takes constructor for a node client and returns a Manager for that client
    40  // The Node Type is inferred from the Go type passed in, so you must name Go client
    41  // Types to manage the node type definitions. The manager recursively finds nodes
    42  // that are children of group nodes and the node types found in parentTypes.
    43  func NewManager[T any](nc *nats.Conn,
    44  	construct func(nc *nats.Conn, config T) Client, parentTypes []string) *Manager[T] {
    45  	var x T
    46  	nodeType := data.ToCamelCase(reflect.TypeOf(x).Name())
    47  
    48  	return &Manager[T]{
    49  		nc:           nc,
    50  		nodeType:     nodeType,
    51  		parentTypes:  append(parentTypes, data.NodeTypeGroup),
    52  		construct:    construct,
    53  		stop:         make(chan struct{}),
    54  		chScan:       make(chan struct{}),
    55  		chAction:     make(chan func()),
    56  		chCSStopped:  make(chan string),
    57  		chDeleteCS:   make(chan string),
    58  		clientStates: make(map[string]*clientState[T]),
    59  		clientUpSub:  make(map[string]*nats.Subscription),
    60  	}
    61  }
    62  
    63  // Run node manager. This function looks for children of a certain node type.
    64  // When new nodes are found, the data is decoded into the client type config, and the
    65  // constructor for the node client is called. This call blocks until Stop is called.
    66  func (m *Manager[T]) Run() error {
    67  	nodes, err := GetNodes(m.nc, "root", "all", "", false)
    68  	if err != nil {
    69  		return fmt.Errorf("Manager: Error getting root node: %v", err)
    70  	}
    71  
    72  	if len(nodes) < 1 {
    73  		return fmt.Errorf("Manager: Error no root node")
    74  	}
    75  
    76  	m.root = nodes[0].ID
    77  
    78  	// TODO: it may make sense at some point to have a special topic
    79  	// for new nodes so that all client managers don't have to listen
    80  	// to all points
    81  	m.upSub, err = m.nc.Subscribe("up.root.>", func(msg *nats.Msg) {
    82  		points, err := data.PbDecodePoints(msg.Data)
    83  		if err != nil {
    84  			log.Println("Error decoding points")
    85  			return
    86  		}
    87  
    88  		for _, p := range points {
    89  			if p.Type == data.PointTypeNodeType {
    90  				m.chScan <- struct{}{}
    91  			}
    92  		}
    93  	})
    94  
    95  	if err != nil {
    96  		return err
    97  	}
    98  
    99  	err = m.scan(m.root)
   100  	if err != nil {
   101  		log.Println("Error scanning for new nodes:", err)
   102  	}
   103  
   104  	shutdownTimer := time.NewTimer(time.Hour)
   105  	shutdownTimer.Stop()
   106  
   107  	restartTimer := time.NewTimer(time.Hour)
   108  	restartTimer.Stop()
   109  
   110  	stopping := false
   111  
   112  	scan := func() {
   113  		if stopping {
   114  			return
   115  		}
   116  
   117  		err := m.scan(m.root)
   118  		if err != nil {
   119  			log.Println("Error scanning for new nodes:", err)
   120  		}
   121  	}
   122  
   123  done:
   124  	for {
   125  		select {
   126  		case <-m.stop:
   127  			stopping = true
   128  			_ = m.upSub.Unsubscribe()
   129  			if len(m.clientStates) > 0 {
   130  				for _, c := range m.clientStates {
   131  					c.stop(err)
   132  				}
   133  				shutdownTimer.Reset(time.Second * 5)
   134  			} else {
   135  				break done
   136  			}
   137  		case f := <-m.chAction:
   138  			f()
   139  		case <-time.After(time.Minute):
   140  			scan()
   141  		case <-m.chScan:
   142  			scan()
   143  		case key := <-m.chCSStopped:
   144  			// TODO: the following can be used to wait until all messages
   145  			// have been drained, but have not been able to get this to
   146  			// work reliably without deadlocking
   147  			err = m.clientUpSub[key].Drain()
   148  			if err != nil {
   149  				log.Println("Error unsubscribing subscription:", err)
   150  			}
   151  			start := time.Now()
   152  			for {
   153  				if !m.clientUpSub[key].IsValid() {
   154  					break
   155  				}
   156  				if time.Since(start) > time.Second*1 {
   157  					log.Println("Error: timeout waiting for subscription to drain:", key)
   158  					break
   159  				}
   160  				time.Sleep(10 * time.Millisecond)
   161  			}
   162  
   163  			m.chDeleteCS <- key
   164  		case key := <-m.chDeleteCS:
   165  			err = m.clientUpSub[key].Unsubscribe()
   166  			if err != nil {
   167  				log.Println("Error unsubscribing subscription:", err)
   168  			}
   169  			delete(m.clientUpSub, key)
   170  			// client state must be deleted after the subscription is stopped
   171  			// as the subscription uses it
   172  			delete(m.clientStates, key)
   173  
   174  			if stopping {
   175  				if len(m.clientStates) <= 0 {
   176  					break done
   177  				}
   178  			} else {
   179  				// client may have exitted itself due to child
   180  				// node changes so scan to re-initialize it again
   181  				scan()
   182  			}
   183  		case <-shutdownTimer.C:
   184  			// TODO: should we return an error here?
   185  			log.Println("BUG: Client manager: not all clients shutdown for node type:", m.nodeType)
   186  			for _, v := range m.clientStates {
   187  				log.Println("Client stuck for node:", v.node.ID)
   188  			}
   189  			break done
   190  		}
   191  	}
   192  
   193  	return nil
   194  }
   195  
   196  // Stop manager. This also stops all registered clients and causes Start to exit.
   197  func (m *Manager[T]) Stop(_ error) {
   198  	close(m.stop)
   199  }
   200  
   201  func (m *Manager[T]) scanHelper(id string, nodes []data.NodeEdge) ([]data.NodeEdge, error) {
   202  	children, err := GetNodes(m.nc, id, "all", m.nodeType, false)
   203  	if err != nil {
   204  		return nil, err
   205  	}
   206  
   207  	nodes = append(nodes, children...)
   208  
   209  	// recurse into any nodes that may have children
   210  	for _, parentType := range m.parentTypes {
   211  		parentNodes, err := GetNodes(m.nc, id, "all", parentType, false)
   212  		if err != nil {
   213  			return []data.NodeEdge{}, err
   214  		}
   215  		for _, p := range parentNodes {
   216  			c, err := m.scanHelper(p.ID, nodes)
   217  			if err != nil {
   218  				return nil, err
   219  			}
   220  			nodes = append(nodes, c...)
   221  		}
   222  	}
   223  
   224  	return nodes, nil
   225  }
   226  
   227  func (m *Manager[T]) scan(id string) error {
   228  	nodes, err := m.scanHelper(id, []data.NodeEdge{})
   229  	if err != nil {
   230  		return err
   231  	}
   232  
   233  	if len(nodes) == 0 {
   234  		return nil
   235  	}
   236  
   237  	found := make(map[string]bool)
   238  
   239  	// create new nodes
   240  	for _, n := range nodes {
   241  		key := mapKey(n)
   242  		found[key] = true
   243  
   244  		if _, ok := m.clientStates[key]; ok {
   245  			continue
   246  		}
   247  
   248  		// Need to create a new client
   249  		cs, err := newClientState(m.nc, m.construct, n)
   250  
   251  		if err != nil {
   252  			log.Printf("Error starting client %v: %v", n, err)
   253  		}
   254  
   255  		go func() {
   256  			err := cs.run()
   257  
   258  			if err != nil {
   259  				log.Printf("clientState error %v: %v\n", m.nodeType, err)
   260  			}
   261  
   262  			m.chDeleteCS <- key
   263  		}()
   264  
   265  		m.clientStates[key] = cs
   266  
   267  		// Set up subscriptions
   268  		subject := fmt.Sprintf("up.%v.>", cs.node.ID)
   269  
   270  		m.clientUpSub[key], err = cs.nc.Subscribe(subject, func(msg *nats.Msg) {
   271  			points, err := data.PbDecodePoints(msg.Data)
   272  			if err != nil {
   273  				log.Println("Error decoding points")
   274  				return
   275  			}
   276  
   277  			// find node ID for points
   278  			chunks := strings.Split(msg.Subject, ".")
   279  
   280  			if len(chunks) != 3 && len(chunks) != 4 {
   281  				log.Println("up subject malformed:", msg.Subject)
   282  				return
   283  			}
   284  
   285  			nodeID := chunks[2]
   286  
   287  			if len(chunks) == 3 {
   288  				// process node points
   289  
   290  				// only filter node points for now. The Shelly client broke badly
   291  				// when we applied the below filtering to edge points as well,
   292  				// probably because the tombstone edge points were filtered.
   293  				// We may optimize this later if we make extensive use of edge
   294  				// points.
   295  				for _, p := range points {
   296  					if p.Origin == "" && nodeID == cs.node.ID {
   297  						// if this point came from the owning client, it already knows about it
   298  						return
   299  					}
   300  
   301  					if p.Origin == cs.node.ID {
   302  						// if this client sent this point, it already knows about it
   303  						return
   304  					}
   305  				}
   306  
   307  				cs.client.Points(nodeID, points)
   308  			} else if len(chunks) == 4 {
   309  				// process edge points
   310  				parentID := chunks[3]
   311  				for _, p := range points {
   312  					switch {
   313  					case p.Type == data.PointTypeTombstone && p.Value == 1:
   314  						// node was deleted, make sure we don't see it in DB
   315  						// before restarting client
   316  						start := time.Now()
   317  						for {
   318  							if time.Since(start) > time.Second*5 {
   319  								log.Println("Client state timeout getting nodes")
   320  								cs.stop(nil)
   321  								return
   322  							}
   323  							nodes, err := GetNodes(cs.nc, parentID, nodeID, "", false)
   324  							if err != nil {
   325  								log.Println("Client state error getting nodes:", err)
   326  								cs.stop(nil)
   327  								return
   328  							}
   329  							if len(nodes) == 0 {
   330  								// confirmed the node was deleted
   331  								cs.stop(nil)
   332  								return
   333  							}
   334  							time.Sleep(time.Millisecond * 10)
   335  						}
   336  
   337  					case (p.Type == data.PointTypeTombstone && p.Value == 0) ||
   338  						p.Type == data.PointTypeNodeType:
   339  						// node was created or undeleted, make sure we see it in DB
   340  						// before restarting client
   341  						start := time.Now()
   342  						for {
   343  							if time.Since(start) > time.Second*5 {
   344  								log.Println("Client state timeout getting nodes")
   345  								cs.stop(nil)
   346  								return
   347  							}
   348  							nodes, err := GetNodes(cs.nc, parentID, nodeID, "", false)
   349  							if err != nil {
   350  								log.Println("Client state error getting nodes:", err)
   351  								cs.stop(nil)
   352  								return
   353  							}
   354  							if len(nodes) > 0 {
   355  								// confirmed the node was added
   356  								cs.stop(nil)
   357  								return
   358  							}
   359  							time.Sleep(time.Millisecond * 10)
   360  						}
   361  					}
   362  				}
   363  
   364  				// send edge points to client
   365  				if cs.client == nil {
   366  					log.Fatal("Client is nil: ", cs.node.ID)
   367  				}
   368  				cs.client.EdgePoints(chunks[2], chunks[3], points)
   369  			}
   370  		})
   371  
   372  		if err != nil {
   373  			return err
   374  		}
   375  
   376  	}
   377  
   378  	// remove nodes that have been deleted
   379  	for key, client := range m.clientStates {
   380  		if _, ok := found[key]; ok {
   381  			continue
   382  		}
   383  
   384  		// bus was deleted so close and clear it
   385  		log.Println("removing client node:", m.clientStates[key].node.ID)
   386  		client.stop(nil)
   387  	}
   388  
   389  	return nil
   390  }
   391  
   392  func mapKey(node data.NodeEdge) string {
   393  	return node.Parent + "-" + node.ID
   394  }