gitee.com/liuxuezhan/go-micro-v1.18.0@v1.0.0/monitor/default.go (about)

     1  package monitor
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"sync"
     7  	"time"
     8  
     9  	"gitee.com/liuxuezhan/go-micro-v1.18.0/client"
    10  	pb "gitee.com/liuxuezhan/go-micro-v1.18.0/debug/service/proto"
    11  	"gitee.com/liuxuezhan/go-micro-v1.18.0/registry"
    12  	"gitee.com/liuxuezhan/go-micro-v1.18.0/registry/cache"
    13  )
    14  
    15  type monitor struct {
    16  	options Options
    17  
    18  	exit     chan bool
    19  	registry cache.Cache
    20  	client   client.Client
    21  
    22  	sync.RWMutex
    23  	running  bool
    24  	services map[string]*Status
    25  }
    26  
    27  func (m *monitor) Check(service string) error {
    28  	status, err := m.check(service)
    29  	if err != nil {
    30  		return err
    31  	}
    32  	m.Lock()
    33  	m.services[service] = status
    34  	m.Unlock()
    35  
    36  	if status.Code != StatusRunning {
    37  		return errors.New(status.Info)
    38  	}
    39  
    40  	return nil
    41  }
    42  
    43  // check provides binary running/failed status.
    44  // In the event Debug.Health cannot be called on a service we reap the node.
    45  func (m *monitor) check(service string) (*Status, error) {
    46  	services, err := m.registry.GetService(service)
    47  	if err != nil {
    48  		return nil, err
    49  	}
    50  
    51  	// create debug client
    52  	debug := pb.NewDebugService(service, m.client)
    53  
    54  	var status *Status
    55  	var gerr error
    56  
    57  	// iterate through multiple versions of a service
    58  	for _, service := range services {
    59  		for _, node := range service.Nodes {
    60  			// TODO: checks that are not just RPC based
    61  			// TODO: better matching of the protocol
    62  			// TODO: maybe everything has to be a go-micro service?
    63  			if node.Metadata["server"] != m.client.String() {
    64  				continue
    65  			}
    66  			// check the transport matches
    67  			if node.Metadata["transport"] != m.client.Options().Transport.String() {
    68  				continue
    69  			}
    70  
    71  			rsp, err := debug.Health(
    72  				context.Background(),
    73  				// empty health request
    74  				&pb.HealthRequest{},
    75  				// call this specific node
    76  				client.WithAddress(node.Address),
    77  				// retry in the event of failure
    78  				client.WithRetries(3),
    79  			)
    80  			if err != nil {
    81  				// save the error
    82  				gerr = err
    83  				continue
    84  			}
    85  
    86  			// expecting ok response status
    87  			if rsp.Status != "ok" {
    88  				gerr = errors.New(rsp.Status)
    89  				continue
    90  			}
    91  
    92  			// no error set status
    93  			status = &Status{
    94  				Code: StatusRunning,
    95  				Info: "running",
    96  			}
    97  		}
    98  	}
    99  
   100  	// if we got the success case return it
   101  	if status != nil {
   102  		return status, nil
   103  	}
   104  
   105  	// if gerr is not nil return it
   106  	if gerr != nil {
   107  		return &Status{
   108  			Code:  StatusFailed,
   109  			Info:  "not running",
   110  			Error: gerr.Error(),
   111  		}, nil
   112  	}
   113  
   114  	// otherwise unknown status
   115  	return &Status{
   116  		Code: StatusUnknown,
   117  		Info: "unknown status",
   118  	}, nil
   119  }
   120  
   121  func (m *monitor) reap() {
   122  	services, err := m.registry.ListServices()
   123  	if err != nil {
   124  		return
   125  	}
   126  
   127  	serviceMap := make(map[string]bool)
   128  	for _, service := range services {
   129  		serviceMap[service.Name] = true
   130  	}
   131  
   132  	m.Lock()
   133  	defer m.Unlock()
   134  
   135  	// range over our watched services
   136  	for service := range m.services {
   137  		// check if the service exists in the registry
   138  		if !serviceMap[service] {
   139  			// if not, delete it in our status map
   140  			delete(m.services, service)
   141  		}
   142  	}
   143  }
   144  
   145  func (m *monitor) run() {
   146  	// check the status every tick
   147  	t := time.NewTicker(time.Minute)
   148  	defer t.Stop()
   149  
   150  	// reap dead services
   151  	t2 := time.NewTicker(time.Hour)
   152  	defer t2.Stop()
   153  
   154  	// list the known services
   155  	services, _ := m.registry.ListServices()
   156  
   157  	// create a check chan of same length
   158  	check := make(chan string, len(services))
   159  
   160  	// front-load the services to watch
   161  	for _, service := range services {
   162  		check <- service.Name
   163  	}
   164  
   165  	for {
   166  		select {
   167  		// exit if we're told to
   168  		case <-m.exit:
   169  			return
   170  		// check a service when told to
   171  		case service := <-check:
   172  			// check the status
   173  			status, err := m.check(service)
   174  			if err != nil {
   175  				status = &Status{
   176  					Code: StatusUnknown,
   177  					Info: "unknown status",
   178  				}
   179  			}
   180  
   181  			// save the status
   182  			m.Lock()
   183  			m.services[service] = status
   184  			m.Unlock()
   185  		// on the tick interval get all services and issue a check
   186  		case <-t.C:
   187  			// create a list of services
   188  			serviceMap := make(map[string]bool)
   189  
   190  			m.RLock()
   191  			for service := range m.services {
   192  				serviceMap[service] = true
   193  			}
   194  			m.RUnlock()
   195  
   196  			go func() {
   197  				// check the status of all watched services
   198  				for service := range serviceMap {
   199  					select {
   200  					case <-m.exit:
   201  						return
   202  					case check <- service:
   203  					default:
   204  						// barf if we block
   205  					}
   206  				}
   207  
   208  				// list services
   209  				services, _ := m.registry.ListServices()
   210  
   211  				for _, service := range services {
   212  					// start watching the service
   213  					if ok := serviceMap[service.Name]; !ok {
   214  						m.Watch(service.Name)
   215  					}
   216  				}
   217  			}()
   218  		case <-t2.C:
   219  			// reap any dead/non-existent services
   220  			m.reap()
   221  		}
   222  	}
   223  }
   224  
   225  func (m *monitor) Reap(service string) error {
   226  	services, err := m.registry.GetService(service)
   227  	if err != nil {
   228  		return nil
   229  	}
   230  	m.Lock()
   231  	defer m.Unlock()
   232  	delete(m.services, service)
   233  	for _, service := range services {
   234  		m.registry.Deregister(service)
   235  	}
   236  	return nil
   237  }
   238  
   239  func (m *monitor) Status(service string) (Status, error) {
   240  	m.RLock()
   241  	defer m.RUnlock()
   242  	if status, ok := m.services[service]; ok {
   243  		return *status, nil
   244  	}
   245  	return Status{}, ErrNotWatching
   246  }
   247  
   248  func (m *monitor) Watch(service string) error {
   249  	m.Lock()
   250  	defer m.Unlock()
   251  
   252  	// check if we're watching
   253  	if _, ok := m.services[service]; ok {
   254  		return nil
   255  	}
   256  
   257  	// get the status
   258  	status, err := m.check(service)
   259  	if err != nil {
   260  		return err
   261  	}
   262  
   263  	// set the status
   264  	m.services[service] = status
   265  	return nil
   266  }
   267  
   268  func (m *monitor) Run() error {
   269  	m.Lock()
   270  	defer m.Unlock()
   271  
   272  	if m.running {
   273  		return nil
   274  	}
   275  
   276  	// reset the exit channel
   277  	m.exit = make(chan bool)
   278  	// setup a new cache
   279  	m.registry = cache.New(m.options.Registry)
   280  
   281  	// start running
   282  	go m.run()
   283  
   284  	// set to running
   285  	m.running = true
   286  
   287  	return nil
   288  }
   289  
   290  func (m *monitor) Stop() error {
   291  	m.Lock()
   292  	defer m.Unlock()
   293  
   294  	if !m.running {
   295  		return nil
   296  	}
   297  
   298  	select {
   299  	case <-m.exit:
   300  		return nil
   301  	default:
   302  		close(m.exit)
   303  		for s := range m.services {
   304  			delete(m.services, s)
   305  		}
   306  		m.registry.Stop()
   307  		m.running = false
   308  		return nil
   309  	}
   310  }
   311  
   312  func newMonitor(opts ...Option) Monitor {
   313  	options := Options{
   314  		Client:   client.DefaultClient,
   315  		Registry: registry.DefaultRegistry,
   316  	}
   317  
   318  	for _, o := range opts {
   319  		o(&options)
   320  	}
   321  
   322  	return &monitor{
   323  		options:  options,
   324  		exit:     make(chan bool),
   325  		client:   options.Client,
   326  		registry: cache.New(options.Registry),
   327  		services: make(map[string]*Status),
   328  	}
   329  }