gitee.com/liuxuezhan/go-micro-v1.18.0@v1.0.0/monitor/default.go (about) 1 package monitor 2 3 import ( 4 "context" 5 "errors" 6 "sync" 7 "time" 8 9 "gitee.com/liuxuezhan/go-micro-v1.18.0/client" 10 pb "gitee.com/liuxuezhan/go-micro-v1.18.0/debug/service/proto" 11 "gitee.com/liuxuezhan/go-micro-v1.18.0/registry" 12 "gitee.com/liuxuezhan/go-micro-v1.18.0/registry/cache" 13 ) 14 15 type monitor struct { 16 options Options 17 18 exit chan bool 19 registry cache.Cache 20 client client.Client 21 22 sync.RWMutex 23 running bool 24 services map[string]*Status 25 } 26 27 func (m *monitor) Check(service string) error { 28 status, err := m.check(service) 29 if err != nil { 30 return err 31 } 32 m.Lock() 33 m.services[service] = status 34 m.Unlock() 35 36 if status.Code != StatusRunning { 37 return errors.New(status.Info) 38 } 39 40 return nil 41 } 42 43 // check provides binary running/failed status. 44 // In the event Debug.Health cannot be called on a service we reap the node. 45 func (m *monitor) check(service string) (*Status, error) { 46 services, err := m.registry.GetService(service) 47 if err != nil { 48 return nil, err 49 } 50 51 // create debug client 52 debug := pb.NewDebugService(service, m.client) 53 54 var status *Status 55 var gerr error 56 57 // iterate through multiple versions of a service 58 for _, service := range services { 59 for _, node := range service.Nodes { 60 // TODO: checks that are not just RPC based 61 // TODO: better matching of the protocol 62 // TODO: maybe everything has to be a go-micro service? 63 if node.Metadata["server"] != m.client.String() { 64 continue 65 } 66 // check the transport matches 67 if node.Metadata["transport"] != m.client.Options().Transport.String() { 68 continue 69 } 70 71 rsp, err := debug.Health( 72 context.Background(), 73 // empty health request 74 &pb.HealthRequest{}, 75 // call this specific node 76 client.WithAddress(node.Address), 77 // retry in the event of failure 78 client.WithRetries(3), 79 ) 80 if err != nil { 81 // save the error 82 gerr = err 83 continue 84 } 85 86 // expecting ok response status 87 if rsp.Status != "ok" { 88 gerr = errors.New(rsp.Status) 89 continue 90 } 91 92 // no error set status 93 status = &Status{ 94 Code: StatusRunning, 95 Info: "running", 96 } 97 } 98 } 99 100 // if we got the success case return it 101 if status != nil { 102 return status, nil 103 } 104 105 // if gerr is not nil return it 106 if gerr != nil { 107 return &Status{ 108 Code: StatusFailed, 109 Info: "not running", 110 Error: gerr.Error(), 111 }, nil 112 } 113 114 // otherwise unknown status 115 return &Status{ 116 Code: StatusUnknown, 117 Info: "unknown status", 118 }, nil 119 } 120 121 func (m *monitor) reap() { 122 services, err := m.registry.ListServices() 123 if err != nil { 124 return 125 } 126 127 serviceMap := make(map[string]bool) 128 for _, service := range services { 129 serviceMap[service.Name] = true 130 } 131 132 m.Lock() 133 defer m.Unlock() 134 135 // range over our watched services 136 for service := range m.services { 137 // check if the service exists in the registry 138 if !serviceMap[service] { 139 // if not, delete it in our status map 140 delete(m.services, service) 141 } 142 } 143 } 144 145 func (m *monitor) run() { 146 // check the status every tick 147 t := time.NewTicker(time.Minute) 148 defer t.Stop() 149 150 // reap dead services 151 t2 := time.NewTicker(time.Hour) 152 defer t2.Stop() 153 154 // list the known services 155 services, _ := m.registry.ListServices() 156 157 // create a check chan of same length 158 check := make(chan string, len(services)) 159 160 // front-load the services to watch 161 for _, service := range services { 162 check <- service.Name 163 } 164 165 for { 166 select { 167 // exit if we're told to 168 case <-m.exit: 169 return 170 // check a service when told to 171 case service := <-check: 172 // check the status 173 status, err := m.check(service) 174 if err != nil { 175 status = &Status{ 176 Code: StatusUnknown, 177 Info: "unknown status", 178 } 179 } 180 181 // save the status 182 m.Lock() 183 m.services[service] = status 184 m.Unlock() 185 // on the tick interval get all services and issue a check 186 case <-t.C: 187 // create a list of services 188 serviceMap := make(map[string]bool) 189 190 m.RLock() 191 for service := range m.services { 192 serviceMap[service] = true 193 } 194 m.RUnlock() 195 196 go func() { 197 // check the status of all watched services 198 for service := range serviceMap { 199 select { 200 case <-m.exit: 201 return 202 case check <- service: 203 default: 204 // barf if we block 205 } 206 } 207 208 // list services 209 services, _ := m.registry.ListServices() 210 211 for _, service := range services { 212 // start watching the service 213 if ok := serviceMap[service.Name]; !ok { 214 m.Watch(service.Name) 215 } 216 } 217 }() 218 case <-t2.C: 219 // reap any dead/non-existent services 220 m.reap() 221 } 222 } 223 } 224 225 func (m *monitor) Reap(service string) error { 226 services, err := m.registry.GetService(service) 227 if err != nil { 228 return nil 229 } 230 m.Lock() 231 defer m.Unlock() 232 delete(m.services, service) 233 for _, service := range services { 234 m.registry.Deregister(service) 235 } 236 return nil 237 } 238 239 func (m *monitor) Status(service string) (Status, error) { 240 m.RLock() 241 defer m.RUnlock() 242 if status, ok := m.services[service]; ok { 243 return *status, nil 244 } 245 return Status{}, ErrNotWatching 246 } 247 248 func (m *monitor) Watch(service string) error { 249 m.Lock() 250 defer m.Unlock() 251 252 // check if we're watching 253 if _, ok := m.services[service]; ok { 254 return nil 255 } 256 257 // get the status 258 status, err := m.check(service) 259 if err != nil { 260 return err 261 } 262 263 // set the status 264 m.services[service] = status 265 return nil 266 } 267 268 func (m *monitor) Run() error { 269 m.Lock() 270 defer m.Unlock() 271 272 if m.running { 273 return nil 274 } 275 276 // reset the exit channel 277 m.exit = make(chan bool) 278 // setup a new cache 279 m.registry = cache.New(m.options.Registry) 280 281 // start running 282 go m.run() 283 284 // set to running 285 m.running = true 286 287 return nil 288 } 289 290 func (m *monitor) Stop() error { 291 m.Lock() 292 defer m.Unlock() 293 294 if !m.running { 295 return nil 296 } 297 298 select { 299 case <-m.exit: 300 return nil 301 default: 302 close(m.exit) 303 for s := range m.services { 304 delete(m.services, s) 305 } 306 m.registry.Stop() 307 m.running = false 308 return nil 309 } 310 } 311 312 func newMonitor(opts ...Option) Monitor { 313 options := Options{ 314 Client: client.DefaultClient, 315 Registry: registry.DefaultRegistry, 316 } 317 318 for _, o := range opts { 319 o(&options) 320 } 321 322 return &monitor{ 323 options: options, 324 exit: make(chan bool), 325 client: options.Client, 326 registry: cache.New(options.Registry), 327 services: make(map[string]*Status), 328 } 329 }