gitee.com/liuxuezhan/go-micro-v1.18.0@v1.0.0/router/default.go (about) 1 package router 2 3 import ( 4 "fmt" 5 "math" 6 "sort" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/google/uuid" 12 "gitee.com/liuxuezhan/go-micro-v1.18.0/registry" 13 "gitee.com/liuxuezhan/go-micro-v1.18.0/util/log" 14 ) 15 16 var ( 17 // AdvertiseEventsTick is time interval in which the router advertises route updates 18 AdvertiseEventsTick = 10 * time.Second 19 // AdvertiseTableTick is time interval in which router advertises all routes found in routing table 20 AdvertiseTableTick = 2 * time.Minute 21 // DefaultAdvertTTL is default advertisement TTL 22 DefaultAdvertTTL = 2 * time.Minute 23 // AdvertSuppress is advert suppression threshold 24 AdvertSuppress = 200.0 25 // AdvertRecover is advert recovery threshold 26 AdvertRecover = 20.0 27 // Penalty for routes processed multiple times 28 Penalty = 100.0 29 // PenaltyHalfLife is the time the advert penalty decays to half its value 30 PenaltyHalfLife = 30.0 31 // MaxSuppressTime defines time after which the suppressed advert is deleted 32 MaxSuppressTime = 90 * time.Second 33 // PenaltyDecay is a coefficient which controls the speed the advert penalty decays 34 PenaltyDecay = math.Log(2) / PenaltyHalfLife 35 ) 36 37 // router implements default router 38 type router struct { 39 sync.RWMutex 40 options Options 41 status Status 42 table *table 43 exit chan struct{} 44 errChan chan error 45 eventChan chan *Event 46 advertWg *sync.WaitGroup 47 wg *sync.WaitGroup 48 49 // advert subscribers 50 sub sync.RWMutex 51 subscribers map[string]chan *Advert 52 } 53 54 // newRouter creates new router and returns it 55 func newRouter(opts ...Option) Router { 56 // get default options 57 options := DefaultOptions() 58 59 // apply requested options 60 for _, o := range opts { 61 o(&options) 62 } 63 64 // set initial status to Stopped 65 status := Status{Code: Stopped, Error: nil} 66 67 return &router{ 68 options: options, 69 status: status, 70 table: newTable(), 71 advertWg: &sync.WaitGroup{}, 72 wg: &sync.WaitGroup{}, 73 subscribers: make(map[string]chan *Advert), 74 } 75 } 76 77 // Init initializes router with given options 78 func (r *router) Init(opts ...Option) error { 79 r.Lock() 80 defer r.Unlock() 81 82 for _, o := range opts { 83 o(&r.options) 84 } 85 86 return nil 87 } 88 89 // Options returns router options 90 func (r *router) Options() Options { 91 r.Lock() 92 options := r.options 93 r.Unlock() 94 95 return options 96 } 97 98 // Table returns routing table 99 func (r *router) Table() Table { 100 return r.table 101 } 102 103 // manageRoute applies action on a given route 104 func (r *router) manageRoute(route Route, action string) error { 105 switch action { 106 case "create": 107 if err := r.table.Create(route); err != nil && err != ErrDuplicateRoute { 108 return fmt.Errorf("failed adding route for service %s: %s", route.Service, err) 109 } 110 case "delete": 111 if err := r.table.Delete(route); err != nil && err != ErrRouteNotFound { 112 return fmt.Errorf("failed deleting route for service %s: %s", route.Service, err) 113 } 114 case "update": 115 if err := r.table.Update(route); err != nil { 116 return fmt.Errorf("failed updating route for service %s: %s", route.Service, err) 117 } 118 case "solicit": 119 // nothing to do here 120 return nil 121 default: 122 return fmt.Errorf("failed to manage route for service %s: unknown action %s", route.Service, action) 123 } 124 125 return nil 126 } 127 128 // manageServiceRoutes applies action to all routes of the service. 129 // It returns error of the action fails with error. 130 func (r *router) manageServiceRoutes(service *registry.Service, action string) error { 131 // action is the routing table action 132 action = strings.ToLower(action) 133 134 // take route action on each service node 135 for _, node := range service.Nodes { 136 route := Route{ 137 Service: service.Name, 138 Address: node.Address, 139 Gateway: "", 140 Network: r.options.Network, 141 Router: r.options.Id, 142 Link: DefaultLink, 143 Metric: DefaultLocalMetric, 144 } 145 146 if err := r.manageRoute(route, action); err != nil { 147 return err 148 } 149 } 150 151 return nil 152 } 153 154 // manageRegistryRoutes applies action to all routes of each service found in the registry. 155 // It returns error if either the services failed to be listed or the routing table action fails. 156 func (r *router) manageRegistryRoutes(reg registry.Registry, action string) error { 157 services, err := reg.ListServices() 158 if err != nil { 159 return fmt.Errorf("failed listing services: %v", err) 160 } 161 162 // add each service node as a separate route 163 for _, service := range services { 164 // get the service to retrieve all its info 165 srvs, err := reg.GetService(service.Name) 166 if err != nil { 167 continue 168 } 169 // manage the routes for all returned services 170 for _, srv := range srvs { 171 if err := r.manageServiceRoutes(srv, action); err != nil { 172 return err 173 } 174 } 175 } 176 177 return nil 178 } 179 180 // watchRegistry watches registry and updates routing table based on the received events. 181 // It returns error if either the registry watcher fails with error or if the routing table update fails. 182 func (r *router) watchRegistry(w registry.Watcher) error { 183 exit := make(chan bool) 184 185 defer func() { 186 // close the exit channel when the go routine finishes 187 close(exit) 188 }() 189 190 // wait in the background for the router to stop 191 // when the router stops, stop the watcher and exit 192 r.wg.Add(1) 193 go func() { 194 defer w.Stop() 195 defer r.wg.Done() 196 197 select { 198 case <-r.exit: 199 return 200 case <-exit: 201 return 202 } 203 }() 204 205 var watchErr error 206 207 for { 208 res, err := w.Next() 209 if err != nil { 210 if err != registry.ErrWatcherStopped { 211 watchErr = err 212 } 213 break 214 } 215 216 if err := r.manageServiceRoutes(res.Service, res.Action); err != nil { 217 return err 218 } 219 } 220 221 return watchErr 222 } 223 224 // watchTable watches routing table entries and either adds or deletes locally registered service to/from network registry 225 // It returns error if the locally registered services either fails to be added/deleted to/from network registry. 226 func (r *router) watchTable(w Watcher) error { 227 exit := make(chan bool) 228 229 defer func() { 230 // close the exit channel when the go routine finishes 231 close(exit) 232 }() 233 234 // wait in the background for the router to stop 235 // when the router stops, stop the watcher and exit 236 r.wg.Add(1) 237 go func() { 238 defer w.Stop() 239 defer r.wg.Done() 240 241 select { 242 case <-r.exit: 243 return 244 case <-exit: 245 return 246 } 247 }() 248 249 var watchErr error 250 251 for { 252 event, err := w.Next() 253 if err != nil { 254 if err != ErrWatcherStopped { 255 watchErr = err 256 } 257 break 258 } 259 260 select { 261 case <-r.exit: 262 close(r.eventChan) 263 return nil 264 case r.eventChan <- event: 265 } 266 } 267 268 // close event channel on error 269 close(r.eventChan) 270 271 return watchErr 272 } 273 274 // publishAdvert publishes router advert to advert channel 275 func (r *router) publishAdvert(advType AdvertType, events []*Event) { 276 a := &Advert{ 277 Id: r.options.Id, 278 Type: advType, 279 TTL: DefaultAdvertTTL, 280 Timestamp: time.Now(), 281 Events: events, 282 } 283 284 r.sub.RLock() 285 for _, sub := range r.subscribers { 286 // now send the message 287 select { 288 case sub <- a: 289 case <-r.exit: 290 r.sub.RUnlock() 291 return 292 } 293 } 294 r.sub.RUnlock() 295 } 296 297 // advertiseTable advertises the whole routing table to the network 298 func (r *router) advertiseTable() error { 299 // create table advertisement ticker 300 ticker := time.NewTicker(AdvertiseTableTick) 301 defer ticker.Stop() 302 303 for { 304 select { 305 case <-ticker.C: 306 // do full table flush 307 events, err := r.flushRouteEvents(Update) 308 if err != nil { 309 return fmt.Errorf("failed flushing routes: %s", err) 310 } 311 312 // advertise routes to subscribers 313 if len(events) > 0 { 314 log.Debugf("Router flushing table with %d events: %s", len(events), r.options.Id) 315 r.advertWg.Add(1) 316 go func() { 317 defer r.advertWg.Done() 318 r.publishAdvert(RouteUpdate, events) 319 }() 320 } 321 case <-r.exit: 322 return nil 323 } 324 } 325 } 326 327 // advert contains a route event to be advertised 328 type advert struct { 329 // event received from routing table 330 event *Event 331 // lastSeen records the time of the last advert update 332 lastSeen time.Time 333 // penalty is current advert penalty 334 penalty float64 335 // isSuppressed flags the advert suppression 336 isSuppressed bool 337 // suppressTime records the time interval the advert has been suppressed for 338 suppressTime time.Time 339 } 340 341 // adverts maintains a map of router adverts 342 type adverts map[uint64]*advert 343 344 // process processes advert 345 // It updates advert timestamp, increments its penalty and 346 // marks upresses or recovers it if it reaches configured thresholds 347 func (m adverts) process(a *advert) error { 348 // lookup advert in adverts 349 hash := a.event.Route.Hash() 350 a, ok := m[hash] 351 if !ok { 352 return fmt.Errorf("advert not found") 353 } 354 355 // decay the event penalty 356 delta := time.Since(a.lastSeen).Seconds() 357 358 // decay advert penalty 359 a.penalty = a.penalty * math.Exp(-delta*PenaltyDecay) 360 service := a.event.Route.Service 361 address := a.event.Route.Address 362 363 // suppress/recover the event based on its penalty level 364 switch { 365 case a.penalty > AdvertSuppress && !a.isSuppressed: 366 log.Debugf("Router suppressing advert %d %.2f for route %s %s", hash, a.penalty, service, address) 367 a.isSuppressed = true 368 a.suppressTime = time.Now() 369 case a.penalty < AdvertRecover && a.isSuppressed: 370 log.Debugf("Router recovering advert %d %.2f for route %s %s", hash, a.penalty, service, address) 371 a.isSuppressed = false 372 } 373 374 // if suppressed, checked how long has it been suppressed for 375 if a.isSuppressed { 376 // max suppression time threshold has been reached, delete the advert 377 if time.Since(a.suppressTime) > MaxSuppressTime { 378 delete(m, hash) 379 return nil 380 } 381 } 382 383 return nil 384 } 385 386 // advertiseEvents advertises routing table events 387 // It suppresses unhealthy flapping events and advertises healthy events upstream. 388 func (r *router) advertiseEvents() error { 389 // ticker to periodically scan event for advertising 390 ticker := time.NewTicker(AdvertiseEventsTick) 391 defer ticker.Stop() 392 393 // adverts is a map of advert events 394 adverts := make(adverts) 395 396 // routing table watcher 397 tableWatcher, err := r.Watch() 398 if err != nil { 399 return fmt.Errorf("failed creating routing table watcher: %v", err) 400 } 401 402 r.wg.Add(1) 403 go func() { 404 defer r.wg.Done() 405 select { 406 case r.errChan <- r.watchTable(tableWatcher): 407 case <-r.exit: 408 } 409 }() 410 411 for { 412 select { 413 case <-ticker.C: 414 // If we're not advertising any events then sip processing them entirely 415 if r.options.Advertise == AdvertiseNone { 416 continue 417 } 418 419 var events []*Event 420 421 // collect all events which are not flapping 422 for key, advert := range adverts { 423 // process the advert 424 if err := adverts.process(advert); err != nil { 425 log.Debugf("Router failed processing advert %d: %v", key, err) 426 continue 427 } 428 // if suppressed go to the next advert 429 if advert.isSuppressed { 430 continue 431 } 432 433 // if we only advertise local routes skip processing anything not link local 434 if r.options.Advertise == AdvertiseLocal && advert.event.Route.Link != "local" { 435 continue 436 } 437 438 // copy the event and append 439 e := new(Event) 440 // this is ok, because router.Event only contains builtin types 441 // and no references so this creates a deep copy of struct Event 442 *e = *(advert.event) 443 events = append(events, e) 444 // delete the advert from adverts 445 delete(adverts, key) 446 } 447 448 // advertise events to subscribers 449 if len(events) > 0 { 450 log.Debugf("Router publishing %d events", len(events)) 451 r.advertWg.Add(1) 452 go func() { 453 defer r.advertWg.Done() 454 r.publishAdvert(RouteUpdate, events) 455 }() 456 } 457 case e := <-r.eventChan: 458 // if event is nil, continue 459 if e == nil { 460 continue 461 } 462 463 // If we're not advertising any events then skip processing them entirely 464 if r.options.Advertise == AdvertiseNone { 465 continue 466 } 467 468 // if we only advertise local routes skip processing anything not link local 469 if r.options.Advertise == AdvertiseLocal && e.Route.Link != "local" { 470 continue 471 } 472 473 now := time.Now() 474 475 log.Debugf("Router processing table event %s for service %s %s", e.Type, e.Route.Service, e.Route.Address) 476 477 // check if we have already registered the route 478 hash := e.Route.Hash() 479 a, ok := adverts[hash] 480 if !ok { 481 a = &advert{ 482 event: e, 483 penalty: Penalty, 484 lastSeen: now, 485 } 486 adverts[hash] = a 487 continue 488 } 489 490 // override the route event only if the previous event was different 491 if a.event.Type != e.Type { 492 a.event = e 493 } 494 495 // process the advert 496 if err := adverts.process(a); err != nil { 497 log.Debugf("Router error processing advert %d: %v", hash, err) 498 continue 499 } 500 501 // update event penalty and timestamp 502 a.lastSeen = now 503 // increment the penalty 504 a.penalty += Penalty 505 log.Debugf("Router advert %d for route %s %s event penalty: %f", hash, a.event.Route.Service, a.event.Route.Address, a.penalty) 506 case <-r.exit: 507 // first wait for the advertiser to finish 508 r.advertWg.Wait() 509 return nil 510 } 511 } 512 } 513 514 // close closes exit channels 515 func (r *router) close() { 516 log.Debugf("Router closing remaining channels") 517 // drain the advertise channel only if advertising 518 if r.status.Code == Advertising { 519 // drain the event channel 520 for range r.eventChan { 521 } 522 523 // close advert subscribers 524 for id, sub := range r.subscribers { 525 select { 526 case <-sub: 527 default: 528 } 529 530 // close the channel 531 close(sub) 532 533 // delete the subscriber 534 r.sub.Lock() 535 delete(r.subscribers, id) 536 r.sub.Unlock() 537 } 538 } 539 540 // mark the router as Stopped and set its Error to nil 541 r.status = Status{Code: Stopped, Error: nil} 542 } 543 544 // watchErrors watches router errors and takes appropriate actions 545 func (r *router) watchErrors() { 546 var err error 547 548 select { 549 case <-r.exit: 550 return 551 case err = <-r.errChan: 552 } 553 554 r.Lock() 555 defer r.Unlock() 556 // if the router is not stopped, stop it 557 if r.status.Code != Stopped { 558 // notify all goroutines to finish 559 close(r.exit) 560 561 // close all the channels 562 r.close() 563 // set the status error 564 if err != nil { 565 r.status.Error = err 566 } 567 } 568 } 569 570 // Start starts the router 571 func (r *router) Start() error { 572 r.Lock() 573 defer r.Unlock() 574 575 // only start if we're stopped 576 if r.status.Code != Stopped { 577 return nil 578 } 579 580 // add all local service routes into the routing table 581 if err := r.manageRegistryRoutes(r.options.Registry, "create"); err != nil { 582 e := fmt.Errorf("failed adding registry routes: %s", err) 583 r.status = Status{Code: Error, Error: e} 584 return e 585 } 586 587 // add default gateway into routing table 588 if r.options.Gateway != "" { 589 // note, the only non-default value is the gateway 590 route := Route{ 591 Service: "*", 592 Address: "*", 593 Gateway: r.options.Gateway, 594 Network: "*", 595 Router: r.options.Id, 596 Link: DefaultLink, 597 Metric: DefaultLocalMetric, 598 } 599 if err := r.table.Create(route); err != nil { 600 e := fmt.Errorf("failed adding default gateway route: %s", err) 601 r.status = Status{Code: Error, Error: e} 602 return e 603 } 604 } 605 606 // create error and exit channels 607 r.errChan = make(chan error, 1) 608 r.exit = make(chan struct{}) 609 610 // registry watcher 611 regWatcher, err := r.options.Registry.Watch() 612 if err != nil { 613 e := fmt.Errorf("failed creating registry watcher: %v", err) 614 r.status = Status{Code: Error, Error: e} 615 return e 616 } 617 618 r.wg.Add(1) 619 go func() { 620 defer r.wg.Done() 621 select { 622 case r.errChan <- r.watchRegistry(regWatcher): 623 case <-r.exit: 624 } 625 }() 626 627 // watch for errors and cleanup 628 r.wg.Add(1) 629 go func() { 630 defer r.wg.Done() 631 r.watchErrors() 632 }() 633 634 // mark router as Running 635 r.status = Status{Code: Running, Error: nil} 636 637 return nil 638 } 639 640 // Advertise stars advertising the routes to the network and returns the advertisements channel to consume from. 641 // If the router is already advertising it returns the channel to consume from. 642 // It returns error if either the router is not running or if the routing table fails to list the routes to advertise. 643 func (r *router) Advertise() (<-chan *Advert, error) { 644 r.Lock() 645 defer r.Unlock() 646 647 switch r.status.Code { 648 case Advertising: 649 advertChan := make(chan *Advert, 128) 650 r.subscribers[uuid.New().String()] = advertChan 651 return advertChan, nil 652 case Running: 653 // list all the routes and pack them into even slice to advertise 654 events, err := r.flushRouteEvents(Create) 655 if err != nil { 656 return nil, fmt.Errorf("failed to flush routes: %s", err) 657 } 658 659 // create event channels 660 r.eventChan = make(chan *Event) 661 662 // create advert channel 663 advertChan := make(chan *Advert, 128) 664 r.subscribers[uuid.New().String()] = advertChan 665 666 // advertise your presence 667 r.advertWg.Add(1) 668 go func() { 669 defer r.advertWg.Done() 670 r.publishAdvert(Announce, events) 671 }() 672 673 r.wg.Add(1) 674 go func() { 675 defer r.wg.Done() 676 select { 677 case r.errChan <- r.advertiseEvents(): 678 case <-r.exit: 679 } 680 }() 681 682 r.advertWg.Add(1) 683 go func() { 684 defer r.advertWg.Done() 685 // advertise the whole routing table 686 select { 687 case r.errChan <- r.advertiseTable(): 688 case <-r.exit: 689 } 690 }() 691 692 // mark router as Running and set its Error to nil 693 r.status = Status{Code: Advertising, Error: nil} 694 695 log.Debugf("Router starting to advertise") 696 return advertChan, nil 697 case Stopped: 698 return nil, fmt.Errorf("not running") 699 } 700 701 return nil, fmt.Errorf("error: %s", r.status.Error) 702 } 703 704 // Process updates the routing table using the advertised values 705 func (r *router) Process(a *Advert) error { 706 // NOTE: event sorting might not be necessary 707 // copy update events intp new slices 708 events := make([]*Event, len(a.Events)) 709 copy(events, a.Events) 710 // sort events by timestamp 711 sort.Slice(events, func(i, j int) bool { 712 return events[i].Timestamp.Before(events[j].Timestamp) 713 }) 714 715 log.Debugf("Router %s processing advert from: %s", r.options.Id, a.Id) 716 717 for _, event := range events { 718 // skip if the router is the origin of this route 719 if event.Route.Router == r.options.Id { 720 log.Debugf("Router skipping processing its own route: %s", r.options.Id) 721 continue 722 } 723 // create a copy of the route 724 route := event.Route 725 action := event.Type 726 log.Debugf("Router %s applying %s from router %s for service %s %s", r.options.Id, action, route.Router, route.Service, route.Address) 727 if err := r.manageRoute(route, action.String()); err != nil { 728 return fmt.Errorf("failed applying action %s to routing table: %s", action, err) 729 } 730 } 731 732 return nil 733 } 734 735 // flushRouteEvents returns a slice of events, one per each route in the routing table 736 func (r *router) flushRouteEvents(evType EventType) ([]*Event, error) { 737 // Do not advertise anything 738 if r.options.Advertise == AdvertiseNone { 739 return []*Event{}, nil 740 } 741 742 // list all routes 743 routes, err := r.table.List() 744 if err != nil { 745 return nil, fmt.Errorf("failed listing routes: %s", err) 746 } 747 748 // Return all the routes 749 if r.options.Advertise == AdvertiseAll { 750 // build a list of events to advertise 751 events := make([]*Event, len(routes)) 752 for i, route := range routes { 753 event := &Event{ 754 Type: evType, 755 Timestamp: time.Now(), 756 Route: route, 757 } 758 events[i] = event 759 } 760 return events, nil 761 } 762 763 // routeMap stores the routes we're going to advertise 764 bestRoutes := make(map[string]Route) 765 766 // set whether we're advertising only local 767 advertiseLocal := r.options.Advertise == AdvertiseLocal 768 769 // go through all routes found in the routing table and collapse them to optimal routes 770 for _, route := range routes { 771 // if we're only advertising local routes 772 if advertiseLocal && route.Link != "local" { 773 continue 774 } 775 776 // now we're going to find the best routes 777 778 routeKey := route.Service + "@" + route.Network 779 current, ok := bestRoutes[routeKey] 780 if !ok { 781 bestRoutes[routeKey] = route 782 continue 783 } 784 // if the current optimal route metric is higher than routing table route, replace it 785 if current.Metric > route.Metric { 786 bestRoutes[routeKey] = route 787 continue 788 } 789 // if the metrics are the same, prefer advertising your own route 790 if current.Metric == route.Metric { 791 if route.Router == r.options.Id { 792 bestRoutes[routeKey] = route 793 continue 794 } 795 } 796 } 797 798 log.Debugf("Router advertising %d %s routes out of %d", len(bestRoutes), r.options.Advertise, len(routes)) 799 800 // build a list of events to advertise 801 events := make([]*Event, len(bestRoutes)) 802 var i int 803 804 for _, route := range bestRoutes { 805 event := &Event{ 806 Type: evType, 807 Timestamp: time.Now(), 808 Route: route, 809 } 810 events[i] = event 811 i++ 812 } 813 814 return events, nil 815 } 816 817 // Solicit advertises all of its routes to the network 818 // It returns error if the router fails to list the routes 819 func (r *router) Solicit() error { 820 events, err := r.flushRouteEvents(Update) 821 if err != nil { 822 return fmt.Errorf("failed solicit routes: %s", err) 823 } 824 825 // advertise the routes 826 r.advertWg.Add(1) 827 828 go func() { 829 r.publishAdvert(Solicitation, events) 830 r.advertWg.Done() 831 }() 832 833 return nil 834 } 835 836 // Lookup routes in the routing table 837 func (r *router) Lookup(q ...QueryOption) ([]Route, error) { 838 return r.table.Query(q...) 839 } 840 841 // Watch routes 842 func (r *router) Watch(opts ...WatchOption) (Watcher, error) { 843 return r.table.Watch(opts...) 844 } 845 846 // Status returns router status 847 func (r *router) Status() Status { 848 r.RLock() 849 defer r.RUnlock() 850 851 // make a copy of the status 852 status := r.status 853 854 return status 855 } 856 857 // Stop stops the router 858 func (r *router) Stop() error { 859 r.Lock() 860 861 log.Debugf("Router shutting down") 862 863 switch r.status.Code { 864 case Stopped, Error: 865 r.Unlock() 866 return r.status.Error 867 case Running, Advertising: 868 // notify all goroutines to finish 869 close(r.exit) 870 871 // close all the channels 872 // NOTE: close marks the router status as Stopped 873 r.close() 874 } 875 r.Unlock() 876 877 log.Debugf("Router waiting for all goroutines to finish") 878 879 // wait for all goroutines to finish 880 r.wg.Wait() 881 882 log.Debugf("Router successfully stopped") 883 884 return nil 885 } 886 887 // String prints debugging information about router 888 func (r *router) String() string { 889 return "memory" 890 }