github.com/osrg/gobgp/v3@v3.30.0/pkg/server/fsm.go (about) 1 // Copyright (C) 2014-2021 Nippon Telegraph and Telephone Corporation. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 12 // implied. 13 // See the License for the specific language governing permissions and 14 // limitations under the License. 15 16 package server 17 18 import ( 19 "context" 20 "fmt" 21 "io" 22 "math/rand" 23 "net" 24 "os" 25 "strconv" 26 "sync" 27 "syscall" 28 "time" 29 30 "github.com/eapache/channels" 31 "github.com/osrg/gobgp/v3/internal/pkg/table" 32 "github.com/osrg/gobgp/v3/internal/pkg/version" 33 "github.com/osrg/gobgp/v3/pkg/config/oc" 34 "github.com/osrg/gobgp/v3/pkg/log" 35 "github.com/osrg/gobgp/v3/pkg/packet/bgp" 36 "github.com/osrg/gobgp/v3/pkg/packet/bmp" 37 ) 38 39 const ( 40 minConnectRetryInterval = 5 41 ) 42 43 type fsmStateReasonType uint8 44 45 const ( 46 fsmDying fsmStateReasonType = iota 47 fsmAdminDown 48 fsmReadFailed 49 fsmWriteFailed 50 fsmNotificationSent 51 fsmNotificationRecv 52 fsmHoldTimerExpired 53 fsmIdleTimerExpired 54 fsmRestartTimerExpired 55 fsmGracefulRestart 56 fsmInvalidMsg 57 fsmNewConnection 58 fsmOpenMsgReceived 59 fsmOpenMsgNegotiated 60 fsmHardReset 61 fsmDeConfigured 62 ) 63 64 type fsmStateReason struct { 65 Type fsmStateReasonType 66 BGPNotification *bgp.BGPMessage 67 Data []byte 68 } 69 70 func newfsmStateReason(typ fsmStateReasonType, notif *bgp.BGPMessage, data []byte) *fsmStateReason { 71 return &fsmStateReason{ 72 Type: typ, 73 BGPNotification: notif, 74 Data: data, 75 } 76 } 77 78 func (r fsmStateReason) String() string { 79 switch r.Type { 80 case fsmDying: 81 return "dying" 82 case fsmAdminDown: 83 return "admin-down" 84 case fsmReadFailed: 85 return "read-failed" 86 case fsmWriteFailed: 87 return "write-failed" 88 case fsmNotificationSent: 89 body := r.BGPNotification.Body.(*bgp.BGPNotification) 90 return fmt.Sprintf("notification-sent %s", bgp.NewNotificationErrorCode(body.ErrorCode, body.ErrorSubcode).String()) 91 case fsmNotificationRecv: 92 body := r.BGPNotification.Body.(*bgp.BGPNotification) 93 return fmt.Sprintf("notification-received %s", bgp.NewNotificationErrorCode(body.ErrorCode, body.ErrorSubcode).String()) 94 case fsmHoldTimerExpired: 95 return "hold-timer-expired" 96 case fsmIdleTimerExpired: 97 return "idle-hold-timer-expired" 98 case fsmRestartTimerExpired: 99 return "restart-timer-expired" 100 case fsmGracefulRestart: 101 return "graceful-restart" 102 case fsmInvalidMsg: 103 return "invalid-msg" 104 case fsmNewConnection: 105 return "new-connection" 106 case fsmOpenMsgReceived: 107 return "open-msg-received" 108 case fsmOpenMsgNegotiated: 109 return "open-msg-negotiated" 110 case fsmHardReset: 111 return "hard-reset" 112 default: 113 return "unknown" 114 } 115 } 116 117 type fsmMsgType int 118 119 const ( 120 _ fsmMsgType = iota 121 fsmMsgStateChange 122 fsmMsgBGPMessage 123 fsmMsgRouteRefresh 124 ) 125 126 type fsmMsg struct { 127 MsgType fsmMsgType 128 fsm *fsm 129 MsgSrc string 130 MsgData interface{} 131 StateReason *fsmStateReason 132 PathList []*table.Path 133 timestamp time.Time 134 payload []byte 135 } 136 137 type fsmOutgoingMsg struct { 138 Paths []*table.Path 139 Notification *bgp.BGPMessage 140 StayIdle bool 141 } 142 143 const ( 144 holdtimeOpensent = 240 145 holdtimeIdle = 5 146 ) 147 148 type adminState int 149 150 const ( 151 adminStateUp adminState = iota 152 adminStateDown 153 adminStatePfxCt 154 ) 155 156 func (s adminState) String() string { 157 switch s { 158 case adminStateUp: 159 return "adminStateUp" 160 case adminStateDown: 161 return "adminStateDown" 162 case adminStatePfxCt: 163 return "adminStatePfxCt" 164 default: 165 return "Unknown" 166 } 167 } 168 169 type adminStateOperation struct { 170 State adminState 171 Communication []byte 172 } 173 174 type fsm struct { 175 gConf *oc.Global 176 pConf *oc.Neighbor 177 lock sync.RWMutex 178 state bgp.FSMState 179 outgoingCh *channels.InfiniteChannel 180 incomingCh *channels.InfiniteChannel 181 reason *fsmStateReason 182 conn net.Conn 183 connCh chan net.Conn 184 idleHoldTime float64 185 opensentHoldTime float64 186 adminState adminState 187 adminStateCh chan adminStateOperation 188 h *fsmHandler 189 rfMap map[bgp.RouteFamily]bgp.BGPAddPathMode 190 capMap map[bgp.BGPCapabilityCode][]bgp.ParameterCapabilityInterface 191 recvOpen *bgp.BGPMessage 192 peerInfo *table.PeerInfo 193 gracefulRestartTimer *time.Timer 194 twoByteAsTrans bool 195 marshallingOptions *bgp.MarshallingOption 196 notification chan *bgp.BGPMessage 197 logger log.Logger 198 } 199 200 func (fsm *fsm) bgpMessageStateUpdate(MessageType uint8, isIn bool) { 201 fsm.lock.Lock() 202 defer fsm.lock.Unlock() 203 state := &fsm.pConf.State.Messages 204 timer := &fsm.pConf.Timers 205 if isIn { 206 state.Received.Total++ 207 } else { 208 state.Sent.Total++ 209 } 210 switch MessageType { 211 case bgp.BGP_MSG_OPEN: 212 if isIn { 213 state.Received.Open++ 214 } else { 215 state.Sent.Open++ 216 } 217 case bgp.BGP_MSG_UPDATE: 218 if isIn { 219 state.Received.Update++ 220 timer.State.UpdateRecvTime = time.Now().Unix() 221 } else { 222 state.Sent.Update++ 223 } 224 case bgp.BGP_MSG_NOTIFICATION: 225 if isIn { 226 state.Received.Notification++ 227 } else { 228 state.Sent.Notification++ 229 } 230 case bgp.BGP_MSG_KEEPALIVE: 231 if isIn { 232 state.Received.Keepalive++ 233 } else { 234 state.Sent.Keepalive++ 235 } 236 case bgp.BGP_MSG_ROUTE_REFRESH: 237 if isIn { 238 state.Received.Refresh++ 239 } else { 240 state.Sent.Refresh++ 241 } 242 default: 243 if isIn { 244 state.Received.Discarded++ 245 } else { 246 state.Sent.Discarded++ 247 } 248 } 249 } 250 251 func (fsm *fsm) bmpStatsUpdate(statType uint16, increment int) { 252 fsm.lock.Lock() 253 defer fsm.lock.Unlock() 254 stats := &fsm.pConf.State.Messages.Received 255 switch statType { 256 // TODO 257 // Support other stat types. 258 case bmp.BMP_STAT_TYPE_WITHDRAW_UPDATE: 259 stats.WithdrawUpdate += uint32(increment) 260 case bmp.BMP_STAT_TYPE_WITHDRAW_PREFIX: 261 stats.WithdrawPrefix += uint32(increment) 262 } 263 } 264 265 func newFSM(gConf *oc.Global, pConf *oc.Neighbor, logger log.Logger) *fsm { 266 adminState := adminStateUp 267 if pConf.Config.AdminDown { 268 adminState = adminStateDown 269 } 270 pConf.State.SessionState = oc.IntToSessionStateMap[int(bgp.BGP_FSM_IDLE)] 271 pConf.Timers.State.Downtime = time.Now().Unix() 272 fsm := &fsm{ 273 gConf: gConf, 274 pConf: pConf, 275 state: bgp.BGP_FSM_IDLE, 276 outgoingCh: channels.NewInfiniteChannel(), 277 incomingCh: channels.NewInfiniteChannel(), 278 connCh: make(chan net.Conn, 1), 279 opensentHoldTime: float64(holdtimeOpensent), 280 adminState: adminState, 281 adminStateCh: make(chan adminStateOperation, 1), 282 rfMap: make(map[bgp.RouteFamily]bgp.BGPAddPathMode), 283 capMap: make(map[bgp.BGPCapabilityCode][]bgp.ParameterCapabilityInterface), 284 peerInfo: table.NewPeerInfo(gConf, pConf), 285 gracefulRestartTimer: time.NewTimer(time.Hour), 286 notification: make(chan *bgp.BGPMessage, 1), 287 logger: logger, 288 } 289 fsm.gracefulRestartTimer.Stop() 290 return fsm 291 } 292 293 func (fsm *fsm) StateChange(nextState bgp.FSMState) { 294 fsm.lock.Lock() 295 defer fsm.lock.Unlock() 296 297 fsm.logger.Debug("state changed", 298 log.Fields{ 299 "Topic": "Peer", 300 "Key": fsm.pConf.State.NeighborAddress, 301 "old": fsm.state.String(), 302 "new": nextState.String(), 303 "reason": fsm.reason}) 304 fsm.state = nextState 305 switch nextState { 306 case bgp.BGP_FSM_ESTABLISHED: 307 fsm.pConf.Timers.State.Uptime = time.Now().Unix() 308 fsm.pConf.State.EstablishedCount++ 309 // reset the state set by the previous session 310 fsm.twoByteAsTrans = false 311 if _, y := fsm.capMap[bgp.BGP_CAP_FOUR_OCTET_AS_NUMBER]; !y { 312 fsm.twoByteAsTrans = true 313 break 314 } 315 y := func() bool { 316 for _, c := range capabilitiesFromConfig(fsm.pConf) { 317 switch c.(type) { 318 case *bgp.CapFourOctetASNumber: 319 return true 320 } 321 } 322 return false 323 }() 324 if !y { 325 fsm.twoByteAsTrans = true 326 } 327 default: 328 fsm.pConf.Timers.State.Downtime = time.Now().Unix() 329 } 330 } 331 332 func hostport(addr net.Addr) (string, uint16) { 333 if addr != nil { 334 host, port, err := net.SplitHostPort(addr.String()) 335 if err != nil { 336 return "", 0 337 } 338 p, _ := strconv.ParseUint(port, 10, 16) 339 return host, uint16(p) 340 } 341 return "", 0 342 } 343 344 func (fsm *fsm) RemoteHostPort() (string, uint16) { 345 return hostport(fsm.conn.RemoteAddr()) 346 347 } 348 349 func (fsm *fsm) LocalHostPort() (string, uint16) { 350 return hostport(fsm.conn.LocalAddr()) 351 } 352 353 func (fsm *fsm) sendNotificationFromErrorMsg(e *bgp.MessageError) (*bgp.BGPMessage, error) { 354 fsm.lock.RLock() 355 established := fsm.h != nil && fsm.h.conn != nil 356 fsm.lock.RUnlock() 357 358 if established { 359 m := bgp.NewBGPNotificationMessage(e.TypeCode, e.SubTypeCode, e.Data) 360 b, _ := m.Serialize() 361 _, err := fsm.h.conn.Write(b) 362 if err == nil { 363 fsm.bgpMessageStateUpdate(m.Header.Type, false) 364 fsm.h.sentNotification = m 365 } 366 fsm.h.conn.Close() 367 fsm.logger.Warn("sent notification", 368 log.Fields{ 369 "Topic": "Peer", 370 "Key": fsm.pConf.State.NeighborAddress, 371 "Data": e}) 372 return m, nil 373 } 374 return nil, fmt.Errorf("can't send notification to %s since TCP connection is not established", fsm.pConf.State.NeighborAddress) 375 } 376 377 func (fsm *fsm) sendNotification(code, subType uint8, data []byte, msg string) (*bgp.BGPMessage, error) { 378 e := bgp.NewMessageError(code, subType, data, msg) 379 return fsm.sendNotificationFromErrorMsg(e.(*bgp.MessageError)) 380 } 381 382 type fsmHandler struct { 383 fsm *fsm 384 conn net.Conn 385 msgCh *channels.InfiniteChannel 386 stateReasonCh chan fsmStateReason 387 incoming *channels.InfiniteChannel 388 outgoing *channels.InfiniteChannel 389 holdTimerResetCh chan bool 390 sentNotification *bgp.BGPMessage 391 ctx context.Context 392 ctxCancel context.CancelFunc 393 wg *sync.WaitGroup 394 } 395 396 func newFSMHandler(fsm *fsm, outgoing *channels.InfiniteChannel) *fsmHandler { 397 ctx, cancel := context.WithCancel(context.Background()) 398 h := &fsmHandler{ 399 fsm: fsm, 400 stateReasonCh: make(chan fsmStateReason, 2), 401 incoming: fsm.incomingCh, 402 outgoing: outgoing, 403 holdTimerResetCh: make(chan bool, 2), 404 wg: &sync.WaitGroup{}, 405 ctx: ctx, 406 ctxCancel: cancel, 407 } 408 h.wg.Add(1) 409 go h.loop(ctx, h.wg) 410 return h 411 } 412 413 func (h *fsmHandler) idle(ctx context.Context) (bgp.FSMState, *fsmStateReason) { 414 fsm := h.fsm 415 416 fsm.lock.RLock() 417 idleHoldTimer := time.NewTimer(time.Second * time.Duration(fsm.idleHoldTime)) 418 fsm.lock.RUnlock() 419 420 for { 421 select { 422 case <-ctx.Done(): 423 return -1, newfsmStateReason(fsmDying, nil, nil) 424 case <-fsm.gracefulRestartTimer.C: 425 fsm.lock.RLock() 426 restarting := fsm.pConf.GracefulRestart.State.PeerRestarting 427 fsm.lock.RUnlock() 428 429 if restarting { 430 fsm.lock.RLock() 431 fsm.logger.Warn("graceful restart timer expired", 432 log.Fields{ 433 "Topic": "Peer", 434 "Key": fsm.pConf.State.NeighborAddress, 435 "State": fsm.state.String()}) 436 fsm.lock.RUnlock() 437 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmRestartTimerExpired, nil, nil) 438 } 439 case conn, ok := <-fsm.connCh: 440 if !ok { 441 break 442 } 443 conn.Close() 444 fsm.lock.RLock() 445 fsm.logger.Warn("Closed an accepted connection", 446 log.Fields{ 447 "Topic": "Peer", 448 "Key": fsm.pConf.State.NeighborAddress, 449 "State": fsm.state.String()}) 450 fsm.lock.RUnlock() 451 452 case <-idleHoldTimer.C: 453 fsm.lock.RLock() 454 adminStateUp := fsm.adminState == adminStateUp 455 fsm.lock.RUnlock() 456 457 if adminStateUp { 458 fsm.lock.Lock() 459 fsm.logger.Debug("IdleHoldTimer expired", 460 log.Fields{ 461 "Topic": "Peer", 462 "Key": fsm.pConf.State.NeighborAddress, 463 "Duration": fsm.idleHoldTime}) 464 fsm.idleHoldTime = holdtimeIdle 465 fsm.lock.Unlock() 466 return bgp.BGP_FSM_ACTIVE, newfsmStateReason(fsmIdleTimerExpired, nil, nil) 467 } else { 468 fsm.logger.Debug("IdleHoldTimer expired, but stay at idle because the admin state is DOWN", 469 log.Fields{ 470 "Topic": "Peer", 471 "Key": fsm.pConf.State.NeighborAddress}) 472 } 473 474 case stateOp := <-fsm.adminStateCh: 475 err := h.changeadminState(stateOp.State) 476 if err == nil { 477 switch stateOp.State { 478 case adminStateDown: 479 // stop idle hold timer 480 idleHoldTimer.Stop() 481 482 case adminStateUp: 483 // restart idle hold timer 484 fsm.lock.RLock() 485 idleHoldTimer.Reset(time.Second * time.Duration(fsm.idleHoldTime)) 486 fsm.lock.RUnlock() 487 } 488 } 489 } 490 } 491 } 492 493 func (h *fsmHandler) connectLoop(ctx context.Context, wg *sync.WaitGroup) { 494 defer wg.Done() 495 fsm := h.fsm 496 497 retry, addr, port, password, ttl, ttlMin, mss, localAddress, localPort, bindInterface := func() (int, string, int, string, uint8, uint8, uint16, string, int, string) { 498 fsm.lock.RLock() 499 defer fsm.lock.RUnlock() 500 501 tick := int(fsm.pConf.Timers.Config.ConnectRetry) 502 if tick < minConnectRetryInterval { 503 tick = minConnectRetryInterval 504 } 505 506 addr := fsm.pConf.State.NeighborAddress 507 port := int(bgp.BGP_PORT) 508 if fsm.pConf.Transport.Config.RemotePort != 0 { 509 port = int(fsm.pConf.Transport.Config.RemotePort) 510 } 511 password := fsm.pConf.Config.AuthPassword 512 ttl := uint8(0) 513 ttlMin := uint8(0) 514 515 if fsm.pConf.TtlSecurity.Config.Enabled { 516 ttl = 255 517 ttlMin = fsm.pConf.TtlSecurity.Config.TtlMin 518 } else if fsm.pConf.Config.PeerAs != 0 && fsm.pConf.Config.PeerType == oc.PEER_TYPE_EXTERNAL { 519 ttl = 1 520 if fsm.pConf.EbgpMultihop.Config.Enabled { 521 ttl = fsm.pConf.EbgpMultihop.Config.MultihopTtl 522 } 523 } 524 return tick, addr, port, password, ttl, ttlMin, fsm.pConf.Transport.Config.TcpMss, fsm.pConf.Transport.Config.LocalAddress, int(fsm.pConf.Transport.Config.LocalPort), fsm.pConf.Transport.Config.BindInterface 525 }() 526 527 tick := minConnectRetryInterval 528 for { 529 r := rand.New(rand.NewSource(time.Now().UnixNano())) 530 timer := time.NewTimer(time.Duration(r.Intn(tick)+tick) * time.Second) 531 select { 532 case <-ctx.Done(): 533 fsm.logger.Debug("stop connect loop", 534 log.Fields{ 535 "Topic": "Peer", 536 "Key": addr}) 537 timer.Stop() 538 return 539 case <-timer.C: 540 if fsm.logger.GetLevel() >= log.DebugLevel { 541 fsm.logger.Debug("try to connect", 542 log.Fields{ 543 "Topic": "Peer", 544 "Key": addr}) 545 } 546 } 547 548 laddr, err := net.ResolveTCPAddr("tcp", net.JoinHostPort(localAddress, strconv.Itoa(localPort))) 549 if err != nil { 550 fsm.logger.Warn("failed to resolve local address", 551 log.Fields{ 552 "Topic": "Peer", 553 "Key": addr}) 554 } 555 556 if err == nil { 557 d := net.Dialer{ 558 LocalAddr: laddr, 559 Timeout: time.Duration(tick-1) * time.Second, 560 Control: func(network, address string, c syscall.RawConn) error { 561 return dialerControl(fsm.logger, network, address, c, ttl, ttlMin, mss, password, bindInterface) 562 }, 563 } 564 565 conn, err := d.DialContext(ctx, "tcp", net.JoinHostPort(addr, strconv.Itoa(port))) 566 select { 567 case <-ctx.Done(): 568 fsm.logger.Debug("stop connect loop", 569 log.Fields{ 570 "Topic": "Peer", 571 "Key": addr}) 572 return 573 default: 574 } 575 576 if err == nil { 577 select { 578 case fsm.connCh <- conn: 579 return 580 default: 581 conn.Close() 582 fsm.logger.Warn("active conn is closed to avoid being blocked", 583 log.Fields{ 584 "Topic": "Peer", 585 "Key": addr}) 586 } 587 } else { 588 if fsm.logger.GetLevel() >= log.DebugLevel { 589 fsm.logger.Debug("failed to connect", 590 log.Fields{ 591 "Topic": "Peer", 592 "Key": addr, 593 "Error": err}) 594 } 595 } 596 } 597 tick = retry 598 } 599 } 600 601 func (h *fsmHandler) active(ctx context.Context) (bgp.FSMState, *fsmStateReason) { 602 c, cancel := context.WithCancel(ctx) 603 604 fsm := h.fsm 605 var wg sync.WaitGroup 606 607 fsm.lock.RLock() 608 tryConnect := !fsm.pConf.Transport.Config.PassiveMode 609 fsm.lock.RUnlock() 610 if tryConnect { 611 wg.Add(1) 612 go h.connectLoop(c, &wg) 613 } 614 615 defer func() { 616 cancel() 617 wg.Wait() 618 }() 619 620 for { 621 select { 622 case <-ctx.Done(): 623 return -1, newfsmStateReason(fsmDying, nil, nil) 624 case conn, ok := <-fsm.connCh: 625 if !ok { 626 break 627 } 628 fsm.lock.Lock() 629 fsm.conn = conn 630 fsm.lock.Unlock() 631 632 fsm.lock.RLock() 633 if err := setPeerConnTTL(fsm); err != nil { 634 fsm.logger.Warn("cannot set TTL for peer", 635 log.Fields{ 636 "Topic": "Peer", 637 "Key": fsm.pConf.Config.NeighborAddress, 638 "State": fsm.state.String(), 639 "Error": err}) 640 } 641 if err := setPeerConnMSS(fsm); err != nil { 642 fsm.logger.Warn("cannot set MSS for peer", 643 log.Fields{ 644 "Topic": "Peer", 645 "Key": fsm.pConf.Config.NeighborAddress, 646 "State": fsm.state.String(), 647 "Error": err}) 648 } 649 fsm.lock.RUnlock() 650 // we don't implement delayed open timer so move to opensent right 651 // away. 652 return bgp.BGP_FSM_OPENSENT, newfsmStateReason(fsmNewConnection, nil, nil) 653 case <-fsm.gracefulRestartTimer.C: 654 fsm.lock.RLock() 655 restarting := fsm.pConf.GracefulRestart.State.PeerRestarting 656 fsm.lock.RUnlock() 657 if restarting { 658 fsm.lock.RLock() 659 fsm.logger.Warn("graceful restart timer expired", 660 log.Fields{ 661 "Topic": "Peer", 662 "Key": fsm.pConf.State.NeighborAddress, 663 "State": fsm.state.String()}) 664 fsm.lock.RUnlock() 665 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmRestartTimerExpired, nil, nil) 666 } 667 case err := <-h.stateReasonCh: 668 return bgp.BGP_FSM_IDLE, &err 669 case stateOp := <-fsm.adminStateCh: 670 err := h.changeadminState(stateOp.State) 671 if err == nil { 672 switch stateOp.State { 673 case adminStateDown: 674 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmAdminDown, nil, nil) 675 case adminStateUp: 676 fsm.logger.Panic("code logic bug", 677 log.Fields{ 678 "Topic": "Peer", 679 "Key": fsm.pConf.State.NeighborAddress, 680 "State": fsm.state.String(), 681 "AdminState": stateOp.State.String()}) 682 } 683 } 684 } 685 } 686 } 687 688 func setPeerConnTTL(fsm *fsm) error { 689 ttl := 0 690 ttlMin := 0 691 692 if fsm.pConf.TtlSecurity.Config.Enabled { 693 ttl = 255 694 ttlMin = int(fsm.pConf.TtlSecurity.Config.TtlMin) 695 } else if fsm.pConf.Config.PeerAs != 0 && fsm.pConf.Config.PeerType == oc.PEER_TYPE_EXTERNAL { 696 if fsm.pConf.EbgpMultihop.Config.Enabled { 697 ttl = int(fsm.pConf.EbgpMultihop.Config.MultihopTtl) 698 } else if fsm.pConf.Transport.Config.Ttl != 0 { 699 ttl = int(fsm.pConf.Transport.Config.Ttl) 700 } else { 701 ttl = 1 702 } 703 } else if fsm.pConf.Transport.Config.Ttl != 0 { 704 ttl = int(fsm.pConf.Transport.Config.Ttl) 705 } 706 707 if ttl != 0 { 708 if err := setTCPTTLSockopt(fsm.conn.(*net.TCPConn), ttl); err != nil { 709 return fmt.Errorf("failed to set TTL %d: %w", ttl, err) 710 } 711 } 712 if ttlMin != 0 { 713 if err := setTCPMinTTLSockopt(fsm.conn.(*net.TCPConn), ttlMin); err != nil { 714 return fmt.Errorf("failed to set minimal TTL %d: %w", ttlMin, err) 715 } 716 } 717 return nil 718 } 719 720 func setPeerConnMSS(fsm *fsm) error { 721 mss := fsm.pConf.Transport.Config.TcpMss 722 if mss == 0 { 723 return nil 724 } 725 if err := setTCPMSSSockopt(fsm.conn.(*net.TCPConn), mss); err != nil { 726 return fmt.Errorf("failed to set MSS %d: %w", mss, err) 727 } 728 return nil 729 } 730 731 func capAddPathFromConfig(pConf *oc.Neighbor) bgp.ParameterCapabilityInterface { 732 tuples := make([]*bgp.CapAddPathTuple, 0, len(pConf.AfiSafis)) 733 for _, af := range pConf.AfiSafis { 734 var mode bgp.BGPAddPathMode 735 if af.AddPaths.State.Receive { 736 mode |= bgp.BGP_ADD_PATH_RECEIVE 737 } 738 if af.AddPaths.State.SendMax > 0 { 739 mode |= bgp.BGP_ADD_PATH_SEND 740 } 741 if mode > 0 { 742 tuples = append(tuples, bgp.NewCapAddPathTuple(af.State.Family, mode)) 743 } 744 } 745 if len(tuples) == 0 { 746 return nil 747 } 748 return bgp.NewCapAddPath(tuples) 749 } 750 751 func capabilitiesFromConfig(pConf *oc.Neighbor) []bgp.ParameterCapabilityInterface { 752 fqdn, _ := os.Hostname() 753 caps := make([]bgp.ParameterCapabilityInterface, 0, 4) 754 caps = append(caps, bgp.NewCapRouteRefresh()) 755 caps = append(caps, bgp.NewCapFQDN(fqdn, "")) 756 757 if pConf.Config.SendSoftwareVersion || pConf.Config.PeerType == oc.PEER_TYPE_INTERNAL { 758 softwareVersion := fmt.Sprintf("GoBGP/%s", version.Version()) 759 caps = append(caps, bgp.NewCapSoftwareVersion(softwareVersion)) 760 } 761 762 for _, af := range pConf.AfiSafis { 763 caps = append(caps, bgp.NewCapMultiProtocol(af.State.Family)) 764 } 765 caps = append(caps, bgp.NewCapFourOctetASNumber(pConf.Config.LocalAs)) 766 767 if c := pConf.GracefulRestart.Config; c.Enabled { 768 tuples := []*bgp.CapGracefulRestartTuple{} 769 ltuples := []*bgp.CapLongLivedGracefulRestartTuple{} 770 771 // RFC 4724 4.1 772 // To re-establish the session with its peer, the Restarting Speaker 773 // MUST set the "Restart State" bit in the Graceful Restart Capability 774 // of the OPEN message. 775 restarting := pConf.GracefulRestart.State.LocalRestarting 776 777 if !c.HelperOnly { 778 for i, rf := range pConf.AfiSafis { 779 if m := rf.MpGracefulRestart.Config; m.Enabled { 780 // When restarting, always flag forwaring bit. 781 // This can be a lie, depending on how gobgpd is used. 782 // For a route-server use-case, since a route-server 783 // itself doesn't forward packets, and the dataplane 784 // is a l2 switch which continues to work with no 785 // relation to bgpd, this behavior is ok. 786 // TODO consideration of other use-cases 787 tuples = append(tuples, bgp.NewCapGracefulRestartTuple(rf.State.Family, restarting)) 788 pConf.AfiSafis[i].MpGracefulRestart.State.Advertised = true 789 } 790 if m := rf.LongLivedGracefulRestart.Config; m.Enabled { 791 ltuples = append(ltuples, bgp.NewCapLongLivedGracefulRestartTuple(rf.State.Family, restarting, m.RestartTime)) 792 } 793 } 794 } 795 restartTime := c.RestartTime 796 notification := c.NotificationEnabled 797 caps = append(caps, bgp.NewCapGracefulRestart(restarting, notification, restartTime, tuples)) 798 if c.LongLivedEnabled { 799 caps = append(caps, bgp.NewCapLongLivedGracefulRestart(ltuples)) 800 } 801 } 802 803 // Extended Nexthop Capability (Code 5) 804 tuples := []*bgp.CapExtendedNexthopTuple{} 805 families, _ := oc.AfiSafis(pConf.AfiSafis).ToRfList() 806 for _, family := range families { 807 if family == bgp.RF_IPv6_UC { 808 continue 809 } 810 tuple := bgp.NewCapExtendedNexthopTuple(family, bgp.AFI_IP6) 811 tuples = append(tuples, tuple) 812 } 813 if len(tuples) != 0 { 814 caps = append(caps, bgp.NewCapExtendedNexthop(tuples)) 815 } 816 817 // ADD-PATH Capability 818 if c := capAddPathFromConfig(pConf); c != nil { 819 caps = append(caps, capAddPathFromConfig(pConf)) 820 } 821 822 return caps 823 } 824 825 func buildopen(gConf *oc.Global, pConf *oc.Neighbor) *bgp.BGPMessage { 826 caps := capabilitiesFromConfig(pConf) 827 opt := bgp.NewOptionParameterCapability(caps) 828 holdTime := uint16(pConf.Timers.Config.HoldTime) 829 as := pConf.Config.LocalAs 830 if as > (1<<16)-1 { 831 as = bgp.AS_TRANS 832 } 833 return bgp.NewBGPOpenMessage(uint16(as), holdTime, gConf.Config.RouterId, 834 []bgp.OptionParameterInterface{opt}) 835 } 836 837 func readAll(conn net.Conn, length int) ([]byte, error) { 838 buf := make([]byte, length) 839 _, err := io.ReadFull(conn, buf) 840 if err != nil { 841 return nil, err 842 } 843 return buf, nil 844 } 845 846 func getPathAttrFromBGPUpdate(m *bgp.BGPUpdate, typ bgp.BGPAttrType) bgp.PathAttributeInterface { 847 for _, a := range m.PathAttributes { 848 if a.GetType() == typ { 849 return a 850 } 851 } 852 return nil 853 } 854 855 func hasOwnASLoop(ownAS uint32, limit int, asPath *bgp.PathAttributeAsPath) bool { 856 cnt := 0 857 for _, param := range asPath.Value { 858 for _, as := range param.GetAS() { 859 if as == ownAS { 860 cnt++ 861 if cnt > limit { 862 return true 863 } 864 } 865 } 866 } 867 return false 868 } 869 870 func extractRouteFamily(p *bgp.PathAttributeInterface) *bgp.RouteFamily { 871 attr := *p 872 873 var afi uint16 874 var safi uint8 875 876 switch a := attr.(type) { 877 case *bgp.PathAttributeMpReachNLRI: 878 afi = a.AFI 879 safi = a.SAFI 880 case *bgp.PathAttributeMpUnreachNLRI: 881 afi = a.AFI 882 safi = a.SAFI 883 default: 884 return nil 885 } 886 887 rf := bgp.AfiSafiToRouteFamily(afi, safi) 888 return &rf 889 } 890 891 func (h *fsmHandler) afiSafiDisable(rf bgp.RouteFamily) string { 892 h.fsm.lock.Lock() 893 defer h.fsm.lock.Unlock() 894 895 n := bgp.AddressFamilyNameMap[rf] 896 897 for i, a := range h.fsm.pConf.AfiSafis { 898 if string(a.Config.AfiSafiName) == n { 899 h.fsm.pConf.AfiSafis[i].State.Enabled = false 900 break 901 } 902 } 903 newList := make([]bgp.ParameterCapabilityInterface, 0) 904 for _, c := range h.fsm.capMap[bgp.BGP_CAP_MULTIPROTOCOL] { 905 if c.(*bgp.CapMultiProtocol).CapValue == rf { 906 continue 907 } 908 newList = append(newList, c) 909 } 910 h.fsm.capMap[bgp.BGP_CAP_MULTIPROTOCOL] = newList 911 return n 912 } 913 914 func (h *fsmHandler) handlingError(m *bgp.BGPMessage, e error, useRevisedError bool) bgp.ErrorHandling { 915 // ineffectual assignment to handling (ineffassign) 916 var handling bgp.ErrorHandling 917 if m.Header.Type == bgp.BGP_MSG_UPDATE && useRevisedError { 918 factor := e.(*bgp.MessageError) 919 handling = factor.ErrorHandling 920 switch handling { 921 case bgp.ERROR_HANDLING_ATTRIBUTE_DISCARD: 922 h.fsm.lock.RLock() 923 h.fsm.logger.Warn("Some attributes were discarded", 924 log.Fields{ 925 "Topic": "Peer", 926 "Key": h.fsm.pConf.State.NeighborAddress, 927 "State": h.fsm.state.String(), 928 "Error": e}) 929 h.fsm.lock.RUnlock() 930 case bgp.ERROR_HANDLING_TREAT_AS_WITHDRAW: 931 m.Body = bgp.TreatAsWithdraw(m.Body.(*bgp.BGPUpdate)) 932 h.fsm.lock.RLock() 933 h.fsm.logger.Warn("the received Update message was treated as withdraw", 934 log.Fields{ 935 "Topic": "Peer", 936 "Key": h.fsm.pConf.State.NeighborAddress, 937 "State": h.fsm.state.String(), 938 "Error": e}) 939 h.fsm.lock.RUnlock() 940 case bgp.ERROR_HANDLING_AFISAFI_DISABLE: 941 rf := extractRouteFamily(factor.ErrorAttribute) 942 if rf == nil { 943 h.fsm.lock.RLock() 944 h.fsm.logger.Warn("Error occurred during AFI/SAFI disabling", 945 log.Fields{ 946 "Topic": "Peer", 947 "Key": h.fsm.pConf.State.NeighborAddress, 948 "State": h.fsm.state.String()}) 949 h.fsm.lock.RUnlock() 950 } else { 951 n := h.afiSafiDisable(*rf) 952 h.fsm.lock.RLock() 953 h.fsm.logger.Warn("Capability was disabled", 954 log.Fields{ 955 "Topic": "Peer", 956 "Key": h.fsm.pConf.State.NeighborAddress, 957 "State": h.fsm.state.String(), 958 "Error": e, 959 "Cap": n}) 960 h.fsm.lock.RUnlock() 961 } 962 } 963 } else { 964 handling = bgp.ERROR_HANDLING_SESSION_RESET 965 } 966 return handling 967 } 968 969 func (h *fsmHandler) recvMessageWithError() (*fsmMsg, error) { 970 sendToStateReasonCh := func(typ fsmStateReasonType, notif *bgp.BGPMessage) { 971 // probably doesn't happen but be cautious 972 select { 973 case h.stateReasonCh <- *newfsmStateReason(typ, notif, nil): 974 default: 975 } 976 } 977 978 headerBuf, err := readAll(h.conn, bgp.BGP_HEADER_LENGTH) 979 if err != nil { 980 sendToStateReasonCh(fsmReadFailed, nil) 981 return nil, err 982 } 983 984 hd := &bgp.BGPHeader{} 985 err = hd.DecodeFromBytes(headerBuf) 986 if err != nil { 987 h.fsm.bgpMessageStateUpdate(0, true) 988 h.fsm.lock.RLock() 989 h.fsm.logger.Warn("Session will be reset due to malformed BGP Header", 990 log.Fields{ 991 "Topic": "Peer", 992 "Key": h.fsm.pConf.State.NeighborAddress, 993 "State": h.fsm.state.String(), 994 "Error": err}) 995 fmsg := &fsmMsg{ 996 fsm: h.fsm, 997 MsgType: fsmMsgBGPMessage, 998 MsgSrc: h.fsm.pConf.State.NeighborAddress, 999 MsgData: err, 1000 } 1001 h.fsm.lock.RUnlock() 1002 return fmsg, err 1003 } 1004 1005 bodyBuf, err := readAll(h.conn, int(hd.Len)-bgp.BGP_HEADER_LENGTH) 1006 if err != nil { 1007 sendToStateReasonCh(fsmReadFailed, nil) 1008 return nil, err 1009 } 1010 1011 now := time.Now() 1012 handling := bgp.ERROR_HANDLING_NONE 1013 1014 h.fsm.lock.RLock() 1015 useRevisedError := h.fsm.pConf.ErrorHandling.Config.TreatAsWithdraw 1016 options := h.fsm.marshallingOptions 1017 h.fsm.lock.RUnlock() 1018 1019 m, err := bgp.ParseBGPBody(hd, bodyBuf, options) 1020 if err != nil { 1021 handling = h.handlingError(m, err, useRevisedError) 1022 h.fsm.bgpMessageStateUpdate(0, true) 1023 } else { 1024 h.fsm.bgpMessageStateUpdate(m.Header.Type, true) 1025 err = bgp.ValidateBGPMessage(m) 1026 } 1027 h.fsm.lock.RLock() 1028 fmsg := &fsmMsg{ 1029 fsm: h.fsm, 1030 MsgType: fsmMsgBGPMessage, 1031 MsgSrc: h.fsm.pConf.State.NeighborAddress, 1032 timestamp: now, 1033 } 1034 h.fsm.lock.RUnlock() 1035 1036 switch handling { 1037 case bgp.ERROR_HANDLING_AFISAFI_DISABLE: 1038 fmsg.MsgData = m 1039 return fmsg, nil 1040 case bgp.ERROR_HANDLING_SESSION_RESET: 1041 h.fsm.lock.RLock() 1042 h.fsm.logger.Warn("Session will be reset due to malformed BGP message", 1043 log.Fields{ 1044 "Topic": "Peer", 1045 "Key": h.fsm.pConf.State.NeighborAddress, 1046 "State": h.fsm.state.String(), 1047 "Error": err}) 1048 h.fsm.lock.RUnlock() 1049 fmsg.MsgData = err 1050 return fmsg, err 1051 default: 1052 fmsg.MsgData = m 1053 1054 h.fsm.lock.RLock() 1055 establishedState := h.fsm.state == bgp.BGP_FSM_ESTABLISHED 1056 h.fsm.lock.RUnlock() 1057 1058 if establishedState { 1059 switch m.Header.Type { 1060 case bgp.BGP_MSG_ROUTE_REFRESH: 1061 fmsg.MsgType = fsmMsgRouteRefresh 1062 case bgp.BGP_MSG_UPDATE: 1063 // if the length of h.holdTimerResetCh 1064 // isn't zero, the timer will be reset 1065 // soon anyway. 1066 select { 1067 case h.holdTimerResetCh <- true: 1068 default: 1069 } 1070 body := m.Body.(*bgp.BGPUpdate) 1071 isEBGP := h.fsm.pConf.IsEBGPPeer(h.fsm.gConf) 1072 isConfed := h.fsm.pConf.IsConfederationMember(h.fsm.gConf) 1073 1074 fmsg.payload = make([]byte, len(headerBuf)+len(bodyBuf)) 1075 copy(fmsg.payload, headerBuf) 1076 copy(fmsg.payload[len(headerBuf):], bodyBuf) 1077 1078 h.fsm.lock.RLock() 1079 rfMap := h.fsm.rfMap 1080 h.fsm.lock.RUnlock() 1081 1082 // Allow updates from host loopback addresses if the BGP connection 1083 // with the neighbour is both dialed and received on loopback 1084 // addresses. 1085 var allowLoopback bool 1086 if localAddr, peerAddr := h.fsm.peerInfo.LocalAddress, h.fsm.peerInfo.Address; localAddr.To4() != nil && peerAddr.To4() != nil { 1087 allowLoopback = localAddr.IsLoopback() && peerAddr.IsLoopback() 1088 } 1089 ok, err := bgp.ValidateUpdateMsg(body, rfMap, isEBGP, isConfed, allowLoopback) 1090 if !ok { 1091 handling = h.handlingError(m, err, useRevisedError) 1092 } 1093 if handling == bgp.ERROR_HANDLING_SESSION_RESET { 1094 h.fsm.lock.RLock() 1095 h.fsm.logger.Warn("Session will be reset due to malformed BGP update message", 1096 log.Fields{ 1097 "Topic": "Peer", 1098 "Key": h.fsm.pConf.State.NeighborAddress, 1099 "State": h.fsm.state.String(), 1100 "error": err}) 1101 h.fsm.lock.RUnlock() 1102 fmsg.MsgData = err 1103 return fmsg, err 1104 } 1105 1106 if routes := len(body.WithdrawnRoutes); routes > 0 { 1107 h.fsm.bmpStatsUpdate(bmp.BMP_STAT_TYPE_WITHDRAW_UPDATE, 1) 1108 h.fsm.bmpStatsUpdate(bmp.BMP_STAT_TYPE_WITHDRAW_PREFIX, routes) 1109 } else if attr := getPathAttrFromBGPUpdate(body, bgp.BGP_ATTR_TYPE_MP_UNREACH_NLRI); attr != nil { 1110 mpUnreach := attr.(*bgp.PathAttributeMpUnreachNLRI) 1111 if routes = len(mpUnreach.Value); routes > 0 { 1112 h.fsm.bmpStatsUpdate(bmp.BMP_STAT_TYPE_WITHDRAW_UPDATE, 1) 1113 h.fsm.bmpStatsUpdate(bmp.BMP_STAT_TYPE_WITHDRAW_PREFIX, routes) 1114 } 1115 } 1116 1117 if err = table.UpdatePathAttrs4ByteAs(h.fsm.logger, body); err != nil { 1118 fmsg.MsgData = err 1119 return fmsg, err 1120 } 1121 1122 if err = table.UpdatePathAggregator4ByteAs(body); err != nil { 1123 fmsg.MsgData = err 1124 return fmsg, err 1125 } 1126 1127 h.fsm.lock.RLock() 1128 peerInfo := h.fsm.peerInfo 1129 h.fsm.lock.RUnlock() 1130 fmsg.PathList = table.ProcessMessage(m, peerInfo, fmsg.timestamp) 1131 fallthrough 1132 case bgp.BGP_MSG_KEEPALIVE: 1133 // if the length of h.holdTimerResetCh 1134 // isn't zero, the timer will be reset 1135 // soon anyway. 1136 select { 1137 case h.holdTimerResetCh <- true: 1138 default: 1139 } 1140 if m.Header.Type == bgp.BGP_MSG_KEEPALIVE { 1141 return nil, nil 1142 } 1143 case bgp.BGP_MSG_NOTIFICATION: 1144 body := m.Body.(*bgp.BGPNotification) 1145 if body.ErrorCode == bgp.BGP_ERROR_CEASE && (body.ErrorSubcode == bgp.BGP_ERROR_SUB_ADMINISTRATIVE_SHUTDOWN || body.ErrorSubcode == bgp.BGP_ERROR_SUB_ADMINISTRATIVE_RESET) { 1146 communication, rest := decodeAdministrativeCommunication(body.Data) 1147 h.fsm.lock.RLock() 1148 h.fsm.logger.Warn("received notification", 1149 log.Fields{ 1150 "Topic": "Peer", 1151 "Key": h.fsm.pConf.State.NeighborAddress, 1152 "Code": body.ErrorCode, 1153 "Subcode": body.ErrorSubcode, 1154 "Communicated-Reason": communication, 1155 "Data": rest}) 1156 h.fsm.lock.RUnlock() 1157 } else { 1158 h.fsm.lock.RLock() 1159 h.fsm.logger.Warn("received notification", 1160 log.Fields{ 1161 "Topic": "Peer", 1162 "Key": h.fsm.pConf.State.NeighborAddress, 1163 "Code": body.ErrorCode, 1164 "Subcode": body.ErrorSubcode, 1165 "Data": body.Data}) 1166 h.fsm.lock.RUnlock() 1167 } 1168 1169 h.fsm.lock.RLock() 1170 s := h.fsm.pConf.GracefulRestart.State 1171 hardReset := s.Enabled && s.NotificationEnabled && body.ErrorCode == bgp.BGP_ERROR_CEASE && body.ErrorSubcode == bgp.BGP_ERROR_SUB_HARD_RESET 1172 h.fsm.lock.RUnlock() 1173 if hardReset { 1174 sendToStateReasonCh(fsmHardReset, m) 1175 } else { 1176 sendToStateReasonCh(fsmNotificationRecv, m) 1177 } 1178 return nil, nil 1179 } 1180 } 1181 } 1182 return fmsg, nil 1183 } 1184 1185 func (h *fsmHandler) recvMessage(ctx context.Context, wg *sync.WaitGroup) error { 1186 defer func() { 1187 h.msgCh.Close() 1188 wg.Done() 1189 }() 1190 fmsg, _ := h.recvMessageWithError() 1191 if fmsg != nil { 1192 h.msgCh.In() <- fmsg 1193 } 1194 return nil 1195 } 1196 1197 func open2Cap(open *bgp.BGPOpen, n *oc.Neighbor) (map[bgp.BGPCapabilityCode][]bgp.ParameterCapabilityInterface, map[bgp.RouteFamily]bgp.BGPAddPathMode) { 1198 capMap := make(map[bgp.BGPCapabilityCode][]bgp.ParameterCapabilityInterface) 1199 for _, p := range open.OptParams { 1200 if paramCap, y := p.(*bgp.OptionParameterCapability); y { 1201 for _, c := range paramCap.Capability { 1202 m, ok := capMap[c.Code()] 1203 if !ok { 1204 m = make([]bgp.ParameterCapabilityInterface, 0, 1) 1205 } 1206 capMap[c.Code()] = append(m, c) 1207 } 1208 } 1209 } 1210 1211 // squash add path cap 1212 if caps, y := capMap[bgp.BGP_CAP_ADD_PATH]; y { 1213 items := make([]*bgp.CapAddPathTuple, 0, len(caps)) 1214 for _, c := range caps { 1215 items = append(items, c.(*bgp.CapAddPath).Tuples...) 1216 } 1217 capMap[bgp.BGP_CAP_ADD_PATH] = []bgp.ParameterCapabilityInterface{bgp.NewCapAddPath(items)} 1218 } 1219 1220 // remote open message may not include multi-protocol capability 1221 if _, y := capMap[bgp.BGP_CAP_MULTIPROTOCOL]; !y { 1222 capMap[bgp.BGP_CAP_MULTIPROTOCOL] = []bgp.ParameterCapabilityInterface{bgp.NewCapMultiProtocol(bgp.RF_IPv4_UC)} 1223 } 1224 1225 local := n.CreateRfMap() 1226 remote := make(map[bgp.RouteFamily]bgp.BGPAddPathMode) 1227 for _, c := range capMap[bgp.BGP_CAP_MULTIPROTOCOL] { 1228 family := c.(*bgp.CapMultiProtocol).CapValue 1229 remote[family] = bgp.BGP_ADD_PATH_NONE 1230 for _, a := range capMap[bgp.BGP_CAP_ADD_PATH] { 1231 for _, i := range a.(*bgp.CapAddPath).Tuples { 1232 if i.RouteFamily == family { 1233 remote[family] = i.Mode 1234 } 1235 } 1236 } 1237 } 1238 negotiated := make(map[bgp.RouteFamily]bgp.BGPAddPathMode) 1239 for family, mode := range local { 1240 if m, y := remote[family]; y { 1241 n := bgp.BGP_ADD_PATH_NONE 1242 if mode&bgp.BGP_ADD_PATH_SEND > 0 && m&bgp.BGP_ADD_PATH_RECEIVE > 0 { 1243 n |= bgp.BGP_ADD_PATH_SEND 1244 } 1245 if mode&bgp.BGP_ADD_PATH_RECEIVE > 0 && m&bgp.BGP_ADD_PATH_SEND > 0 { 1246 n |= bgp.BGP_ADD_PATH_RECEIVE 1247 } 1248 negotiated[family] = n 1249 } 1250 } 1251 return capMap, negotiated 1252 } 1253 1254 func (h *fsmHandler) opensent(ctx context.Context) (bgp.FSMState, *fsmStateReason) { 1255 fsm := h.fsm 1256 1257 fsm.lock.Lock() 1258 m := buildopen(fsm.gConf, fsm.pConf) 1259 fsm.lock.Unlock() 1260 1261 b, _ := m.Serialize() 1262 fsm.conn.Write(b) 1263 fsm.bgpMessageStateUpdate(m.Header.Type, false) 1264 1265 h.msgCh = channels.NewInfiniteChannel() 1266 1267 fsm.lock.RLock() 1268 h.conn = fsm.conn 1269 fsm.lock.RUnlock() 1270 1271 var wg sync.WaitGroup 1272 wg.Add(1) 1273 defer wg.Wait() 1274 go h.recvMessage(ctx, &wg) 1275 1276 // RFC 4271 P.60 1277 // sets its HoldTimer to a large value 1278 // A HoldTimer value of 4 minutes is suggested as a "large value" 1279 // for the HoldTimer 1280 fsm.lock.RLock() 1281 holdTimer := time.NewTimer(time.Second * time.Duration(fsm.opensentHoldTime)) 1282 fsm.lock.RUnlock() 1283 1284 for { 1285 select { 1286 case <-ctx.Done(): 1287 h.conn.Close() 1288 return -1, newfsmStateReason(fsmDying, nil, nil) 1289 case conn, ok := <-fsm.connCh: 1290 if !ok { 1291 break 1292 } 1293 conn.Close() 1294 fsm.lock.RLock() 1295 fsm.logger.Warn("Closed an accepted connection", 1296 log.Fields{ 1297 "Topic": "Peer", 1298 "Key": fsm.pConf.State.NeighborAddress, 1299 "State": fsm.state.String()}) 1300 fsm.lock.RUnlock() 1301 case <-fsm.gracefulRestartTimer.C: 1302 fsm.lock.RLock() 1303 restarting := fsm.pConf.GracefulRestart.State.PeerRestarting 1304 fsm.lock.RUnlock() 1305 if restarting { 1306 fsm.lock.RLock() 1307 fsm.logger.Warn("graceful restart timer expired", 1308 log.Fields{ 1309 "Topic": "Peer", 1310 "Key": fsm.pConf.State.NeighborAddress, 1311 "State": fsm.state.String()}) 1312 fsm.lock.RUnlock() 1313 h.conn.Close() 1314 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmRestartTimerExpired, nil, nil) 1315 } 1316 case i, ok := <-h.msgCh.Out(): 1317 if !ok { 1318 continue 1319 } 1320 e := i.(*fsmMsg) 1321 switch m := e.MsgData.(type) { 1322 case *bgp.BGPMessage: 1323 if m.Header.Type == bgp.BGP_MSG_OPEN { 1324 fsm.lock.Lock() 1325 fsm.recvOpen = m 1326 fsm.lock.Unlock() 1327 1328 body := m.Body.(*bgp.BGPOpen) 1329 1330 fsm.lock.RLock() 1331 fsmPeerAS := fsm.pConf.Config.PeerAs 1332 fsm.lock.RUnlock() 1333 peerAs, err := bgp.ValidateOpenMsg(body, fsmPeerAS, fsm.peerInfo.LocalAS, net.ParseIP(fsm.gConf.Config.RouterId)) 1334 if err != nil { 1335 m, _ := fsm.sendNotificationFromErrorMsg(err.(*bgp.MessageError)) 1336 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmInvalidMsg, m, nil) 1337 } 1338 1339 // ASN negotiation was skipped 1340 fsm.lock.RLock() 1341 asnNegotiationSkipped := fsm.pConf.Config.PeerAs == 0 1342 fsm.lock.RUnlock() 1343 if asnNegotiationSkipped { 1344 fsm.lock.Lock() 1345 typ := oc.PEER_TYPE_EXTERNAL 1346 if fsm.peerInfo.LocalAS == peerAs { 1347 typ = oc.PEER_TYPE_INTERNAL 1348 } 1349 fsm.pConf.State.PeerType = typ 1350 fsm.logger.Info("skipped asn negotiation", 1351 log.Fields{ 1352 "Topic": "Peer", 1353 "Key": fsm.pConf.State.NeighborAddress, 1354 "State": fsm.state.String(), 1355 "Asn": peerAs, 1356 "PeerType": typ}) 1357 fsm.lock.Unlock() 1358 } else { 1359 fsm.lock.Lock() 1360 fsm.pConf.State.PeerType = fsm.pConf.Config.PeerType 1361 fsm.lock.Unlock() 1362 } 1363 fsm.lock.Lock() 1364 fsm.pConf.State.PeerAs = peerAs 1365 fsm.peerInfo.AS = peerAs 1366 fsm.peerInfo.ID = body.ID 1367 fsm.capMap, fsm.rfMap = open2Cap(body, fsm.pConf) 1368 1369 if _, y := fsm.capMap[bgp.BGP_CAP_ADD_PATH]; y { 1370 fsm.marshallingOptions = &bgp.MarshallingOption{ 1371 AddPath: fsm.rfMap, 1372 } 1373 } else { 1374 fsm.marshallingOptions = nil 1375 } 1376 1377 // calculate HoldTime 1378 // RFC 4271 P.13 1379 // a BGP speaker MUST calculate the value of the Hold Timer 1380 // by using the smaller of its configured Hold Time and the Hold Time 1381 // received in the OPEN message. 1382 holdTime := float64(body.HoldTime) 1383 myHoldTime := fsm.pConf.Timers.Config.HoldTime 1384 if holdTime > myHoldTime { 1385 fsm.pConf.Timers.State.NegotiatedHoldTime = myHoldTime 1386 } else { 1387 fsm.pConf.Timers.State.NegotiatedHoldTime = holdTime 1388 } 1389 1390 keepalive := fsm.pConf.Timers.Config.KeepaliveInterval 1391 if n := fsm.pConf.Timers.State.NegotiatedHoldTime; n < myHoldTime { 1392 keepalive = n / 3 1393 } 1394 fsm.pConf.Timers.State.KeepaliveInterval = keepalive 1395 1396 gr, ok := fsm.capMap[bgp.BGP_CAP_GRACEFUL_RESTART] 1397 if fsm.pConf.GracefulRestart.Config.Enabled && ok { 1398 state := &fsm.pConf.GracefulRestart.State 1399 state.Enabled = true 1400 cap := gr[len(gr)-1].(*bgp.CapGracefulRestart) 1401 state.PeerRestartTime = uint16(cap.Time) 1402 1403 for _, t := range cap.Tuples { 1404 n := bgp.AddressFamilyNameMap[bgp.AfiSafiToRouteFamily(t.AFI, t.SAFI)] 1405 for i, a := range fsm.pConf.AfiSafis { 1406 if string(a.Config.AfiSafiName) == n { 1407 fsm.pConf.AfiSafis[i].MpGracefulRestart.State.Enabled = true 1408 fsm.pConf.AfiSafis[i].MpGracefulRestart.State.Received = true 1409 break 1410 } 1411 } 1412 } 1413 1414 // RFC 4724 4.1 1415 // To re-establish the session with its peer, the Restarting Speaker 1416 // MUST set the "Restart State" bit in the Graceful Restart Capability 1417 // of the OPEN message. 1418 if fsm.pConf.GracefulRestart.State.PeerRestarting && cap.Flags&0x08 == 0 { 1419 fsm.logger.Warn("restart flag is not set", 1420 log.Fields{ 1421 "Topic": "Peer", 1422 "Key": fsm.pConf.State.NeighborAddress, 1423 "State": fsm.state.String()}) 1424 // just ignore 1425 } 1426 1427 // RFC 4724 3 1428 // The most significant bit is defined as the Restart State (R) 1429 // bit, ...(snip)... When set (value 1), this bit 1430 // indicates that the BGP speaker has restarted, and its peer MUST 1431 // NOT wait for the End-of-RIB marker from the speaker before 1432 // advertising routing information to the speaker. 1433 if fsm.pConf.GracefulRestart.State.LocalRestarting && cap.Flags&0x08 != 0 { 1434 fsm.logger.Debug("peer has restarted, skipping wait for EOR", 1435 log.Fields{ 1436 "Topic": "Peer", 1437 "Key": fsm.pConf.State.NeighborAddress, 1438 "State": fsm.state.String()}) 1439 for i := range fsm.pConf.AfiSafis { 1440 fsm.pConf.AfiSafis[i].MpGracefulRestart.State.EndOfRibReceived = true 1441 } 1442 } 1443 if fsm.pConf.GracefulRestart.Config.NotificationEnabled && cap.Flags&0x04 > 0 { 1444 fsm.pConf.GracefulRestart.State.NotificationEnabled = true 1445 } 1446 } 1447 llgr, ok2 := fsm.capMap[bgp.BGP_CAP_LONG_LIVED_GRACEFUL_RESTART] 1448 if fsm.pConf.GracefulRestart.Config.LongLivedEnabled && ok && ok2 { 1449 fsm.pConf.GracefulRestart.State.LongLivedEnabled = true 1450 cap := llgr[len(llgr)-1].(*bgp.CapLongLivedGracefulRestart) 1451 for _, t := range cap.Tuples { 1452 n := bgp.AddressFamilyNameMap[bgp.AfiSafiToRouteFamily(t.AFI, t.SAFI)] 1453 for i, a := range fsm.pConf.AfiSafis { 1454 if string(a.Config.AfiSafiName) == n { 1455 fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.Enabled = true 1456 fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.Received = true 1457 fsm.pConf.AfiSafis[i].LongLivedGracefulRestart.State.PeerRestartTime = t.RestartTime 1458 break 1459 } 1460 } 1461 } 1462 } 1463 1464 fsm.lock.Unlock() 1465 msg := bgp.NewBGPKeepAliveMessage() 1466 b, _ := msg.Serialize() 1467 fsm.conn.Write(b) 1468 fsm.bgpMessageStateUpdate(msg.Header.Type, false) 1469 return bgp.BGP_FSM_OPENCONFIRM, newfsmStateReason(fsmOpenMsgReceived, nil, nil) 1470 } else { 1471 // send notification? 1472 h.conn.Close() 1473 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmInvalidMsg, nil, nil) 1474 } 1475 case *bgp.MessageError: 1476 msg, _ := fsm.sendNotificationFromErrorMsg(m) 1477 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmInvalidMsg, msg, nil) 1478 default: 1479 h.fsm.logger.Panic("unknown msg type", 1480 log.Fields{ 1481 "Topic": "Peer", 1482 "Key": fsm.pConf.State.NeighborAddress, 1483 "State": fsm.state.String(), 1484 "Data": e.MsgData}) 1485 } 1486 case err := <-h.stateReasonCh: 1487 h.conn.Close() 1488 return bgp.BGP_FSM_IDLE, &err 1489 case <-holdTimer.C: 1490 m, _ := fsm.sendNotification(bgp.BGP_ERROR_HOLD_TIMER_EXPIRED, 0, nil, "hold timer expired") 1491 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmHoldTimerExpired, m, nil) 1492 case stateOp := <-fsm.adminStateCh: 1493 err := h.changeadminState(stateOp.State) 1494 if err == nil { 1495 switch stateOp.State { 1496 case adminStateDown: 1497 h.conn.Close() 1498 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmAdminDown, m, nil) 1499 case adminStateUp: 1500 h.fsm.logger.Panic("code logic bug", 1501 log.Fields{ 1502 "Topic": "Peer", 1503 "Key": fsm.pConf.State.NeighborAddress, 1504 "State": fsm.state.String(), 1505 "AdminState": stateOp.State.String()}) 1506 } 1507 } 1508 } 1509 } 1510 } 1511 1512 func keepaliveTicker(fsm *fsm) *time.Ticker { 1513 fsm.lock.RLock() 1514 defer fsm.lock.RUnlock() 1515 1516 negotiatedTime := fsm.pConf.Timers.State.NegotiatedHoldTime 1517 if negotiatedTime == 0 { 1518 return &time.Ticker{} 1519 } 1520 sec := time.Second * time.Duration(fsm.pConf.Timers.State.KeepaliveInterval) 1521 if sec == 0 { 1522 sec = time.Second 1523 } 1524 return time.NewTicker(sec) 1525 } 1526 1527 func (h *fsmHandler) openconfirm(ctx context.Context) (bgp.FSMState, *fsmStateReason) { 1528 fsm := h.fsm 1529 ticker := keepaliveTicker(fsm) 1530 h.msgCh = channels.NewInfiniteChannel() 1531 fsm.lock.RLock() 1532 h.conn = fsm.conn 1533 1534 var wg sync.WaitGroup 1535 defer wg.Wait() 1536 wg.Add(1) 1537 go h.recvMessage(ctx, &wg) 1538 1539 var holdTimer *time.Timer 1540 if fsm.pConf.Timers.State.NegotiatedHoldTime == 0 { 1541 holdTimer = &time.Timer{} 1542 } else { 1543 // RFC 4271 P.65 1544 // sets the HoldTimer according to the negotiated value 1545 holdTimer = time.NewTimer(time.Second * time.Duration(fsm.pConf.Timers.State.NegotiatedHoldTime)) 1546 } 1547 fsm.lock.RUnlock() 1548 1549 for { 1550 select { 1551 case <-ctx.Done(): 1552 h.conn.Close() 1553 return -1, newfsmStateReason(fsmDying, nil, nil) 1554 case conn, ok := <-fsm.connCh: 1555 if !ok { 1556 break 1557 } 1558 conn.Close() 1559 fsm.lock.RLock() 1560 fsm.logger.Warn("Closed an accepted connection", 1561 log.Fields{ 1562 "Topic": "Peer", 1563 "Key": fsm.pConf.State.NeighborAddress, 1564 "State": fsm.state.String()}) 1565 fsm.lock.RUnlock() 1566 case <-fsm.gracefulRestartTimer.C: 1567 fsm.lock.RLock() 1568 restarting := fsm.pConf.GracefulRestart.State.PeerRestarting 1569 fsm.lock.RUnlock() 1570 if restarting { 1571 fsm.lock.RLock() 1572 fsm.logger.Warn("graceful restart timer expired", 1573 log.Fields{ 1574 "Topic": "Peer", 1575 "Key": fsm.pConf.State.NeighborAddress, 1576 "State": fsm.state.String()}) 1577 fsm.lock.RUnlock() 1578 h.conn.Close() 1579 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmRestartTimerExpired, nil, nil) 1580 } 1581 case <-ticker.C: 1582 m := bgp.NewBGPKeepAliveMessage() 1583 b, _ := m.Serialize() 1584 // TODO: check error 1585 fsm.conn.Write(b) 1586 fsm.bgpMessageStateUpdate(m.Header.Type, false) 1587 case i, ok := <-h.msgCh.Out(): 1588 if !ok { 1589 continue 1590 } 1591 e := i.(*fsmMsg) 1592 switch m := e.MsgData.(type) { 1593 case *bgp.BGPMessage: 1594 if m.Header.Type == bgp.BGP_MSG_KEEPALIVE { 1595 return bgp.BGP_FSM_ESTABLISHED, newfsmStateReason(fsmOpenMsgNegotiated, nil, nil) 1596 } 1597 // send notification ? 1598 h.conn.Close() 1599 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmInvalidMsg, nil, nil) 1600 case *bgp.MessageError: 1601 msg, _ := fsm.sendNotificationFromErrorMsg(m) 1602 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmInvalidMsg, msg, nil) 1603 default: 1604 fsm.logger.Panic("unknown msg type", 1605 log.Fields{ 1606 "Topic": "Peer", 1607 "Key": fsm.pConf.State.NeighborAddress, 1608 "State": fsm.state.String(), 1609 "Data": e.MsgData}) 1610 } 1611 case err := <-h.stateReasonCh: 1612 h.conn.Close() 1613 return bgp.BGP_FSM_IDLE, &err 1614 case <-holdTimer.C: 1615 m, _ := fsm.sendNotification(bgp.BGP_ERROR_HOLD_TIMER_EXPIRED, 0, nil, "hold timer expired") 1616 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmHoldTimerExpired, m, nil) 1617 case stateOp := <-fsm.adminStateCh: 1618 err := h.changeadminState(stateOp.State) 1619 if err == nil { 1620 switch stateOp.State { 1621 case adminStateDown: 1622 h.conn.Close() 1623 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmAdminDown, nil, nil) 1624 case adminStateUp: 1625 fsm.logger.Panic("code logic bug", 1626 log.Fields{ 1627 "Topic": "Peer", 1628 "Key": fsm.pConf.State.NeighborAddress, 1629 "State": fsm.state.String(), 1630 "adminState": stateOp.State.String()}) 1631 } 1632 } 1633 } 1634 } 1635 } 1636 1637 func (h *fsmHandler) sendMessageloop(ctx context.Context, wg *sync.WaitGroup) error { 1638 sendToStateReasonCh := func(typ fsmStateReasonType, notif *bgp.BGPMessage) { 1639 // probably doesn't happen but be cautious 1640 select { 1641 case h.stateReasonCh <- *newfsmStateReason(typ, notif, nil): 1642 default: 1643 } 1644 } 1645 1646 defer wg.Done() 1647 conn := h.conn 1648 fsm := h.fsm 1649 ticker := keepaliveTicker(fsm) 1650 send := func(m *bgp.BGPMessage) error { 1651 fsm.lock.RLock() 1652 if fsm.twoByteAsTrans && m.Header.Type == bgp.BGP_MSG_UPDATE { 1653 fsm.logger.Debug("update for 2byte AS peer", 1654 log.Fields{ 1655 "Topic": "Peer", 1656 "Key": fsm.pConf.State.NeighborAddress, 1657 "State": fsm.state.String(), 1658 "Data": m}) 1659 table.UpdatePathAttrs2ByteAs(m.Body.(*bgp.BGPUpdate)) 1660 table.UpdatePathAggregator2ByteAs(m.Body.(*bgp.BGPUpdate)) 1661 } 1662 1663 // RFC8538 defines a Hard Reset notification subcode which 1664 // indicates that the BGP speaker wants to reset the session 1665 // without triggering graceful restart procedures. Here we map 1666 // notification subcodes to the Hard Reset subcode following 1667 // the RFC8538 suggestion. 1668 // 1669 // We check Status instead of Config because RFC8538 states 1670 // that A BGP speaker SHOULD NOT send a Hard Reset to a peer 1671 // from which it has not received the "N" bit. 1672 if fsm.pConf.GracefulRestart.State.NotificationEnabled && m.Header.Type == bgp.BGP_MSG_NOTIFICATION { 1673 if body := m.Body.(*bgp.BGPNotification); body.ErrorCode == bgp.BGP_ERROR_CEASE && bgp.ShouldHardReset(body.ErrorSubcode, false) { 1674 body.ErrorSubcode = bgp.BGP_ERROR_SUB_HARD_RESET 1675 } 1676 } 1677 1678 b, err := m.Serialize(h.fsm.marshallingOptions) 1679 fsm.lock.RUnlock() 1680 if err != nil { 1681 fsm.lock.RLock() 1682 fsm.logger.Warn("failed to serialize", 1683 log.Fields{ 1684 "Topic": "Peer", 1685 "Key": fsm.pConf.State.NeighborAddress, 1686 "State": fsm.state.String(), 1687 "Data": err}) 1688 fsm.lock.RUnlock() 1689 fsm.bgpMessageStateUpdate(0, false) 1690 return nil 1691 } 1692 fsm.lock.RLock() 1693 err = conn.SetWriteDeadline(time.Now().Add(time.Second * time.Duration(fsm.pConf.Timers.State.NegotiatedHoldTime))) 1694 fsm.lock.RUnlock() 1695 if err != nil { 1696 sendToStateReasonCh(fsmWriteFailed, nil) 1697 conn.Close() 1698 return fmt.Errorf("failed to set write deadline") 1699 } 1700 _, err = conn.Write(b) 1701 if err != nil { 1702 fsm.lock.RLock() 1703 fsm.logger.Warn("failed to send", 1704 log.Fields{ 1705 "Topic": "Peer", 1706 "Key": fsm.pConf.State.NeighborAddress, 1707 "State": fsm.state.String(), 1708 "Data": err}) 1709 fsm.lock.RUnlock() 1710 sendToStateReasonCh(fsmWriteFailed, nil) 1711 conn.Close() 1712 return fmt.Errorf("closed") 1713 } 1714 fsm.bgpMessageStateUpdate(m.Header.Type, false) 1715 1716 switch m.Header.Type { 1717 case bgp.BGP_MSG_NOTIFICATION: 1718 body := m.Body.(*bgp.BGPNotification) 1719 if body.ErrorCode == bgp.BGP_ERROR_CEASE && (body.ErrorSubcode == bgp.BGP_ERROR_SUB_ADMINISTRATIVE_SHUTDOWN || body.ErrorSubcode == bgp.BGP_ERROR_SUB_ADMINISTRATIVE_RESET) { 1720 communication, rest := decodeAdministrativeCommunication(body.Data) 1721 fsm.lock.RLock() 1722 fsm.logger.Warn("sent notification", 1723 log.Fields{ 1724 "Topic": "Peer", 1725 "Key": fsm.pConf.State.NeighborAddress, 1726 "State": fsm.state.String(), 1727 "Code": body.ErrorCode, 1728 "Subcode": body.ErrorSubcode, 1729 "Communicated-Reason": communication, 1730 "Data": rest}) 1731 fsm.lock.RUnlock() 1732 } else { 1733 fsm.lock.RLock() 1734 fsm.logger.Warn("sent notification", 1735 log.Fields{ 1736 "Topic": "Peer", 1737 "Key": fsm.pConf.State.NeighborAddress, 1738 "State": fsm.state.String(), 1739 "Code": body.ErrorCode, 1740 "Subcode": body.ErrorSubcode, 1741 "Data": body.Data}) 1742 fsm.lock.RUnlock() 1743 } 1744 sendToStateReasonCh(fsmNotificationSent, m) 1745 conn.Close() 1746 return fmt.Errorf("closed") 1747 case bgp.BGP_MSG_UPDATE: 1748 update := m.Body.(*bgp.BGPUpdate) 1749 if fsm.logger.GetLevel() >= log.DebugLevel { 1750 fsm.lock.RLock() 1751 fsm.logger.Debug("sent update", 1752 log.Fields{ 1753 "Topic": "Peer", 1754 "Key": fsm.pConf.State.NeighborAddress, 1755 "State": fsm.state.String(), 1756 "nlri": update.NLRI, 1757 "withdrawals": update.WithdrawnRoutes, 1758 "attributes": update.PathAttributes}) 1759 fsm.lock.RUnlock() 1760 } 1761 default: 1762 fsm.lock.RLock() 1763 fsm.logger.Debug("sent", 1764 log.Fields{ 1765 "Topic": "Peer", 1766 "Key": fsm.pConf.State.NeighborAddress, 1767 "State": fsm.state.String(), 1768 "data": m}) 1769 fsm.lock.RUnlock() 1770 } 1771 return nil 1772 } 1773 1774 for { 1775 select { 1776 case <-ctx.Done(): 1777 return nil 1778 case o := <-h.outgoing.Out(): 1779 switch m := o.(type) { 1780 case *fsmOutgoingMsg: 1781 h.fsm.lock.RLock() 1782 options := h.fsm.marshallingOptions 1783 h.fsm.lock.RUnlock() 1784 for _, msg := range table.CreateUpdateMsgFromPaths(m.Paths, options) { 1785 if err := send(msg); err != nil { 1786 return nil 1787 } 1788 } 1789 if m.Notification != nil { 1790 if m.StayIdle { 1791 // current user is only prefix-limit 1792 // fix me if this is not the case 1793 h.changeadminState(adminStatePfxCt) 1794 } 1795 if err := send(m.Notification); err != nil { 1796 return nil 1797 } 1798 } 1799 default: 1800 return nil 1801 } 1802 case <-ticker.C: 1803 if err := send(bgp.NewBGPKeepAliveMessage()); err != nil { 1804 return nil 1805 } 1806 } 1807 } 1808 } 1809 1810 func (h *fsmHandler) recvMessageloop(ctx context.Context, wg *sync.WaitGroup) error { 1811 defer wg.Done() 1812 for { 1813 fmsg, err := h.recvMessageWithError() 1814 if fmsg != nil { 1815 h.msgCh.In() <- fmsg 1816 } 1817 if err != nil { 1818 return nil 1819 } 1820 } 1821 } 1822 1823 func (h *fsmHandler) established(ctx context.Context) (bgp.FSMState, *fsmStateReason) { 1824 var wg sync.WaitGroup 1825 fsm := h.fsm 1826 fsm.lock.Lock() 1827 h.conn = fsm.conn 1828 fsm.lock.Unlock() 1829 1830 defer wg.Wait() 1831 wg.Add(2) 1832 1833 go h.sendMessageloop(ctx, &wg) 1834 h.msgCh = h.incoming 1835 go h.recvMessageloop(ctx, &wg) 1836 1837 var holdTimer *time.Timer 1838 if fsm.pConf.Timers.State.NegotiatedHoldTime == 0 { 1839 holdTimer = &time.Timer{} 1840 } else { 1841 fsm.lock.RLock() 1842 holdTimer = time.NewTimer(time.Second * time.Duration(fsm.pConf.Timers.State.NegotiatedHoldTime)) 1843 fsm.lock.RUnlock() 1844 } 1845 1846 fsm.gracefulRestartTimer.Stop() 1847 1848 for { 1849 select { 1850 case <-ctx.Done(): 1851 select { 1852 case m := <-fsm.notification: 1853 // RFC8538 defines a Hard Reset notification subcode which 1854 // indicates that the BGP speaker wants to reset the session 1855 // without triggering graceful restart procedures. Here we map 1856 // notification subcodes to the Hard Reset subcode following 1857 // the RFC8538 suggestion. 1858 // 1859 // We check Status instead of Config because RFC8538 states 1860 // that A BGP speaker SHOULD NOT send a Hard Reset to a peer 1861 // from which it has not received the "N" bit. 1862 if fsm.pConf.GracefulRestart.State.NotificationEnabled { 1863 if body := m.Body.(*bgp.BGPNotification); body.ErrorCode == bgp.BGP_ERROR_CEASE && bgp.ShouldHardReset(body.ErrorSubcode, false) { 1864 body.ErrorSubcode = bgp.BGP_ERROR_SUB_HARD_RESET 1865 } 1866 } 1867 b, _ := m.Serialize(h.fsm.marshallingOptions) 1868 h.conn.Write(b) 1869 default: 1870 // nothing to do 1871 } 1872 h.conn.Close() 1873 return -1, newfsmStateReason(fsmDying, nil, nil) 1874 case conn, ok := <-fsm.connCh: 1875 if !ok { 1876 break 1877 } 1878 conn.Close() 1879 fsm.lock.RLock() 1880 fsm.logger.Warn("Closed an accepted connection", 1881 log.Fields{ 1882 "Topic": "Peer", 1883 "Key": fsm.pConf.State.NeighborAddress, 1884 "State": fsm.state.String()}) 1885 fsm.lock.RUnlock() 1886 case err := <-h.stateReasonCh: 1887 h.conn.Close() 1888 // if recv goroutine hit an error and sent to 1889 // stateReasonCh, then tx goroutine might take 1890 // long until it exits because it waits for 1891 // ctx.Done() or keepalive timer. So let kill 1892 // it now. 1893 h.outgoing.In() <- err 1894 fsm.lock.RLock() 1895 if s := fsm.pConf.GracefulRestart.State; s.Enabled { 1896 if (s.NotificationEnabled && err.Type == fsmNotificationRecv) || 1897 (err.Type == fsmNotificationSent && 1898 err.BGPNotification.Body.(*bgp.BGPNotification).ErrorCode == bgp.BGP_ERROR_HOLD_TIMER_EXPIRED) || 1899 err.Type == fsmReadFailed || 1900 err.Type == fsmWriteFailed { 1901 err = *newfsmStateReason(fsmGracefulRestart, nil, nil) 1902 fsm.logger.Info("peer graceful restart", 1903 log.Fields{ 1904 "Topic": "Peer", 1905 "Key": fsm.pConf.State.NeighborAddress, 1906 "State": fsm.state.String()}) 1907 fsm.gracefulRestartTimer.Reset(time.Duration(fsm.pConf.GracefulRestart.State.PeerRestartTime) * time.Second) 1908 } 1909 } 1910 fsm.lock.RUnlock() 1911 return bgp.BGP_FSM_IDLE, &err 1912 case <-holdTimer.C: 1913 fsm.lock.RLock() 1914 fsm.logger.Warn("hold timer expired", 1915 log.Fields{ 1916 "Topic": "Peer", 1917 "Key": fsm.pConf.State.NeighborAddress, 1918 "State": fsm.state.String()}) 1919 fsm.lock.RUnlock() 1920 m := bgp.NewBGPNotificationMessage(bgp.BGP_ERROR_HOLD_TIMER_EXPIRED, 0, nil) 1921 h.outgoing.In() <- &fsmOutgoingMsg{Notification: m} 1922 fsm.lock.RLock() 1923 s := fsm.pConf.GracefulRestart.State 1924 fsm.lock.RUnlock() 1925 // Do not return hold timer expired to server if graceful restart is enabled 1926 // Let it fallback to read/write error or fsmNotificationSent handled above 1927 // Reference: https://github.com/osrg/gobgp/issues/2174 1928 if !s.Enabled { 1929 return bgp.BGP_FSM_IDLE, newfsmStateReason(fsmHoldTimerExpired, m, nil) 1930 } 1931 case <-h.holdTimerResetCh: 1932 fsm.lock.RLock() 1933 if fsm.pConf.Timers.State.NegotiatedHoldTime != 0 { 1934 holdTimer.Reset(time.Second * time.Duration(fsm.pConf.Timers.State.NegotiatedHoldTime)) 1935 } 1936 fsm.lock.RUnlock() 1937 case stateOp := <-fsm.adminStateCh: 1938 err := h.changeadminState(stateOp.State) 1939 if err == nil { 1940 switch stateOp.State { 1941 case adminStateDown: 1942 m := bgp.NewBGPNotificationMessage(bgp.BGP_ERROR_CEASE, bgp.BGP_ERROR_SUB_ADMINISTRATIVE_SHUTDOWN, stateOp.Communication) 1943 h.outgoing.In() <- &fsmOutgoingMsg{Notification: m} 1944 } 1945 } 1946 } 1947 } 1948 } 1949 1950 func (h *fsmHandler) loop(ctx context.Context, wg *sync.WaitGroup) error { 1951 defer wg.Done() 1952 1953 fsm := h.fsm 1954 fsm.lock.RLock() 1955 oldState := fsm.state 1956 fsm.lock.RUnlock() 1957 1958 var reason *fsmStateReason 1959 nextState := bgp.FSMState(-1) 1960 fsm.lock.RLock() 1961 fsmState := fsm.state 1962 fsm.lock.RUnlock() 1963 1964 switch fsmState { 1965 case bgp.BGP_FSM_IDLE: 1966 nextState, reason = h.idle(ctx) 1967 // case bgp.BGP_FSM_CONNECT: 1968 // nextState = h.connect() 1969 case bgp.BGP_FSM_ACTIVE: 1970 nextState, reason = h.active(ctx) 1971 case bgp.BGP_FSM_OPENSENT: 1972 nextState, reason = h.opensent(ctx) 1973 case bgp.BGP_FSM_OPENCONFIRM: 1974 nextState, reason = h.openconfirm(ctx) 1975 case bgp.BGP_FSM_ESTABLISHED: 1976 nextState, reason = h.established(ctx) 1977 } 1978 1979 fsm.lock.RLock() 1980 fsm.reason = reason 1981 1982 if nextState == bgp.BGP_FSM_ESTABLISHED && oldState == bgp.BGP_FSM_OPENCONFIRM { 1983 fsm.logger.Info("Peer Up", 1984 log.Fields{ 1985 "Topic": "Peer", 1986 "Key": fsm.pConf.State.NeighborAddress, 1987 "State": fsm.state.String()}) 1988 } 1989 1990 if oldState == bgp.BGP_FSM_ESTABLISHED { 1991 // The main goroutine sent the notification due to 1992 // deconfiguration or something. 1993 reason := fsm.reason 1994 if fsm.h.sentNotification != nil { 1995 reason.Type = fsmNotificationSent 1996 reason.BGPNotification = fsm.h.sentNotification 1997 } 1998 fsm.logger.Info("Peer Down", 1999 log.Fields{ 2000 "Topic": "Peer", 2001 "Key": fsm.pConf.State.NeighborAddress, 2002 "State": fsm.state.String(), 2003 "Reason": reason.String()}) 2004 } 2005 fsm.lock.RUnlock() 2006 2007 fsm.lock.RLock() 2008 h.incoming.In() <- &fsmMsg{ 2009 fsm: fsm, 2010 MsgType: fsmMsgStateChange, 2011 MsgSrc: fsm.pConf.State.NeighborAddress, 2012 MsgData: nextState, 2013 StateReason: reason, 2014 } 2015 fsm.lock.RUnlock() 2016 return nil 2017 } 2018 2019 func (h *fsmHandler) changeadminState(s adminState) error { 2020 h.fsm.lock.Lock() 2021 defer h.fsm.lock.Unlock() 2022 2023 fsm := h.fsm 2024 if fsm.adminState != s { 2025 fsm.logger.Debug("admin state changed", 2026 log.Fields{ 2027 "Topic": "Peer", 2028 "Key": fsm.pConf.State.NeighborAddress, 2029 "State": fsm.state.String(), 2030 "adminState": s.String()}) 2031 fsm.adminState = s 2032 fsm.pConf.State.AdminDown = !fsm.pConf.State.AdminDown 2033 2034 switch s { 2035 case adminStateUp: 2036 fsm.logger.Info("Administrative start", 2037 log.Fields{ 2038 "Topic": "Peer", 2039 "Key": fsm.pConf.State.NeighborAddress, 2040 "State": fsm.state.String()}) 2041 case adminStateDown: 2042 fsm.logger.Info("Administrative shutdown", 2043 log.Fields{ 2044 "Topic": "Peer", 2045 "Key": fsm.pConf.State.NeighborAddress, 2046 "State": fsm.state.String()}) 2047 case adminStatePfxCt: 2048 fsm.logger.Info("Administrative shutdown(Prefix limit reached)", 2049 log.Fields{ 2050 "Topic": "Peer", 2051 "Key": fsm.pConf.State.NeighborAddress, 2052 "State": fsm.state.String()}) 2053 } 2054 } else { 2055 fsm.logger.Warn("cannot change to the same state", 2056 log.Fields{ 2057 "Topic": "Peer", 2058 "Key": fsm.pConf.State.NeighborAddress, 2059 "State": fsm.state.String()}) 2060 return fmt.Errorf("cannot change to the same state") 2061 } 2062 return nil 2063 }