github.com/slackhq/nebula@v1.9.0/lighthouse.go (about) 1 package nebula 2 3 import ( 4 "context" 5 "encoding/binary" 6 "errors" 7 "fmt" 8 "net" 9 "net/netip" 10 "sync" 11 "sync/atomic" 12 "time" 13 14 "github.com/rcrowley/go-metrics" 15 "github.com/sirupsen/logrus" 16 "github.com/slackhq/nebula/cidr" 17 "github.com/slackhq/nebula/config" 18 "github.com/slackhq/nebula/header" 19 "github.com/slackhq/nebula/iputil" 20 "github.com/slackhq/nebula/udp" 21 "github.com/slackhq/nebula/util" 22 ) 23 24 //TODO: if a lighthouse doesn't have an answer, clients AGGRESSIVELY REQUERY.. why? handshake manager and/or getOrHandshake? 25 //TODO: nodes are roaming lighthouses, this is bad. How are they learning? 26 27 var ErrHostNotKnown = errors.New("host not known") 28 29 type netIpAndPort struct { 30 ip net.IP 31 port uint16 32 } 33 34 type LightHouse struct { 35 //TODO: We need a timer wheel to kick out vpnIps that haven't reported in a long time 36 sync.RWMutex //Because we concurrently read and write to our maps 37 ctx context.Context 38 amLighthouse bool 39 myVpnIp iputil.VpnIp 40 myVpnZeros iputil.VpnIp 41 myVpnNet *net.IPNet 42 punchConn udp.Conn 43 punchy *Punchy 44 45 // Local cache of answers from light houses 46 // map of vpn Ip to answers 47 addrMap map[iputil.VpnIp]*RemoteList 48 49 // filters remote addresses allowed for each host 50 // - When we are a lighthouse, this filters what addresses we store and 51 // respond with. 52 // - When we are not a lighthouse, this filters which addresses we accept 53 // from lighthouses. 54 remoteAllowList atomic.Pointer[RemoteAllowList] 55 56 // filters local addresses that we advertise to lighthouses 57 localAllowList atomic.Pointer[LocalAllowList] 58 59 // used to trigger the HandshakeManager when we receive HostQueryReply 60 handshakeTrigger chan<- iputil.VpnIp 61 62 // staticList exists to avoid having a bool in each addrMap entry 63 // since static should be rare 64 staticList atomic.Pointer[map[iputil.VpnIp]struct{}] 65 lighthouses atomic.Pointer[map[iputil.VpnIp]struct{}] 66 67 interval atomic.Int64 68 updateCancel context.CancelFunc 69 ifce EncWriter 70 nebulaPort uint32 // 32 bits because protobuf does not have a uint16 71 72 advertiseAddrs atomic.Pointer[[]netIpAndPort] 73 74 // IP's of relays that can be used by peers to access me 75 relaysForMe atomic.Pointer[[]iputil.VpnIp] 76 77 queryChan chan iputil.VpnIp 78 79 calculatedRemotes atomic.Pointer[cidr.Tree4[[]*calculatedRemote]] // Maps VpnIp to []*calculatedRemote 80 81 metrics *MessageMetrics 82 metricHolepunchTx metrics.Counter 83 l *logrus.Logger 84 } 85 86 // NewLightHouseFromConfig will build a Lighthouse struct from the values provided in the config object 87 // addrMap should be nil unless this is during a config reload 88 func NewLightHouseFromConfig(ctx context.Context, l *logrus.Logger, c *config.C, myVpnNet *net.IPNet, pc udp.Conn, p *Punchy) (*LightHouse, error) { 89 amLighthouse := c.GetBool("lighthouse.am_lighthouse", false) 90 nebulaPort := uint32(c.GetInt("listen.port", 0)) 91 if amLighthouse && nebulaPort == 0 { 92 return nil, util.NewContextualError("lighthouse.am_lighthouse enabled on node but no port number is set in config", nil, nil) 93 } 94 95 // If port is dynamic, discover it 96 if nebulaPort == 0 && pc != nil { 97 uPort, err := pc.LocalAddr() 98 if err != nil { 99 return nil, util.NewContextualError("Failed to get listening port", nil, err) 100 } 101 nebulaPort = uint32(uPort.Port) 102 } 103 104 ones, _ := myVpnNet.Mask.Size() 105 h := LightHouse{ 106 ctx: ctx, 107 amLighthouse: amLighthouse, 108 myVpnIp: iputil.Ip2VpnIp(myVpnNet.IP), 109 myVpnZeros: iputil.VpnIp(32 - ones), 110 myVpnNet: myVpnNet, 111 addrMap: make(map[iputil.VpnIp]*RemoteList), 112 nebulaPort: nebulaPort, 113 punchConn: pc, 114 punchy: p, 115 queryChan: make(chan iputil.VpnIp, c.GetUint32("handshakes.query_buffer", 64)), 116 l: l, 117 } 118 lighthouses := make(map[iputil.VpnIp]struct{}) 119 h.lighthouses.Store(&lighthouses) 120 staticList := make(map[iputil.VpnIp]struct{}) 121 h.staticList.Store(&staticList) 122 123 if c.GetBool("stats.lighthouse_metrics", false) { 124 h.metrics = newLighthouseMetrics() 125 h.metricHolepunchTx = metrics.GetOrRegisterCounter("messages.tx.holepunch", nil) 126 } else { 127 h.metricHolepunchTx = metrics.NilCounter{} 128 } 129 130 err := h.reload(c, true) 131 if err != nil { 132 return nil, err 133 } 134 135 c.RegisterReloadCallback(func(c *config.C) { 136 err := h.reload(c, false) 137 switch v := err.(type) { 138 case *util.ContextualError: 139 v.Log(l) 140 case error: 141 l.WithError(err).Error("failed to reload lighthouse") 142 } 143 }) 144 145 h.startQueryWorker() 146 147 return &h, nil 148 } 149 150 func (lh *LightHouse) GetStaticHostList() map[iputil.VpnIp]struct{} { 151 return *lh.staticList.Load() 152 } 153 154 func (lh *LightHouse) GetLighthouses() map[iputil.VpnIp]struct{} { 155 return *lh.lighthouses.Load() 156 } 157 158 func (lh *LightHouse) GetRemoteAllowList() *RemoteAllowList { 159 return lh.remoteAllowList.Load() 160 } 161 162 func (lh *LightHouse) GetLocalAllowList() *LocalAllowList { 163 return lh.localAllowList.Load() 164 } 165 166 func (lh *LightHouse) GetAdvertiseAddrs() []netIpAndPort { 167 return *lh.advertiseAddrs.Load() 168 } 169 170 func (lh *LightHouse) GetRelaysForMe() []iputil.VpnIp { 171 return *lh.relaysForMe.Load() 172 } 173 174 func (lh *LightHouse) getCalculatedRemotes() *cidr.Tree4[[]*calculatedRemote] { 175 return lh.calculatedRemotes.Load() 176 } 177 178 func (lh *LightHouse) GetUpdateInterval() int64 { 179 return lh.interval.Load() 180 } 181 182 func (lh *LightHouse) reload(c *config.C, initial bool) error { 183 if initial || c.HasChanged("lighthouse.advertise_addrs") { 184 rawAdvAddrs := c.GetStringSlice("lighthouse.advertise_addrs", []string{}) 185 advAddrs := make([]netIpAndPort, 0) 186 187 for i, rawAddr := range rawAdvAddrs { 188 fIp, fPort, err := udp.ParseIPAndPort(rawAddr) 189 if err != nil { 190 return util.NewContextualError("Unable to parse lighthouse.advertise_addrs entry", m{"addr": rawAddr, "entry": i + 1}, err) 191 } 192 193 if fPort == 0 { 194 fPort = uint16(lh.nebulaPort) 195 } 196 197 if ip4 := fIp.To4(); ip4 != nil && lh.myVpnNet.Contains(fIp) { 198 lh.l.WithField("addr", rawAddr).WithField("entry", i+1). 199 Warn("Ignoring lighthouse.advertise_addrs report because it is within the nebula network range") 200 continue 201 } 202 203 advAddrs = append(advAddrs, netIpAndPort{ip: fIp, port: fPort}) 204 } 205 206 lh.advertiseAddrs.Store(&advAddrs) 207 208 if !initial { 209 lh.l.Info("lighthouse.advertise_addrs has changed") 210 } 211 } 212 213 if initial || c.HasChanged("lighthouse.interval") { 214 lh.interval.Store(int64(c.GetInt("lighthouse.interval", 10))) 215 216 if !initial { 217 lh.l.Infof("lighthouse.interval changed to %v", lh.interval.Load()) 218 219 if lh.updateCancel != nil { 220 // May not always have a running routine 221 lh.updateCancel() 222 } 223 224 lh.StartUpdateWorker() 225 } 226 } 227 228 if initial || c.HasChanged("lighthouse.remote_allow_list") || c.HasChanged("lighthouse.remote_allow_ranges") { 229 ral, err := NewRemoteAllowListFromConfig(c, "lighthouse.remote_allow_list", "lighthouse.remote_allow_ranges") 230 if err != nil { 231 return util.NewContextualError("Invalid lighthouse.remote_allow_list", nil, err) 232 } 233 234 lh.remoteAllowList.Store(ral) 235 if !initial { 236 //TODO: a diff will be annoyingly difficult 237 lh.l.Info("lighthouse.remote_allow_list and/or lighthouse.remote_allow_ranges has changed") 238 } 239 } 240 241 if initial || c.HasChanged("lighthouse.local_allow_list") { 242 lal, err := NewLocalAllowListFromConfig(c, "lighthouse.local_allow_list") 243 if err != nil { 244 return util.NewContextualError("Invalid lighthouse.local_allow_list", nil, err) 245 } 246 247 lh.localAllowList.Store(lal) 248 if !initial { 249 //TODO: a diff will be annoyingly difficult 250 lh.l.Info("lighthouse.local_allow_list has changed") 251 } 252 } 253 254 if initial || c.HasChanged("lighthouse.calculated_remotes") { 255 cr, err := NewCalculatedRemotesFromConfig(c, "lighthouse.calculated_remotes") 256 if err != nil { 257 return util.NewContextualError("Invalid lighthouse.calculated_remotes", nil, err) 258 } 259 260 lh.calculatedRemotes.Store(cr) 261 if !initial { 262 //TODO: a diff will be annoyingly difficult 263 lh.l.Info("lighthouse.calculated_remotes has changed") 264 } 265 } 266 267 //NOTE: many things will get much simpler when we combine static_host_map and lighthouse.hosts in config 268 if initial || c.HasChanged("static_host_map") || c.HasChanged("static_map.cadence") || c.HasChanged("static_map.network") || c.HasChanged("static_map.lookup_timeout") { 269 // Clean up. Entries still in the static_host_map will be re-built. 270 // Entries no longer present must have their (possible) background DNS goroutines stopped. 271 if existingStaticList := lh.staticList.Load(); existingStaticList != nil { 272 lh.RLock() 273 for staticVpnIp := range *existingStaticList { 274 if am, ok := lh.addrMap[staticVpnIp]; ok && am != nil { 275 am.hr.Cancel() 276 } 277 } 278 lh.RUnlock() 279 } 280 // Build a new list based on current config. 281 staticList := make(map[iputil.VpnIp]struct{}) 282 err := lh.loadStaticMap(c, lh.myVpnNet, staticList) 283 if err != nil { 284 return err 285 } 286 287 lh.staticList.Store(&staticList) 288 if !initial { 289 //TODO: we should remove any remote list entries for static hosts that were removed/modified? 290 if c.HasChanged("static_host_map") { 291 lh.l.Info("static_host_map has changed") 292 } 293 if c.HasChanged("static_map.cadence") { 294 lh.l.Info("static_map.cadence has changed") 295 } 296 if c.HasChanged("static_map.network") { 297 lh.l.Info("static_map.network has changed") 298 } 299 if c.HasChanged("static_map.lookup_timeout") { 300 lh.l.Info("static_map.lookup_timeout has changed") 301 } 302 } 303 } 304 305 if initial || c.HasChanged("lighthouse.hosts") { 306 lhMap := make(map[iputil.VpnIp]struct{}) 307 err := lh.parseLighthouses(c, lh.myVpnNet, lhMap) 308 if err != nil { 309 return err 310 } 311 312 lh.lighthouses.Store(&lhMap) 313 if !initial { 314 //NOTE: we are not tearing down existing lighthouse connections because they might be used for non lighthouse traffic 315 lh.l.Info("lighthouse.hosts has changed") 316 } 317 } 318 319 if initial || c.HasChanged("relay.relays") { 320 switch c.GetBool("relay.am_relay", false) { 321 case true: 322 // Relays aren't allowed to specify other relays 323 if len(c.GetStringSlice("relay.relays", nil)) > 0 { 324 lh.l.Info("Ignoring relays from config because am_relay is true") 325 } 326 relaysForMe := []iputil.VpnIp{} 327 lh.relaysForMe.Store(&relaysForMe) 328 case false: 329 relaysForMe := []iputil.VpnIp{} 330 for _, v := range c.GetStringSlice("relay.relays", nil) { 331 lh.l.WithField("relay", v).Info("Read relay from config") 332 333 configRIP := net.ParseIP(v) 334 if configRIP != nil { 335 relaysForMe = append(relaysForMe, iputil.Ip2VpnIp(configRIP)) 336 } 337 } 338 lh.relaysForMe.Store(&relaysForMe) 339 } 340 } 341 342 return nil 343 } 344 345 func (lh *LightHouse) parseLighthouses(c *config.C, tunCidr *net.IPNet, lhMap map[iputil.VpnIp]struct{}) error { 346 lhs := c.GetStringSlice("lighthouse.hosts", []string{}) 347 if lh.amLighthouse && len(lhs) != 0 { 348 lh.l.Warn("lighthouse.am_lighthouse enabled on node but upstream lighthouses exist in config") 349 } 350 351 for i, host := range lhs { 352 ip := net.ParseIP(host) 353 if ip == nil { 354 return util.NewContextualError("Unable to parse lighthouse host entry", m{"host": host, "entry": i + 1}, nil) 355 } 356 if !tunCidr.Contains(ip) { 357 return util.NewContextualError("lighthouse host is not in our subnet, invalid", m{"vpnIp": ip, "network": tunCidr.String()}, nil) 358 } 359 lhMap[iputil.Ip2VpnIp(ip)] = struct{}{} 360 } 361 362 if !lh.amLighthouse && len(lhMap) == 0 { 363 lh.l.Warn("No lighthouse.hosts configured, this host will only be able to initiate tunnels with static_host_map entries") 364 } 365 366 staticList := lh.GetStaticHostList() 367 for lhIP, _ := range lhMap { 368 if _, ok := staticList[lhIP]; !ok { 369 return fmt.Errorf("lighthouse %s does not have a static_host_map entry", lhIP) 370 } 371 } 372 373 return nil 374 } 375 376 func getStaticMapCadence(c *config.C) (time.Duration, error) { 377 cadence := c.GetString("static_map.cadence", "30s") 378 d, err := time.ParseDuration(cadence) 379 if err != nil { 380 return 0, err 381 } 382 return d, nil 383 } 384 385 func getStaticMapLookupTimeout(c *config.C) (time.Duration, error) { 386 lookupTimeout := c.GetString("static_map.lookup_timeout", "250ms") 387 d, err := time.ParseDuration(lookupTimeout) 388 if err != nil { 389 return 0, err 390 } 391 return d, nil 392 } 393 394 func getStaticMapNetwork(c *config.C) (string, error) { 395 network := c.GetString("static_map.network", "ip4") 396 if network != "ip" && network != "ip4" && network != "ip6" { 397 return "", fmt.Errorf("static_map.network must be one of ip, ip4, or ip6") 398 } 399 return network, nil 400 } 401 402 func (lh *LightHouse) loadStaticMap(c *config.C, tunCidr *net.IPNet, staticList map[iputil.VpnIp]struct{}) error { 403 d, err := getStaticMapCadence(c) 404 if err != nil { 405 return err 406 } 407 408 network, err := getStaticMapNetwork(c) 409 if err != nil { 410 return err 411 } 412 413 lookup_timeout, err := getStaticMapLookupTimeout(c) 414 if err != nil { 415 return err 416 } 417 418 shm := c.GetMap("static_host_map", map[interface{}]interface{}{}) 419 i := 0 420 421 for k, v := range shm { 422 rip := net.ParseIP(fmt.Sprintf("%v", k)) 423 if rip == nil { 424 return util.NewContextualError("Unable to parse static_host_map entry", m{"host": k, "entry": i + 1}, nil) 425 } 426 427 if !tunCidr.Contains(rip) { 428 return util.NewContextualError("static_host_map key is not in our subnet, invalid", m{"vpnIp": rip, "network": tunCidr.String(), "entry": i + 1}, nil) 429 } 430 431 vpnIp := iputil.Ip2VpnIp(rip) 432 vals, ok := v.([]interface{}) 433 if !ok { 434 vals = []interface{}{v} 435 } 436 remoteAddrs := []string{} 437 for _, v := range vals { 438 remoteAddrs = append(remoteAddrs, fmt.Sprintf("%v", v)) 439 } 440 441 err := lh.addStaticRemotes(i, d, network, lookup_timeout, vpnIp, remoteAddrs, staticList) 442 if err != nil { 443 return err 444 } 445 i++ 446 } 447 448 return nil 449 } 450 451 func (lh *LightHouse) Query(ip iputil.VpnIp) *RemoteList { 452 if !lh.IsLighthouseIP(ip) { 453 lh.QueryServer(ip) 454 } 455 lh.RLock() 456 if v, ok := lh.addrMap[ip]; ok { 457 lh.RUnlock() 458 return v 459 } 460 lh.RUnlock() 461 return nil 462 } 463 464 // QueryServer is asynchronous so no reply should be expected 465 func (lh *LightHouse) QueryServer(ip iputil.VpnIp) { 466 // Don't put lighthouse ips in the query channel because we can't query lighthouses about lighthouses 467 if lh.amLighthouse || lh.IsLighthouseIP(ip) { 468 return 469 } 470 471 lh.queryChan <- ip 472 } 473 474 func (lh *LightHouse) QueryCache(ip iputil.VpnIp) *RemoteList { 475 lh.RLock() 476 if v, ok := lh.addrMap[ip]; ok { 477 lh.RUnlock() 478 return v 479 } 480 lh.RUnlock() 481 482 lh.Lock() 483 defer lh.Unlock() 484 // Add an entry if we don't already have one 485 return lh.unlockedGetRemoteList(ip) 486 } 487 488 // queryAndPrepMessage is a lock helper on RemoteList, assisting the caller to build a lighthouse message containing 489 // details from the remote list. It looks for a hit in the addrMap and a hit in the RemoteList under the owner vpnIp 490 // If one is found then f() is called with proper locking, f() must return result of n.MarshalTo() 491 func (lh *LightHouse) queryAndPrepMessage(vpnIp iputil.VpnIp, f func(*cache) (int, error)) (bool, int, error) { 492 lh.RLock() 493 // Do we have an entry in the main cache? 494 if v, ok := lh.addrMap[vpnIp]; ok { 495 // Swap lh lock for remote list lock 496 v.RLock() 497 defer v.RUnlock() 498 499 lh.RUnlock() 500 501 // vpnIp should also be the owner here since we are a lighthouse. 502 c := v.cache[vpnIp] 503 // Make sure we have 504 if c != nil { 505 n, err := f(c) 506 return true, n, err 507 } 508 return false, 0, nil 509 } 510 lh.RUnlock() 511 return false, 0, nil 512 } 513 514 func (lh *LightHouse) DeleteVpnIp(vpnIp iputil.VpnIp) { 515 // First we check the static mapping 516 // and do nothing if it is there 517 if _, ok := lh.GetStaticHostList()[vpnIp]; ok { 518 return 519 } 520 lh.Lock() 521 //l.Debugln(lh.addrMap) 522 delete(lh.addrMap, vpnIp) 523 524 if lh.l.Level >= logrus.DebugLevel { 525 lh.l.Debugf("deleting %s from lighthouse.", vpnIp) 526 } 527 528 lh.Unlock() 529 } 530 531 // AddStaticRemote adds a static host entry for vpnIp as ourselves as the owner 532 // We are the owner because we don't want a lighthouse server to advertise for static hosts it was configured with 533 // And we don't want a lighthouse query reply to interfere with our learned cache if we are a client 534 // NOTE: this function should not interact with any hot path objects, like lh.staticList, the caller should handle it 535 func (lh *LightHouse) addStaticRemotes(i int, d time.Duration, network string, timeout time.Duration, vpnIp iputil.VpnIp, toAddrs []string, staticList map[iputil.VpnIp]struct{}) error { 536 lh.Lock() 537 am := lh.unlockedGetRemoteList(vpnIp) 538 am.Lock() 539 defer am.Unlock() 540 ctx := lh.ctx 541 lh.Unlock() 542 543 hr, err := NewHostnameResults(ctx, lh.l, d, network, timeout, toAddrs, func() { 544 // This callback runs whenever the DNS hostname resolver finds a different set of IP's 545 // in its resolution for hostnames. 546 am.Lock() 547 defer am.Unlock() 548 am.shouldRebuild = true 549 }) 550 if err != nil { 551 return util.NewContextualError("Static host address could not be parsed", m{"vpnIp": vpnIp, "entry": i + 1}, err) 552 } 553 am.unlockedSetHostnamesResults(hr) 554 555 for _, addrPort := range hr.GetIPs() { 556 557 switch { 558 case addrPort.Addr().Is4(): 559 to := NewIp4AndPortFromNetIP(addrPort.Addr(), addrPort.Port()) 560 if !lh.unlockedShouldAddV4(vpnIp, to) { 561 continue 562 } 563 am.unlockedPrependV4(lh.myVpnIp, to) 564 case addrPort.Addr().Is6(): 565 to := NewIp6AndPortFromNetIP(addrPort.Addr(), addrPort.Port()) 566 if !lh.unlockedShouldAddV6(vpnIp, to) { 567 continue 568 } 569 am.unlockedPrependV6(lh.myVpnIp, to) 570 } 571 } 572 573 // Mark it as static in the caller provided map 574 staticList[vpnIp] = struct{}{} 575 return nil 576 } 577 578 // addCalculatedRemotes adds any calculated remotes based on the 579 // lighthouse.calculated_remotes configuration. It returns true if any 580 // calculated remotes were added 581 func (lh *LightHouse) addCalculatedRemotes(vpnIp iputil.VpnIp) bool { 582 tree := lh.getCalculatedRemotes() 583 if tree == nil { 584 return false 585 } 586 ok, calculatedRemotes := tree.MostSpecificContains(vpnIp) 587 if !ok { 588 return false 589 } 590 591 var calculated []*Ip4AndPort 592 for _, cr := range calculatedRemotes { 593 c := cr.Apply(vpnIp) 594 if c != nil { 595 calculated = append(calculated, c) 596 } 597 } 598 599 lh.Lock() 600 am := lh.unlockedGetRemoteList(vpnIp) 601 am.Lock() 602 defer am.Unlock() 603 lh.Unlock() 604 605 am.unlockedSetV4(lh.myVpnIp, vpnIp, calculated, lh.unlockedShouldAddV4) 606 607 return len(calculated) > 0 608 } 609 610 // unlockedGetRemoteList assumes you have the lh lock 611 func (lh *LightHouse) unlockedGetRemoteList(vpnIp iputil.VpnIp) *RemoteList { 612 am, ok := lh.addrMap[vpnIp] 613 if !ok { 614 am = NewRemoteList(func(a netip.Addr) bool { return lh.shouldAdd(vpnIp, a) }) 615 lh.addrMap[vpnIp] = am 616 } 617 return am 618 } 619 620 func (lh *LightHouse) shouldAdd(vpnIp iputil.VpnIp, to netip.Addr) bool { 621 switch { 622 case to.Is4(): 623 ipBytes := to.As4() 624 ip := iputil.Ip2VpnIp(ipBytes[:]) 625 allow := lh.GetRemoteAllowList().AllowIpV4(vpnIp, ip) 626 if lh.l.Level >= logrus.TraceLevel { 627 lh.l.WithField("remoteIp", vpnIp).WithField("allow", allow).Trace("remoteAllowList.Allow") 628 } 629 if !allow || ipMaskContains(lh.myVpnIp, lh.myVpnZeros, ip) { 630 return false 631 } 632 case to.Is6(): 633 ipBytes := to.As16() 634 635 hi := binary.BigEndian.Uint64(ipBytes[:8]) 636 lo := binary.BigEndian.Uint64(ipBytes[8:]) 637 allow := lh.GetRemoteAllowList().AllowIpV6(vpnIp, hi, lo) 638 if lh.l.Level >= logrus.TraceLevel { 639 lh.l.WithField("remoteIp", to).WithField("allow", allow).Trace("remoteAllowList.Allow") 640 } 641 642 // We don't check our vpn network here because nebula does not support ipv6 on the inside 643 if !allow { 644 return false 645 } 646 } 647 return true 648 } 649 650 // unlockedShouldAddV4 checks if to is allowed by our allow list 651 func (lh *LightHouse) unlockedShouldAddV4(vpnIp iputil.VpnIp, to *Ip4AndPort) bool { 652 allow := lh.GetRemoteAllowList().AllowIpV4(vpnIp, iputil.VpnIp(to.Ip)) 653 if lh.l.Level >= logrus.TraceLevel { 654 lh.l.WithField("remoteIp", vpnIp).WithField("allow", allow).Trace("remoteAllowList.Allow") 655 } 656 657 if !allow || ipMaskContains(lh.myVpnIp, lh.myVpnZeros, iputil.VpnIp(to.Ip)) { 658 return false 659 } 660 661 return true 662 } 663 664 // unlockedShouldAddV6 checks if to is allowed by our allow list 665 func (lh *LightHouse) unlockedShouldAddV6(vpnIp iputil.VpnIp, to *Ip6AndPort) bool { 666 allow := lh.GetRemoteAllowList().AllowIpV6(vpnIp, to.Hi, to.Lo) 667 if lh.l.Level >= logrus.TraceLevel { 668 lh.l.WithField("remoteIp", lhIp6ToIp(to)).WithField("allow", allow).Trace("remoteAllowList.Allow") 669 } 670 671 // We don't check our vpn network here because nebula does not support ipv6 on the inside 672 if !allow { 673 return false 674 } 675 676 return true 677 } 678 679 func lhIp6ToIp(v *Ip6AndPort) net.IP { 680 ip := make(net.IP, 16) 681 binary.BigEndian.PutUint64(ip[:8], v.Hi) 682 binary.BigEndian.PutUint64(ip[8:], v.Lo) 683 return ip 684 } 685 686 func (lh *LightHouse) IsLighthouseIP(vpnIp iputil.VpnIp) bool { 687 if _, ok := lh.GetLighthouses()[vpnIp]; ok { 688 return true 689 } 690 return false 691 } 692 693 func NewLhQueryByInt(VpnIp iputil.VpnIp) *NebulaMeta { 694 return &NebulaMeta{ 695 Type: NebulaMeta_HostQuery, 696 Details: &NebulaMetaDetails{ 697 VpnIp: uint32(VpnIp), 698 }, 699 } 700 } 701 702 func NewIp4AndPort(ip net.IP, port uint32) *Ip4AndPort { 703 ipp := Ip4AndPort{Port: port} 704 ipp.Ip = uint32(iputil.Ip2VpnIp(ip)) 705 return &ipp 706 } 707 708 func NewIp4AndPortFromNetIP(ip netip.Addr, port uint16) *Ip4AndPort { 709 v4Addr := ip.As4() 710 return &Ip4AndPort{ 711 Ip: binary.BigEndian.Uint32(v4Addr[:]), 712 Port: uint32(port), 713 } 714 } 715 716 func NewIp6AndPort(ip net.IP, port uint32) *Ip6AndPort { 717 return &Ip6AndPort{ 718 Hi: binary.BigEndian.Uint64(ip[:8]), 719 Lo: binary.BigEndian.Uint64(ip[8:]), 720 Port: port, 721 } 722 } 723 724 func NewIp6AndPortFromNetIP(ip netip.Addr, port uint16) *Ip6AndPort { 725 ip6Addr := ip.As16() 726 return &Ip6AndPort{ 727 Hi: binary.BigEndian.Uint64(ip6Addr[:8]), 728 Lo: binary.BigEndian.Uint64(ip6Addr[8:]), 729 Port: uint32(port), 730 } 731 } 732 func NewUDPAddrFromLH4(ipp *Ip4AndPort) *udp.Addr { 733 ip := ipp.Ip 734 return udp.NewAddr( 735 net.IPv4(byte(ip&0xff000000>>24), byte(ip&0x00ff0000>>16), byte(ip&0x0000ff00>>8), byte(ip&0x000000ff)), 736 uint16(ipp.Port), 737 ) 738 } 739 740 func NewUDPAddrFromLH6(ipp *Ip6AndPort) *udp.Addr { 741 return udp.NewAddr(lhIp6ToIp(ipp), uint16(ipp.Port)) 742 } 743 744 func (lh *LightHouse) startQueryWorker() { 745 if lh.amLighthouse { 746 return 747 } 748 749 go func() { 750 nb := make([]byte, 12, 12) 751 out := make([]byte, mtu) 752 753 for { 754 select { 755 case <-lh.ctx.Done(): 756 return 757 case ip := <-lh.queryChan: 758 lh.innerQueryServer(ip, nb, out) 759 } 760 } 761 }() 762 } 763 764 func (lh *LightHouse) innerQueryServer(ip iputil.VpnIp, nb, out []byte) { 765 if lh.IsLighthouseIP(ip) { 766 return 767 } 768 769 // Send a query to the lighthouses and hope for the best next time 770 query, err := NewLhQueryByInt(ip).Marshal() 771 if err != nil { 772 lh.l.WithError(err).WithField("vpnIp", ip).Error("Failed to marshal lighthouse query payload") 773 return 774 } 775 776 lighthouses := lh.GetLighthouses() 777 lh.metricTx(NebulaMeta_HostQuery, int64(len(lighthouses))) 778 779 for n := range lighthouses { 780 lh.ifce.SendMessageToVpnIp(header.LightHouse, 0, n, query, nb, out) 781 } 782 } 783 784 func (lh *LightHouse) StartUpdateWorker() { 785 interval := lh.GetUpdateInterval() 786 if lh.amLighthouse || interval == 0 { 787 return 788 } 789 790 clockSource := time.NewTicker(time.Second * time.Duration(interval)) 791 updateCtx, cancel := context.WithCancel(lh.ctx) 792 lh.updateCancel = cancel 793 794 go func() { 795 defer clockSource.Stop() 796 797 for { 798 lh.SendUpdate() 799 800 select { 801 case <-updateCtx.Done(): 802 return 803 case <-clockSource.C: 804 continue 805 } 806 } 807 }() 808 } 809 810 func (lh *LightHouse) SendUpdate() { 811 var v4 []*Ip4AndPort 812 var v6 []*Ip6AndPort 813 814 for _, e := range lh.GetAdvertiseAddrs() { 815 if ip := e.ip.To4(); ip != nil { 816 v4 = append(v4, NewIp4AndPort(e.ip, uint32(e.port))) 817 } else { 818 v6 = append(v6, NewIp6AndPort(e.ip, uint32(e.port))) 819 } 820 } 821 822 lal := lh.GetLocalAllowList() 823 for _, e := range *localIps(lh.l, lal) { 824 if ip4 := e.To4(); ip4 != nil && ipMaskContains(lh.myVpnIp, lh.myVpnZeros, iputil.Ip2VpnIp(ip4)) { 825 continue 826 } 827 828 // Only add IPs that aren't my VPN/tun IP 829 if ip := e.To4(); ip != nil { 830 v4 = append(v4, NewIp4AndPort(e, lh.nebulaPort)) 831 } else { 832 v6 = append(v6, NewIp6AndPort(e, lh.nebulaPort)) 833 } 834 } 835 836 var relays []uint32 837 for _, r := range lh.GetRelaysForMe() { 838 relays = append(relays, (uint32)(r)) 839 } 840 841 m := &NebulaMeta{ 842 Type: NebulaMeta_HostUpdateNotification, 843 Details: &NebulaMetaDetails{ 844 VpnIp: uint32(lh.myVpnIp), 845 Ip4AndPorts: v4, 846 Ip6AndPorts: v6, 847 RelayVpnIp: relays, 848 }, 849 } 850 851 lighthouses := lh.GetLighthouses() 852 lh.metricTx(NebulaMeta_HostUpdateNotification, int64(len(lighthouses))) 853 nb := make([]byte, 12, 12) 854 out := make([]byte, mtu) 855 856 mm, err := m.Marshal() 857 if err != nil { 858 lh.l.WithError(err).Error("Error while marshaling for lighthouse update") 859 return 860 } 861 862 for vpnIp := range lighthouses { 863 lh.ifce.SendMessageToVpnIp(header.LightHouse, 0, vpnIp, mm, nb, out) 864 } 865 } 866 867 type LightHouseHandler struct { 868 lh *LightHouse 869 nb []byte 870 out []byte 871 pb []byte 872 meta *NebulaMeta 873 l *logrus.Logger 874 } 875 876 func (lh *LightHouse) NewRequestHandler() *LightHouseHandler { 877 lhh := &LightHouseHandler{ 878 lh: lh, 879 nb: make([]byte, 12, 12), 880 out: make([]byte, mtu), 881 l: lh.l, 882 pb: make([]byte, mtu), 883 884 meta: &NebulaMeta{ 885 Details: &NebulaMetaDetails{}, 886 }, 887 } 888 889 return lhh 890 } 891 892 func (lh *LightHouse) metricRx(t NebulaMeta_MessageType, i int64) { 893 lh.metrics.Rx(header.MessageType(t), 0, i) 894 } 895 896 func (lh *LightHouse) metricTx(t NebulaMeta_MessageType, i int64) { 897 lh.metrics.Tx(header.MessageType(t), 0, i) 898 } 899 900 // This method is similar to Reset(), but it re-uses the pointer structs 901 // so that we don't have to re-allocate them 902 func (lhh *LightHouseHandler) resetMeta() *NebulaMeta { 903 details := lhh.meta.Details 904 lhh.meta.Reset() 905 906 // Keep the array memory around 907 details.Ip4AndPorts = details.Ip4AndPorts[:0] 908 details.Ip6AndPorts = details.Ip6AndPorts[:0] 909 details.RelayVpnIp = details.RelayVpnIp[:0] 910 lhh.meta.Details = details 911 912 return lhh.meta 913 } 914 915 func lhHandleRequest(lhh *LightHouseHandler, f *Interface) udp.LightHouseHandlerFunc { 916 return func(rAddr *udp.Addr, vpnIp iputil.VpnIp, p []byte) { 917 lhh.HandleRequest(rAddr, vpnIp, p, f) 918 } 919 } 920 921 func (lhh *LightHouseHandler) HandleRequest(rAddr *udp.Addr, vpnIp iputil.VpnIp, p []byte, w EncWriter) { 922 n := lhh.resetMeta() 923 err := n.Unmarshal(p) 924 if err != nil { 925 lhh.l.WithError(err).WithField("vpnIp", vpnIp).WithField("udpAddr", rAddr). 926 Error("Failed to unmarshal lighthouse packet") 927 //TODO: send recv_error? 928 return 929 } 930 931 if n.Details == nil { 932 lhh.l.WithField("vpnIp", vpnIp).WithField("udpAddr", rAddr). 933 Error("Invalid lighthouse update") 934 //TODO: send recv_error? 935 return 936 } 937 938 lhh.lh.metricRx(n.Type, 1) 939 940 switch n.Type { 941 case NebulaMeta_HostQuery: 942 lhh.handleHostQuery(n, vpnIp, rAddr, w) 943 944 case NebulaMeta_HostQueryReply: 945 lhh.handleHostQueryReply(n, vpnIp) 946 947 case NebulaMeta_HostUpdateNotification: 948 lhh.handleHostUpdateNotification(n, vpnIp, w) 949 950 case NebulaMeta_HostMovedNotification: 951 case NebulaMeta_HostPunchNotification: 952 lhh.handleHostPunchNotification(n, vpnIp, w) 953 954 case NebulaMeta_HostUpdateNotificationAck: 955 // noop 956 } 957 } 958 959 func (lhh *LightHouseHandler) handleHostQuery(n *NebulaMeta, vpnIp iputil.VpnIp, addr *udp.Addr, w EncWriter) { 960 // Exit if we don't answer queries 961 if !lhh.lh.amLighthouse { 962 if lhh.l.Level >= logrus.DebugLevel { 963 lhh.l.Debugln("I don't answer queries, but received from: ", addr) 964 } 965 return 966 } 967 968 //TODO: we can DRY this further 969 reqVpnIp := n.Details.VpnIp 970 //TODO: Maybe instead of marshalling into n we marshal into a new `r` to not nuke our current request data 971 found, ln, err := lhh.lh.queryAndPrepMessage(iputil.VpnIp(n.Details.VpnIp), func(c *cache) (int, error) { 972 n = lhh.resetMeta() 973 n.Type = NebulaMeta_HostQueryReply 974 n.Details.VpnIp = reqVpnIp 975 976 lhh.coalesceAnswers(c, n) 977 978 return n.MarshalTo(lhh.pb) 979 }) 980 981 if !found { 982 return 983 } 984 985 if err != nil { 986 lhh.l.WithError(err).WithField("vpnIp", vpnIp).Error("Failed to marshal lighthouse host query reply") 987 return 988 } 989 990 lhh.lh.metricTx(NebulaMeta_HostQueryReply, 1) 991 w.SendMessageToVpnIp(header.LightHouse, 0, vpnIp, lhh.pb[:ln], lhh.nb, lhh.out[:0]) 992 993 // This signals the other side to punch some zero byte udp packets 994 found, ln, err = lhh.lh.queryAndPrepMessage(vpnIp, func(c *cache) (int, error) { 995 n = lhh.resetMeta() 996 n.Type = NebulaMeta_HostPunchNotification 997 n.Details.VpnIp = uint32(vpnIp) 998 999 lhh.coalesceAnswers(c, n) 1000 1001 return n.MarshalTo(lhh.pb) 1002 }) 1003 1004 if !found { 1005 return 1006 } 1007 1008 if err != nil { 1009 lhh.l.WithError(err).WithField("vpnIp", vpnIp).Error("Failed to marshal lighthouse host was queried for") 1010 return 1011 } 1012 1013 lhh.lh.metricTx(NebulaMeta_HostPunchNotification, 1) 1014 w.SendMessageToVpnIp(header.LightHouse, 0, iputil.VpnIp(reqVpnIp), lhh.pb[:ln], lhh.nb, lhh.out[:0]) 1015 } 1016 1017 func (lhh *LightHouseHandler) coalesceAnswers(c *cache, n *NebulaMeta) { 1018 if c.v4 != nil { 1019 if c.v4.learned != nil { 1020 n.Details.Ip4AndPorts = append(n.Details.Ip4AndPorts, c.v4.learned) 1021 } 1022 if c.v4.reported != nil && len(c.v4.reported) > 0 { 1023 n.Details.Ip4AndPorts = append(n.Details.Ip4AndPorts, c.v4.reported...) 1024 } 1025 } 1026 1027 if c.v6 != nil { 1028 if c.v6.learned != nil { 1029 n.Details.Ip6AndPorts = append(n.Details.Ip6AndPorts, c.v6.learned) 1030 } 1031 if c.v6.reported != nil && len(c.v6.reported) > 0 { 1032 n.Details.Ip6AndPorts = append(n.Details.Ip6AndPorts, c.v6.reported...) 1033 } 1034 } 1035 1036 if c.relay != nil { 1037 n.Details.RelayVpnIp = append(n.Details.RelayVpnIp, c.relay.relay...) 1038 } 1039 } 1040 1041 func (lhh *LightHouseHandler) handleHostQueryReply(n *NebulaMeta, vpnIp iputil.VpnIp) { 1042 if !lhh.lh.IsLighthouseIP(vpnIp) { 1043 return 1044 } 1045 1046 lhh.lh.Lock() 1047 am := lhh.lh.unlockedGetRemoteList(iputil.VpnIp(n.Details.VpnIp)) 1048 am.Lock() 1049 lhh.lh.Unlock() 1050 1051 certVpnIp := iputil.VpnIp(n.Details.VpnIp) 1052 am.unlockedSetV4(vpnIp, certVpnIp, n.Details.Ip4AndPorts, lhh.lh.unlockedShouldAddV4) 1053 am.unlockedSetV6(vpnIp, certVpnIp, n.Details.Ip6AndPorts, lhh.lh.unlockedShouldAddV6) 1054 am.unlockedSetRelay(vpnIp, certVpnIp, n.Details.RelayVpnIp) 1055 am.Unlock() 1056 1057 // Non-blocking attempt to trigger, skip if it would block 1058 select { 1059 case lhh.lh.handshakeTrigger <- iputil.VpnIp(n.Details.VpnIp): 1060 default: 1061 } 1062 } 1063 1064 func (lhh *LightHouseHandler) handleHostUpdateNotification(n *NebulaMeta, vpnIp iputil.VpnIp, w EncWriter) { 1065 if !lhh.lh.amLighthouse { 1066 if lhh.l.Level >= logrus.DebugLevel { 1067 lhh.l.Debugln("I am not a lighthouse, do not take host updates: ", vpnIp) 1068 } 1069 return 1070 } 1071 1072 //Simple check that the host sent this not someone else 1073 if n.Details.VpnIp != uint32(vpnIp) { 1074 if lhh.l.Level >= logrus.DebugLevel { 1075 lhh.l.WithField("vpnIp", vpnIp).WithField("answer", iputil.VpnIp(n.Details.VpnIp)).Debugln("Host sent invalid update") 1076 } 1077 return 1078 } 1079 1080 lhh.lh.Lock() 1081 am := lhh.lh.unlockedGetRemoteList(vpnIp) 1082 am.Lock() 1083 lhh.lh.Unlock() 1084 1085 certVpnIp := iputil.VpnIp(n.Details.VpnIp) 1086 am.unlockedSetV4(vpnIp, certVpnIp, n.Details.Ip4AndPorts, lhh.lh.unlockedShouldAddV4) 1087 am.unlockedSetV6(vpnIp, certVpnIp, n.Details.Ip6AndPorts, lhh.lh.unlockedShouldAddV6) 1088 am.unlockedSetRelay(vpnIp, certVpnIp, n.Details.RelayVpnIp) 1089 am.Unlock() 1090 1091 n = lhh.resetMeta() 1092 n.Type = NebulaMeta_HostUpdateNotificationAck 1093 n.Details.VpnIp = uint32(vpnIp) 1094 ln, err := n.MarshalTo(lhh.pb) 1095 1096 if err != nil { 1097 lhh.l.WithError(err).WithField("vpnIp", vpnIp).Error("Failed to marshal lighthouse host update ack") 1098 return 1099 } 1100 1101 lhh.lh.metricTx(NebulaMeta_HostUpdateNotificationAck, 1) 1102 w.SendMessageToVpnIp(header.LightHouse, 0, vpnIp, lhh.pb[:ln], lhh.nb, lhh.out[:0]) 1103 } 1104 1105 func (lhh *LightHouseHandler) handleHostPunchNotification(n *NebulaMeta, vpnIp iputil.VpnIp, w EncWriter) { 1106 if !lhh.lh.IsLighthouseIP(vpnIp) { 1107 return 1108 } 1109 1110 empty := []byte{0} 1111 punch := func(vpnPeer *udp.Addr) { 1112 if vpnPeer == nil { 1113 return 1114 } 1115 1116 go func() { 1117 time.Sleep(lhh.lh.punchy.GetDelay()) 1118 lhh.lh.metricHolepunchTx.Inc(1) 1119 lhh.lh.punchConn.WriteTo(empty, vpnPeer) 1120 }() 1121 1122 if lhh.l.Level >= logrus.DebugLevel { 1123 //TODO: lacking the ip we are actually punching on, old: l.Debugf("Punching %s on %d for %s", IntIp(a.Ip), a.Port, IntIp(n.Details.VpnIp)) 1124 lhh.l.Debugf("Punching on %d for %s", vpnPeer.Port, iputil.VpnIp(n.Details.VpnIp)) 1125 } 1126 } 1127 1128 for _, a := range n.Details.Ip4AndPorts { 1129 punch(NewUDPAddrFromLH4(a)) 1130 } 1131 1132 for _, a := range n.Details.Ip6AndPorts { 1133 punch(NewUDPAddrFromLH6(a)) 1134 } 1135 1136 // This sends a nebula test packet to the host trying to contact us. In the case 1137 // of a double nat or other difficult scenario, this may help establish 1138 // a tunnel. 1139 if lhh.lh.punchy.GetRespond() { 1140 queryVpnIp := iputil.VpnIp(n.Details.VpnIp) 1141 go func() { 1142 time.Sleep(lhh.lh.punchy.GetRespondDelay()) 1143 if lhh.l.Level >= logrus.DebugLevel { 1144 lhh.l.Debugf("Sending a nebula test packet to vpn ip %s", queryVpnIp) 1145 } 1146 //NOTE: we have to allocate a new output buffer here since we are spawning a new goroutine 1147 // for each punchBack packet. We should move this into a timerwheel or a single goroutine 1148 // managed by a channel. 1149 w.SendMessageToVpnIp(header.Test, header.TestRequest, queryVpnIp, []byte(""), make([]byte, 12, 12), make([]byte, mtu)) 1150 }() 1151 } 1152 } 1153 1154 // ipMaskContains checks if testIp is contained by ip after applying a cidr 1155 // zeros is 32 - bits from net.IPMask.Size() 1156 func ipMaskContains(ip iputil.VpnIp, zeros iputil.VpnIp, testIp iputil.VpnIp) bool { 1157 return (testIp^ip)>>zeros == 0 1158 }