github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/libnetwork/drivers/overlay/peerdb.go (about) 1 package overlay 2 3 import ( 4 "context" 5 "fmt" 6 "net" 7 "sync" 8 "syscall" 9 10 "github.com/docker/libnetwork/internal/caller" 11 "github.com/docker/libnetwork/internal/setmatrix" 12 "github.com/docker/libnetwork/osl" 13 "github.com/sirupsen/logrus" 14 ) 15 16 const ovPeerTable = "overlay_peer_table" 17 18 type peerKey struct { 19 peerIP net.IP 20 peerMac net.HardwareAddr 21 } 22 23 type peerEntry struct { 24 eid string 25 vtep net.IP 26 peerIPMask net.IPMask 27 isLocal bool 28 } 29 30 func (p *peerEntry) MarshalDB() peerEntryDB { 31 ones, bits := p.peerIPMask.Size() 32 return peerEntryDB{ 33 eid: p.eid, 34 vtep: p.vtep.String(), 35 peerIPMaskOnes: ones, 36 peerIPMaskBits: bits, 37 isLocal: p.isLocal, 38 } 39 } 40 41 // This the structure saved into the set (SetMatrix), due to the implementation of it 42 // the value inserted in the set has to be Hashable so the []byte had to be converted into 43 // strings 44 type peerEntryDB struct { 45 eid string 46 vtep string 47 peerIPMaskOnes int 48 peerIPMaskBits int 49 isLocal bool 50 } 51 52 func (p *peerEntryDB) UnMarshalDB() peerEntry { 53 return peerEntry{ 54 eid: p.eid, 55 vtep: net.ParseIP(p.vtep), 56 peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits), 57 isLocal: p.isLocal, 58 } 59 } 60 61 type peerMap struct { 62 // set of peerEntry, note they have to be objects and not pointers to maintain the proper equality checks 63 mp setmatrix.SetMatrix 64 sync.Mutex 65 } 66 67 type peerNetworkMap struct { 68 // map with key peerKey 69 mp map[string]*peerMap 70 sync.Mutex 71 } 72 73 func (pKey peerKey) String() string { 74 return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac) 75 } 76 77 func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error { 78 ipB, err := state.Token(true, nil) 79 if err != nil { 80 return err 81 } 82 83 pKey.peerIP = net.ParseIP(string(ipB)) 84 85 macB, err := state.Token(true, nil) 86 if err != nil { 87 return err 88 } 89 90 pKey.peerMac, err = net.ParseMAC(string(macB)) 91 return err 92 } 93 94 func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error { 95 d.peerDb.Lock() 96 nids := []string{} 97 for nid := range d.peerDb.mp { 98 nids = append(nids, nid) 99 } 100 d.peerDb.Unlock() 101 102 for _, nid := range nids { 103 d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 104 return f(nid, pKey, pEntry) 105 }) 106 } 107 return nil 108 } 109 110 func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error { 111 d.peerDb.Lock() 112 pMap, ok := d.peerDb.mp[nid] 113 d.peerDb.Unlock() 114 115 if !ok { 116 return nil 117 } 118 119 mp := map[string]peerEntry{} 120 pMap.Lock() 121 for _, pKeyStr := range pMap.mp.Keys() { 122 entryDBList, ok := pMap.mp.Get(pKeyStr) 123 if ok { 124 peerEntryDB := entryDBList[0].(peerEntryDB) 125 mp[pKeyStr] = peerEntryDB.UnMarshalDB() 126 } 127 } 128 pMap.Unlock() 129 130 for pKeyStr, pEntry := range mp { 131 var pKey peerKey 132 if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil { 133 logrus.Warnf("Peer key scan on network %s failed: %v", nid, err) 134 } 135 if f(&pKey, &pEntry) { 136 return nil 137 } 138 } 139 140 return nil 141 } 142 143 func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) { 144 var pKeyMatched *peerKey 145 var pEntryMatched *peerEntry 146 err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 147 if pKey.peerIP.Equal(peerIP) { 148 pKeyMatched = pKey 149 pEntryMatched = pEntry 150 return true 151 } 152 153 return false 154 }) 155 156 if err != nil { 157 return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) 158 } 159 160 if pKeyMatched == nil || pEntryMatched == nil { 161 return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) 162 } 163 164 return pKeyMatched, pEntryMatched, nil 165 } 166 167 func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 168 peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 169 170 d.peerDb.Lock() 171 pMap, ok := d.peerDb.mp[nid] 172 if !ok { 173 d.peerDb.mp[nid] = &peerMap{ 174 mp: setmatrix.NewSetMatrix(), 175 } 176 177 pMap = d.peerDb.mp[nid] 178 } 179 d.peerDb.Unlock() 180 181 pKey := peerKey{ 182 peerIP: peerIP, 183 peerMac: peerMac, 184 } 185 186 pEntry := peerEntry{ 187 eid: eid, 188 vtep: vtep, 189 peerIPMask: peerIPMask, 190 isLocal: isLocal, 191 } 192 193 pMap.Lock() 194 defer pMap.Unlock() 195 b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB()) 196 if i != 1 { 197 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 198 s, _ := pMap.mp.String(pKey.String()) 199 logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 200 } 201 return b, i 202 } 203 204 func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 205 peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 206 207 d.peerDb.Lock() 208 pMap, ok := d.peerDb.mp[nid] 209 if !ok { 210 d.peerDb.Unlock() 211 return false, 0 212 } 213 d.peerDb.Unlock() 214 215 pKey := peerKey{ 216 peerIP: peerIP, 217 peerMac: peerMac, 218 } 219 220 pEntry := peerEntry{ 221 eid: eid, 222 vtep: vtep, 223 peerIPMask: peerIPMask, 224 isLocal: isLocal, 225 } 226 227 pMap.Lock() 228 defer pMap.Unlock() 229 b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB()) 230 if i != 0 { 231 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 232 s, _ := pMap.mp.String(pKey.String()) 233 logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 234 } 235 return b, i 236 } 237 238 // The overlay uses a lazy initialization approach, this means that when a network is created 239 // and the driver registered the overlay does not allocate resources till the moment that a 240 // sandbox is actually created. 241 // At the moment of this call, that happens when a sandbox is initialized, is possible that 242 // networkDB has already delivered some events of peers already available on remote nodes, 243 // these peers are saved into the peerDB and this function is used to properly configure 244 // the network sandbox with all those peers that got previously notified. 245 // Note also that this method sends a single message on the channel and the go routine on the 246 // other side, will atomically loop on the whole table of peers and will program their state 247 // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that 248 // new peerAdd or peerDelete gets reordered during the sandbox init. 249 func (d *driver) initSandboxPeerDB(nid string) { 250 d.peerInit(nid) 251 } 252 253 type peerOperationType int32 254 255 const ( 256 peerOperationINIT peerOperationType = iota 257 peerOperationADD 258 peerOperationDELETE 259 peerOperationFLUSH 260 ) 261 262 type peerOperation struct { 263 opType peerOperationType 264 networkID string 265 endpointID string 266 peerIP net.IP 267 peerIPMask net.IPMask 268 peerMac net.HardwareAddr 269 vtepIP net.IP 270 l2Miss bool 271 l3Miss bool 272 localPeer bool 273 callerName string 274 } 275 276 func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) { 277 var err error 278 for { 279 select { 280 case <-ctx.Done(): 281 return 282 case op := <-ch: 283 switch op.opType { 284 case peerOperationINIT: 285 err = d.peerInitOp(op.networkID) 286 case peerOperationADD: 287 err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.l2Miss, op.l3Miss, true, op.localPeer) 288 case peerOperationDELETE: 289 err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer) 290 case peerOperationFLUSH: 291 err = d.peerFlushOp(op.networkID) 292 } 293 if err != nil { 294 logrus.Warnf("Peer operation failed:%s op:%v", err, op) 295 } 296 } 297 } 298 } 299 300 func (d *driver) peerInit(nid string) { 301 callerName := caller.Name(1) 302 d.peerOpCh <- &peerOperation{ 303 opType: peerOperationINIT, 304 networkID: nid, 305 callerName: callerName, 306 } 307 } 308 309 func (d *driver) peerInitOp(nid string) error { 310 return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 311 // Local entries do not need to be added 312 if pEntry.isLocal { 313 return false 314 } 315 316 d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal) 317 // return false to loop on all entries 318 return false 319 }) 320 } 321 322 func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 323 peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) { 324 d.peerOpCh <- &peerOperation{ 325 opType: peerOperationADD, 326 networkID: nid, 327 endpointID: eid, 328 peerIP: peerIP, 329 peerIPMask: peerIPMask, 330 peerMac: peerMac, 331 vtepIP: vtep, 332 l2Miss: l2Miss, 333 l3Miss: l3Miss, 334 localPeer: localPeer, 335 callerName: caller.Name(1), 336 } 337 } 338 339 func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 340 peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error { 341 342 if err := validateID(nid, eid); err != nil { 343 return err 344 } 345 346 var dbEntries int 347 var inserted bool 348 if updateDB { 349 inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 350 if !inserted { 351 logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 352 nid, eid, peerIP, peerMac, localPeer, vtep) 353 } 354 } 355 356 // Local peers do not need any further configuration 357 if localPeer { 358 return nil 359 } 360 361 n := d.network(nid) 362 if n == nil { 363 return nil 364 } 365 366 sbox := n.sandbox() 367 if sbox == nil { 368 // We are hitting this case for all the events that are arriving before that the sandbox 369 // is being created. The peer got already added into the database and the sanbox init will 370 // call the peerDbUpdateSandbox that will configure all these peers from the database 371 return nil 372 } 373 374 IP := &net.IPNet{ 375 IP: peerIP, 376 Mask: peerIPMask, 377 } 378 379 s := n.getSubnetforIP(IP) 380 if s == nil { 381 return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id) 382 } 383 384 if err := n.obtainVxlanID(s); err != nil { 385 return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err) 386 } 387 388 if err := n.joinSandbox(s, false, false); err != nil { 389 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err) 390 } 391 392 if err := d.checkEncryption(nid, vtep, false, true); err != nil { 393 logrus.Warn(err) 394 } 395 396 // Add neighbor entry for the peer IP 397 if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil { 398 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 { 399 // We are in the transient case so only the first configuration is programmed into the kernel 400 // Upon deletion if the active configuration is deleted the next one from the database will be restored 401 // Note we are skipping also the next configuration 402 return nil 403 } 404 return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 405 } 406 407 // Add fdb entry to the bridge for the peer mac 408 if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName), 409 sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil { 410 return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 411 } 412 413 return nil 414 } 415 416 func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 417 peerMac net.HardwareAddr, vtep net.IP, localPeer bool) { 418 d.peerOpCh <- &peerOperation{ 419 opType: peerOperationDELETE, 420 networkID: nid, 421 endpointID: eid, 422 peerIP: peerIP, 423 peerIPMask: peerIPMask, 424 peerMac: peerMac, 425 vtepIP: vtep, 426 callerName: caller.Name(1), 427 localPeer: localPeer, 428 } 429 } 430 431 func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 432 peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error { 433 434 if err := validateID(nid, eid); err != nil { 435 return err 436 } 437 438 deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 439 if !deleted { 440 logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 441 nid, eid, peerIP, peerMac, localPeer, vtep) 442 } 443 444 n := d.network(nid) 445 if n == nil { 446 return nil 447 } 448 449 sbox := n.sandbox() 450 if sbox == nil { 451 return nil 452 } 453 454 if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil { 455 logrus.Warn(err) 456 } 457 458 // Local peers do not have any local configuration to delete 459 if !localPeer { 460 // Remove fdb entry to the bridge for the peer mac 461 if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil { 462 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 { 463 // We fall in here if there is a transient state and if the neighbor that is being deleted 464 // was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping) 465 return nil 466 } 467 return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 468 } 469 470 // Delete neighbor entry for the peer IP 471 if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil { 472 return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 473 } 474 } 475 476 if dbEntries == 0 { 477 return nil 478 } 479 480 // If there is still an entry into the database and the deletion went through without errors means that there is now no 481 // configuration active in the kernel. 482 // Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one 483 peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP) 484 if err != nil { 485 logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err) 486 return err 487 } 488 return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal) 489 } 490 491 func (d *driver) peerFlush(nid string) { 492 d.peerOpCh <- &peerOperation{ 493 opType: peerOperationFLUSH, 494 networkID: nid, 495 callerName: caller.Name(1), 496 } 497 } 498 499 func (d *driver) peerFlushOp(nid string) error { 500 d.peerDb.Lock() 501 defer d.peerDb.Unlock() 502 _, ok := d.peerDb.mp[nid] 503 if !ok { 504 return fmt.Errorf("Unable to find the peerDB for nid:%s", nid) 505 } 506 delete(d.peerDb.mp, nid) 507 return nil 508 } 509 510 func (d *driver) pushLocalDb() { 511 d.peerDbWalk(func(nid string, pKey *peerKey, pEntry *peerEntry) bool { 512 if pEntry.isLocal { 513 d.pushLocalEndpointEvent("join", nid, pEntry.eid) 514 } 515 return false 516 }) 517 } 518 519 func (d *driver) peerDBUpdateSelf() { 520 d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool { 521 if pEntry.isLocal { 522 pEntry.vtep = net.ParseIP(d.advertiseAddress) 523 } 524 return false 525 }) 526 }