github.com/adityamillind98/moby@v23.0.0-rc.4+incompatible/libnetwork/drivers/overlay/peerdb.go (about) 1 //go:build linux 2 // +build linux 3 4 package overlay 5 6 import ( 7 "context" 8 "fmt" 9 "net" 10 "sync" 11 "syscall" 12 13 "github.com/docker/docker/libnetwork/internal/caller" 14 "github.com/docker/docker/libnetwork/internal/setmatrix" 15 "github.com/docker/docker/libnetwork/osl" 16 "github.com/sirupsen/logrus" 17 ) 18 19 const ovPeerTable = "overlay_peer_table" 20 21 type peerKey struct { 22 peerIP net.IP 23 peerMac net.HardwareAddr 24 } 25 26 type peerEntry struct { 27 eid string 28 vtep net.IP 29 peerIPMask net.IPMask 30 isLocal bool 31 } 32 33 func (p *peerEntry) MarshalDB() peerEntryDB { 34 ones, bits := p.peerIPMask.Size() 35 return peerEntryDB{ 36 eid: p.eid, 37 vtep: p.vtep.String(), 38 peerIPMaskOnes: ones, 39 peerIPMaskBits: bits, 40 isLocal: p.isLocal, 41 } 42 } 43 44 // This the structure saved into the set (SetMatrix), due to the implementation of it 45 // the value inserted in the set has to be Hashable so the []byte had to be converted into 46 // strings 47 type peerEntryDB struct { 48 eid string 49 vtep string 50 peerIPMaskOnes int 51 peerIPMaskBits int 52 isLocal bool 53 } 54 55 func (p *peerEntryDB) UnMarshalDB() peerEntry { 56 return peerEntry{ 57 eid: p.eid, 58 vtep: net.ParseIP(p.vtep), 59 peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits), 60 isLocal: p.isLocal, 61 } 62 } 63 64 type peerMap struct { 65 // set of peerEntry, note they have to be objects and not pointers to maintain the proper equality checks 66 mp setmatrix.SetMatrix 67 sync.Mutex 68 } 69 70 type peerNetworkMap struct { 71 // map with key peerKey 72 mp map[string]*peerMap 73 sync.Mutex 74 } 75 76 func (pKey peerKey) String() string { 77 return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac) 78 } 79 80 func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error { 81 ipB, err := state.Token(true, nil) 82 if err != nil { 83 return err 84 } 85 86 pKey.peerIP = net.ParseIP(string(ipB)) 87 88 macB, err := state.Token(true, nil) 89 if err != nil { 90 return err 91 } 92 93 pKey.peerMac, err = net.ParseMAC(string(macB)) 94 return err 95 } 96 97 func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error { 98 d.peerDb.Lock() 99 nids := []string{} 100 for nid := range d.peerDb.mp { 101 nids = append(nids, nid) 102 } 103 d.peerDb.Unlock() 104 105 for _, nid := range nids { 106 d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 107 return f(nid, pKey, pEntry) 108 }) 109 } 110 return nil 111 } 112 113 func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error { 114 d.peerDb.Lock() 115 pMap, ok := d.peerDb.mp[nid] 116 d.peerDb.Unlock() 117 118 if !ok { 119 return nil 120 } 121 122 mp := map[string]peerEntry{} 123 pMap.Lock() 124 for _, pKeyStr := range pMap.mp.Keys() { 125 entryDBList, ok := pMap.mp.Get(pKeyStr) 126 if ok { 127 peerEntryDB := entryDBList[0].(peerEntryDB) 128 mp[pKeyStr] = peerEntryDB.UnMarshalDB() 129 } 130 } 131 pMap.Unlock() 132 133 for pKeyStr, pEntry := range mp { 134 var pKey peerKey 135 pEntry := pEntry 136 if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil { 137 logrus.Warnf("Peer key scan on network %s failed: %v", nid, err) 138 } 139 if f(&pKey, &pEntry) { 140 return nil 141 } 142 } 143 144 return nil 145 } 146 147 func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) { 148 var pKeyMatched *peerKey 149 var pEntryMatched *peerEntry 150 err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 151 if pKey.peerIP.Equal(peerIP) { 152 pKeyMatched = pKey 153 pEntryMatched = pEntry 154 return true 155 } 156 157 return false 158 }) 159 160 if err != nil { 161 return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) 162 } 163 164 if pKeyMatched == nil || pEntryMatched == nil { 165 return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) 166 } 167 168 return pKeyMatched, pEntryMatched, nil 169 } 170 171 func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 172 d.peerDb.Lock() 173 pMap, ok := d.peerDb.mp[nid] 174 if !ok { 175 d.peerDb.mp[nid] = &peerMap{ 176 mp: setmatrix.NewSetMatrix(), 177 } 178 179 pMap = d.peerDb.mp[nid] 180 } 181 d.peerDb.Unlock() 182 183 pKey := peerKey{ 184 peerIP: peerIP, 185 peerMac: peerMac, 186 } 187 188 pEntry := peerEntry{ 189 eid: eid, 190 vtep: vtep, 191 peerIPMask: peerIPMask, 192 isLocal: isLocal, 193 } 194 195 pMap.Lock() 196 defer pMap.Unlock() 197 b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB()) 198 if i != 1 { 199 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 200 s, _ := pMap.mp.String(pKey.String()) 201 logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 202 } 203 return b, i 204 } 205 206 func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 207 d.peerDb.Lock() 208 pMap, ok := d.peerDb.mp[nid] 209 if !ok { 210 d.peerDb.Unlock() 211 return false, 0 212 } 213 d.peerDb.Unlock() 214 215 pKey := peerKey{ 216 peerIP: peerIP, 217 peerMac: peerMac, 218 } 219 220 pEntry := peerEntry{ 221 eid: eid, 222 vtep: vtep, 223 peerIPMask: peerIPMask, 224 isLocal: isLocal, 225 } 226 227 pMap.Lock() 228 defer pMap.Unlock() 229 b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB()) 230 if i != 0 { 231 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 232 s, _ := pMap.mp.String(pKey.String()) 233 logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 234 } 235 return b, i 236 } 237 238 // The overlay uses a lazy initialization approach, this means that when a network is created 239 // and the driver registered the overlay does not allocate resources till the moment that a 240 // sandbox is actually created. 241 // At the moment of this call, that happens when a sandbox is initialized, is possible that 242 // networkDB has already delivered some events of peers already available on remote nodes, 243 // these peers are saved into the peerDB and this function is used to properly configure 244 // the network sandbox with all those peers that got previously notified. 245 // Note also that this method sends a single message on the channel and the go routine on the 246 // other side, will atomically loop on the whole table of peers and will program their state 247 // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that 248 // new peerAdd or peerDelete gets reordered during the sandbox init. 249 func (d *driver) initSandboxPeerDB(nid string) { 250 d.peerInit(nid) 251 } 252 253 type peerOperationType int32 254 255 const ( 256 peerOperationINIT peerOperationType = iota 257 peerOperationADD 258 peerOperationDELETE 259 peerOperationFLUSH 260 ) 261 262 type peerOperation struct { 263 opType peerOperationType 264 networkID string 265 endpointID string 266 peerIP net.IP 267 peerIPMask net.IPMask 268 peerMac net.HardwareAddr 269 vtepIP net.IP 270 l2Miss bool 271 l3Miss bool 272 localPeer bool 273 callerName string 274 } 275 276 func (d *driver) peerOpRoutine(ctx context.Context, ch chan *peerOperation) { 277 var err error 278 for { 279 select { 280 case <-ctx.Done(): 281 return 282 case op := <-ch: 283 switch op.opType { 284 case peerOperationINIT: 285 err = d.peerInitOp(op.networkID) 286 case peerOperationADD: 287 err = d.peerAddOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.l2Miss, op.l3Miss, true, op.localPeer) 288 case peerOperationDELETE: 289 err = d.peerDeleteOp(op.networkID, op.endpointID, op.peerIP, op.peerIPMask, op.peerMac, op.vtepIP, op.localPeer) 290 case peerOperationFLUSH: 291 err = d.peerFlushOp(op.networkID) 292 } 293 if err != nil { 294 logrus.Warnf("Peer operation failed:%s op:%v", err, op) 295 } 296 } 297 } 298 } 299 300 func (d *driver) peerInit(nid string) { 301 callerName := caller.Name(1) 302 d.peerOpCh <- &peerOperation{ 303 opType: peerOperationINIT, 304 networkID: nid, 305 callerName: callerName, 306 } 307 } 308 309 func (d *driver) peerInitOp(nid string) error { 310 return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 311 // Local entries do not need to be added 312 if pEntry.isLocal { 313 return false 314 } 315 316 d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal) 317 // return false to loop on all entries 318 return false 319 }) 320 } 321 322 func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 323 peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) { 324 d.peerOpCh <- &peerOperation{ 325 opType: peerOperationADD, 326 networkID: nid, 327 endpointID: eid, 328 peerIP: peerIP, 329 peerIPMask: peerIPMask, 330 peerMac: peerMac, 331 vtepIP: vtep, 332 l2Miss: l2Miss, 333 l3Miss: l3Miss, 334 localPeer: localPeer, 335 callerName: caller.Name(1), 336 } 337 } 338 339 func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error { 340 if err := validateID(nid, eid); err != nil { 341 return err 342 } 343 344 var dbEntries int 345 var inserted bool 346 if updateDB { 347 inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 348 if !inserted { 349 logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 350 nid, eid, peerIP, peerMac, localPeer, vtep) 351 } 352 } 353 354 // Local peers do not need any further configuration 355 if localPeer { 356 return nil 357 } 358 359 n := d.network(nid) 360 if n == nil { 361 return nil 362 } 363 364 sbox := n.sandbox() 365 if sbox == nil { 366 // We are hitting this case for all the events that are arriving before that the sandbox 367 // is being created. The peer got already added into the database and the sanbox init will 368 // call the peerDbUpdateSandbox that will configure all these peers from the database 369 return nil 370 } 371 372 IP := &net.IPNet{ 373 IP: peerIP, 374 Mask: peerIPMask, 375 } 376 377 s := n.getSubnetforIP(IP) 378 if s == nil { 379 return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id) 380 } 381 382 if err := n.obtainVxlanID(s); err != nil { 383 return fmt.Errorf("couldn't get vxlan id for %q: %v", s.subnetIP.String(), err) 384 } 385 386 if err := n.joinSandbox(s, false, false); err != nil { 387 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err) 388 } 389 390 if err := d.checkEncryption(nid, vtep, n.vxlanID(s), false, true); err != nil { 391 logrus.Warn(err) 392 } 393 394 // Add neighbor entry for the peer IP 395 if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil { 396 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 { 397 // We are in the transient case so only the first configuration is programmed into the kernel 398 // Upon deletion if the active configuration is deleted the next one from the database will be restored 399 // Note we are skipping also the next configuration 400 return nil 401 } 402 return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 403 } 404 405 // Add fdb entry to the bridge for the peer mac 406 if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName), 407 sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil { 408 return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 409 } 410 411 return nil 412 } 413 414 func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 415 peerMac net.HardwareAddr, vtep net.IP, localPeer bool) { 416 d.peerOpCh <- &peerOperation{ 417 opType: peerOperationDELETE, 418 networkID: nid, 419 endpointID: eid, 420 peerIP: peerIP, 421 peerIPMask: peerIPMask, 422 peerMac: peerMac, 423 vtepIP: vtep, 424 callerName: caller.Name(1), 425 localPeer: localPeer, 426 } 427 } 428 429 func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error { 430 if err := validateID(nid, eid); err != nil { 431 return err 432 } 433 434 deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 435 if !deleted { 436 logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 437 nid, eid, peerIP, peerMac, localPeer, vtep) 438 } 439 440 n := d.network(nid) 441 if n == nil { 442 return nil 443 } 444 445 sbox := n.sandbox() 446 if sbox == nil { 447 return nil 448 } 449 450 if err := d.checkEncryption(nid, vtep, 0, localPeer, false); err != nil { 451 logrus.Warn(err) 452 } 453 454 // Local peers do not have any local configuration to delete 455 if !localPeer { 456 // Remove fdb entry to the bridge for the peer mac 457 if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil { 458 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 { 459 // We fall in here if there is a transient state and if the neighbor that is being deleted 460 // was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping) 461 return nil 462 } 463 return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 464 } 465 466 // Delete neighbor entry for the peer IP 467 if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil { 468 return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 469 } 470 } 471 472 if dbEntries == 0 { 473 return nil 474 } 475 476 // If there is still an entry into the database and the deletion went through without errors means that there is now no 477 // configuration active in the kernel. 478 // Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one 479 peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP) 480 if err != nil { 481 logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err) 482 return err 483 } 484 return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal) 485 } 486 487 func (d *driver) peerFlush(nid string) { 488 d.peerOpCh <- &peerOperation{ 489 opType: peerOperationFLUSH, 490 networkID: nid, 491 callerName: caller.Name(1), 492 } 493 } 494 495 func (d *driver) peerFlushOp(nid string) error { 496 d.peerDb.Lock() 497 defer d.peerDb.Unlock() 498 _, ok := d.peerDb.mp[nid] 499 if !ok { 500 return fmt.Errorf("Unable to find the peerDB for nid:%s", nid) 501 } 502 delete(d.peerDb.mp, nid) 503 return nil 504 } 505 506 func (d *driver) pushLocalDb() { 507 d.peerDbWalk(func(nid string, pKey *peerKey, pEntry *peerEntry) bool { 508 if pEntry.isLocal { 509 d.pushLocalEndpointEvent("join", nid, pEntry.eid) 510 } 511 return false 512 }) 513 } 514 515 func (d *driver) peerDBUpdateSelf() { 516 d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool { 517 if pEntry.isLocal { 518 pEntry.vtep = net.ParseIP(d.advertiseAddress) 519 } 520 return false 521 }) 522 }