github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/drivers/overlay/peerdb.go (about) 1 //go:build linux 2 3 package overlay 4 5 import ( 6 "context" 7 "fmt" 8 "net" 9 "sync" 10 "syscall" 11 12 "github.com/containerd/log" 13 "github.com/Prakhar-Agarwal-byte/moby/libnetwork/internal/setmatrix" 14 "github.com/Prakhar-Agarwal-byte/moby/libnetwork/osl" 15 ) 16 17 const ovPeerTable = "overlay_peer_table" 18 19 type peerKey struct { 20 peerIP net.IP 21 peerMac net.HardwareAddr 22 } 23 24 type peerEntry struct { 25 eid string 26 vtep net.IP 27 peerIPMask net.IPMask 28 isLocal bool 29 } 30 31 func (p *peerEntry) MarshalDB() peerEntryDB { 32 ones, bits := p.peerIPMask.Size() 33 return peerEntryDB{ 34 eid: p.eid, 35 vtep: p.vtep.String(), 36 peerIPMaskOnes: ones, 37 peerIPMaskBits: bits, 38 isLocal: p.isLocal, 39 } 40 } 41 42 // This the structure saved into the set (SetMatrix), due to the implementation of it 43 // the value inserted in the set has to be Hashable so the []byte had to be converted into 44 // strings 45 type peerEntryDB struct { 46 eid string 47 vtep string 48 peerIPMaskOnes int 49 peerIPMaskBits int 50 isLocal bool 51 } 52 53 func (p *peerEntryDB) UnMarshalDB() peerEntry { 54 return peerEntry{ 55 eid: p.eid, 56 vtep: net.ParseIP(p.vtep), 57 peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits), 58 isLocal: p.isLocal, 59 } 60 } 61 62 type peerMap struct { 63 // set of peerEntry, note the values have to be objects and not pointers to maintain the proper equality checks 64 mp setmatrix.SetMatrix[peerEntryDB] 65 sync.Mutex 66 } 67 68 type peerNetworkMap struct { 69 // map with key peerKey 70 mp map[string]*peerMap 71 sync.Mutex 72 } 73 74 func (pKey peerKey) String() string { 75 return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac) 76 } 77 78 func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error { 79 ipB, err := state.Token(true, nil) 80 if err != nil { 81 return err 82 } 83 84 pKey.peerIP = net.ParseIP(string(ipB)) 85 86 macB, err := state.Token(true, nil) 87 if err != nil { 88 return err 89 } 90 91 pKey.peerMac, err = net.ParseMAC(string(macB)) 92 return err 93 } 94 95 func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error { 96 d.peerDb.Lock() 97 nids := []string{} 98 for nid := range d.peerDb.mp { 99 nids = append(nids, nid) 100 } 101 d.peerDb.Unlock() 102 103 for _, nid := range nids { 104 d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 105 return f(nid, pKey, pEntry) 106 }) 107 } 108 return nil 109 } 110 111 func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error { 112 d.peerDb.Lock() 113 pMap, ok := d.peerDb.mp[nid] 114 d.peerDb.Unlock() 115 116 if !ok { 117 return nil 118 } 119 120 mp := map[string]peerEntry{} 121 pMap.Lock() 122 for _, pKeyStr := range pMap.mp.Keys() { 123 entryDBList, ok := pMap.mp.Get(pKeyStr) 124 if ok { 125 peerEntryDB := entryDBList[0] 126 mp[pKeyStr] = peerEntryDB.UnMarshalDB() 127 } 128 } 129 pMap.Unlock() 130 131 for pKeyStr, pEntry := range mp { 132 var pKey peerKey 133 pEntry := pEntry 134 if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil { 135 log.G(context.TODO()).Warnf("Peer key scan on network %s failed: %v", nid, err) 136 } 137 if f(&pKey, &pEntry) { 138 return nil 139 } 140 } 141 142 return nil 143 } 144 145 func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) { 146 var pKeyMatched *peerKey 147 var pEntryMatched *peerEntry 148 err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 149 if pKey.peerIP.Equal(peerIP) { 150 pKeyMatched = pKey 151 pEntryMatched = pEntry 152 return true 153 } 154 155 return false 156 }) 157 if err != nil { 158 return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) 159 } 160 161 if pKeyMatched == nil || pEntryMatched == nil { 162 return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) 163 } 164 165 return pKeyMatched, pEntryMatched, nil 166 } 167 168 func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 169 d.peerDb.Lock() 170 pMap, ok := d.peerDb.mp[nid] 171 if !ok { 172 pMap = &peerMap{} 173 d.peerDb.mp[nid] = pMap 174 } 175 d.peerDb.Unlock() 176 177 pKey := peerKey{ 178 peerIP: peerIP, 179 peerMac: peerMac, 180 } 181 182 pEntry := peerEntry{ 183 eid: eid, 184 vtep: vtep, 185 peerIPMask: peerIPMask, 186 isLocal: isLocal, 187 } 188 189 pMap.Lock() 190 defer pMap.Unlock() 191 b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB()) 192 if i != 1 { 193 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 194 s, _ := pMap.mp.String(pKey.String()) 195 log.G(context.TODO()).Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 196 } 197 return b, i 198 } 199 200 func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 201 d.peerDb.Lock() 202 pMap, ok := d.peerDb.mp[nid] 203 if !ok { 204 d.peerDb.Unlock() 205 return false, 0 206 } 207 d.peerDb.Unlock() 208 209 pKey := peerKey{ 210 peerIP: peerIP, 211 peerMac: peerMac, 212 } 213 214 pEntry := peerEntry{ 215 eid: eid, 216 vtep: vtep, 217 peerIPMask: peerIPMask, 218 isLocal: isLocal, 219 } 220 221 pMap.Lock() 222 defer pMap.Unlock() 223 b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB()) 224 if i != 0 { 225 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 226 s, _ := pMap.mp.String(pKey.String()) 227 log.G(context.TODO()).Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 228 } 229 return b, i 230 } 231 232 // The overlay uses a lazy initialization approach, this means that when a network is created 233 // and the driver registered the overlay does not allocate resources till the moment that a 234 // sandbox is actually created. 235 // At the moment of this call, that happens when a sandbox is initialized, is possible that 236 // networkDB has already delivered some events of peers already available on remote nodes, 237 // these peers are saved into the peerDB and this function is used to properly configure 238 // the network sandbox with all those peers that got previously notified. 239 // Note also that this method sends a single message on the channel and the go routine on the 240 // other side, will atomically loop on the whole table of peers and will program their state 241 // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that 242 // new peerAdd or peerDelete gets reordered during the sandbox init. 243 func (d *driver) initSandboxPeerDB(nid string) { 244 d.peerOpMu.Lock() 245 defer d.peerOpMu.Unlock() 246 if err := d.peerInitOp(nid); err != nil { 247 log.G(context.TODO()).WithError(err).Warn("Peer init operation failed") 248 } 249 } 250 251 func (d *driver) peerInitOp(nid string) error { 252 return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 253 // Local entries do not need to be added 254 if pEntry.isLocal { 255 return false 256 } 257 258 d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal) 259 // return false to loop on all entries 260 return false 261 }) 262 } 263 264 func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) { 265 d.peerOpMu.Lock() 266 defer d.peerOpMu.Unlock() 267 err := d.peerAddOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, l2Miss, l3Miss, true, localPeer) 268 if err != nil { 269 log.G(context.TODO()).WithError(err).Warn("Peer add operation failed") 270 } 271 } 272 273 func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error { 274 if err := validateID(nid, eid); err != nil { 275 return err 276 } 277 278 var dbEntries int 279 var inserted bool 280 if updateDB { 281 inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 282 if !inserted { 283 log.G(context.TODO()).Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 284 nid, eid, peerIP, peerMac, localPeer, vtep) 285 } 286 } 287 288 // Local peers do not need any further configuration 289 if localPeer { 290 return nil 291 } 292 293 n := d.network(nid) 294 if n == nil { 295 return nil 296 } 297 298 sbox := n.sandbox() 299 if sbox == nil { 300 // We are hitting this case for all the events that are arriving before that the sandbox 301 // is being created. The peer got already added into the database and the sanbox init will 302 // call the peerDbUpdateSandbox that will configure all these peers from the database 303 return nil 304 } 305 306 IP := &net.IPNet{ 307 IP: peerIP, 308 Mask: peerIPMask, 309 } 310 311 s := n.getSubnetforIP(IP) 312 if s == nil { 313 return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id) 314 } 315 316 if err := n.joinSandbox(s, false); err != nil { 317 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err) 318 } 319 320 if err := d.checkEncryption(nid, vtep, false, true); err != nil { 321 log.G(context.TODO()).Warn(err) 322 } 323 324 // Add neighbor entry for the peer IP 325 if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, osl.WithLinkName(s.vxlanName)); err != nil { 326 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 { 327 // We are in the transient case so only the first configuration is programmed into the kernel 328 // Upon deletion if the active configuration is deleted the next one from the database will be restored 329 // Note we are skipping also the next configuration 330 return nil 331 } 332 return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 333 } 334 335 // Add fdb entry to the bridge for the peer mac 336 if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil { 337 return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 338 } 339 340 return nil 341 } 342 343 func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) { 344 d.peerOpMu.Lock() 345 defer d.peerOpMu.Unlock() 346 err := d.peerDeleteOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 347 if err != nil { 348 log.G(context.TODO()).WithError(err).Warn("Peer delete operation failed") 349 } 350 } 351 352 func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error { 353 if err := validateID(nid, eid); err != nil { 354 return err 355 } 356 357 deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 358 if !deleted { 359 log.G(context.TODO()).Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 360 nid, eid, peerIP, peerMac, localPeer, vtep) 361 } 362 363 n := d.network(nid) 364 if n == nil { 365 return nil 366 } 367 368 sbox := n.sandbox() 369 if sbox == nil { 370 return nil 371 } 372 373 if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil { 374 log.G(context.TODO()).Warn(err) 375 } 376 377 // Local peers do not have any local configuration to delete 378 if !localPeer { 379 // Remove fdb entry to the bridge for the peer mac 380 if err := sbox.DeleteNeighbor(vtep, peerMac); err != nil { 381 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 { 382 // We fall in here if there is a transient state and if the neighbor that is being deleted 383 // was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping) 384 return nil 385 } 386 return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 387 } 388 389 // Delete neighbor entry for the peer IP 390 if err := sbox.DeleteNeighbor(peerIP, peerMac); err != nil { 391 return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 392 } 393 } 394 395 if dbEntries == 0 { 396 return nil 397 } 398 399 // If there is still an entry into the database and the deletion went through without errors means that there is now no 400 // configuration active in the kernel. 401 // Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one 402 peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP) 403 if err != nil { 404 log.G(context.TODO()).Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err) 405 return err 406 } 407 return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal) 408 } 409 410 func (d *driver) peerFlush(nid string) { 411 d.peerOpMu.Lock() 412 defer d.peerOpMu.Unlock() 413 if err := d.peerFlushOp(nid); err != nil { 414 log.G(context.TODO()).WithError(err).Warn("Peer flush operation failed") 415 } 416 } 417 418 func (d *driver) peerFlushOp(nid string) error { 419 d.peerDb.Lock() 420 defer d.peerDb.Unlock() 421 _, ok := d.peerDb.mp[nid] 422 if !ok { 423 return fmt.Errorf("Unable to find the peerDB for nid:%s", nid) 424 } 425 delete(d.peerDb.mp, nid) 426 return nil 427 } 428 429 func (d *driver) peerDBUpdateSelf() { 430 d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool { 431 if pEntry.isLocal { 432 pEntry.vtep = net.ParseIP(d.advertiseAddress) 433 } 434 return false 435 }) 436 }