github.com/rish1988/moby@v25.0.2+incompatible/libnetwork/drivers/overlay/peerdb.go (about) 1 // FIXME(thaJeztah): remove once we are a module; the go:build directive prevents go from downgrading language version to go1.16: 2 //go:build go1.19 && linux 3 4 package overlay 5 6 import ( 7 "context" 8 "fmt" 9 "net" 10 "sync" 11 "syscall" 12 13 "github.com/containerd/log" 14 "github.com/docker/docker/libnetwork/internal/setmatrix" 15 "github.com/docker/docker/libnetwork/osl" 16 ) 17 18 const ovPeerTable = "overlay_peer_table" 19 20 type peerKey struct { 21 peerIP net.IP 22 peerMac net.HardwareAddr 23 } 24 25 type peerEntry struct { 26 eid string 27 vtep net.IP 28 peerIPMask net.IPMask 29 isLocal bool 30 } 31 32 func (p *peerEntry) MarshalDB() peerEntryDB { 33 ones, bits := p.peerIPMask.Size() 34 return peerEntryDB{ 35 eid: p.eid, 36 vtep: p.vtep.String(), 37 peerIPMaskOnes: ones, 38 peerIPMaskBits: bits, 39 isLocal: p.isLocal, 40 } 41 } 42 43 // This the structure saved into the set (SetMatrix), due to the implementation of it 44 // the value inserted in the set has to be Hashable so the []byte had to be converted into 45 // strings 46 type peerEntryDB struct { 47 eid string 48 vtep string 49 peerIPMaskOnes int 50 peerIPMaskBits int 51 isLocal bool 52 } 53 54 func (p *peerEntryDB) UnMarshalDB() peerEntry { 55 return peerEntry{ 56 eid: p.eid, 57 vtep: net.ParseIP(p.vtep), 58 peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits), 59 isLocal: p.isLocal, 60 } 61 } 62 63 type peerMap struct { 64 // set of peerEntry, note the values have to be objects and not pointers to maintain the proper equality checks 65 mp setmatrix.SetMatrix[peerEntryDB] 66 sync.Mutex 67 } 68 69 type peerNetworkMap struct { 70 // map with key peerKey 71 mp map[string]*peerMap 72 sync.Mutex 73 } 74 75 func (pKey peerKey) String() string { 76 return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac) 77 } 78 79 func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error { 80 ipB, err := state.Token(true, nil) 81 if err != nil { 82 return err 83 } 84 85 pKey.peerIP = net.ParseIP(string(ipB)) 86 87 macB, err := state.Token(true, nil) 88 if err != nil { 89 return err 90 } 91 92 pKey.peerMac, err = net.ParseMAC(string(macB)) 93 return err 94 } 95 96 func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error { 97 d.peerDb.Lock() 98 nids := []string{} 99 for nid := range d.peerDb.mp { 100 nids = append(nids, nid) 101 } 102 d.peerDb.Unlock() 103 104 for _, nid := range nids { 105 d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 106 return f(nid, pKey, pEntry) 107 }) 108 } 109 return nil 110 } 111 112 func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error { 113 d.peerDb.Lock() 114 pMap, ok := d.peerDb.mp[nid] 115 d.peerDb.Unlock() 116 117 if !ok { 118 return nil 119 } 120 121 mp := map[string]peerEntry{} 122 pMap.Lock() 123 for _, pKeyStr := range pMap.mp.Keys() { 124 entryDBList, ok := pMap.mp.Get(pKeyStr) 125 if ok { 126 peerEntryDB := entryDBList[0] 127 mp[pKeyStr] = peerEntryDB.UnMarshalDB() 128 } 129 } 130 pMap.Unlock() 131 132 for pKeyStr, pEntry := range mp { 133 var pKey peerKey 134 pEntry := pEntry 135 if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil { 136 log.G(context.TODO()).Warnf("Peer key scan on network %s failed: %v", nid, err) 137 } 138 if f(&pKey, &pEntry) { 139 return nil 140 } 141 } 142 143 return nil 144 } 145 146 func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) { 147 var pKeyMatched *peerKey 148 var pEntryMatched *peerEntry 149 err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 150 if pKey.peerIP.Equal(peerIP) { 151 pKeyMatched = pKey 152 pEntryMatched = pEntry 153 return true 154 } 155 156 return false 157 }) 158 if err != nil { 159 return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) 160 } 161 162 if pKeyMatched == nil || pEntryMatched == nil { 163 return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) 164 } 165 166 return pKeyMatched, pEntryMatched, nil 167 } 168 169 func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 170 d.peerDb.Lock() 171 pMap, ok := d.peerDb.mp[nid] 172 if !ok { 173 pMap = &peerMap{} 174 d.peerDb.mp[nid] = pMap 175 } 176 d.peerDb.Unlock() 177 178 pKey := peerKey{ 179 peerIP: peerIP, 180 peerMac: peerMac, 181 } 182 183 pEntry := peerEntry{ 184 eid: eid, 185 vtep: vtep, 186 peerIPMask: peerIPMask, 187 isLocal: isLocal, 188 } 189 190 pMap.Lock() 191 defer pMap.Unlock() 192 b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB()) 193 if i != 1 { 194 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 195 s, _ := pMap.mp.String(pKey.String()) 196 log.G(context.TODO()).Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 197 } 198 return b, i 199 } 200 201 func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 202 d.peerDb.Lock() 203 pMap, ok := d.peerDb.mp[nid] 204 if !ok { 205 d.peerDb.Unlock() 206 return false, 0 207 } 208 d.peerDb.Unlock() 209 210 pKey := peerKey{ 211 peerIP: peerIP, 212 peerMac: peerMac, 213 } 214 215 pEntry := peerEntry{ 216 eid: eid, 217 vtep: vtep, 218 peerIPMask: peerIPMask, 219 isLocal: isLocal, 220 } 221 222 pMap.Lock() 223 defer pMap.Unlock() 224 b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB()) 225 if i != 0 { 226 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 227 s, _ := pMap.mp.String(pKey.String()) 228 log.G(context.TODO()).Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 229 } 230 return b, i 231 } 232 233 // The overlay uses a lazy initialization approach, this means that when a network is created 234 // and the driver registered the overlay does not allocate resources till the moment that a 235 // sandbox is actually created. 236 // At the moment of this call, that happens when a sandbox is initialized, is possible that 237 // networkDB has already delivered some events of peers already available on remote nodes, 238 // these peers are saved into the peerDB and this function is used to properly configure 239 // the network sandbox with all those peers that got previously notified. 240 // Note also that this method sends a single message on the channel and the go routine on the 241 // other side, will atomically loop on the whole table of peers and will program their state 242 // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that 243 // new peerAdd or peerDelete gets reordered during the sandbox init. 244 func (d *driver) initSandboxPeerDB(nid string) { 245 d.peerOpMu.Lock() 246 defer d.peerOpMu.Unlock() 247 if err := d.peerInitOp(nid); err != nil { 248 log.G(context.TODO()).WithError(err).Warn("Peer init operation failed") 249 } 250 } 251 252 func (d *driver) peerInitOp(nid string) error { 253 return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 254 // Local entries do not need to be added 255 if pEntry.isLocal { 256 return false 257 } 258 259 d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal) 260 // return false to loop on all entries 261 return false 262 }) 263 } 264 265 func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) { 266 d.peerOpMu.Lock() 267 defer d.peerOpMu.Unlock() 268 err := d.peerAddOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, l2Miss, l3Miss, true, localPeer) 269 if err != nil { 270 log.G(context.TODO()).WithError(err).Warn("Peer add operation failed") 271 } 272 } 273 274 func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error { 275 if err := validateID(nid, eid); err != nil { 276 return err 277 } 278 279 var dbEntries int 280 var inserted bool 281 if updateDB { 282 inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 283 if !inserted { 284 log.G(context.TODO()).Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 285 nid, eid, peerIP, peerMac, localPeer, vtep) 286 } 287 } 288 289 // Local peers do not need any further configuration 290 if localPeer { 291 return nil 292 } 293 294 n := d.network(nid) 295 if n == nil { 296 return nil 297 } 298 299 sbox := n.sandbox() 300 if sbox == nil { 301 // We are hitting this case for all the events that are arriving before that the sandbox 302 // is being created. The peer got already added into the database and the sanbox init will 303 // call the peerDbUpdateSandbox that will configure all these peers from the database 304 return nil 305 } 306 307 IP := &net.IPNet{ 308 IP: peerIP, 309 Mask: peerIPMask, 310 } 311 312 s := n.getSubnetforIP(IP) 313 if s == nil { 314 return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id) 315 } 316 317 if err := n.joinSandbox(s, false); err != nil { 318 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err) 319 } 320 321 if err := d.checkEncryption(nid, vtep, false, true); err != nil { 322 log.G(context.TODO()).Warn(err) 323 } 324 325 // Add neighbor entry for the peer IP 326 if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, osl.WithLinkName(s.vxlanName)); err != nil { 327 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 { 328 // We are in the transient case so only the first configuration is programmed into the kernel 329 // Upon deletion if the active configuration is deleted the next one from the database will be restored 330 // Note we are skipping also the next configuration 331 return nil 332 } 333 return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 334 } 335 336 // Add fdb entry to the bridge for the peer mac 337 if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, osl.WithLinkName(s.vxlanName), osl.WithFamily(syscall.AF_BRIDGE)); err != nil { 338 return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 339 } 340 341 return nil 342 } 343 344 func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) { 345 d.peerOpMu.Lock() 346 defer d.peerOpMu.Unlock() 347 err := d.peerDeleteOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 348 if err != nil { 349 log.G(context.TODO()).WithError(err).Warn("Peer delete operation failed") 350 } 351 } 352 353 func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error { 354 if err := validateID(nid, eid); err != nil { 355 return err 356 } 357 358 deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 359 if !deleted { 360 log.G(context.TODO()).Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 361 nid, eid, peerIP, peerMac, localPeer, vtep) 362 } 363 364 n := d.network(nid) 365 if n == nil { 366 return nil 367 } 368 369 sbox := n.sandbox() 370 if sbox == nil { 371 return nil 372 } 373 374 if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil { 375 log.G(context.TODO()).Warn(err) 376 } 377 378 // Local peers do not have any local configuration to delete 379 if !localPeer { 380 // Remove fdb entry to the bridge for the peer mac 381 if err := sbox.DeleteNeighbor(vtep, peerMac); err != nil { 382 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 { 383 // We fall in here if there is a transient state and if the neighbor that is being deleted 384 // was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping) 385 return nil 386 } 387 return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 388 } 389 390 // Delete neighbor entry for the peer IP 391 if err := sbox.DeleteNeighbor(peerIP, peerMac); err != nil { 392 return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 393 } 394 } 395 396 if dbEntries == 0 { 397 return nil 398 } 399 400 // If there is still an entry into the database and the deletion went through without errors means that there is now no 401 // configuration active in the kernel. 402 // Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one 403 peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP) 404 if err != nil { 405 log.G(context.TODO()).Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err) 406 return err 407 } 408 return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal) 409 } 410 411 func (d *driver) peerFlush(nid string) { 412 d.peerOpMu.Lock() 413 defer d.peerOpMu.Unlock() 414 if err := d.peerFlushOp(nid); err != nil { 415 log.G(context.TODO()).WithError(err).Warn("Peer flush operation failed") 416 } 417 } 418 419 func (d *driver) peerFlushOp(nid string) error { 420 d.peerDb.Lock() 421 defer d.peerDb.Unlock() 422 _, ok := d.peerDb.mp[nid] 423 if !ok { 424 return fmt.Errorf("Unable to find the peerDB for nid:%s", nid) 425 } 426 delete(d.peerDb.mp, nid) 427 return nil 428 } 429 430 func (d *driver) peerDBUpdateSelf() { 431 d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool { 432 if pEntry.isLocal { 433 pEntry.vtep = d.advertiseAddress 434 } 435 return false 436 }) 437 }