github.com/rawahars/moby@v24.0.4+incompatible/libnetwork/drivers/overlay/peerdb.go (about) 1 //go:build linux 2 // +build linux 3 4 package overlay 5 6 import ( 7 "fmt" 8 "net" 9 "sync" 10 "syscall" 11 12 "github.com/docker/docker/libnetwork/internal/setmatrix" 13 "github.com/docker/docker/libnetwork/osl" 14 "github.com/sirupsen/logrus" 15 ) 16 17 const ovPeerTable = "overlay_peer_table" 18 19 type peerKey struct { 20 peerIP net.IP 21 peerMac net.HardwareAddr 22 } 23 24 type peerEntry struct { 25 eid string 26 vtep net.IP 27 peerIPMask net.IPMask 28 isLocal bool 29 } 30 31 func (p *peerEntry) MarshalDB() peerEntryDB { 32 ones, bits := p.peerIPMask.Size() 33 return peerEntryDB{ 34 eid: p.eid, 35 vtep: p.vtep.String(), 36 peerIPMaskOnes: ones, 37 peerIPMaskBits: bits, 38 isLocal: p.isLocal, 39 } 40 } 41 42 // This the structure saved into the set (SetMatrix), due to the implementation of it 43 // the value inserted in the set has to be Hashable so the []byte had to be converted into 44 // strings 45 type peerEntryDB struct { 46 eid string 47 vtep string 48 peerIPMaskOnes int 49 peerIPMaskBits int 50 isLocal bool 51 } 52 53 func (p *peerEntryDB) UnMarshalDB() peerEntry { 54 return peerEntry{ 55 eid: p.eid, 56 vtep: net.ParseIP(p.vtep), 57 peerIPMask: net.CIDRMask(p.peerIPMaskOnes, p.peerIPMaskBits), 58 isLocal: p.isLocal, 59 } 60 } 61 62 type peerMap struct { 63 // set of peerEntry, note the values have to be objects and not pointers to maintain the proper equality checks 64 mp setmatrix.SetMatrix[peerEntryDB] 65 sync.Mutex 66 } 67 68 type peerNetworkMap struct { 69 // map with key peerKey 70 mp map[string]*peerMap 71 sync.Mutex 72 } 73 74 func (pKey peerKey) String() string { 75 return fmt.Sprintf("%s %s", pKey.peerIP, pKey.peerMac) 76 } 77 78 func (pKey *peerKey) Scan(state fmt.ScanState, verb rune) error { 79 ipB, err := state.Token(true, nil) 80 if err != nil { 81 return err 82 } 83 84 pKey.peerIP = net.ParseIP(string(ipB)) 85 86 macB, err := state.Token(true, nil) 87 if err != nil { 88 return err 89 } 90 91 pKey.peerMac, err = net.ParseMAC(string(macB)) 92 return err 93 } 94 95 func (d *driver) peerDbWalk(f func(string, *peerKey, *peerEntry) bool) error { 96 d.peerDb.Lock() 97 nids := []string{} 98 for nid := range d.peerDb.mp { 99 nids = append(nids, nid) 100 } 101 d.peerDb.Unlock() 102 103 for _, nid := range nids { 104 d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 105 return f(nid, pKey, pEntry) 106 }) 107 } 108 return nil 109 } 110 111 func (d *driver) peerDbNetworkWalk(nid string, f func(*peerKey, *peerEntry) bool) error { 112 d.peerDb.Lock() 113 pMap, ok := d.peerDb.mp[nid] 114 d.peerDb.Unlock() 115 116 if !ok { 117 return nil 118 } 119 120 mp := map[string]peerEntry{} 121 pMap.Lock() 122 for _, pKeyStr := range pMap.mp.Keys() { 123 entryDBList, ok := pMap.mp.Get(pKeyStr) 124 if ok { 125 peerEntryDB := entryDBList[0] 126 mp[pKeyStr] = peerEntryDB.UnMarshalDB() 127 } 128 } 129 pMap.Unlock() 130 131 for pKeyStr, pEntry := range mp { 132 var pKey peerKey 133 pEntry := pEntry 134 if _, err := fmt.Sscan(pKeyStr, &pKey); err != nil { 135 logrus.Warnf("Peer key scan on network %s failed: %v", nid, err) 136 } 137 if f(&pKey, &pEntry) { 138 return nil 139 } 140 } 141 142 return nil 143 } 144 145 func (d *driver) peerDbSearch(nid string, peerIP net.IP) (*peerKey, *peerEntry, error) { 146 var pKeyMatched *peerKey 147 var pEntryMatched *peerEntry 148 err := d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 149 if pKey.peerIP.Equal(peerIP) { 150 pKeyMatched = pKey 151 pEntryMatched = pEntry 152 return true 153 } 154 155 return false 156 }) 157 158 if err != nil { 159 return nil, nil, fmt.Errorf("peerdb search for peer ip %q failed: %v", peerIP, err) 160 } 161 162 if pKeyMatched == nil || pEntryMatched == nil { 163 return nil, nil, fmt.Errorf("peer ip %q not found in peerdb", peerIP) 164 } 165 166 return pKeyMatched, pEntryMatched, nil 167 } 168 169 func (d *driver) peerDbAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 170 d.peerDb.Lock() 171 pMap, ok := d.peerDb.mp[nid] 172 if !ok { 173 pMap = &peerMap{} 174 d.peerDb.mp[nid] = pMap 175 } 176 d.peerDb.Unlock() 177 178 pKey := peerKey{ 179 peerIP: peerIP, 180 peerMac: peerMac, 181 } 182 183 pEntry := peerEntry{ 184 eid: eid, 185 vtep: vtep, 186 peerIPMask: peerIPMask, 187 isLocal: isLocal, 188 } 189 190 pMap.Lock() 191 defer pMap.Unlock() 192 b, i := pMap.mp.Insert(pKey.String(), pEntry.MarshalDB()) 193 if i != 1 { 194 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 195 s, _ := pMap.mp.String(pKey.String()) 196 logrus.Warnf("peerDbAdd transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 197 } 198 return b, i 199 } 200 201 func (d *driver) peerDbDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, isLocal bool) (bool, int) { 202 d.peerDb.Lock() 203 pMap, ok := d.peerDb.mp[nid] 204 if !ok { 205 d.peerDb.Unlock() 206 return false, 0 207 } 208 d.peerDb.Unlock() 209 210 pKey := peerKey{ 211 peerIP: peerIP, 212 peerMac: peerMac, 213 } 214 215 pEntry := peerEntry{ 216 eid: eid, 217 vtep: vtep, 218 peerIPMask: peerIPMask, 219 isLocal: isLocal, 220 } 221 222 pMap.Lock() 223 defer pMap.Unlock() 224 b, i := pMap.mp.Remove(pKey.String(), pEntry.MarshalDB()) 225 if i != 0 { 226 // Transient case, there is more than one endpoint that is using the same IP,MAC pair 227 s, _ := pMap.mp.String(pKey.String()) 228 logrus.Warnf("peerDbDelete transient condition - Key:%s cardinality:%d db state:%s", pKey.String(), i, s) 229 } 230 return b, i 231 } 232 233 // The overlay uses a lazy initialization approach, this means that when a network is created 234 // and the driver registered the overlay does not allocate resources till the moment that a 235 // sandbox is actually created. 236 // At the moment of this call, that happens when a sandbox is initialized, is possible that 237 // networkDB has already delivered some events of peers already available on remote nodes, 238 // these peers are saved into the peerDB and this function is used to properly configure 239 // the network sandbox with all those peers that got previously notified. 240 // Note also that this method sends a single message on the channel and the go routine on the 241 // other side, will atomically loop on the whole table of peers and will program their state 242 // in one single atomic operation. This is fundamental to guarantee consistency, and avoid that 243 // new peerAdd or peerDelete gets reordered during the sandbox init. 244 func (d *driver) initSandboxPeerDB(nid string) { 245 d.peerOpMu.Lock() 246 defer d.peerOpMu.Unlock() 247 if err := d.peerInitOp(nid); err != nil { 248 logrus.WithError(err).Warn("Peer init operation failed") 249 } 250 } 251 252 func (d *driver) peerInitOp(nid string) error { 253 return d.peerDbNetworkWalk(nid, func(pKey *peerKey, pEntry *peerEntry) bool { 254 // Local entries do not need to be added 255 if pEntry.isLocal { 256 return false 257 } 258 259 d.peerAddOp(nid, pEntry.eid, pKey.peerIP, pEntry.peerIPMask, pKey.peerMac, pEntry.vtep, false, false, false, pEntry.isLocal) 260 // return false to loop on all entries 261 return false 262 }) 263 } 264 265 func (d *driver) peerAdd(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 266 peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, localPeer bool) { 267 d.peerOpMu.Lock() 268 defer d.peerOpMu.Unlock() 269 err := d.peerAddOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, l2Miss, l3Miss, true, localPeer) 270 if err != nil { 271 logrus.WithError(err).Warn("Peer add operation failed") 272 } 273 } 274 275 func (d *driver) peerAddOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, l2Miss, l3Miss, updateDB, localPeer bool) error { 276 if err := validateID(nid, eid); err != nil { 277 return err 278 } 279 280 var dbEntries int 281 var inserted bool 282 if updateDB { 283 inserted, dbEntries = d.peerDbAdd(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 284 if !inserted { 285 logrus.Warnf("Entry already present in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 286 nid, eid, peerIP, peerMac, localPeer, vtep) 287 } 288 } 289 290 // Local peers do not need any further configuration 291 if localPeer { 292 return nil 293 } 294 295 n := d.network(nid) 296 if n == nil { 297 return nil 298 } 299 300 sbox := n.sandbox() 301 if sbox == nil { 302 // We are hitting this case for all the events that are arriving before that the sandbox 303 // is being created. The peer got already added into the database and the sanbox init will 304 // call the peerDbUpdateSandbox that will configure all these peers from the database 305 return nil 306 } 307 308 IP := &net.IPNet{ 309 IP: peerIP, 310 Mask: peerIPMask, 311 } 312 313 s := n.getSubnetforIP(IP) 314 if s == nil { 315 return fmt.Errorf("couldn't find the subnet %q in network %q", IP.String(), n.id) 316 } 317 318 if err := n.joinSandbox(s, false); err != nil { 319 return fmt.Errorf("subnet sandbox join failed for %q: %v", s.subnetIP.String(), err) 320 } 321 322 if err := d.checkEncryption(nid, vtep, false, true); err != nil { 323 logrus.Warn(err) 324 } 325 326 // Add neighbor entry for the peer IP 327 if err := sbox.AddNeighbor(peerIP, peerMac, l3Miss, sbox.NeighborOptions().LinkName(s.vxlanName)); err != nil { 328 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 1 { 329 // We are in the transient case so only the first configuration is programmed into the kernel 330 // Upon deletion if the active configuration is deleted the next one from the database will be restored 331 // Note we are skipping also the next configuration 332 return nil 333 } 334 return fmt.Errorf("could not add neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 335 } 336 337 // Add fdb entry to the bridge for the peer mac 338 if err := sbox.AddNeighbor(vtep, peerMac, l2Miss, sbox.NeighborOptions().LinkName(s.vxlanName), 339 sbox.NeighborOptions().Family(syscall.AF_BRIDGE)); err != nil { 340 return fmt.Errorf("could not add fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 341 } 342 343 return nil 344 } 345 346 func (d *driver) peerDelete(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, 347 peerMac net.HardwareAddr, vtep net.IP, localPeer bool) { 348 d.peerOpMu.Lock() 349 defer d.peerOpMu.Unlock() 350 err := d.peerDeleteOp(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 351 if err != nil { 352 logrus.WithError(err).Warn("Peer delete operation failed") 353 } 354 } 355 356 func (d *driver) peerDeleteOp(nid, eid string, peerIP net.IP, peerIPMask net.IPMask, peerMac net.HardwareAddr, vtep net.IP, localPeer bool) error { 357 if err := validateID(nid, eid); err != nil { 358 return err 359 } 360 361 deleted, dbEntries := d.peerDbDelete(nid, eid, peerIP, peerIPMask, peerMac, vtep, localPeer) 362 if !deleted { 363 logrus.Warnf("Entry was not in db: nid:%s eid:%s peerIP:%v peerMac:%v isLocal:%t vtep:%v", 364 nid, eid, peerIP, peerMac, localPeer, vtep) 365 } 366 367 n := d.network(nid) 368 if n == nil { 369 return nil 370 } 371 372 sbox := n.sandbox() 373 if sbox == nil { 374 return nil 375 } 376 377 if err := d.checkEncryption(nid, vtep, localPeer, false); err != nil { 378 logrus.Warn(err) 379 } 380 381 // Local peers do not have any local configuration to delete 382 if !localPeer { 383 // Remove fdb entry to the bridge for the peer mac 384 if err := sbox.DeleteNeighbor(vtep, peerMac, true); err != nil { 385 if _, ok := err.(osl.NeighborSearchError); ok && dbEntries > 0 { 386 // We fall in here if there is a transient state and if the neighbor that is being deleted 387 // was never been configured into the kernel (we allow only 1 configuration at the time per <ip,mac> mapping) 388 return nil 389 } 390 return fmt.Errorf("could not delete fdb entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 391 } 392 393 // Delete neighbor entry for the peer IP 394 if err := sbox.DeleteNeighbor(peerIP, peerMac, true); err != nil { 395 return fmt.Errorf("could not delete neighbor entry for nid:%s eid:%s into the sandbox:%v", nid, eid, err) 396 } 397 } 398 399 if dbEntries == 0 { 400 return nil 401 } 402 403 // If there is still an entry into the database and the deletion went through without errors means that there is now no 404 // configuration active in the kernel. 405 // Restore one configuration for the <ip,mac> directly from the database, note that is guaranteed that there is one 406 peerKey, peerEntry, err := d.peerDbSearch(nid, peerIP) 407 if err != nil { 408 logrus.Errorf("peerDeleteOp unable to restore a configuration for nid:%s ip:%v mac:%v err:%s", nid, peerIP, peerMac, err) 409 return err 410 } 411 return d.peerAddOp(nid, peerEntry.eid, peerIP, peerEntry.peerIPMask, peerKey.peerMac, peerEntry.vtep, false, false, false, peerEntry.isLocal) 412 } 413 414 func (d *driver) peerFlush(nid string) { 415 d.peerOpMu.Lock() 416 defer d.peerOpMu.Unlock() 417 if err := d.peerFlushOp(nid); err != nil { 418 logrus.WithError(err).Warn("Peer flush operation failed") 419 } 420 } 421 422 func (d *driver) peerFlushOp(nid string) error { 423 d.peerDb.Lock() 424 defer d.peerDb.Unlock() 425 _, ok := d.peerDb.mp[nid] 426 if !ok { 427 return fmt.Errorf("Unable to find the peerDB for nid:%s", nid) 428 } 429 delete(d.peerDb.mp, nid) 430 return nil 431 } 432 433 func (d *driver) peerDBUpdateSelf() { 434 d.peerDbWalk(func(nid string, pkey *peerKey, pEntry *peerEntry) bool { 435 if pEntry.isLocal { 436 pEntry.vtep = net.ParseIP(d.advertiseAddress) 437 } 438 return false 439 }) 440 }