github.com/Prakhar-Agarwal-byte/moby@v0.0.0-20231027092010-a14e3e8ab87e/libnetwork/networkdb/delegate.go (about) 1 package networkdb 2 3 import ( 4 "context" 5 "net" 6 "time" 7 8 "github.com/containerd/log" 9 "github.com/gogo/protobuf/proto" 10 ) 11 12 type delegate struct { 13 nDB *NetworkDB 14 } 15 16 func (d *delegate) NodeMeta(limit int) []byte { 17 return []byte{} 18 } 19 20 func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool { 21 // Update our local clock if the received messages has newer 22 // time. 23 nDB.networkClock.Witness(nEvent.LTime) 24 25 nDB.Lock() 26 defer nDB.Unlock() 27 28 // check if the node exists 29 n, _, _ := nDB.findNode(nEvent.NodeName) 30 if n == nil { 31 return false 32 } 33 34 // check if the event is fresh 35 if n.ltime >= nEvent.LTime { 36 return false 37 } 38 39 // If we are here means that the event is fresher and the node is known. Update the laport time 40 n.ltime = nEvent.LTime 41 42 // If the node is not known from memberlist we cannot process save any state of it else if it actually 43 // dies we won't receive any notification and we will remain stuck with it 44 if _, ok := nDB.nodes[nEvent.NodeName]; !ok { 45 log.G(context.TODO()).Errorf("node: %s is unknown to memberlist", nEvent.NodeName) 46 return false 47 } 48 49 switch nEvent.Type { 50 case NodeEventTypeJoin: 51 moved, err := nDB.changeNodeState(n.Name, nodeActiveState) 52 if err != nil { 53 log.G(context.TODO()).WithError(err).Error("unable to find the node to move") 54 return false 55 } 56 if moved { 57 log.G(context.TODO()).Infof("%v(%v): Node join event for %s/%s", nDB.config.Hostname, nDB.config.NodeID, n.Name, n.Addr) 58 } 59 return moved 60 case NodeEventTypeLeave: 61 moved, err := nDB.changeNodeState(n.Name, nodeLeftState) 62 if err != nil { 63 log.G(context.TODO()).WithError(err).Error("unable to find the node to move") 64 return false 65 } 66 if moved { 67 log.G(context.TODO()).Infof("%v(%v): Node leave event for %s/%s", nDB.config.Hostname, nDB.config.NodeID, n.Name, n.Addr) 68 } 69 return moved 70 } 71 72 return false 73 } 74 75 func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool { 76 // Update our local clock if the received messages has newer 77 // time. 78 nDB.networkClock.Witness(nEvent.LTime) 79 80 nDB.Lock() 81 defer nDB.Unlock() 82 83 if nEvent.NodeName == nDB.config.NodeID { 84 return false 85 } 86 87 nodeNetworks, ok := nDB.networks[nEvent.NodeName] 88 if !ok { 89 // We haven't heard about this node at all. Ignore the leave 90 if nEvent.Type == NetworkEventTypeLeave { 91 return false 92 } 93 94 nodeNetworks = make(map[string]*network) 95 nDB.networks[nEvent.NodeName] = nodeNetworks 96 } 97 98 if n, ok := nodeNetworks[nEvent.NetworkID]; ok { 99 // We have the latest state. Ignore the event 100 // since it is stale. 101 if n.ltime >= nEvent.LTime { 102 return false 103 } 104 105 n.ltime = nEvent.LTime 106 n.leaving = nEvent.Type == NetworkEventTypeLeave 107 if n.leaving { 108 n.reapTime = nDB.config.reapNetworkInterval 109 110 // The remote node is leaving the network, but not the gossip cluster. 111 // Mark all its entries in deleted state, this will guarantee that 112 // if some node bulk sync with us, the deleted state of 113 // these entries will be propagated. 114 nDB.deleteNodeNetworkEntries(nEvent.NetworkID, nEvent.NodeName) 115 } 116 117 if nEvent.Type == NetworkEventTypeLeave { 118 nDB.deleteNetworkNode(nEvent.NetworkID, nEvent.NodeName) 119 } else { 120 nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName) 121 } 122 123 return true 124 } 125 126 if nEvent.Type == NetworkEventTypeLeave { 127 return false 128 } 129 130 // If the node is not known from memberlist we cannot process save any state of it else if it actually 131 // dies we won't receive any notification and we will remain stuck with it 132 if _, ok := nDB.nodes[nEvent.NodeName]; !ok { 133 return false 134 } 135 136 // This remote network join is being seen the first time. 137 nodeNetworks[nEvent.NetworkID] = &network{ 138 id: nEvent.NetworkID, 139 ltime: nEvent.LTime, 140 } 141 142 nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName) 143 return true 144 } 145 146 func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent, isBulkSync bool) bool { 147 // Update our local clock if the received messages has newer time. 148 nDB.tableClock.Witness(tEvent.LTime) 149 150 // Ignore the table events for networks that are in the process of going away 151 nDB.RLock() 152 networks := nDB.networks[nDB.config.NodeID] 153 network, ok := networks[tEvent.NetworkID] 154 // Check if the owner of the event is still part of the network 155 nodes := nDB.networkNodes[tEvent.NetworkID] 156 var nodePresent bool 157 for _, node := range nodes { 158 if node == tEvent.NodeName { 159 nodePresent = true 160 break 161 } 162 } 163 nDB.RUnlock() 164 165 if !ok || network.leaving || !nodePresent { 166 // I'm out of the network OR the event owner is not anymore part of the network so do not propagate 167 return false 168 } 169 170 nDB.Lock() 171 e, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key) 172 if err == nil { 173 // We have the latest state. Ignore the event 174 // since it is stale. 175 if e.ltime >= tEvent.LTime { 176 nDB.Unlock() 177 return false 178 } 179 } else if tEvent.Type == TableEventTypeDelete && !isBulkSync { 180 nDB.Unlock() 181 // We don't know the entry, the entry is being deleted and the message is an async message 182 // In this case the safest approach is to ignore it, it is possible that the queue grew so much to 183 // exceed the garbage collection time (the residual reap time that is in the message is not being 184 // updated, to avoid inserting too many messages in the queue). 185 // Instead the messages coming from TCP bulk sync are safe with the latest value for the garbage collection time 186 return false 187 } 188 189 e = &entry{ 190 ltime: tEvent.LTime, 191 node: tEvent.NodeName, 192 value: tEvent.Value, 193 deleting: tEvent.Type == TableEventTypeDelete, 194 reapTime: time.Duration(tEvent.ResidualReapTime) * time.Second, 195 } 196 197 // All the entries marked for deletion should have a reapTime set greater than 0 198 // This case can happen if the cluster is running different versions of the engine where the old version does not have the 199 // field. If that is not the case, this can be a BUG 200 if e.deleting && e.reapTime == 0 { 201 log.G(context.TODO()).Warnf("%v(%v) handleTableEvent object %+v has a 0 reapTime, is the cluster running the same docker engine version?", 202 nDB.config.Hostname, nDB.config.NodeID, tEvent) 203 e.reapTime = nDB.config.reapEntryInterval 204 } 205 nDB.createOrUpdateEntry(tEvent.NetworkID, tEvent.TableName, tEvent.Key, e) 206 nDB.Unlock() 207 208 if err != nil && tEvent.Type == TableEventTypeDelete { 209 // Again we don't know the entry but this is coming from a TCP sync so the message body is up to date. 210 // We had saved the state so to speed up convergence and be able to avoid accepting create events. 211 // Now we will rebroadcast the message if 2 conditions are met: 212 // 1) we had already synced this network (during the network join) 213 // 2) the residual reapTime is higher than 1/6 of the total reapTime. 214 // If the residual reapTime is lower or equal to 1/6 of the total reapTime don't bother broadcasting it around 215 // most likely the cluster is already aware of it 216 // This also reduce the possibility that deletion of entries close to their garbage collection ends up circuling around 217 // forever 218 // log.G(ctx).Infof("exiting on delete not knowing the obj with rebroadcast:%t", network.inSync) 219 return network.inSync && e.reapTime > nDB.config.reapEntryInterval/6 220 } 221 222 var op opType 223 switch tEvent.Type { 224 case TableEventTypeCreate: 225 op = opCreate 226 case TableEventTypeUpdate: 227 op = opUpdate 228 case TableEventTypeDelete: 229 op = opDelete 230 } 231 232 nDB.broadcaster.Write(makeEvent(op, tEvent.TableName, tEvent.NetworkID, tEvent.Key, tEvent.Value)) 233 return network.inSync 234 } 235 236 func (nDB *NetworkDB) handleCompound(buf []byte, isBulkSync bool) { 237 // Decode the parts 238 parts, err := decodeCompoundMessage(buf) 239 if err != nil { 240 log.G(context.TODO()).Errorf("Failed to decode compound request: %v", err) 241 return 242 } 243 244 // Handle each message 245 for _, part := range parts { 246 nDB.handleMessage(part, isBulkSync) 247 } 248 } 249 250 func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) { 251 var tEvent TableEvent 252 if err := proto.Unmarshal(buf, &tEvent); err != nil { 253 log.G(context.TODO()).Errorf("Error decoding table event message: %v", err) 254 return 255 } 256 257 // Ignore messages that this node generated. 258 if tEvent.NodeName == nDB.config.NodeID { 259 return 260 } 261 262 if rebroadcast := nDB.handleTableEvent(&tEvent, isBulkSync); rebroadcast { 263 var err error 264 buf, err = encodeRawMessage(MessageTypeTableEvent, buf) 265 if err != nil { 266 log.G(context.TODO()).Errorf("Error marshalling gossip message for network event rebroadcast: %v", err) 267 return 268 } 269 270 nDB.RLock() 271 n, ok := nDB.networks[nDB.config.NodeID][tEvent.NetworkID] 272 nDB.RUnlock() 273 274 // if the network is not there anymore, OR we are leaving the network OR the broadcast queue is not present 275 if !ok || n.leaving || n.tableBroadcasts == nil { 276 return 277 } 278 279 // if the queue is over the threshold, avoid distributing information coming from TCP sync 280 if isBulkSync && n.tableBroadcasts.NumQueued() > maxQueueLenBroadcastOnSync { 281 return 282 } 283 284 n.tableBroadcasts.QueueBroadcast(&tableEventMessage{ 285 msg: buf, 286 id: tEvent.NetworkID, 287 tname: tEvent.TableName, 288 key: tEvent.Key, 289 }) 290 } 291 } 292 293 func (nDB *NetworkDB) handleNodeMessage(buf []byte) { 294 var nEvent NodeEvent 295 if err := proto.Unmarshal(buf, &nEvent); err != nil { 296 log.G(context.TODO()).Errorf("Error decoding node event message: %v", err) 297 return 298 } 299 300 if rebroadcast := nDB.handleNodeEvent(&nEvent); rebroadcast { 301 var err error 302 buf, err = encodeRawMessage(MessageTypeNodeEvent, buf) 303 if err != nil { 304 log.G(context.TODO()).Errorf("Error marshalling gossip message for node event rebroadcast: %v", err) 305 return 306 } 307 308 nDB.nodeBroadcasts.QueueBroadcast(&nodeEventMessage{ 309 msg: buf, 310 }) 311 } 312 } 313 314 func (nDB *NetworkDB) handleNetworkMessage(buf []byte) { 315 var nEvent NetworkEvent 316 if err := proto.Unmarshal(buf, &nEvent); err != nil { 317 log.G(context.TODO()).Errorf("Error decoding network event message: %v", err) 318 return 319 } 320 321 if rebroadcast := nDB.handleNetworkEvent(&nEvent); rebroadcast { 322 var err error 323 buf, err = encodeRawMessage(MessageTypeNetworkEvent, buf) 324 if err != nil { 325 log.G(context.TODO()).Errorf("Error marshalling gossip message for network event rebroadcast: %v", err) 326 return 327 } 328 329 nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{ 330 msg: buf, 331 id: nEvent.NetworkID, 332 node: nEvent.NodeName, 333 }) 334 } 335 } 336 337 func (nDB *NetworkDB) handleBulkSync(buf []byte) { 338 var bsm BulkSyncMessage 339 if err := proto.Unmarshal(buf, &bsm); err != nil { 340 log.G(context.TODO()).Errorf("Error decoding bulk sync message: %v", err) 341 return 342 } 343 344 if bsm.LTime > 0 { 345 nDB.tableClock.Witness(bsm.LTime) 346 } 347 348 nDB.handleMessage(bsm.Payload, true) 349 350 // Don't respond to a bulk sync which was not unsolicited 351 if !bsm.Unsolicited { 352 nDB.Lock() 353 ch, ok := nDB.bulkSyncAckTbl[bsm.NodeName] 354 if ok { 355 close(ch) 356 delete(nDB.bulkSyncAckTbl, bsm.NodeName) 357 } 358 nDB.Unlock() 359 360 return 361 } 362 363 var nodeAddr net.IP 364 nDB.RLock() 365 if node, ok := nDB.nodes[bsm.NodeName]; ok { 366 nodeAddr = node.Addr 367 } 368 nDB.RUnlock() 369 370 if err := nDB.bulkSyncNode(bsm.Networks, bsm.NodeName, false); err != nil { 371 log.G(context.TODO()).Errorf("Error in responding to bulk sync from node %s: %v", nodeAddr, err) 372 } 373 } 374 375 func (nDB *NetworkDB) handleMessage(buf []byte, isBulkSync bool) { 376 mType, data, err := decodeMessage(buf) 377 if err != nil { 378 log.G(context.TODO()).Errorf("Error decoding gossip message to get message type: %v", err) 379 return 380 } 381 382 switch mType { 383 case MessageTypeNodeEvent: 384 nDB.handleNodeMessage(data) 385 case MessageTypeNetworkEvent: 386 nDB.handleNetworkMessage(data) 387 case MessageTypeTableEvent: 388 nDB.handleTableMessage(data, isBulkSync) 389 case MessageTypeBulkSync: 390 nDB.handleBulkSync(data) 391 case MessageTypeCompound: 392 nDB.handleCompound(data, isBulkSync) 393 default: 394 log.G(context.TODO()).Errorf("%v(%v): unknown message type %d", nDB.config.Hostname, nDB.config.NodeID, mType) 395 } 396 } 397 398 func (d *delegate) NotifyMsg(buf []byte) { 399 if len(buf) == 0 { 400 return 401 } 402 403 d.nDB.handleMessage(buf, false) 404 } 405 406 func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte { 407 msgs := d.nDB.networkBroadcasts.GetBroadcasts(overhead, limit) 408 msgs = append(msgs, d.nDB.nodeBroadcasts.GetBroadcasts(overhead, limit)...) 409 return msgs 410 } 411 412 func (d *delegate) LocalState(join bool) []byte { 413 if join { 414 // Update all the local node/network state to a new time to 415 // force update on the node we are trying to rejoin, just in 416 // case that node has these in leaving state still. This is 417 // facilitate fast convergence after recovering from a gossip 418 // failure. 419 d.nDB.updateLocalNetworkTime() 420 } 421 422 d.nDB.RLock() 423 defer d.nDB.RUnlock() 424 425 pp := NetworkPushPull{ 426 LTime: d.nDB.networkClock.Time(), 427 NodeName: d.nDB.config.NodeID, 428 } 429 430 for name, nn := range d.nDB.networks { 431 for _, n := range nn { 432 pp.Networks = append(pp.Networks, &NetworkEntry{ 433 LTime: n.ltime, 434 NetworkID: n.id, 435 NodeName: name, 436 Leaving: n.leaving, 437 }) 438 } 439 } 440 441 buf, err := encodeMessage(MessageTypePushPull, &pp) 442 if err != nil { 443 log.G(context.TODO()).Errorf("Failed to encode local network state: %v", err) 444 return nil 445 } 446 447 return buf 448 } 449 450 func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) { 451 if len(buf) == 0 { 452 log.G(context.TODO()).Error("zero byte remote network state received") 453 return 454 } 455 456 var gMsg GossipMessage 457 err := proto.Unmarshal(buf, &gMsg) 458 if err != nil { 459 log.G(context.TODO()).Errorf("Error unmarshalling push pull message: %v", err) 460 return 461 } 462 463 if gMsg.Type != MessageTypePushPull { 464 log.G(context.TODO()).Errorf("Invalid message type %v received from remote", buf[0]) 465 } 466 467 pp := NetworkPushPull{} 468 if err := proto.Unmarshal(gMsg.Data, &pp); err != nil { 469 log.G(context.TODO()).Errorf("Failed to decode remote network state: %v", err) 470 return 471 } 472 473 nodeEvent := &NodeEvent{ 474 LTime: pp.LTime, 475 NodeName: pp.NodeName, 476 Type: NodeEventTypeJoin, 477 } 478 d.nDB.handleNodeEvent(nodeEvent) 479 480 for _, n := range pp.Networks { 481 nEvent := &NetworkEvent{ 482 LTime: n.LTime, 483 NodeName: n.NodeName, 484 NetworkID: n.NetworkID, 485 Type: NetworkEventTypeJoin, 486 } 487 488 if n.Leaving { 489 nEvent.Type = NetworkEventTypeLeave 490 } 491 492 d.nDB.handleNetworkEvent(nEvent) 493 } 494 }