github.com/Heebron/moby@v0.0.0-20221111184709-6eab4f55faf7/libnetwork/networkdb/delegate.go (about) 1 package networkdb 2 3 import ( 4 "net" 5 "time" 6 7 "github.com/gogo/protobuf/proto" 8 "github.com/sirupsen/logrus" 9 ) 10 11 type delegate struct { 12 nDB *NetworkDB 13 } 14 15 func (d *delegate) NodeMeta(limit int) []byte { 16 return []byte{} 17 } 18 19 func (nDB *NetworkDB) handleNodeEvent(nEvent *NodeEvent) bool { 20 // Update our local clock if the received messages has newer 21 // time. 22 nDB.networkClock.Witness(nEvent.LTime) 23 24 nDB.Lock() 25 defer nDB.Unlock() 26 27 // check if the node exists 28 n, _, _ := nDB.findNode(nEvent.NodeName) 29 if n == nil { 30 return false 31 } 32 33 // check if the event is fresh 34 if n.ltime >= nEvent.LTime { 35 return false 36 } 37 38 // If we are here means that the event is fresher and the node is known. Update the laport time 39 n.ltime = nEvent.LTime 40 41 // If the node is not known from memberlist we cannot process save any state of it else if it actually 42 // dies we won't receive any notification and we will remain stuck with it 43 if _, ok := nDB.nodes[nEvent.NodeName]; !ok { 44 logrus.Errorf("node: %s is unknown to memberlist", nEvent.NodeName) 45 return false 46 } 47 48 switch nEvent.Type { 49 case NodeEventTypeJoin: 50 moved, err := nDB.changeNodeState(n.Name, nodeActiveState) 51 if err != nil { 52 logrus.WithError(err).Error("unable to find the node to move") 53 return false 54 } 55 if moved { 56 logrus.Infof("%v(%v): Node join event for %s/%s", nDB.config.Hostname, nDB.config.NodeID, n.Name, n.Addr) 57 } 58 return moved 59 case NodeEventTypeLeave: 60 moved, err := nDB.changeNodeState(n.Name, nodeLeftState) 61 if err != nil { 62 logrus.WithError(err).Error("unable to find the node to move") 63 return false 64 } 65 if moved { 66 logrus.Infof("%v(%v): Node leave event for %s/%s", nDB.config.Hostname, nDB.config.NodeID, n.Name, n.Addr) 67 } 68 return moved 69 } 70 71 return false 72 } 73 74 func (nDB *NetworkDB) handleNetworkEvent(nEvent *NetworkEvent) bool { 75 // Update our local clock if the received messages has newer 76 // time. 77 nDB.networkClock.Witness(nEvent.LTime) 78 79 nDB.Lock() 80 defer nDB.Unlock() 81 82 if nEvent.NodeName == nDB.config.NodeID { 83 return false 84 } 85 86 nodeNetworks, ok := nDB.networks[nEvent.NodeName] 87 if !ok { 88 // We haven't heard about this node at all. Ignore the leave 89 if nEvent.Type == NetworkEventTypeLeave { 90 return false 91 } 92 93 nodeNetworks = make(map[string]*network) 94 nDB.networks[nEvent.NodeName] = nodeNetworks 95 } 96 97 if n, ok := nodeNetworks[nEvent.NetworkID]; ok { 98 // We have the latest state. Ignore the event 99 // since it is stale. 100 if n.ltime >= nEvent.LTime { 101 return false 102 } 103 104 n.ltime = nEvent.LTime 105 n.leaving = nEvent.Type == NetworkEventTypeLeave 106 if n.leaving { 107 n.reapTime = nDB.config.reapNetworkInterval 108 109 // The remote node is leaving the network, but not the gossip cluster. 110 // Mark all its entries in deleted state, this will guarantee that 111 // if some node bulk sync with us, the deleted state of 112 // these entries will be propagated. 113 nDB.deleteNodeNetworkEntries(nEvent.NetworkID, nEvent.NodeName) 114 } 115 116 if nEvent.Type == NetworkEventTypeLeave { 117 nDB.deleteNetworkNode(nEvent.NetworkID, nEvent.NodeName) 118 } else { 119 nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName) 120 } 121 122 return true 123 } 124 125 if nEvent.Type == NetworkEventTypeLeave { 126 return false 127 } 128 129 // If the node is not known from memberlist we cannot process save any state of it else if it actually 130 // dies we won't receive any notification and we will remain stuck with it 131 if _, ok := nDB.nodes[nEvent.NodeName]; !ok { 132 return false 133 } 134 135 // This remote network join is being seen the first time. 136 nodeNetworks[nEvent.NetworkID] = &network{ 137 id: nEvent.NetworkID, 138 ltime: nEvent.LTime, 139 } 140 141 nDB.addNetworkNode(nEvent.NetworkID, nEvent.NodeName) 142 return true 143 } 144 145 func (nDB *NetworkDB) handleTableEvent(tEvent *TableEvent, isBulkSync bool) bool { 146 // Update our local clock if the received messages has newer time. 147 nDB.tableClock.Witness(tEvent.LTime) 148 149 // Ignore the table events for networks that are in the process of going away 150 nDB.RLock() 151 networks := nDB.networks[nDB.config.NodeID] 152 network, ok := networks[tEvent.NetworkID] 153 // Check if the owner of the event is still part of the network 154 nodes := nDB.networkNodes[tEvent.NetworkID] 155 var nodePresent bool 156 for _, node := range nodes { 157 if node == tEvent.NodeName { 158 nodePresent = true 159 break 160 } 161 } 162 nDB.RUnlock() 163 164 if !ok || network.leaving || !nodePresent { 165 // I'm out of the network OR the event owner is not anymore part of the network so do not propagate 166 return false 167 } 168 169 nDB.Lock() 170 e, err := nDB.getEntry(tEvent.TableName, tEvent.NetworkID, tEvent.Key) 171 if err == nil { 172 // We have the latest state. Ignore the event 173 // since it is stale. 174 if e.ltime >= tEvent.LTime { 175 nDB.Unlock() 176 return false 177 } 178 } else if tEvent.Type == TableEventTypeDelete && !isBulkSync { 179 nDB.Unlock() 180 // We don't know the entry, the entry is being deleted and the message is an async message 181 // In this case the safest approach is to ignore it, it is possible that the queue grew so much to 182 // exceed the garbage collection time (the residual reap time that is in the message is not being 183 // updated, to avoid inserting too many messages in the queue). 184 // Instead the messages coming from TCP bulk sync are safe with the latest value for the garbage collection time 185 return false 186 } 187 188 e = &entry{ 189 ltime: tEvent.LTime, 190 node: tEvent.NodeName, 191 value: tEvent.Value, 192 deleting: tEvent.Type == TableEventTypeDelete, 193 reapTime: time.Duration(tEvent.ResidualReapTime) * time.Second, 194 } 195 196 // All the entries marked for deletion should have a reapTime set greater than 0 197 // This case can happen if the cluster is running different versions of the engine where the old version does not have the 198 // field. If that is not the case, this can be a BUG 199 if e.deleting && e.reapTime == 0 { 200 logrus.Warnf("%v(%v) handleTableEvent object %+v has a 0 reapTime, is the cluster running the same docker engine version?", 201 nDB.config.Hostname, nDB.config.NodeID, tEvent) 202 e.reapTime = nDB.config.reapEntryInterval 203 } 204 nDB.createOrUpdateEntry(tEvent.NetworkID, tEvent.TableName, tEvent.Key, e) 205 nDB.Unlock() 206 207 if err != nil && tEvent.Type == TableEventTypeDelete { 208 // Again we don't know the entry but this is coming from a TCP sync so the message body is up to date. 209 // We had saved the state so to speed up convergence and be able to avoid accepting create events. 210 // Now we will rebroadcast the message if 2 conditions are met: 211 // 1) we had already synced this network (during the network join) 212 // 2) the residual reapTime is higher than 1/6 of the total reapTime. 213 // If the residual reapTime is lower or equal to 1/6 of the total reapTime don't bother broadcasting it around 214 // most likely the cluster is already aware of it 215 // This also reduce the possibility that deletion of entries close to their garbage collection ends up circuling around 216 // forever 217 //logrus.Infof("exiting on delete not knowing the obj with rebroadcast:%t", network.inSync) 218 return network.inSync && e.reapTime > nDB.config.reapEntryInterval/6 219 } 220 221 var op opType 222 switch tEvent.Type { 223 case TableEventTypeCreate: 224 op = opCreate 225 case TableEventTypeUpdate: 226 op = opUpdate 227 case TableEventTypeDelete: 228 op = opDelete 229 } 230 231 nDB.broadcaster.Write(makeEvent(op, tEvent.TableName, tEvent.NetworkID, tEvent.Key, tEvent.Value)) 232 return network.inSync 233 } 234 235 func (nDB *NetworkDB) handleCompound(buf []byte, isBulkSync bool) { 236 // Decode the parts 237 parts, err := decodeCompoundMessage(buf) 238 if err != nil { 239 logrus.Errorf("Failed to decode compound request: %v", err) 240 return 241 } 242 243 // Handle each message 244 for _, part := range parts { 245 nDB.handleMessage(part, isBulkSync) 246 } 247 } 248 249 func (nDB *NetworkDB) handleTableMessage(buf []byte, isBulkSync bool) { 250 var tEvent TableEvent 251 if err := proto.Unmarshal(buf, &tEvent); err != nil { 252 logrus.Errorf("Error decoding table event message: %v", err) 253 return 254 } 255 256 // Ignore messages that this node generated. 257 if tEvent.NodeName == nDB.config.NodeID { 258 return 259 } 260 261 if rebroadcast := nDB.handleTableEvent(&tEvent, isBulkSync); rebroadcast { 262 var err error 263 buf, err = encodeRawMessage(MessageTypeTableEvent, buf) 264 if err != nil { 265 logrus.Errorf("Error marshalling gossip message for network event rebroadcast: %v", err) 266 return 267 } 268 269 nDB.RLock() 270 n, ok := nDB.networks[nDB.config.NodeID][tEvent.NetworkID] 271 nDB.RUnlock() 272 273 // if the network is not there anymore, OR we are leaving the network OR the broadcast queue is not present 274 if !ok || n.leaving || n.tableBroadcasts == nil { 275 return 276 } 277 278 // if the queue is over the threshold, avoid distributing information coming from TCP sync 279 if isBulkSync && n.tableBroadcasts.NumQueued() > maxQueueLenBroadcastOnSync { 280 return 281 } 282 283 n.tableBroadcasts.QueueBroadcast(&tableEventMessage{ 284 msg: buf, 285 id: tEvent.NetworkID, 286 tname: tEvent.TableName, 287 key: tEvent.Key, 288 }) 289 } 290 } 291 292 func (nDB *NetworkDB) handleNodeMessage(buf []byte) { 293 var nEvent NodeEvent 294 if err := proto.Unmarshal(buf, &nEvent); err != nil { 295 logrus.Errorf("Error decoding node event message: %v", err) 296 return 297 } 298 299 if rebroadcast := nDB.handleNodeEvent(&nEvent); rebroadcast { 300 var err error 301 buf, err = encodeRawMessage(MessageTypeNodeEvent, buf) 302 if err != nil { 303 logrus.Errorf("Error marshalling gossip message for node event rebroadcast: %v", err) 304 return 305 } 306 307 nDB.nodeBroadcasts.QueueBroadcast(&nodeEventMessage{ 308 msg: buf, 309 }) 310 } 311 } 312 313 func (nDB *NetworkDB) handleNetworkMessage(buf []byte) { 314 var nEvent NetworkEvent 315 if err := proto.Unmarshal(buf, &nEvent); err != nil { 316 logrus.Errorf("Error decoding network event message: %v", err) 317 return 318 } 319 320 if rebroadcast := nDB.handleNetworkEvent(&nEvent); rebroadcast { 321 var err error 322 buf, err = encodeRawMessage(MessageTypeNetworkEvent, buf) 323 if err != nil { 324 logrus.Errorf("Error marshalling gossip message for network event rebroadcast: %v", err) 325 return 326 } 327 328 nDB.networkBroadcasts.QueueBroadcast(&networkEventMessage{ 329 msg: buf, 330 id: nEvent.NetworkID, 331 node: nEvent.NodeName, 332 }) 333 } 334 } 335 336 func (nDB *NetworkDB) handleBulkSync(buf []byte) { 337 var bsm BulkSyncMessage 338 if err := proto.Unmarshal(buf, &bsm); err != nil { 339 logrus.Errorf("Error decoding bulk sync message: %v", err) 340 return 341 } 342 343 if bsm.LTime > 0 { 344 nDB.tableClock.Witness(bsm.LTime) 345 } 346 347 nDB.handleMessage(bsm.Payload, true) 348 349 // Don't respond to a bulk sync which was not unsolicited 350 if !bsm.Unsolicited { 351 nDB.Lock() 352 ch, ok := nDB.bulkSyncAckTbl[bsm.NodeName] 353 if ok { 354 close(ch) 355 delete(nDB.bulkSyncAckTbl, bsm.NodeName) 356 } 357 nDB.Unlock() 358 359 return 360 } 361 362 var nodeAddr net.IP 363 nDB.RLock() 364 if node, ok := nDB.nodes[bsm.NodeName]; ok { 365 nodeAddr = node.Addr 366 } 367 nDB.RUnlock() 368 369 if err := nDB.bulkSyncNode(bsm.Networks, bsm.NodeName, false); err != nil { 370 logrus.Errorf("Error in responding to bulk sync from node %s: %v", nodeAddr, err) 371 } 372 } 373 374 func (nDB *NetworkDB) handleMessage(buf []byte, isBulkSync bool) { 375 mType, data, err := decodeMessage(buf) 376 if err != nil { 377 logrus.Errorf("Error decoding gossip message to get message type: %v", err) 378 return 379 } 380 381 switch mType { 382 case MessageTypeNodeEvent: 383 nDB.handleNodeMessage(data) 384 case MessageTypeNetworkEvent: 385 nDB.handleNetworkMessage(data) 386 case MessageTypeTableEvent: 387 nDB.handleTableMessage(data, isBulkSync) 388 case MessageTypeBulkSync: 389 nDB.handleBulkSync(data) 390 case MessageTypeCompound: 391 nDB.handleCompound(data, isBulkSync) 392 default: 393 logrus.Errorf("%v(%v): unknown message type %d", nDB.config.Hostname, nDB.config.NodeID, mType) 394 } 395 } 396 397 func (d *delegate) NotifyMsg(buf []byte) { 398 if len(buf) == 0 { 399 return 400 } 401 402 d.nDB.handleMessage(buf, false) 403 } 404 405 func (d *delegate) GetBroadcasts(overhead, limit int) [][]byte { 406 msgs := d.nDB.networkBroadcasts.GetBroadcasts(overhead, limit) 407 msgs = append(msgs, d.nDB.nodeBroadcasts.GetBroadcasts(overhead, limit)...) 408 return msgs 409 } 410 411 func (d *delegate) LocalState(join bool) []byte { 412 if join { 413 // Update all the local node/network state to a new time to 414 // force update on the node we are trying to rejoin, just in 415 // case that node has these in leaving state still. This is 416 // facilitate fast convergence after recovering from a gossip 417 // failure. 418 d.nDB.updateLocalNetworkTime() 419 } 420 421 d.nDB.RLock() 422 defer d.nDB.RUnlock() 423 424 pp := NetworkPushPull{ 425 LTime: d.nDB.networkClock.Time(), 426 NodeName: d.nDB.config.NodeID, 427 } 428 429 for name, nn := range d.nDB.networks { 430 for _, n := range nn { 431 pp.Networks = append(pp.Networks, &NetworkEntry{ 432 LTime: n.ltime, 433 NetworkID: n.id, 434 NodeName: name, 435 Leaving: n.leaving, 436 }) 437 } 438 } 439 440 buf, err := encodeMessage(MessageTypePushPull, &pp) 441 if err != nil { 442 logrus.Errorf("Failed to encode local network state: %v", err) 443 return nil 444 } 445 446 return buf 447 } 448 449 func (d *delegate) MergeRemoteState(buf []byte, isJoin bool) { 450 if len(buf) == 0 { 451 logrus.Error("zero byte remote network state received") 452 return 453 } 454 455 var gMsg GossipMessage 456 err := proto.Unmarshal(buf, &gMsg) 457 if err != nil { 458 logrus.Errorf("Error unmarshalling push pull message: %v", err) 459 return 460 } 461 462 if gMsg.Type != MessageTypePushPull { 463 logrus.Errorf("Invalid message type %v received from remote", buf[0]) 464 } 465 466 pp := NetworkPushPull{} 467 if err := proto.Unmarshal(gMsg.Data, &pp); err != nil { 468 logrus.Errorf("Failed to decode remote network state: %v", err) 469 return 470 } 471 472 nodeEvent := &NodeEvent{ 473 LTime: pp.LTime, 474 NodeName: pp.NodeName, 475 Type: NodeEventTypeJoin, 476 } 477 d.nDB.handleNodeEvent(nodeEvent) 478 479 for _, n := range pp.Networks { 480 nEvent := &NetworkEvent{ 481 LTime: n.LTime, 482 NodeName: n.NodeName, 483 NetworkID: n.NetworkID, 484 Type: NetworkEventTypeJoin, 485 } 486 487 if n.Leaving { 488 nEvent.Type = NetworkEventTypeLeave 489 } 490 491 d.nDB.handleNetworkEvent(nEvent) 492 } 493 494 }