github.com/zhuohuang-hust/src-cbuild@v0.0.0-20230105071821-c7aab3e7c840/mergeCode/libnetwork/networkdb/networkdb.go (about) 1 package networkdb 2 3 //go:generate protoc -I.:../Godeps/_workspace/src/github.com/gogo/protobuf --gogo_out=import_path=github.com/docker/libnetwork/networkdb,Mgogoproto/gogo.proto=github.com/gogo/protobuf/gogoproto:. networkdb.proto 4 5 import ( 6 "fmt" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/Sirupsen/logrus" 12 "github.com/armon/go-radix" 13 "github.com/docker/go-events" 14 "github.com/hashicorp/memberlist" 15 "github.com/hashicorp/serf/serf" 16 ) 17 18 const ( 19 byTable int = 1 + iota 20 byNetwork 21 ) 22 23 // NetworkDB instance drives the networkdb cluster and acts the broker 24 // for cluster-scoped and network-scoped gossip and watches. 25 type NetworkDB struct { 26 sync.RWMutex 27 28 // NetworkDB configuration. 29 config *Config 30 31 // local copy of memberlist config that we use to driver 32 // network scoped gossip and bulk sync. 33 mConfig *memberlist.Config 34 35 // All the tree index (byTable, byNetwork) that we maintain 36 // the db. 37 indexes map[int]*radix.Tree 38 39 // Memberlist we use to drive the cluster. 40 memberlist *memberlist.Memberlist 41 42 // List of all peer nodes in the cluster not-limited to any 43 // network. 44 nodes map[string]*node 45 46 // List of all peer nodes which have failed 47 failedNodes map[string]*node 48 49 // List of all peer nodes which have left 50 leftNodes map[string]*node 51 52 // A multi-dimensional map of network/node attachmemts. The 53 // first key is a node name and the second key is a network ID 54 // for the network that node is participating in. 55 networks map[string]map[string]*network 56 57 // A map of nodes which are participating in a given 58 // network. The key is a network ID. 59 60 networkNodes map[string][]string 61 62 // A table of ack channels for every node from which we are 63 // waiting for an ack. 64 bulkSyncAckTbl map[string]chan struct{} 65 66 // Global lamport clock for node network attach events. 67 networkClock serf.LamportClock 68 69 // Global lamport clock for table events. 70 tableClock serf.LamportClock 71 72 // Broadcast queue for network event gossip. 73 networkBroadcasts *memberlist.TransmitLimitedQueue 74 75 // Broadcast queue for node event gossip. 76 nodeBroadcasts *memberlist.TransmitLimitedQueue 77 78 // A central stop channel to stop all go routines running on 79 // behalf of the NetworkDB instance. 80 stopCh chan struct{} 81 82 // A central broadcaster for all local watchers watching table 83 // events. 84 broadcaster *events.Broadcaster 85 86 // List of all tickers which needed to be stopped when 87 // cleaning up. 88 tickers []*time.Ticker 89 90 // Reference to the memberlist's keyring to add & remove keys 91 keyring *memberlist.Keyring 92 } 93 94 type node struct { 95 memberlist.Node 96 ltime serf.LamportTime 97 } 98 99 // network describes the node/network attachment. 100 type network struct { 101 // Network ID 102 id string 103 104 // Lamport time for the latest state of the entry. 105 ltime serf.LamportTime 106 107 // Node leave is in progress. 108 leaving bool 109 110 // The time this node knew about the node's network leave. 111 leaveTime time.Time 112 113 // The broadcast queue for table event gossip. This is only 114 // initialized for this node's network attachment entries. 115 tableBroadcasts *memberlist.TransmitLimitedQueue 116 } 117 118 // Config represents the configuration of the networdb instance and 119 // can be passed by the caller. 120 type Config struct { 121 // NodeName is the cluster wide unique name for this node. 122 NodeName string 123 124 // BindAddr is the IP on which networkdb listens. It can be 125 // 0.0.0.0 to listen on all addresses on the host. 126 BindAddr string 127 128 // AdvertiseAddr is the node's IP address that we advertise for 129 // cluster communication. 130 AdvertiseAddr string 131 132 // BindPort is the local node's port to which we bind to for 133 // cluster communication. 134 BindPort int 135 136 // Keys to be added to the Keyring of the memberlist. Key at index 137 // 0 is the primary key 138 Keys [][]byte 139 } 140 141 // entry defines a table entry 142 type entry struct { 143 // node from which this entry was learned. 144 node string 145 146 // Lamport time for the most recent update to the entry 147 ltime serf.LamportTime 148 149 // Opaque value store in the entry 150 value []byte 151 152 // Deleting the entry is in progress. All entries linger in 153 // the cluster for certain amount of time after deletion. 154 deleting bool 155 156 // The wall clock time when this node learned about this deletion. 157 deleteTime time.Time 158 } 159 160 // New creates a new instance of NetworkDB using the Config passed by 161 // the caller. 162 func New(c *Config) (*NetworkDB, error) { 163 nDB := &NetworkDB{ 164 config: c, 165 indexes: make(map[int]*radix.Tree), 166 networks: make(map[string]map[string]*network), 167 nodes: make(map[string]*node), 168 failedNodes: make(map[string]*node), 169 leftNodes: make(map[string]*node), 170 networkNodes: make(map[string][]string), 171 bulkSyncAckTbl: make(map[string]chan struct{}), 172 broadcaster: events.NewBroadcaster(), 173 } 174 175 nDB.indexes[byTable] = radix.New() 176 nDB.indexes[byNetwork] = radix.New() 177 178 if err := nDB.clusterInit(); err != nil { 179 return nil, err 180 } 181 182 return nDB, nil 183 } 184 185 // Join joins this NetworkDB instance with a list of peer NetworkDB 186 // instances passed by the caller in the form of addr:port 187 func (nDB *NetworkDB) Join(members []string) error { 188 return nDB.clusterJoin(members) 189 } 190 191 // Close destroys this NetworkDB instance by leave the cluster, 192 // stopping timers, canceling goroutines etc. 193 func (nDB *NetworkDB) Close() { 194 if err := nDB.clusterLeave(); err != nil { 195 logrus.Errorf("Could not close DB %s: %v", nDB.config.NodeName, err) 196 } 197 } 198 199 // GetEntry retrieves the value of a table entry in a given (network, 200 // table, key) tuple 201 func (nDB *NetworkDB) GetEntry(tname, nid, key string) ([]byte, error) { 202 entry, err := nDB.getEntry(tname, nid, key) 203 if err != nil { 204 return nil, err 205 } 206 207 return entry.value, nil 208 } 209 210 func (nDB *NetworkDB) getEntry(tname, nid, key string) (*entry, error) { 211 nDB.RLock() 212 defer nDB.RUnlock() 213 214 e, ok := nDB.indexes[byTable].Get(fmt.Sprintf("/%s/%s/%s", tname, nid, key)) 215 if !ok { 216 return nil, fmt.Errorf("could not get entry in table %s with network id %s and key %s", tname, nid, key) 217 } 218 219 return e.(*entry), nil 220 } 221 222 // CreateEntry creates a table entry in NetworkDB for given (network, 223 // table, key) tuple and if the NetworkDB is part of the cluster 224 // propogates this event to the cluster. It is an error to create an 225 // entry for the same tuple for which there is already an existing 226 // entry. 227 func (nDB *NetworkDB) CreateEntry(tname, nid, key string, value []byte) error { 228 if _, err := nDB.GetEntry(tname, nid, key); err == nil { 229 return fmt.Errorf("cannot create entry as the entry in table %s with network id %s and key %s already exists", tname, nid, key) 230 } 231 232 entry := &entry{ 233 ltime: nDB.tableClock.Increment(), 234 node: nDB.config.NodeName, 235 value: value, 236 } 237 238 if err := nDB.sendTableEvent(TableEventTypeCreate, nid, tname, key, entry); err != nil { 239 return fmt.Errorf("cannot send table create event: %v", err) 240 } 241 242 nDB.Lock() 243 nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) 244 nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) 245 nDB.Unlock() 246 247 nDB.broadcaster.Write(makeEvent(opCreate, tname, nid, key, value)) 248 return nil 249 } 250 251 // UpdateEntry updates a table entry in NetworkDB for given (network, 252 // table, key) tuple and if the NetworkDB is part of the cluster 253 // propogates this event to the cluster. It is an error to update a 254 // non-existent entry. 255 func (nDB *NetworkDB) UpdateEntry(tname, nid, key string, value []byte) error { 256 if _, err := nDB.GetEntry(tname, nid, key); err != nil { 257 return fmt.Errorf("cannot update entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key) 258 } 259 260 entry := &entry{ 261 ltime: nDB.tableClock.Increment(), 262 node: nDB.config.NodeName, 263 value: value, 264 } 265 266 if err := nDB.sendTableEvent(TableEventTypeUpdate, nid, tname, key, entry); err != nil { 267 return fmt.Errorf("cannot send table update event: %v", err) 268 } 269 270 nDB.Lock() 271 nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) 272 nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) 273 nDB.Unlock() 274 275 nDB.broadcaster.Write(makeEvent(opUpdate, tname, nid, key, value)) 276 return nil 277 } 278 279 // DeleteEntry deletes a table entry in NetworkDB for given (network, 280 // table, key) tuple and if the NetworkDB is part of the cluster 281 // propogates this event to the cluster. 282 func (nDB *NetworkDB) DeleteEntry(tname, nid, key string) error { 283 value, err := nDB.GetEntry(tname, nid, key) 284 if err != nil { 285 return fmt.Errorf("cannot delete entry as the entry in table %s with network id %s and key %s does not exist", tname, nid, key) 286 } 287 288 entry := &entry{ 289 ltime: nDB.tableClock.Increment(), 290 node: nDB.config.NodeName, 291 value: value, 292 deleting: true, 293 deleteTime: time.Now(), 294 } 295 296 if err := nDB.sendTableEvent(TableEventTypeDelete, nid, tname, key, entry); err != nil { 297 return fmt.Errorf("cannot send table delete event: %v", err) 298 } 299 300 nDB.Lock() 301 nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) 302 nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) 303 nDB.Unlock() 304 305 nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, value)) 306 return nil 307 } 308 309 func (nDB *NetworkDB) deleteNetworkEntriesForNode(deletedNode string) { 310 nDB.Lock() 311 for nid, nodes := range nDB.networkNodes { 312 updatedNodes := make([]string, 0, len(nodes)) 313 for _, node := range nodes { 314 if node == deletedNode { 315 continue 316 } 317 318 updatedNodes = append(updatedNodes, node) 319 } 320 321 nDB.networkNodes[nid] = updatedNodes 322 } 323 324 delete(nDB.networks, deletedNode) 325 nDB.Unlock() 326 } 327 328 func (nDB *NetworkDB) deleteNodeTableEntries(node string) { 329 nDB.Lock() 330 nDB.indexes[byTable].Walk(func(path string, v interface{}) bool { 331 oldEntry := v.(*entry) 332 if oldEntry.node != node { 333 return false 334 } 335 336 params := strings.Split(path[1:], "/") 337 tname := params[0] 338 nid := params[1] 339 key := params[2] 340 341 entry := &entry{ 342 ltime: oldEntry.ltime, 343 node: node, 344 value: oldEntry.value, 345 deleting: true, 346 deleteTime: time.Now(), 347 } 348 349 nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) 350 nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) 351 352 nDB.broadcaster.Write(makeEvent(opDelete, tname, nid, key, entry.value)) 353 return false 354 }) 355 nDB.Unlock() 356 } 357 358 // WalkTable walks a single table in NetworkDB and invokes the passed 359 // function for each entry in the table passing the network, key, 360 // value. The walk stops if the passed function returns a true. 361 func (nDB *NetworkDB) WalkTable(tname string, fn func(string, string, []byte) bool) error { 362 nDB.RLock() 363 values := make(map[string]interface{}) 364 nDB.indexes[byTable].WalkPrefix(fmt.Sprintf("/%s", tname), func(path string, v interface{}) bool { 365 values[path] = v 366 return false 367 }) 368 nDB.RUnlock() 369 370 for k, v := range values { 371 params := strings.Split(k[1:], "/") 372 nid := params[1] 373 key := params[2] 374 if fn(nid, key, v.(*entry).value) { 375 return nil 376 } 377 } 378 379 return nil 380 } 381 382 // JoinNetwork joins this node to a given network and propogates this 383 // event across the cluster. This triggers this node joining the 384 // sub-cluster of this network and participates in the network-scoped 385 // gossip and bulk sync for this network. 386 func (nDB *NetworkDB) JoinNetwork(nid string) error { 387 ltime := nDB.networkClock.Increment() 388 389 nDB.Lock() 390 nodeNetworks, ok := nDB.networks[nDB.config.NodeName] 391 if !ok { 392 nodeNetworks = make(map[string]*network) 393 nDB.networks[nDB.config.NodeName] = nodeNetworks 394 } 395 nodeNetworks[nid] = &network{id: nid, ltime: ltime} 396 nodeNetworks[nid].tableBroadcasts = &memberlist.TransmitLimitedQueue{ 397 NumNodes: func() int { 398 nDB.RLock() 399 num := len(nDB.networkNodes[nid]) 400 nDB.RUnlock() 401 return num 402 }, 403 RetransmitMult: 4, 404 } 405 nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nDB.config.NodeName) 406 networkNodes := nDB.networkNodes[nid] 407 nDB.Unlock() 408 409 if err := nDB.sendNetworkEvent(nid, NetworkEventTypeJoin, ltime); err != nil { 410 return fmt.Errorf("failed to send leave network event for %s: %v", nid, err) 411 } 412 413 logrus.Debugf("%s: joined network %s", nDB.config.NodeName, nid) 414 if _, err := nDB.bulkSync(networkNodes, true); err != nil { 415 logrus.Errorf("Error bulk syncing while joining network %s: %v", nid, err) 416 } 417 418 return nil 419 } 420 421 // LeaveNetwork leaves this node from a given network and propogates 422 // this event across the cluster. This triggers this node leaving the 423 // sub-cluster of this network and as a result will no longer 424 // participate in the network-scoped gossip and bulk sync for this 425 // network. Also remove all the table entries for this network from 426 // networkdb 427 func (nDB *NetworkDB) LeaveNetwork(nid string) error { 428 ltime := nDB.networkClock.Increment() 429 if err := nDB.sendNetworkEvent(nid, NetworkEventTypeLeave, ltime); err != nil { 430 return fmt.Errorf("failed to send leave network event for %s: %v", nid, err) 431 } 432 433 nDB.Lock() 434 defer nDB.Unlock() 435 var ( 436 paths []string 437 entries []*entry 438 ) 439 440 nwWalker := func(path string, v interface{}) bool { 441 entry, ok := v.(*entry) 442 if !ok { 443 return false 444 } 445 paths = append(paths, path) 446 entries = append(entries, entry) 447 return false 448 } 449 450 nDB.indexes[byNetwork].WalkPrefix(fmt.Sprintf("/%s", nid), nwWalker) 451 for _, path := range paths { 452 params := strings.Split(path[1:], "/") 453 tname := params[1] 454 key := params[2] 455 456 if _, ok := nDB.indexes[byTable].Delete(fmt.Sprintf("/%s/%s/%s", tname, nid, key)); !ok { 457 logrus.Errorf("Could not delete entry in table %s with network id %s and key %s as it does not exist", tname, nid, key) 458 } 459 460 if _, ok := nDB.indexes[byNetwork].Delete(fmt.Sprintf("/%s/%s/%s", nid, tname, key)); !ok { 461 logrus.Errorf("Could not delete entry in network %s with table name %s and key %s as it does not exist", nid, tname, key) 462 } 463 } 464 465 nodeNetworks, ok := nDB.networks[nDB.config.NodeName] 466 if !ok { 467 return fmt.Errorf("could not find self node for network %s while trying to leave", nid) 468 } 469 470 n, ok := nodeNetworks[nid] 471 if !ok { 472 return fmt.Errorf("could not find network %s while trying to leave", nid) 473 } 474 475 n.ltime = ltime 476 n.leaving = true 477 return nil 478 } 479 480 // addNetworkNode adds the node to the list of nodes which participate 481 // in the passed network only if it is not already present. Caller 482 // should hold the NetworkDB lock while calling this 483 func (nDB *NetworkDB) addNetworkNode(nid string, nodeName string) { 484 nodes := nDB.networkNodes[nid] 485 for _, node := range nodes { 486 if node == nodeName { 487 return 488 } 489 } 490 491 nDB.networkNodes[nid] = append(nDB.networkNodes[nid], nodeName) 492 } 493 494 // Deletes the node from the list of nodes which participate in the 495 // passed network. Caller should hold the NetworkDB lock while calling 496 // this 497 func (nDB *NetworkDB) deleteNetworkNode(nid string, nodeName string) { 498 nodes := nDB.networkNodes[nid] 499 newNodes := make([]string, 0, len(nodes)-1) 500 for _, name := range nodes { 501 if name == nodeName { 502 continue 503 } 504 newNodes = append(newNodes, name) 505 } 506 nDB.networkNodes[nid] = newNodes 507 } 508 509 // findCommonnetworks find the networks that both this node and the 510 // passed node have joined. 511 func (nDB *NetworkDB) findCommonNetworks(nodeName string) []string { 512 nDB.RLock() 513 defer nDB.RUnlock() 514 515 var networks []string 516 for nid := range nDB.networks[nDB.config.NodeName] { 517 if n, ok := nDB.networks[nodeName][nid]; ok { 518 if !n.leaving { 519 networks = append(networks, nid) 520 } 521 } 522 } 523 524 return networks 525 } 526 527 func (nDB *NetworkDB) updateLocalNetworkTime() { 528 nDB.Lock() 529 defer nDB.Unlock() 530 531 ltime := nDB.networkClock.Increment() 532 for _, n := range nDB.networks[nDB.config.NodeName] { 533 n.ltime = ltime 534 } 535 } 536 537 func (nDB *NetworkDB) updateLocalTableTime() { 538 nDB.Lock() 539 defer nDB.Unlock() 540 541 ltime := nDB.tableClock.Increment() 542 nDB.indexes[byTable].Walk(func(path string, v interface{}) bool { 543 entry := v.(*entry) 544 if entry.node != nDB.config.NodeName { 545 return false 546 } 547 548 params := strings.Split(path[1:], "/") 549 tname := params[0] 550 nid := params[1] 551 key := params[2] 552 entry.ltime = ltime 553 554 nDB.indexes[byTable].Insert(fmt.Sprintf("/%s/%s/%s", tname, nid, key), entry) 555 nDB.indexes[byNetwork].Insert(fmt.Sprintf("/%s/%s/%s", nid, tname, key), entry) 556 557 return false 558 }) 559 }