github.com/criteo-forks/consul@v1.4.5-criteonogrpc/agent/consul/state/catalog.go (about) 1 package state 2 3 import ( 4 "fmt" 5 "strings" 6 7 "github.com/hashicorp/consul/agent/structs" 8 "github.com/hashicorp/consul/api" 9 "github.com/hashicorp/consul/types" 10 "github.com/hashicorp/go-memdb" 11 uuid "github.com/hashicorp/go-uuid" 12 ) 13 14 const ( 15 servicesTableName = "services" 16 17 // serviceLastExtinctionIndexName keeps track of the last raft index when the last instance 18 // of any service was unregistered. This is used by blocking queries on missing services. 19 serviceLastExtinctionIndexName = "service_last_extinction" 20 ) 21 22 // nodesTableSchema returns a new table schema used for storing node 23 // information. 24 func nodesTableSchema() *memdb.TableSchema { 25 return &memdb.TableSchema{ 26 Name: "nodes", 27 Indexes: map[string]*memdb.IndexSchema{ 28 "id": &memdb.IndexSchema{ 29 Name: "id", 30 AllowMissing: false, 31 Unique: true, 32 Indexer: &memdb.StringFieldIndex{ 33 Field: "Node", 34 Lowercase: true, 35 }, 36 }, 37 "uuid": &memdb.IndexSchema{ 38 Name: "uuid", 39 AllowMissing: true, 40 Unique: true, 41 Indexer: &memdb.UUIDFieldIndex{ 42 Field: "ID", 43 }, 44 }, 45 "meta": &memdb.IndexSchema{ 46 Name: "meta", 47 AllowMissing: true, 48 Unique: false, 49 Indexer: &memdb.StringMapFieldIndex{ 50 Field: "Meta", 51 Lowercase: false, 52 }, 53 }, 54 }, 55 } 56 } 57 58 // servicesTableSchema returns a new table schema used to store information 59 // about services. 60 func servicesTableSchema() *memdb.TableSchema { 61 return &memdb.TableSchema{ 62 Name: "services", 63 Indexes: map[string]*memdb.IndexSchema{ 64 "id": &memdb.IndexSchema{ 65 Name: "id", 66 AllowMissing: false, 67 Unique: true, 68 Indexer: &memdb.CompoundIndex{ 69 Indexes: []memdb.Indexer{ 70 &memdb.StringFieldIndex{ 71 Field: "Node", 72 Lowercase: true, 73 }, 74 &memdb.StringFieldIndex{ 75 Field: "ServiceID", 76 Lowercase: true, 77 }, 78 }, 79 }, 80 }, 81 "node": &memdb.IndexSchema{ 82 Name: "node", 83 AllowMissing: false, 84 Unique: false, 85 Indexer: &memdb.StringFieldIndex{ 86 Field: "Node", 87 Lowercase: true, 88 }, 89 }, 90 "service": &memdb.IndexSchema{ 91 Name: "service", 92 AllowMissing: true, 93 Unique: false, 94 Indexer: &memdb.StringFieldIndex{ 95 Field: "ServiceName", 96 Lowercase: true, 97 }, 98 }, 99 "connect": &memdb.IndexSchema{ 100 Name: "connect", 101 AllowMissing: true, 102 Unique: false, 103 Indexer: &IndexConnectService{}, 104 }, 105 }, 106 } 107 } 108 109 // checksTableSchema returns a new table schema used for storing and indexing 110 // health check information. Health checks have a number of different attributes 111 // we want to filter by, so this table is a bit more complex. 112 func checksTableSchema() *memdb.TableSchema { 113 return &memdb.TableSchema{ 114 Name: "checks", 115 Indexes: map[string]*memdb.IndexSchema{ 116 "id": &memdb.IndexSchema{ 117 Name: "id", 118 AllowMissing: false, 119 Unique: true, 120 Indexer: &memdb.CompoundIndex{ 121 Indexes: []memdb.Indexer{ 122 &memdb.StringFieldIndex{ 123 Field: "Node", 124 Lowercase: true, 125 }, 126 &memdb.StringFieldIndex{ 127 Field: "CheckID", 128 Lowercase: true, 129 }, 130 }, 131 }, 132 }, 133 "status": &memdb.IndexSchema{ 134 Name: "status", 135 AllowMissing: false, 136 Unique: false, 137 Indexer: &memdb.StringFieldIndex{ 138 Field: "Status", 139 Lowercase: false, 140 }, 141 }, 142 "service": &memdb.IndexSchema{ 143 Name: "service", 144 AllowMissing: true, 145 Unique: false, 146 Indexer: &memdb.StringFieldIndex{ 147 Field: "ServiceName", 148 Lowercase: true, 149 }, 150 }, 151 "node": &memdb.IndexSchema{ 152 Name: "node", 153 AllowMissing: true, 154 Unique: false, 155 Indexer: &memdb.StringFieldIndex{ 156 Field: "Node", 157 Lowercase: true, 158 }, 159 }, 160 "node_service_check": &memdb.IndexSchema{ 161 Name: "node_service_check", 162 AllowMissing: true, 163 Unique: false, 164 Indexer: &memdb.CompoundIndex{ 165 Indexes: []memdb.Indexer{ 166 &memdb.StringFieldIndex{ 167 Field: "Node", 168 Lowercase: true, 169 }, 170 &memdb.FieldSetIndex{ 171 Field: "ServiceID", 172 }, 173 }, 174 }, 175 }, 176 "node_service": &memdb.IndexSchema{ 177 Name: "node_service", 178 AllowMissing: true, 179 Unique: false, 180 Indexer: &memdb.CompoundIndex{ 181 Indexes: []memdb.Indexer{ 182 &memdb.StringFieldIndex{ 183 Field: "Node", 184 Lowercase: true, 185 }, 186 &memdb.StringFieldIndex{ 187 Field: "ServiceID", 188 Lowercase: true, 189 }, 190 }, 191 }, 192 }, 193 }, 194 } 195 } 196 197 func init() { 198 registerSchema(nodesTableSchema) 199 registerSchema(servicesTableSchema) 200 registerSchema(checksTableSchema) 201 } 202 203 const ( 204 // minUUIDLookupLen is used as a minimum length of a node name required before 205 // we test to see if the name is actually a UUID and perform an ID-based node 206 // lookup. 207 minUUIDLookupLen = 2 208 ) 209 210 func resizeNodeLookupKey(s string) string { 211 l := len(s) 212 213 if l%2 != 0 { 214 return s[0 : l-1] 215 } 216 217 return s 218 } 219 220 // Nodes is used to pull the full list of nodes for use during snapshots. 221 func (s *Snapshot) Nodes() (memdb.ResultIterator, error) { 222 iter, err := s.tx.Get("nodes", "id") 223 if err != nil { 224 return nil, err 225 } 226 return iter, nil 227 } 228 229 // Services is used to pull the full list of services for a given node for use 230 // during snapshots. 231 func (s *Snapshot) Services(node string) (memdb.ResultIterator, error) { 232 iter, err := s.tx.Get("services", "node", node) 233 if err != nil { 234 return nil, err 235 } 236 return iter, nil 237 } 238 239 // Checks is used to pull the full list of checks for a given node for use 240 // during snapshots. 241 func (s *Snapshot) Checks(node string) (memdb.ResultIterator, error) { 242 iter, err := s.tx.Get("checks", "node", node) 243 if err != nil { 244 return nil, err 245 } 246 return iter, nil 247 } 248 249 // Registration is used to make sure a node, service, and check registration is 250 // performed within a single transaction to avoid race conditions on state 251 // updates. 252 func (s *Restore) Registration(idx uint64, req *structs.RegisterRequest) error { 253 if err := s.store.ensureRegistrationTxn(s.tx, idx, req); err != nil { 254 return err 255 } 256 return nil 257 } 258 259 // EnsureRegistration is used to make sure a node, service, and check 260 // registration is performed within a single transaction to avoid race 261 // conditions on state updates. 262 func (s *Store) EnsureRegistration(idx uint64, req *structs.RegisterRequest) error { 263 tx := s.db.Txn(true) 264 defer tx.Abort() 265 266 if err := s.ensureRegistrationTxn(tx, idx, req); err != nil { 267 return err 268 } 269 270 tx.Commit() 271 return nil 272 } 273 274 func (s *Store) ensureCheckIfNodeMatches(tx *memdb.Txn, idx uint64, node string, check *structs.HealthCheck) error { 275 if check.Node != node { 276 return fmt.Errorf("check node %q does not match node %q", 277 check.Node, node) 278 } 279 if err := s.ensureCheckTxn(tx, idx, check); err != nil { 280 return fmt.Errorf("failed inserting check: %s on node %q", err, check.Node) 281 } 282 return nil 283 } 284 285 // ensureRegistrationTxn is used to make sure a node, service, and check 286 // registration is performed within a single transaction to avoid race 287 // conditions on state updates. 288 func (s *Store) ensureRegistrationTxn(tx *memdb.Txn, idx uint64, req *structs.RegisterRequest) error { 289 // Create a node structure. 290 node := &structs.Node{ 291 ID: req.ID, 292 Node: req.Node, 293 Address: req.Address, 294 Datacenter: req.Datacenter, 295 TaggedAddresses: req.TaggedAddresses, 296 Meta: req.NodeMeta, 297 } 298 299 // Since this gets called for all node operations (service and check 300 // updates) and churn on the node itself is basically none after the 301 // node updates itself the first time, it's worth seeing if we need to 302 // modify the node at all so we prevent watch churn and useless writes 303 // and modify index bumps on the node. 304 { 305 existing, err := tx.First("nodes", "id", node.Node) 306 if err != nil { 307 return fmt.Errorf("node lookup failed: %s", err) 308 } 309 if existing == nil || req.ChangesNode(existing.(*structs.Node)) { 310 if err := s.ensureNodeTxn(tx, idx, node); err != nil { 311 return fmt.Errorf("failed inserting node: %s", err) 312 } 313 } 314 } 315 316 // Add the service, if any. We perform a similar check as we do for the 317 // node info above to make sure we actually need to update the service 318 // definition in order to prevent useless churn if nothing has changed. 319 if req.Service != nil { 320 existing, err := tx.First("services", "id", req.Node, req.Service.ID) 321 if err != nil { 322 return fmt.Errorf("failed service lookup: %s", err) 323 } 324 if existing == nil || !(existing.(*structs.ServiceNode).ToNodeService()).IsSame(req.Service) { 325 if err := s.ensureServiceTxn(tx, idx, req.Node, req.Service); err != nil { 326 return fmt.Errorf("failed inserting service: %s", err) 327 328 } 329 } 330 } 331 332 // Add the checks, if any. 333 if req.Check != nil { 334 if err := s.ensureCheckIfNodeMatches(tx, idx, req.Node, req.Check); err != nil { 335 return err 336 } 337 } 338 for _, check := range req.Checks { 339 if err := s.ensureCheckIfNodeMatches(tx, idx, req.Node, check); err != nil { 340 return err 341 } 342 } 343 344 return nil 345 } 346 347 // EnsureNode is used to upsert node registration or modification. 348 func (s *Store) EnsureNode(idx uint64, node *structs.Node) error { 349 tx := s.db.Txn(true) 350 defer tx.Abort() 351 352 // Call the node upsert 353 if err := s.ensureNodeTxn(tx, idx, node); err != nil { 354 return err 355 } 356 357 tx.Commit() 358 return nil 359 } 360 361 // ensureNoNodeWithSimilarNameTxn checks that no other node has conflict in its name 362 // If allowClashWithoutID then, getting a conflict on another node without ID will be allowed 363 func (s *Store) ensureNoNodeWithSimilarNameTxn(tx *memdb.Txn, node *structs.Node, allowClashWithoutID bool) error { 364 // Retrieve all of the nodes 365 enodes, err := tx.Get("nodes", "id") 366 if err != nil { 367 return fmt.Errorf("Cannot lookup all nodes: %s", err) 368 } 369 for nodeIt := enodes.Next(); nodeIt != nil; nodeIt = enodes.Next() { 370 enode := nodeIt.(*structs.Node) 371 if strings.EqualFold(node.Node, enode.Node) && node.ID != enode.ID { 372 if !(enode.ID == "" && allowClashWithoutID) { 373 return fmt.Errorf("Node name %s is reserved by node %s with name %s", node.Node, enode.ID, enode.Node) 374 } 375 } 376 } 377 return nil 378 } 379 380 // ensureNodeCASTxn updates a node only if the existing index matches the given index. 381 // Returns a bool indicating if a write happened and any error. 382 func (s *Store) ensureNodeCASTxn(tx *memdb.Txn, idx uint64, node *structs.Node) (bool, error) { 383 // Retrieve the existing entry. 384 existing, err := getNodeTxn(tx, node.Node) 385 if err != nil { 386 return false, err 387 } 388 389 // Check if the we should do the set. A ModifyIndex of 0 means that 390 // we are doing a set-if-not-exists. 391 if node.ModifyIndex == 0 && existing != nil { 392 return false, nil 393 } 394 if node.ModifyIndex != 0 && existing == nil { 395 return false, nil 396 } 397 if existing != nil && node.ModifyIndex != 0 && node.ModifyIndex != existing.ModifyIndex { 398 return false, nil 399 } 400 401 // Perform the update. 402 if err := s.ensureNodeTxn(tx, idx, node); err != nil { 403 return false, err 404 } 405 406 return true, nil 407 } 408 409 // ensureNodeTxn is the inner function called to actually create a node 410 // registration or modify an existing one in the state store. It allows 411 // passing in a memdb transaction so it may be part of a larger txn. 412 func (s *Store) ensureNodeTxn(tx *memdb.Txn, idx uint64, node *structs.Node) error { 413 // See if there's an existing node with this UUID, and make sure the 414 // name is the same. 415 var n *structs.Node 416 if node.ID != "" { 417 existing, err := getNodeIDTxn(tx, node.ID) 418 if err != nil { 419 return fmt.Errorf("node lookup failed: %s", err) 420 } 421 if existing != nil { 422 n = existing 423 if n.Node != node.Node { 424 // Lets first get all nodes and check whether name do match, we do not allow clash on nodes without ID 425 dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, false) 426 if dupNameError != nil { 427 return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError) 428 } 429 // We are actually renaming a node, remove its reference first 430 err := s.deleteNodeTxn(tx, idx, n.Node) 431 if err != nil { 432 return fmt.Errorf("Error while renaming Node ID: %q from %s to %s", 433 node.ID, n.Node, node.Node) 434 } 435 } 436 } else { 437 // We allow to "steal" another node name that would have no ID 438 // It basically means that we allow upgrading a node without ID and add the ID 439 dupNameError := s.ensureNoNodeWithSimilarNameTxn(tx, node, true) 440 if dupNameError != nil { 441 return fmt.Errorf("Error while renaming Node ID: %q: %s", node.ID, dupNameError) 442 } 443 } 444 } 445 // TODO: else Node.ID == "" should be forbidden in future Consul releases 446 // See https://github.com/hashicorp/consul/pull/3983 for context 447 448 // Check for an existing node by name to support nodes with no IDs. 449 if n == nil { 450 existing, err := tx.First("nodes", "id", node.Node) 451 if err != nil { 452 return fmt.Errorf("node name lookup failed: %s", err) 453 } 454 455 if existing != nil { 456 n = existing.(*structs.Node) 457 } 458 // WARNING, for compatibility reasons with tests, we do not check 459 // for case insensitive matches, which may lead to DB corruption 460 // See https://github.com/hashicorp/consul/pull/3983 for context 461 } 462 463 // Get the indexes. 464 if n != nil { 465 node.CreateIndex = n.CreateIndex 466 node.ModifyIndex = n.ModifyIndex 467 // We do not need to update anything 468 if node.IsSame(n) { 469 return nil 470 } 471 node.ModifyIndex = idx 472 } else { 473 node.CreateIndex = idx 474 node.ModifyIndex = idx 475 } 476 477 // Insert the node and update the index. 478 if err := tx.Insert("nodes", node); err != nil { 479 return fmt.Errorf("failed inserting node: %s", err) 480 } 481 if err := tx.Insert("index", &IndexEntry{"nodes", idx}); err != nil { 482 return fmt.Errorf("failed updating index: %s", err) 483 } 484 // Update the node's service indexes as the node information is included 485 // in health queries and we would otherwise miss node updates in some cases 486 // for those queries. 487 if err := s.updateAllServiceIndexesOfNode(tx, idx, node.Node); err != nil { 488 return fmt.Errorf("failed updating index: %s", err) 489 } 490 491 return nil 492 } 493 494 // GetNode is used to retrieve a node registration by node name ID. 495 func (s *Store) GetNode(id string) (uint64, *structs.Node, error) { 496 tx := s.db.Txn(false) 497 defer tx.Abort() 498 499 // Get the table index. 500 idx := maxIndexTxn(tx, "nodes") 501 502 // Retrieve the node from the state store 503 node, err := getNodeTxn(tx, id) 504 if err != nil { 505 return 0, nil, fmt.Errorf("node lookup failed: %s", err) 506 } 507 return idx, node, nil 508 } 509 510 func getNodeTxn(tx *memdb.Txn, nodeName string) (*structs.Node, error) { 511 node, err := tx.First("nodes", "id", nodeName) 512 if err != nil { 513 return nil, fmt.Errorf("node lookup failed: %s", err) 514 } 515 if node != nil { 516 return node.(*structs.Node), nil 517 } 518 return nil, nil 519 } 520 521 func getNodeIDTxn(tx *memdb.Txn, id types.NodeID) (*structs.Node, error) { 522 strnode := string(id) 523 uuidValue, err := uuid.ParseUUID(strnode) 524 if err != nil { 525 return nil, fmt.Errorf("node lookup by ID failed, wrong UUID: %v for '%s'", err, strnode) 526 } 527 528 node, err := tx.First("nodes", "uuid", uuidValue) 529 if err != nil { 530 return nil, fmt.Errorf("node lookup by ID failed: %s", err) 531 } 532 if node != nil { 533 return node.(*structs.Node), nil 534 } 535 return nil, nil 536 } 537 538 // GetNodeID is used to retrieve a node registration by node ID. 539 func (s *Store) GetNodeID(id types.NodeID) (uint64, *structs.Node, error) { 540 tx := s.db.Txn(false) 541 defer tx.Abort() 542 543 // Get the table index. 544 idx := maxIndexTxn(tx, "nodes") 545 546 // Retrieve the node from the state store 547 node, err := getNodeIDTxn(tx, id) 548 return idx, node, err 549 } 550 551 // Nodes is used to return all of the known nodes. 552 func (s *Store) Nodes(ws memdb.WatchSet) (uint64, structs.Nodes, error) { 553 tx := s.db.Txn(false) 554 defer tx.Abort() 555 556 // Get the table index. 557 idx := maxIndexTxn(tx, "nodes") 558 559 // Retrieve all of the nodes 560 nodes, err := tx.Get("nodes", "id") 561 if err != nil { 562 return 0, nil, fmt.Errorf("failed nodes lookup: %s", err) 563 } 564 ws.Add(nodes.WatchCh()) 565 566 // Create and return the nodes list. 567 var results structs.Nodes 568 for node := nodes.Next(); node != nil; node = nodes.Next() { 569 results = append(results, node.(*structs.Node)) 570 } 571 return idx, results, nil 572 } 573 574 // NodesByMeta is used to return all nodes with the given metadata key/value pairs. 575 func (s *Store) NodesByMeta(ws memdb.WatchSet, filters map[string]string) (uint64, structs.Nodes, error) { 576 tx := s.db.Txn(false) 577 defer tx.Abort() 578 579 // Get the table index. 580 idx := maxIndexTxn(tx, "nodes") 581 582 // Retrieve all of the nodes 583 var args []interface{} 584 for key, value := range filters { 585 args = append(args, key, value) 586 break 587 } 588 nodes, err := tx.Get("nodes", "meta", args...) 589 if err != nil { 590 return 0, nil, fmt.Errorf("failed nodes lookup: %s", err) 591 } 592 ws.Add(nodes.WatchCh()) 593 594 // Create and return the nodes list. 595 var results structs.Nodes 596 for node := nodes.Next(); node != nil; node = nodes.Next() { 597 n := node.(*structs.Node) 598 if len(filters) <= 1 || structs.SatisfiesMetaFilters(n.Meta, filters) { 599 results = append(results, n) 600 } 601 } 602 return idx, results, nil 603 } 604 605 // DeleteNode is used to delete a given node by its ID. 606 func (s *Store) DeleteNode(idx uint64, nodeName string) error { 607 tx := s.db.Txn(true) 608 defer tx.Abort() 609 610 // Call the node deletion. 611 if err := s.deleteNodeTxn(tx, idx, nodeName); err != nil { 612 return err 613 } 614 615 tx.Commit() 616 return nil 617 } 618 619 // deleteNodeCASTxn is used to try doing a node delete operation with a given 620 // raft index. If the CAS index specified is not equal to the last observed index for 621 // the given check, then the call is a noop, otherwise a normal check delete is invoked. 622 func (s *Store) deleteNodeCASTxn(tx *memdb.Txn, idx, cidx uint64, nodeName string) (bool, error) { 623 // Look up the node. 624 node, err := getNodeTxn(tx, nodeName) 625 if err != nil { 626 return false, err 627 } 628 if node == nil { 629 return false, nil 630 } 631 632 // If the existing index does not match the provided CAS 633 // index arg, then we shouldn't update anything and can safely 634 // return early here. 635 if node.ModifyIndex != cidx { 636 return false, nil 637 } 638 639 // Call the actual deletion if the above passed. 640 if err := s.deleteNodeTxn(tx, idx, nodeName); err != nil { 641 return false, err 642 } 643 644 return true, nil 645 } 646 647 // deleteNodeTxn is the inner method used for removing a node from 648 // the store within a given transaction. 649 func (s *Store) deleteNodeTxn(tx *memdb.Txn, idx uint64, nodeName string) error { 650 // Look up the node. 651 node, err := tx.First("nodes", "id", nodeName) 652 if err != nil { 653 return fmt.Errorf("node lookup failed: %s", err) 654 } 655 if node == nil { 656 return nil 657 } 658 659 // Delete all services associated with the node and update the service index. 660 services, err := tx.Get("services", "node", nodeName) 661 if err != nil { 662 return fmt.Errorf("failed service lookup: %s", err) 663 } 664 var sids []string 665 for service := services.Next(); service != nil; service = services.Next() { 666 svc := service.(*structs.ServiceNode) 667 sids = append(sids, svc.ServiceID) 668 if err := tx.Insert("index", &IndexEntry{serviceIndexName(svc.ServiceName), idx}); err != nil { 669 return fmt.Errorf("failed updating index: %s", err) 670 } 671 } 672 673 // Do the delete in a separate loop so we don't trash the iterator. 674 for _, sid := range sids { 675 if err := s.deleteServiceTxn(tx, idx, nodeName, sid); err != nil { 676 return err 677 } 678 } 679 680 // Delete all checks associated with the node. This will invalidate 681 // sessions as necessary. 682 checks, err := tx.Get("checks", "node", nodeName) 683 if err != nil { 684 return fmt.Errorf("failed check lookup: %s", err) 685 } 686 var cids []types.CheckID 687 for check := checks.Next(); check != nil; check = checks.Next() { 688 cids = append(cids, check.(*structs.HealthCheck).CheckID) 689 } 690 691 // Do the delete in a separate loop so we don't trash the iterator. 692 for _, cid := range cids { 693 if err := s.deleteCheckTxn(tx, idx, nodeName, cid); err != nil { 694 return err 695 } 696 } 697 698 // Delete any coordinates associated with this node. 699 coords, err := tx.Get("coordinates", "node", nodeName) 700 if err != nil { 701 return fmt.Errorf("failed coordinate lookup: %s", err) 702 } 703 for coord := coords.Next(); coord != nil; coord = coords.Next() { 704 if err := tx.Delete("coordinates", coord); err != nil { 705 return fmt.Errorf("failed deleting coordinate: %s", err) 706 } 707 if err := tx.Insert("index", &IndexEntry{"coordinates", idx}); err != nil { 708 return fmt.Errorf("failed updating index: %s", err) 709 } 710 } 711 712 // Delete the node and update the index. 713 if err := tx.Delete("nodes", node); err != nil { 714 return fmt.Errorf("failed deleting node: %s", err) 715 } 716 if err := tx.Insert("index", &IndexEntry{"nodes", idx}); err != nil { 717 return fmt.Errorf("failed updating index: %s", err) 718 } 719 720 // Invalidate any sessions for this node. 721 sessions, err := tx.Get("sessions", "node", nodeName) 722 if err != nil { 723 return fmt.Errorf("failed session lookup: %s", err) 724 } 725 var ids []string 726 for sess := sessions.Next(); sess != nil; sess = sessions.Next() { 727 ids = append(ids, sess.(*structs.Session).ID) 728 } 729 730 // Do the delete in a separate loop so we don't trash the iterator. 731 for _, id := range ids { 732 if err := s.deleteSessionTxn(tx, idx, id); err != nil { 733 return fmt.Errorf("failed session delete: %s", err) 734 } 735 } 736 737 return nil 738 } 739 740 // EnsureService is called to upsert creation of a given NodeService. 741 func (s *Store) EnsureService(idx uint64, node string, svc *structs.NodeService) error { 742 tx := s.db.Txn(true) 743 defer tx.Abort() 744 745 // Call the service registration upsert 746 if err := s.ensureServiceTxn(tx, idx, node, svc); err != nil { 747 return err 748 } 749 750 tx.Commit() 751 return nil 752 } 753 754 // ensureServiceCASTxn updates a service only if the existing index matches the given index. 755 // Returns a bool indicating if a write happened and any error. 756 func (s *Store) ensureServiceCASTxn(tx *memdb.Txn, idx uint64, node string, svc *structs.NodeService) (bool, error) { 757 // Retrieve the existing service. 758 existing, err := tx.First("services", "id", node, svc.ID) 759 if err != nil { 760 return false, fmt.Errorf("failed service lookup: %s", err) 761 } 762 763 // Check if the we should do the set. A ModifyIndex of 0 means that 764 // we are doing a set-if-not-exists. 765 if svc.ModifyIndex == 0 && existing != nil { 766 return false, nil 767 } 768 if svc.ModifyIndex != 0 && existing == nil { 769 return false, nil 770 } 771 e, ok := existing.(*structs.Node) 772 if ok && svc.ModifyIndex != 0 && svc.ModifyIndex != e.ModifyIndex { 773 return false, nil 774 } 775 776 // Perform the update. 777 if err := s.ensureServiceTxn(tx, idx, node, svc); err != nil { 778 return false, err 779 } 780 781 return true, nil 782 } 783 784 // ensureServiceTxn is used to upsert a service registration within an 785 // existing memdb transaction. 786 func (s *Store) ensureServiceTxn(tx *memdb.Txn, idx uint64, node string, svc *structs.NodeService) error { 787 // Check for existing service 788 existing, err := tx.First("services", "id", node, svc.ID) 789 if err != nil { 790 return fmt.Errorf("failed service lookup: %s", err) 791 } 792 793 if err = structs.ValidateMetadata(svc.Meta, false); err != nil { 794 return fmt.Errorf("Invalid Service Meta for node %s and serviceID %s: %v", node, svc.ID, err) 795 } 796 // Create the service node entry and populate the indexes. Note that 797 // conversion doesn't populate any of the node-specific information. 798 // That's always populated when we read from the state store. 799 entry := svc.ToServiceNode(node) 800 // Get the node 801 n, err := tx.First("nodes", "id", node) 802 if err != nil { 803 return fmt.Errorf("failed node lookup: %s", err) 804 } 805 if n == nil { 806 return ErrMissingNode 807 } 808 if existing != nil { 809 serviceNode := existing.(*structs.ServiceNode) 810 entry.CreateIndex = serviceNode.CreateIndex 811 entry.ModifyIndex = serviceNode.ModifyIndex 812 // We cannot return here because: we want to keep existing behavior (ex: failed node lookup -> ErrMissingNode) 813 // It might be modified in future, but it requires changing many unit tests 814 // Enforcing saving the entry also ensures that if we add default values in .ToServiceNode() 815 // those values will be saved even if node is not really modified for a while. 816 if entry.IsSameService(serviceNode) { 817 return nil 818 } 819 } else { 820 entry.CreateIndex = idx 821 } 822 entry.ModifyIndex = idx 823 824 // Insert the service and update the index 825 if err := tx.Insert("services", entry); err != nil { 826 return fmt.Errorf("failed inserting service: %s", err) 827 } 828 if err := tx.Insert("index", &IndexEntry{"services", idx}); err != nil { 829 return fmt.Errorf("failed updating index: %s", err) 830 } 831 if err := tx.Insert("index", &IndexEntry{serviceIndexName(svc.Service), idx}); err != nil { 832 return fmt.Errorf("failed updating index: %s", err) 833 } 834 835 return nil 836 } 837 838 // Services returns all services along with a list of associated tags. 839 func (s *Store) Services(ws memdb.WatchSet) (uint64, structs.Services, error) { 840 tx := s.db.Txn(false) 841 defer tx.Abort() 842 843 // Get the table index. 844 idx := maxIndexTxn(tx, "services") 845 846 // List all the services. 847 services, err := tx.Get("services", "id") 848 if err != nil { 849 return 0, nil, fmt.Errorf("failed querying services: %s", err) 850 } 851 ws.Add(services.WatchCh()) 852 853 // Rip through the services and enumerate them and their unique set of 854 // tags. 855 unique := make(map[string]map[string]struct{}) 856 for service := services.Next(); service != nil; service = services.Next() { 857 svc := service.(*structs.ServiceNode) 858 tags, ok := unique[svc.ServiceName] 859 if !ok { 860 unique[svc.ServiceName] = make(map[string]struct{}) 861 tags = unique[svc.ServiceName] 862 } 863 for _, tag := range svc.ServiceTags { 864 tags[tag] = struct{}{} 865 } 866 } 867 868 // Generate the output structure. 869 var results = make(structs.Services) 870 for service, tags := range unique { 871 results[service] = make([]string, 0) 872 for tag := range tags { 873 results[service] = append(results[service], tag) 874 } 875 } 876 return idx, results, nil 877 } 878 879 // ServicesByNodeMeta returns all services, filtered by the given node metadata. 880 func (s *Store) ServicesByNodeMeta(ws memdb.WatchSet, filters map[string]string) (uint64, structs.Services, error) { 881 tx := s.db.Txn(false) 882 defer tx.Abort() 883 884 // Get the table index. 885 idx := maxIndexTxn(tx, "services", "nodes") 886 887 // Retrieve all of the nodes with the meta k/v pair 888 var args []interface{} 889 for key, value := range filters { 890 args = append(args, key, value) 891 break 892 } 893 nodes, err := tx.Get("nodes", "meta", args...) 894 if err != nil { 895 return 0, nil, fmt.Errorf("failed nodes lookup: %s", err) 896 } 897 ws.Add(nodes.WatchCh()) 898 899 // We don't want to track an unlimited number of services, so we pull a 900 // top-level watch to use as a fallback. 901 allServices, err := tx.Get("services", "id") 902 if err != nil { 903 return 0, nil, fmt.Errorf("failed services lookup: %s", err) 904 } 905 allServicesCh := allServices.WatchCh() 906 907 // Populate the services map 908 unique := make(map[string]map[string]struct{}) 909 for node := nodes.Next(); node != nil; node = nodes.Next() { 910 n := node.(*structs.Node) 911 if len(filters) > 1 && !structs.SatisfiesMetaFilters(n.Meta, filters) { 912 continue 913 } 914 915 // List all the services on the node 916 services, err := tx.Get("services", "node", n.Node) 917 if err != nil { 918 return 0, nil, fmt.Errorf("failed querying services: %s", err) 919 } 920 ws.AddWithLimit(s.watchLimit, services.WatchCh(), allServicesCh) 921 922 // Rip through the services and enumerate them and their unique set of 923 // tags. 924 for service := services.Next(); service != nil; service = services.Next() { 925 svc := service.(*structs.ServiceNode) 926 tags, ok := unique[svc.ServiceName] 927 if !ok { 928 unique[svc.ServiceName] = make(map[string]struct{}) 929 tags = unique[svc.ServiceName] 930 } 931 for _, tag := range svc.ServiceTags { 932 tags[tag] = struct{}{} 933 } 934 } 935 } 936 937 // Generate the output structure. 938 var results = make(structs.Services) 939 for service, tags := range unique { 940 results[service] = make([]string, 0) 941 for tag := range tags { 942 results[service] = append(results[service], tag) 943 } 944 } 945 946 if len(ws) >= s.watchLimit { 947 s.warnSoftLimitReached("service by node meta") 948 } 949 950 return idx, results, nil 951 } 952 953 // maxIndexForService return the maximum Raft Index for a service 954 // If the index is not set for the service, it will return the missing 955 // service index. 956 // The service_last_extinction is set to the last raft index when a service 957 // was unregistered (or 0 if no services were ever unregistered). This 958 // allows blocking queries to 959 // * return when the last instance of a service is removed 960 // * block until an instance for this service is available, or another 961 // service is unregistered. 962 func maxIndexForService(tx *memdb.Txn, serviceName string, serviceExists, checks bool) uint64 { 963 idx, _ := maxIndexAndWatchChForService(tx, serviceName, serviceExists, checks) 964 return idx 965 } 966 967 // maxIndexAndWatchChForService return the maximum Raft Index for a service. If 968 // the index is not set for the service, it will return the missing service 969 // index. The service_last_extinction is set to the last raft index when a 970 // service was unregistered (or 0 if no services were ever unregistered). This 971 // allows blocking queries to 972 // * return when the last instance of a service is removed 973 // * block until an instance for this service is available, or another 974 // service is unregistered. 975 // 976 // It also _may_ return a watch chan to add to a WatchSet. It will only return 977 // one if the service exists, and has a service index. If it doesn't then nil is 978 // returned for the chan. This allows for blocking watchers to _only_ watch this 979 // one chan in the common case, falling back to watching all touched MemDB 980 // indexes in more complicated cases. 981 func maxIndexAndWatchChForService(tx *memdb.Txn, serviceName string, serviceExists, checks bool) (uint64, <-chan struct{}) { 982 if !serviceExists { 983 res, err := tx.First("index", "id", serviceLastExtinctionIndexName) 984 if missingIdx, ok := res.(*IndexEntry); ok && err == nil { 985 // Not safe to only watch the extinction index as it's not updated when 986 // new instances come along so return nil watchCh. 987 return missingIdx.Value, nil 988 } 989 } 990 991 ch, res, err := tx.FirstWatch("index", "id", serviceIndexName(serviceName)) 992 if idx, ok := res.(*IndexEntry); ok && err == nil { 993 return idx.Value, ch 994 } 995 if checks { 996 return maxIndexTxn(tx, "nodes", "services", "checks"), nil 997 } 998 999 return maxIndexTxn(tx, "nodes", "services"), nil 1000 } 1001 1002 // ConnectServiceNodes returns the nodes associated with a Connect 1003 // compatible destination for the given service name. This will include 1004 // both proxies and native integrations. 1005 func (s *Store) ConnectServiceNodes(ws memdb.WatchSet, serviceName string) (uint64, structs.ServiceNodes, error) { 1006 return s.serviceNodes(ws, serviceName, true) 1007 } 1008 1009 // ServiceNodes returns the nodes associated with a given service name. 1010 func (s *Store) ServiceNodes(ws memdb.WatchSet, serviceName string) (uint64, structs.ServiceNodes, error) { 1011 return s.serviceNodes(ws, serviceName, false) 1012 } 1013 1014 func (s *Store) serviceNodes(ws memdb.WatchSet, serviceName string, connect bool) (uint64, structs.ServiceNodes, error) { 1015 tx := s.db.Txn(false) 1016 defer tx.Abort() 1017 1018 // Function for lookup 1019 var f func() (memdb.ResultIterator, error) 1020 if !connect { 1021 f = func() (memdb.ResultIterator, error) { 1022 return tx.Get("services", "service", serviceName) 1023 } 1024 } else { 1025 f = func() (memdb.ResultIterator, error) { 1026 return tx.Get("services", "connect", serviceName) 1027 } 1028 } 1029 1030 // List all the services. 1031 services, err := f() 1032 if err != nil { 1033 return 0, nil, fmt.Errorf("failed service lookup: %s", err) 1034 } 1035 ws.Add(services.WatchCh()) 1036 1037 var results structs.ServiceNodes 1038 for service := services.Next(); service != nil; service = services.Next() { 1039 results = append(results, service.(*structs.ServiceNode)) 1040 } 1041 1042 // Fill in the node details. 1043 results, err = s.parseServiceNodes(tx, ws, results) 1044 if err != nil { 1045 return 0, nil, fmt.Errorf("failed parsing service nodes: %s", err) 1046 } 1047 1048 // Get the table index. 1049 idx := maxIndexForService(tx, serviceName, len(results) > 0, false) 1050 if len(ws) >= s.watchLimit { 1051 s.warnSoftLimitReached("service %s", serviceName) 1052 } 1053 1054 return idx, results, nil 1055 } 1056 1057 // ServiceTagNodes returns the nodes associated with a given service, filtering 1058 // out services that don't contain the given tags. 1059 func (s *Store) ServiceTagNodes(ws memdb.WatchSet, service string, tags []string) (uint64, structs.ServiceNodes, error) { 1060 tx := s.db.Txn(false) 1061 defer tx.Abort() 1062 1063 // List all the services. 1064 services, err := tx.Get("services", "service", service) 1065 if err != nil { 1066 return 0, nil, fmt.Errorf("failed service lookup: %s", err) 1067 } 1068 ws.Add(services.WatchCh()) 1069 1070 // Gather all the services and apply the tag filter. 1071 serviceExists := false 1072 var results structs.ServiceNodes 1073 for service := services.Next(); service != nil; service = services.Next() { 1074 svc := service.(*structs.ServiceNode) 1075 serviceExists = true 1076 if !serviceTagsFilter(svc, tags) { 1077 results = append(results, svc) 1078 } 1079 } 1080 1081 // Fill in the node details. 1082 results, err = s.parseServiceNodes(tx, ws, results) 1083 if err != nil { 1084 return 0, nil, fmt.Errorf("failed parsing service nodes: %s", err) 1085 } 1086 // Get the table index. 1087 idx := maxIndexForService(tx, service, serviceExists, false) 1088 1089 return idx, results, nil 1090 } 1091 1092 // serviceTagFilter returns true (should filter) if the given service node 1093 // doesn't contain the given tag. 1094 func serviceTagFilter(sn *structs.ServiceNode, tag string) bool { 1095 tag = strings.ToLower(tag) 1096 1097 // Look for the lower cased version of the tag. 1098 for _, t := range sn.ServiceTags { 1099 if strings.ToLower(t) == tag { 1100 return false 1101 } 1102 } 1103 1104 // If we didn't hit the tag above then we should filter. 1105 return true 1106 } 1107 1108 // serviceTagsFilter returns true (should filter) if the given service node 1109 // doesn't contain the given set of tags. 1110 func serviceTagsFilter(sn *structs.ServiceNode, tags []string) bool { 1111 for _, tag := range tags { 1112 if serviceTagFilter(sn, tag) { 1113 // If any one of the expected tags was not found, filter the service 1114 return true 1115 } 1116 } 1117 1118 // If all tags were found, don't filter the service 1119 return false 1120 } 1121 1122 // ServiceAddressNodes returns the nodes associated with a given service, filtering 1123 // out services that don't match the given serviceAddress 1124 func (s *Store) ServiceAddressNodes(ws memdb.WatchSet, address string) (uint64, structs.ServiceNodes, error) { 1125 tx := s.db.Txn(false) 1126 defer tx.Abort() 1127 1128 // List all the services. 1129 services, err := tx.Get("services", "id") 1130 if err != nil { 1131 return 0, nil, fmt.Errorf("failed service lookup: %s", err) 1132 } 1133 ws.Add(services.WatchCh()) 1134 1135 // Gather all the services and apply the tag filter. 1136 var results structs.ServiceNodes 1137 for service := services.Next(); service != nil; service = services.Next() { 1138 svc := service.(*structs.ServiceNode) 1139 if svc.ServiceAddress == address { 1140 results = append(results, svc) 1141 } 1142 } 1143 1144 // Fill in the node details. 1145 results, err = s.parseServiceNodes(tx, ws, results) 1146 if err != nil { 1147 return 0, nil, fmt.Errorf("failed parsing service nodes: %s", err) 1148 } 1149 return 0, results, nil 1150 } 1151 1152 // parseServiceNodes iterates over a services query and fills in the node details, 1153 // returning a ServiceNodes slice. 1154 func (s *Store) parseServiceNodes(tx *memdb.Txn, ws memdb.WatchSet, services structs.ServiceNodes) (structs.ServiceNodes, error) { 1155 // We don't want to track an unlimited number of nodes, so we pull a 1156 // top-level watch to use as a fallback. 1157 allNodes, err := tx.Get("nodes", "id") 1158 if err != nil { 1159 return nil, fmt.Errorf("failed nodes lookup: %s", err) 1160 } 1161 allNodesCh := allNodes.WatchCh() 1162 1163 // Fill in the node data for each service instance. 1164 var results structs.ServiceNodes 1165 for _, sn := range services { 1166 // Note that we have to clone here because we don't want to 1167 // modify the node-related fields on the object in the database, 1168 // which is what we are referencing. 1169 sc := sn.PartialClone() 1170 1171 // Grab the corresponding node record. 1172 watchCh, n, err := tx.FirstWatch("nodes", "id", sn.Node) 1173 if err != nil { 1174 return nil, fmt.Errorf("failed node lookup: %s", err) 1175 } 1176 ws.AddWithLimit(s.watchLimit, watchCh, allNodesCh) 1177 1178 // Populate the node-related fields. The tagged addresses may be 1179 // used by agents to perform address translation if they are 1180 // configured to do that. 1181 node := n.(*structs.Node) 1182 sc.ID = node.ID 1183 sc.Address = node.Address 1184 sc.Datacenter = node.Datacenter 1185 sc.TaggedAddresses = node.TaggedAddresses 1186 sc.NodeMeta = node.Meta 1187 1188 results = append(results, sc) 1189 } 1190 return results, nil 1191 } 1192 1193 // NodeService is used to retrieve a specific service associated with the given 1194 // node. 1195 func (s *Store) NodeService(nodeName string, serviceID string) (uint64, *structs.NodeService, error) { 1196 tx := s.db.Txn(false) 1197 defer tx.Abort() 1198 1199 // Get the table index. 1200 idx := maxIndexTxn(tx, "services") 1201 1202 // Query the service 1203 service, err := s.getNodeServiceTxn(tx, nodeName, serviceID) 1204 if err != nil { 1205 return 0, nil, fmt.Errorf("failed querying service for node %q: %s", nodeName, err) 1206 } 1207 1208 return idx, service, nil 1209 } 1210 1211 func (s *Store) getNodeServiceTxn(tx *memdb.Txn, nodeName, serviceID string) (*structs.NodeService, error) { 1212 // Query the service 1213 service, err := tx.First("services", "id", nodeName, serviceID) 1214 if err != nil { 1215 return nil, fmt.Errorf("failed querying service for node %q: %s", nodeName, err) 1216 } 1217 1218 if service != nil { 1219 return service.(*structs.ServiceNode).ToNodeService(), nil 1220 } 1221 1222 return nil, nil 1223 } 1224 1225 // NodeServices is used to query service registrations by node name or UUID. 1226 func (s *Store) NodeServices(ws memdb.WatchSet, nodeNameOrID string) (uint64, *structs.NodeServices, error) { 1227 tx := s.db.Txn(false) 1228 defer tx.Abort() 1229 1230 // Get the table index. 1231 idx := maxIndexTxn(tx, "nodes", "services") 1232 1233 // Query the node by node name 1234 watchCh, n, err := tx.FirstWatch("nodes", "id", nodeNameOrID) 1235 if err != nil { 1236 return 0, nil, fmt.Errorf("node lookup failed: %s", err) 1237 } 1238 1239 if n != nil { 1240 ws.Add(watchCh) 1241 } else { 1242 if len(nodeNameOrID) < minUUIDLookupLen { 1243 ws.Add(watchCh) 1244 return 0, nil, nil 1245 } 1246 1247 // Attempt to lookup the node by its node ID 1248 iter, err := tx.Get("nodes", "uuid_prefix", resizeNodeLookupKey(nodeNameOrID)) 1249 if err != nil { 1250 ws.Add(watchCh) 1251 // TODO(sean@): We could/should log an error re: the uuid_prefix lookup 1252 // failing once a logger has been introduced to the catalog. 1253 return 0, nil, nil 1254 } 1255 1256 n = iter.Next() 1257 if n == nil { 1258 // No nodes matched, even with the Node ID: add a watch on the node name. 1259 ws.Add(watchCh) 1260 return 0, nil, nil 1261 } 1262 1263 idWatchCh := iter.WatchCh() 1264 if iter.Next() != nil { 1265 // More than one match present: Watch on the node name channel and return 1266 // an empty result (node lookups can not be ambiguous). 1267 ws.Add(watchCh) 1268 return 0, nil, nil 1269 } 1270 1271 ws.Add(idWatchCh) 1272 } 1273 1274 node := n.(*structs.Node) 1275 nodeName := node.Node 1276 1277 // Read all of the services 1278 services, err := tx.Get("services", "node", nodeName) 1279 if err != nil { 1280 return 0, nil, fmt.Errorf("failed querying services for node %q: %s", nodeName, err) 1281 } 1282 ws.Add(services.WatchCh()) 1283 1284 // Initialize the node services struct 1285 ns := &structs.NodeServices{ 1286 Node: node, 1287 Services: make(map[string]*structs.NodeService), 1288 } 1289 1290 // Add all of the services to the map. 1291 for service := services.Next(); service != nil; service = services.Next() { 1292 svc := service.(*structs.ServiceNode).ToNodeService() 1293 ns.Services[svc.ID] = svc 1294 } 1295 1296 return idx, ns, nil 1297 } 1298 1299 // DeleteService is used to delete a given service associated with a node. 1300 func (s *Store) DeleteService(idx uint64, nodeName, serviceID string) error { 1301 tx := s.db.Txn(true) 1302 defer tx.Abort() 1303 1304 // Call the service deletion 1305 if err := s.deleteServiceTxn(tx, idx, nodeName, serviceID); err != nil { 1306 return err 1307 } 1308 1309 tx.Commit() 1310 return nil 1311 } 1312 1313 func serviceIndexName(name string) string { 1314 return fmt.Sprintf("service.%s", name) 1315 } 1316 1317 // deleteServiceCASTxn is used to try doing a service delete operation with a given 1318 // raft index. If the CAS index specified is not equal to the last observed index for 1319 // the given service, then the call is a noop, otherwise a normal delete is invoked. 1320 func (s *Store) deleteServiceCASTxn(tx *memdb.Txn, idx, cidx uint64, nodeName, serviceID string) (bool, error) { 1321 // Look up the service. 1322 service, err := s.getNodeServiceTxn(tx, nodeName, serviceID) 1323 if err != nil { 1324 return false, fmt.Errorf("service lookup failed: %s", err) 1325 } 1326 if service == nil { 1327 return false, nil 1328 } 1329 1330 // If the existing index does not match the provided CAS 1331 // index arg, then we shouldn't update anything and can safely 1332 // return early here. 1333 if service.ModifyIndex != cidx { 1334 return false, nil 1335 } 1336 1337 // Call the actual deletion if the above passed. 1338 if err := s.deleteServiceTxn(tx, idx, nodeName, serviceID); err != nil { 1339 return false, err 1340 } 1341 1342 return true, nil 1343 } 1344 1345 // deleteServiceTxn is the inner method called to remove a service 1346 // registration within an existing transaction. 1347 func (s *Store) deleteServiceTxn(tx *memdb.Txn, idx uint64, nodeName, serviceID string) error { 1348 // Look up the service. 1349 service, err := tx.First("services", "id", nodeName, serviceID) 1350 if err != nil { 1351 return fmt.Errorf("failed service lookup: %s", err) 1352 } 1353 if service == nil { 1354 return nil 1355 } 1356 1357 // Delete any checks associated with the service. This will invalidate 1358 // sessions as necessary. 1359 checks, err := tx.Get("checks", "node_service", nodeName, serviceID) 1360 if err != nil { 1361 return fmt.Errorf("failed service check lookup: %s", err) 1362 } 1363 var cids []types.CheckID 1364 for check := checks.Next(); check != nil; check = checks.Next() { 1365 cids = append(cids, check.(*structs.HealthCheck).CheckID) 1366 } 1367 1368 // Do the delete in a separate loop so we don't trash the iterator. 1369 for _, cid := range cids { 1370 if err := s.deleteCheckTxn(tx, idx, nodeName, cid); err != nil { 1371 return err 1372 } 1373 } 1374 1375 // Update the index. 1376 if err := tx.Insert("index", &IndexEntry{"checks", idx}); err != nil { 1377 return fmt.Errorf("failed updating index: %s", err) 1378 } 1379 1380 // Delete the service and update the index 1381 if err := tx.Delete("services", service); err != nil { 1382 return fmt.Errorf("failed deleting service: %s", err) 1383 } 1384 if err := tx.Insert("index", &IndexEntry{"services", idx}); err != nil { 1385 return fmt.Errorf("failed updating index: %s", err) 1386 } 1387 1388 svc := service.(*structs.ServiceNode) 1389 if remainingService, err := tx.First("services", "service", svc.ServiceName); err == nil { 1390 if remainingService != nil { 1391 // We have at least one remaining service, update the index 1392 if err := tx.Insert("index", &IndexEntry{serviceIndexName(svc.ServiceName), idx}); err != nil { 1393 return fmt.Errorf("failed updating index: %s", err) 1394 } 1395 } else { 1396 // There are no more service instances, cleanup the service.<serviceName> index 1397 serviceIndex, err := tx.First("index", "id", serviceIndexName(svc.ServiceName)) 1398 if err == nil && serviceIndex != nil { 1399 // we found service.<serviceName> index, garbage collect it 1400 if errW := tx.Delete("index", serviceIndex); errW != nil { 1401 return fmt.Errorf("[FAILED] deleting serviceIndex %s: %s", svc.ServiceName, err) 1402 } 1403 } 1404 1405 if err := tx.Insert("index", &IndexEntry{serviceLastExtinctionIndexName, idx}); err != nil { 1406 return fmt.Errorf("failed updating missing service index: %s", err) 1407 } 1408 1409 } 1410 } else { 1411 return fmt.Errorf("Could not find any service %s: %s", svc.ServiceName, err) 1412 } 1413 return nil 1414 } 1415 1416 // EnsureCheck is used to store a check registration in the db. 1417 func (s *Store) EnsureCheck(idx uint64, hc *structs.HealthCheck) error { 1418 tx := s.db.Txn(true) 1419 defer tx.Abort() 1420 1421 // Call the check registration 1422 if err := s.ensureCheckTxn(tx, idx, hc); err != nil { 1423 return err 1424 } 1425 1426 tx.Commit() 1427 return nil 1428 } 1429 1430 // updateAllServiceIndexesOfNode updates the Raft index of all the services associated with this node 1431 func (s *Store) updateAllServiceIndexesOfNode(tx *memdb.Txn, idx uint64, nodeID string) error { 1432 services, err := tx.Get("services", "node", nodeID) 1433 if err != nil { 1434 return fmt.Errorf("failed updating services for node %s: %s", nodeID, err) 1435 } 1436 for service := services.Next(); service != nil; service = services.Next() { 1437 svc := service.(*structs.ServiceNode).ToNodeService() 1438 if err := tx.Insert("index", &IndexEntry{serviceIndexName(svc.Service), idx}); err != nil { 1439 return fmt.Errorf("failed updating index: %s", err) 1440 } 1441 } 1442 return nil 1443 } 1444 1445 // ensureCheckCASTxn updates a check only if the existing index matches the given index. 1446 // Returns a bool indicating if a write happened and any error. 1447 func (s *Store) ensureCheckCASTxn(tx *memdb.Txn, idx uint64, hc *structs.HealthCheck) (bool, error) { 1448 // Retrieve the existing entry. 1449 _, existing, err := s.getNodeCheckTxn(tx, hc.Node, hc.CheckID) 1450 if err != nil { 1451 return false, fmt.Errorf("failed health check lookup: %s", err) 1452 } 1453 1454 // Check if the we should do the set. A ModifyIndex of 0 means that 1455 // we are doing a set-if-not-exists. 1456 if hc.ModifyIndex == 0 && existing != nil { 1457 return false, nil 1458 } 1459 if hc.ModifyIndex != 0 && existing == nil { 1460 return false, nil 1461 } 1462 if existing != nil && hc.ModifyIndex != 0 && hc.ModifyIndex != existing.ModifyIndex { 1463 return false, nil 1464 } 1465 1466 // Perform the update. 1467 if err := s.ensureCheckTxn(tx, idx, hc); err != nil { 1468 return false, err 1469 } 1470 1471 return true, nil 1472 } 1473 1474 // ensureCheckTransaction is used as the inner method to handle inserting 1475 // a health check into the state store. It ensures safety against inserting 1476 // checks with no matching node or service. 1477 func (s *Store) ensureCheckTxn(tx *memdb.Txn, idx uint64, hc *structs.HealthCheck) error { 1478 // Check if we have an existing health check 1479 existing, err := tx.First("checks", "id", hc.Node, string(hc.CheckID)) 1480 if err != nil { 1481 return fmt.Errorf("failed health check lookup: %s", err) 1482 } 1483 1484 // Set the indexes 1485 if existing != nil { 1486 existingCheck := existing.(*structs.HealthCheck) 1487 hc.CreateIndex = existingCheck.CreateIndex 1488 hc.ModifyIndex = existingCheck.ModifyIndex 1489 } else { 1490 hc.CreateIndex = idx 1491 hc.ModifyIndex = idx 1492 } 1493 1494 // Use the default check status if none was provided 1495 if hc.Status == "" { 1496 hc.Status = api.HealthCritical 1497 } 1498 1499 // Get the node 1500 node, err := tx.First("nodes", "id", hc.Node) 1501 if err != nil { 1502 return fmt.Errorf("failed node lookup: %s", err) 1503 } 1504 if node == nil { 1505 return ErrMissingNode 1506 } 1507 1508 modified := true 1509 // If the check is associated with a service, check that we have 1510 // a registration for the service. 1511 if hc.ServiceID != "" { 1512 service, err := tx.First("services", "id", hc.Node, hc.ServiceID) 1513 if err != nil { 1514 return fmt.Errorf("failed service lookup: %s", err) 1515 } 1516 if service == nil { 1517 return ErrMissingService 1518 } 1519 1520 // Copy in the service name and tags 1521 svc := service.(*structs.ServiceNode) 1522 hc.ServiceName = svc.ServiceName 1523 hc.ServiceTags = svc.ServiceTags 1524 if existing != nil && existing.(*structs.HealthCheck).IsSame(hc) { 1525 modified = false 1526 } else { 1527 // Check has been modified, we trigger a index service change 1528 if err = tx.Insert("index", &IndexEntry{serviceIndexName(svc.ServiceName), idx}); err != nil { 1529 return fmt.Errorf("failed updating index: %s", err) 1530 } 1531 } 1532 } else { 1533 if existing != nil && existing.(*structs.HealthCheck).IsSame(hc) { 1534 modified = false 1535 } else { 1536 // Since the check has been modified, it impacts all services of node 1537 // Update the status for all the services associated with this node 1538 err = s.updateAllServiceIndexesOfNode(tx, idx, hc.Node) 1539 if err != nil { 1540 return err 1541 } 1542 } 1543 } 1544 1545 // Delete any sessions for this check if the health is critical. 1546 if hc.Status == api.HealthCritical { 1547 mappings, err := tx.Get("session_checks", "node_check", hc.Node, string(hc.CheckID)) 1548 if err != nil { 1549 return fmt.Errorf("failed session checks lookup: %s", err) 1550 } 1551 1552 var ids []string 1553 for mapping := mappings.Next(); mapping != nil; mapping = mappings.Next() { 1554 ids = append(ids, mapping.(*sessionCheck).Session) 1555 } 1556 1557 // Delete the session in a separate loop so we don't trash the 1558 // iterator. 1559 for _, id := range ids { 1560 if err := s.deleteSessionTxn(tx, idx, id); err != nil { 1561 return fmt.Errorf("failed deleting session: %s", err) 1562 } 1563 } 1564 } 1565 if modified { 1566 // We update the modify index, ONLY if something has changed, thus 1567 // With constant output, no change is seen when watching a service 1568 // With huge number of nodes where anti-entropy updates continuously 1569 // the checks, but not the values within the check 1570 hc.ModifyIndex = idx 1571 } 1572 1573 // Persist the check registration in the db. 1574 if err := tx.Insert("checks", hc); err != nil { 1575 return fmt.Errorf("failed inserting check: %s", err) 1576 } 1577 if err := tx.Insert("index", &IndexEntry{"checks", idx}); err != nil { 1578 return fmt.Errorf("failed updating index: %s", err) 1579 } 1580 1581 return nil 1582 } 1583 1584 // NodeCheck is used to retrieve a specific check associated with the given 1585 // node. 1586 func (s *Store) NodeCheck(nodeName string, checkID types.CheckID) (uint64, *structs.HealthCheck, error) { 1587 tx := s.db.Txn(false) 1588 defer tx.Abort() 1589 1590 return s.getNodeCheckTxn(tx, nodeName, checkID) 1591 } 1592 1593 // nodeCheckTxn is used as the inner method to handle reading a health check 1594 // from the state store. 1595 func (s *Store) getNodeCheckTxn(tx *memdb.Txn, nodeName string, checkID types.CheckID) (uint64, *structs.HealthCheck, error) { 1596 // Get the table index. 1597 idx := maxIndexTxn(tx, "checks") 1598 1599 // Return the check. 1600 check, err := tx.First("checks", "id", nodeName, string(checkID)) 1601 if err != nil { 1602 return 0, nil, fmt.Errorf("failed check lookup: %s", err) 1603 } 1604 1605 if check != nil { 1606 return idx, check.(*structs.HealthCheck), nil 1607 } 1608 return idx, nil, nil 1609 } 1610 1611 // NodeChecks is used to retrieve checks associated with the 1612 // given node from the state store. 1613 func (s *Store) NodeChecks(ws memdb.WatchSet, nodeName string) (uint64, structs.HealthChecks, error) { 1614 tx := s.db.Txn(false) 1615 defer tx.Abort() 1616 1617 // Get the table index. 1618 idx := maxIndexTxn(tx, "checks") 1619 1620 // Return the checks. 1621 iter, err := tx.Get("checks", "node", nodeName) 1622 if err != nil { 1623 return 0, nil, fmt.Errorf("failed check lookup: %s", err) 1624 } 1625 ws.Add(iter.WatchCh()) 1626 1627 var results structs.HealthChecks 1628 for check := iter.Next(); check != nil; check = iter.Next() { 1629 results = append(results, check.(*structs.HealthCheck)) 1630 } 1631 return idx, results, nil 1632 } 1633 1634 // ServiceChecks is used to get all checks associated with a 1635 // given service ID. The query is performed against a service 1636 // _name_ instead of a service ID. 1637 func (s *Store) ServiceChecks(ws memdb.WatchSet, serviceName string) (uint64, structs.HealthChecks, error) { 1638 tx := s.db.Txn(false) 1639 defer tx.Abort() 1640 1641 // Get the table index. 1642 idx := maxIndexTxn(tx, "checks") 1643 1644 // Return the checks. 1645 iter, err := tx.Get("checks", "service", serviceName) 1646 if err != nil { 1647 return 0, nil, fmt.Errorf("failed check lookup: %s", err) 1648 } 1649 ws.Add(iter.WatchCh()) 1650 1651 var results structs.HealthChecks 1652 for check := iter.Next(); check != nil; check = iter.Next() { 1653 results = append(results, check.(*structs.HealthCheck)) 1654 } 1655 return idx, results, nil 1656 } 1657 1658 // ServiceChecksByNodeMeta is used to get all checks associated with a 1659 // given service ID, filtered by the given node metadata values. The query 1660 // is performed against a service _name_ instead of a service ID. 1661 func (s *Store) ServiceChecksByNodeMeta(ws memdb.WatchSet, serviceName string, 1662 filters map[string]string) (uint64, structs.HealthChecks, error) { 1663 1664 tx := s.db.Txn(false) 1665 defer tx.Abort() 1666 1667 // Get the table index. 1668 idx := maxIndexForService(tx, serviceName, true, true) 1669 // Return the checks. 1670 iter, err := tx.Get("checks", "service", serviceName) 1671 if err != nil { 1672 return 0, nil, fmt.Errorf("failed check lookup: %s", err) 1673 } 1674 ws.Add(iter.WatchCh()) 1675 1676 idx, checks, err := s.parseChecksByNodeMeta(tx, ws, idx, iter, filters) 1677 if err != nil { 1678 return 0, nil, err 1679 } 1680 1681 if len(ws) >= s.watchLimit { 1682 s.warnSoftLimitReached("service %s", serviceName) 1683 } 1684 1685 return idx, checks, nil 1686 } 1687 1688 // ChecksInState is used to query the state store for all checks 1689 // which are in the provided state. 1690 func (s *Store) ChecksInState(ws memdb.WatchSet, state string) (uint64, structs.HealthChecks, error) { 1691 tx := s.db.Txn(false) 1692 defer tx.Abort() 1693 1694 // Get the table index. 1695 idx := maxIndexTxn(tx, "checks") 1696 1697 // Query all checks if HealthAny is passed, otherwise use the index. 1698 var iter memdb.ResultIterator 1699 var err error 1700 if state == api.HealthAny { 1701 iter, err = tx.Get("checks", "status") 1702 } else { 1703 iter, err = tx.Get("checks", "status", state) 1704 } 1705 if err != nil { 1706 return 0, nil, fmt.Errorf("failed check lookup: %s", err) 1707 } 1708 ws.Add(iter.WatchCh()) 1709 1710 var results structs.HealthChecks 1711 for check := iter.Next(); check != nil; check = iter.Next() { 1712 results = append(results, check.(*structs.HealthCheck)) 1713 } 1714 return idx, results, nil 1715 } 1716 1717 // ChecksInStateByNodeMeta is used to query the state store for all checks 1718 // which are in the provided state, filtered by the given node metadata values. 1719 func (s *Store) ChecksInStateByNodeMeta(ws memdb.WatchSet, state string, filters map[string]string) (uint64, structs.HealthChecks, error) { 1720 tx := s.db.Txn(false) 1721 defer tx.Abort() 1722 1723 // Get the table index. 1724 idx := maxIndexTxn(tx, "nodes", "checks") 1725 1726 // Query all checks if HealthAny is passed, otherwise use the index. 1727 var iter memdb.ResultIterator 1728 var err error 1729 if state == api.HealthAny { 1730 iter, err = tx.Get("checks", "status") 1731 if err != nil { 1732 return 0, nil, fmt.Errorf("failed check lookup: %s", err) 1733 } 1734 } else { 1735 iter, err = tx.Get("checks", "status", state) 1736 if err != nil { 1737 return 0, nil, fmt.Errorf("failed check lookup: %s", err) 1738 } 1739 } 1740 ws.Add(iter.WatchCh()) 1741 1742 return s.parseChecksByNodeMeta(tx, ws, idx, iter, filters) 1743 } 1744 1745 // parseChecksByNodeMeta is a helper function used to deduplicate some 1746 // repetitive code for returning health checks filtered by node metadata fields. 1747 func (s *Store) parseChecksByNodeMeta(tx *memdb.Txn, ws memdb.WatchSet, 1748 idx uint64, iter memdb.ResultIterator, filters map[string]string) (uint64, structs.HealthChecks, error) { 1749 1750 // We don't want to track an unlimited number of nodes, so we pull a 1751 // top-level watch to use as a fallback. 1752 allNodes, err := tx.Get("nodes", "id") 1753 if err != nil { 1754 return 0, nil, fmt.Errorf("failed nodes lookup: %s", err) 1755 } 1756 allNodesCh := allNodes.WatchCh() 1757 1758 // Only take results for nodes that satisfy the node metadata filters. 1759 var results structs.HealthChecks 1760 for check := iter.Next(); check != nil; check = iter.Next() { 1761 healthCheck := check.(*structs.HealthCheck) 1762 watchCh, node, err := tx.FirstWatch("nodes", "id", healthCheck.Node) 1763 if err != nil { 1764 return 0, nil, fmt.Errorf("failed node lookup: %s", err) 1765 } 1766 if node == nil { 1767 return 0, nil, ErrMissingNode 1768 } 1769 1770 // Add even the filtered nodes so we wake up if the node metadata 1771 // changes. 1772 ws.AddWithLimit(s.watchLimit, watchCh, allNodesCh) 1773 if structs.SatisfiesMetaFilters(node.(*structs.Node).Meta, filters) { 1774 results = append(results, healthCheck) 1775 } 1776 } 1777 return idx, results, nil 1778 } 1779 1780 // DeleteCheck is used to delete a health check registration. 1781 func (s *Store) DeleteCheck(idx uint64, node string, checkID types.CheckID) error { 1782 tx := s.db.Txn(true) 1783 defer tx.Abort() 1784 1785 // Call the check deletion 1786 if err := s.deleteCheckTxn(tx, idx, node, checkID); err != nil { 1787 return err 1788 } 1789 1790 tx.Commit() 1791 return nil 1792 } 1793 1794 // deleteCheckCASTxn is used to try doing a check delete operation with a given 1795 // raft index. If the CAS index specified is not equal to the last observed index for 1796 // the given check, then the call is a noop, otherwise a normal check delete is invoked. 1797 func (s *Store) deleteCheckCASTxn(tx *memdb.Txn, idx, cidx uint64, node string, checkID types.CheckID) (bool, error) { 1798 // Try to retrieve the existing health check. 1799 _, hc, err := s.getNodeCheckTxn(tx, node, checkID) 1800 if err != nil { 1801 return false, fmt.Errorf("check lookup failed: %s", err) 1802 } 1803 if hc == nil { 1804 return false, nil 1805 } 1806 1807 // If the existing index does not match the provided CAS 1808 // index arg, then we shouldn't update anything and can safely 1809 // return early here. 1810 if hc.ModifyIndex != cidx { 1811 return false, nil 1812 } 1813 1814 // Call the actual deletion if the above passed. 1815 if err := s.deleteCheckTxn(tx, idx, node, checkID); err != nil { 1816 return false, err 1817 } 1818 1819 return true, nil 1820 } 1821 1822 // deleteCheckTxn is the inner method used to call a health 1823 // check deletion within an existing transaction. 1824 func (s *Store) deleteCheckTxn(tx *memdb.Txn, idx uint64, node string, checkID types.CheckID) error { 1825 // Try to retrieve the existing health check. 1826 hc, err := tx.First("checks", "id", node, string(checkID)) 1827 if err != nil { 1828 return fmt.Errorf("check lookup failed: %s", err) 1829 } 1830 if hc == nil { 1831 return nil 1832 } 1833 existing := hc.(*structs.HealthCheck) 1834 if existing != nil { 1835 // When no service is linked to this service, update all services of node 1836 if existing.ServiceID != "" { 1837 if err = tx.Insert("index", &IndexEntry{serviceIndexName(existing.ServiceName), idx}); err != nil { 1838 return fmt.Errorf("failed updating index: %s", err) 1839 } 1840 } else { 1841 err = s.updateAllServiceIndexesOfNode(tx, idx, existing.Node) 1842 if err != nil { 1843 return fmt.Errorf("Failed to update services linked to deleted healthcheck: %s", err) 1844 } 1845 if err := tx.Insert("index", &IndexEntry{"services", idx}); err != nil { 1846 return fmt.Errorf("failed updating index: %s", err) 1847 } 1848 } 1849 } 1850 1851 // Delete the check from the DB and update the index. 1852 if err := tx.Delete("checks", hc); err != nil { 1853 return fmt.Errorf("failed removing check: %s", err) 1854 } 1855 if err := tx.Insert("index", &IndexEntry{"checks", idx}); err != nil { 1856 return fmt.Errorf("failed updating index: %s", err) 1857 } 1858 1859 // Delete any sessions for this check. 1860 mappings, err := tx.Get("session_checks", "node_check", node, string(checkID)) 1861 if err != nil { 1862 return fmt.Errorf("failed session checks lookup: %s", err) 1863 } 1864 var ids []string 1865 for mapping := mappings.Next(); mapping != nil; mapping = mappings.Next() { 1866 ids = append(ids, mapping.(*sessionCheck).Session) 1867 } 1868 1869 // Do the delete in a separate loop so we don't trash the iterator. 1870 for _, id := range ids { 1871 if err := s.deleteSessionTxn(tx, idx, id); err != nil { 1872 return fmt.Errorf("failed deleting session: %s", err) 1873 } 1874 } 1875 1876 return nil 1877 } 1878 1879 // CheckServiceNodes is used to query all nodes and checks for a given service. 1880 func (s *Store) CheckServiceNodes(ws memdb.WatchSet, serviceName string) (uint64, structs.CheckServiceNodes, error) { 1881 return s.checkServiceNodes(ws, serviceName, false) 1882 } 1883 1884 // CheckConnectServiceNodes is used to query all nodes and checks for Connect 1885 // compatible endpoints for a given service. 1886 func (s *Store) CheckConnectServiceNodes(ws memdb.WatchSet, serviceName string) (uint64, structs.CheckServiceNodes, error) { 1887 return s.checkServiceNodes(ws, serviceName, true) 1888 } 1889 1890 func (s *Store) checkServiceNodes(ws memdb.WatchSet, serviceName string, connect bool) (uint64, structs.CheckServiceNodes, error) { 1891 tx := s.db.Txn(false) 1892 defer tx.Abort() 1893 1894 // Function for lookup 1895 var f func() (memdb.ResultIterator, error) 1896 if !connect { 1897 f = func() (memdb.ResultIterator, error) { 1898 return tx.Get("services", "service", serviceName) 1899 } 1900 } else { 1901 f = func() (memdb.ResultIterator, error) { 1902 return tx.Get("services", "connect", serviceName) 1903 } 1904 } 1905 1906 // Query the state store for the service. 1907 iter, err := f() 1908 if err != nil { 1909 return 0, nil, fmt.Errorf("failed service lookup: %s", err) 1910 } 1911 // Note we decide if we want to watch this iterator or not down below. We need 1912 // to see if it returned anything first. 1913 1914 // Return the results. 1915 var results structs.ServiceNodes 1916 for service := iter.Next(); service != nil; service = iter.Next() { 1917 results = append(results, service.(*structs.ServiceNode)) 1918 } 1919 1920 // Get the table index. 1921 idx, ch := maxIndexAndWatchChForService(tx, serviceName, len(results) > 0, true) 1922 1923 // Create a nil watchset to pass below, we'll only pass the real one if we 1924 // need to. Nil watchers are safe/allowed and saves some allocation too. 1925 var fallbackWS memdb.WatchSet 1926 if ch == nil { 1927 // There was no explicit channel returned that corresponds to the service 1928 // index. That means we need to fallback to watching everything we touch in 1929 // the DB as normal. We plumb the caller's watchset through (note it's a map 1930 // so this is a by-reference assignment.) 1931 fallbackWS = ws 1932 // We also need to watch the iterator from earlier too. 1933 fallbackWS.Add(iter.WatchCh()) 1934 } else { 1935 // There was a valid service index, and non-empty result. In this case it is 1936 // sufficient just to watch the service index's chan since that _must_ be 1937 // written to if the result of this method is going to change. This saves us 1938 // watching potentially thousands of watch chans for large services which 1939 // may need many goroutines. It also avoid the performance cliff that is hit 1940 // when watchLimit is hit (~682 service instances). See 1941 // https://github.com/hashicorp/consul/issues/4984 1942 ws.Add(ch) 1943 } 1944 1945 return s.parseCheckServiceNodes(tx, fallbackWS, idx, serviceName, results, err) 1946 } 1947 1948 // CheckServiceTagNodes is used to query all nodes and checks for a given 1949 // service, filtering out services that don't contain the given tag. 1950 func (s *Store) CheckServiceTagNodes(ws memdb.WatchSet, serviceName string, tags []string) (uint64, structs.CheckServiceNodes, error) { 1951 tx := s.db.Txn(false) 1952 defer tx.Abort() 1953 1954 // Query the state store for the service. 1955 iter, err := tx.Get("services", "service", serviceName) 1956 if err != nil { 1957 return 0, nil, fmt.Errorf("failed service lookup: %s", err) 1958 } 1959 ws.Add(iter.WatchCh()) 1960 1961 // Return the results, filtering by tag. 1962 serviceExists := false 1963 var results structs.ServiceNodes 1964 for service := iter.Next(); service != nil; service = iter.Next() { 1965 svc := service.(*structs.ServiceNode) 1966 serviceExists = true 1967 if !serviceTagsFilter(svc, tags) { 1968 results = append(results, service.(*structs.ServiceNode)) 1969 } 1970 } 1971 1972 // Get the table index. 1973 idx := maxIndexForService(tx, serviceName, serviceExists, true) 1974 return s.parseCheckServiceNodes(tx, ws, idx, serviceName, results, err) 1975 } 1976 1977 // parseCheckServiceNodes is used to parse through a given set of services, 1978 // and query for an associated node and a set of checks. This is the inner 1979 // method used to return a rich set of results from a more simple query. 1980 func (s *Store) parseCheckServiceNodes( 1981 tx *memdb.Txn, ws memdb.WatchSet, idx uint64, 1982 serviceName string, services structs.ServiceNodes, 1983 err error) (uint64, structs.CheckServiceNodes, error) { 1984 if err != nil { 1985 return 0, nil, err 1986 } 1987 1988 // Special-case the zero return value to nil, since this ends up in 1989 // external APIs. 1990 if len(services) == 0 { 1991 return idx, nil, nil 1992 } 1993 1994 // We don't want to track an unlimited number of nodes, so we pull a 1995 // top-level watch to use as a fallback. 1996 allNodes, err := tx.Get("nodes", "id") 1997 if err != nil { 1998 return 0, nil, fmt.Errorf("failed nodes lookup: %s", err) 1999 } 2000 allNodesCh := allNodes.WatchCh() 2001 2002 // We need a similar fallback for checks. Since services need the 2003 // status of node + service-specific checks, we pull in a top-level 2004 // watch over all checks. 2005 allChecks, err := tx.Get("checks", "id") 2006 if err != nil { 2007 return 0, nil, fmt.Errorf("failed checks lookup: %s", err) 2008 } 2009 allChecksCh := allChecks.WatchCh() 2010 2011 results := make(structs.CheckServiceNodes, 0, len(services)) 2012 for _, sn := range services { 2013 // Retrieve the node. 2014 watchCh, n, err := tx.FirstWatch("nodes", "id", sn.Node) 2015 if err != nil { 2016 return 0, nil, fmt.Errorf("failed node lookup: %s", err) 2017 } 2018 ws.AddWithLimit(s.watchLimit, watchCh, allNodesCh) 2019 2020 if n == nil { 2021 return 0, nil, ErrMissingNode 2022 } 2023 node := n.(*structs.Node) 2024 2025 // First add the node-level checks. These always apply to any 2026 // service on the node. 2027 var checks structs.HealthChecks 2028 iter, err := tx.Get("checks", "node_service_check", sn.Node, false) 2029 if err != nil { 2030 return 0, nil, err 2031 } 2032 ws.AddWithLimit(s.watchLimit, iter.WatchCh(), allChecksCh) 2033 for check := iter.Next(); check != nil; check = iter.Next() { 2034 checks = append(checks, check.(*structs.HealthCheck)) 2035 } 2036 2037 // Now add the service-specific checks. 2038 iter, err = tx.Get("checks", "node_service", sn.Node, sn.ServiceID) 2039 if err != nil { 2040 return 0, nil, err 2041 } 2042 ws.AddWithLimit(s.watchLimit, iter.WatchCh(), allChecksCh) 2043 for check := iter.Next(); check != nil; check = iter.Next() { 2044 checks = append(checks, check.(*structs.HealthCheck)) 2045 } 2046 2047 // Append to the results. 2048 results = append(results, structs.CheckServiceNode{ 2049 Node: node, 2050 Service: sn.ToNodeService(), 2051 Checks: checks, 2052 }) 2053 } 2054 2055 return idx, results, nil 2056 } 2057 2058 // NodeInfo is used to generate a dump of a single node. The dump includes 2059 // all services and checks which are registered against the node. 2060 func (s *Store) NodeInfo(ws memdb.WatchSet, node string) (uint64, structs.NodeDump, error) { 2061 tx := s.db.Txn(false) 2062 defer tx.Abort() 2063 2064 // Get the table index. 2065 idx := maxIndexTxn(tx, "nodes", "services", "checks") 2066 2067 // Query the node by the passed node 2068 nodes, err := tx.Get("nodes", "id", node) 2069 if err != nil { 2070 return 0, nil, fmt.Errorf("failed node lookup: %s", err) 2071 } 2072 ws.Add(nodes.WatchCh()) 2073 2074 idx, nodeDump, err := s.parseNodes(tx, ws, idx, nodes) 2075 if err != nil { 2076 return 0, nil, err 2077 } 2078 2079 if len(ws) >= s.watchLimit { 2080 s.warnSoftLimitReached("node %s", node) 2081 } 2082 2083 return idx, nodeDump, nil 2084 } 2085 2086 // NodeDump is used to generate a dump of all nodes. This call is expensive 2087 // as it has to query every node, service, and check. The response can also 2088 // be quite large since there is currently no filtering applied. 2089 func (s *Store) NodeDump(ws memdb.WatchSet) (uint64, structs.NodeDump, error) { 2090 tx := s.db.Txn(false) 2091 defer tx.Abort() 2092 2093 // Get the table index. 2094 idx := maxIndexTxn(tx, "nodes", "services", "checks") 2095 2096 // Fetch all of the registered nodes 2097 nodes, err := tx.Get("nodes", "id") 2098 if err != nil { 2099 return 0, nil, fmt.Errorf("failed node lookup: %s", err) 2100 } 2101 ws.Add(nodes.WatchCh()) 2102 return s.parseNodes(tx, ws, idx, nodes) 2103 } 2104 2105 // parseNodes takes an iterator over a set of nodes and returns a struct 2106 // containing the nodes along with all of their associated services 2107 // and/or health checks. 2108 func (s *Store) parseNodes(tx *memdb.Txn, ws memdb.WatchSet, idx uint64, 2109 iter memdb.ResultIterator) (uint64, structs.NodeDump, error) { 2110 2111 // We don't want to track an unlimited number of services, so we pull a 2112 // top-level watch to use as a fallback. 2113 allServices, err := tx.Get("services", "id") 2114 if err != nil { 2115 return 0, nil, fmt.Errorf("failed services lookup: %s", err) 2116 } 2117 allServicesCh := allServices.WatchCh() 2118 2119 // We need a similar fallback for checks. 2120 allChecks, err := tx.Get("checks", "id") 2121 if err != nil { 2122 return 0, nil, fmt.Errorf("failed checks lookup: %s", err) 2123 } 2124 allChecksCh := allChecks.WatchCh() 2125 2126 var results structs.NodeDump 2127 for n := iter.Next(); n != nil; n = iter.Next() { 2128 node := n.(*structs.Node) 2129 2130 // Create the wrapped node 2131 dump := &structs.NodeInfo{ 2132 ID: node.ID, 2133 Node: node.Node, 2134 Address: node.Address, 2135 TaggedAddresses: node.TaggedAddresses, 2136 Meta: node.Meta, 2137 } 2138 2139 // Query the node services 2140 services, err := tx.Get("services", "node", node.Node) 2141 if err != nil { 2142 return 0, nil, fmt.Errorf("failed services lookup: %s", err) 2143 } 2144 ws.AddWithLimit(s.watchLimit, services.WatchCh(), allServicesCh) 2145 for service := services.Next(); service != nil; service = services.Next() { 2146 ns := service.(*structs.ServiceNode).ToNodeService() 2147 dump.Services = append(dump.Services, ns) 2148 } 2149 2150 // Query the node checks 2151 checks, err := tx.Get("checks", "node", node.Node) 2152 if err != nil { 2153 return 0, nil, fmt.Errorf("failed node lookup: %s", err) 2154 } 2155 ws.AddWithLimit(s.watchLimit, checks.WatchCh(), allChecksCh) 2156 for check := checks.Next(); check != nil; check = checks.Next() { 2157 hc := check.(*structs.HealthCheck) 2158 dump.Checks = append(dump.Checks, hc) 2159 } 2160 2161 // Add the result to the slice 2162 results = append(results, dump) 2163 } 2164 return idx, results, nil 2165 } 2166 2167 func (s *Store) warnSoftLimitReached(f string, a ...interface{}) { 2168 if s.watchLimitWarnCounter%100000 > 0 { 2169 return 2170 } 2171 2172 s.logger.Printf("[WARN] consul: exceeded soft watch limit of %d for %s, falling back to coarse grained watch", s.watchLimit, fmt.Sprintf(f, a...)) 2173 s.watchLimitWarnCounter++ 2174 }