github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/core/meta/smap.go (about) 1 // Package meta: cluster-level metadata 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package meta 6 7 import ( 8 "errors" 9 "fmt" 10 "net" 11 "net/url" 12 "strconv" 13 "strings" 14 "sync" 15 16 "github.com/NVIDIA/aistore/api/apc" 17 "github.com/NVIDIA/aistore/cmn" 18 "github.com/NVIDIA/aistore/cmn/cos" 19 "github.com/NVIDIA/aistore/cmn/debug" 20 "github.com/NVIDIA/aistore/cmn/nlog" 21 "github.com/OneOfOne/xxhash" 22 ) 23 24 // enum Snode.Flags 25 const ( 26 SnodeNonElectable cos.BitFlags = 1 << iota 27 SnodeIC 28 SnodeMaint 29 SnodeDecomm 30 SnodeMaintPostReb 31 ) 32 33 const SnodeMaintDecomm = SnodeMaint | SnodeDecomm 34 35 // desirable gateway count in the Information Center (IC) 36 const DfltCountIC = 3 37 38 type ( 39 // interface to Get current (immutable, versioned) cluster map (Smap) instance 40 Sowner interface { 41 Get() (smap *Smap) 42 Listeners() SmapListeners 43 } 44 // Smap On-change listeners (see ais/clustermap.go for impl-s) 45 Slistener interface { 46 String() string 47 ListenSmapChanged() 48 } 49 SmapListeners interface { 50 Reg(sl Slistener) 51 Unreg(sl Slistener) 52 } 53 ) 54 55 type ( 56 // Snode's networking info 57 NetInfo struct { 58 Hostname string `json:"node_ip_addr"` 59 Port string `json:"daemon_port"` 60 URL string `json:"direct_url"` 61 tcpEndpoint string 62 } 63 errNetInfoChanged struct { 64 sname string 65 tag string 66 oep, nep string 67 } 68 69 // Snode - a node (gateway or target) in a cluster 70 Snode struct { 71 LocalNet *net.IPNet `json:"-"` 72 PubNet NetInfo `json:"public_net"` // cmn.NetPublic 73 PubExtra []NetInfo `json:"pub_extra,omitempty"` 74 DataNet NetInfo `json:"intra_data_net"` // cmn.NetIntraData 75 ControlNet NetInfo `json:"intra_control_net"` // cmn.NetIntraControl 76 DaeType string `json:"daemon_type"` // "target" or "proxy" 77 DaeID string `json:"daemon_id"` 78 name string 79 Flags cos.BitFlags `json:"flags"` // enum { SnodeNonElectable, SnodeIC, ... } 80 idDigest uint64 81 } 82 83 Nodes []*Snode // slice of Snodes 84 NodeMap map[string]*Snode // map of Snodes indexed by node ID (Pmap & Tmap below) 85 86 // cluster map 87 Smap struct { 88 Ext any `json:"ext,omitempty"` 89 Pmap NodeMap `json:"pmap"` // [pid => Snode] 90 Primary *Snode `json:"proxy_si"` 91 Tmap NodeMap `json:"tmap"` // [tid => Snode] 92 UUID string `json:"uuid"` // is assigned once at creation time, never changes 93 CreationTime string `json:"creation_time"` // creation timestamp 94 Version int64 `json:"version,string"` 95 } 96 ) 97 98 /////////// 99 // Snode // 100 /////////// 101 102 func (d *Snode) Init(id, daeType string) { 103 debug.Assert(d.DaeID == "" && d.DaeType == "") 104 debug.Assert(id != "" && daeType != "") 105 d.DaeID, d.DaeType = id, daeType 106 d.SetName() 107 d.setDigest() 108 } 109 110 func (d *Snode) Digest() uint64 { return d.idDigest } 111 112 func (d *Snode) setDigest() { 113 if d.idDigest == 0 { 114 d.idDigest = xxhash.Checksum64S(cos.UnsafeB(d.ID()), cos.MLCG32) 115 } 116 } 117 118 func (d *Snode) ID() string { return d.DaeID } 119 func (d *Snode) Type() string { return d.DaeType } // enum { apc.Proxy, apc.Target } 120 121 func (d *Snode) Name() string { return d.name } 122 func (d *Snode) String() string { return d.Name() } 123 124 func (d *Snode) SetName() { 125 name := d.StringEx() 126 debug.Assert(d.name == "" || d.name == name, name, d.name) 127 d.name = name 128 } 129 130 const ( 131 PnamePrefix = "p[" 132 TnamePrefix = "t[" 133 SnameSuffix = "]" 134 ) 135 136 func Pname(pid string) string { return PnamePrefix + pid + "]" } 137 func Tname(tid string) string { return TnamePrefix + tid + "]" } 138 139 func N2ID(name string) string { 140 if len(name) > 2 && (name[:2] == TnamePrefix || name[:2] == PnamePrefix) { 141 return name[2 : len(name)-1] 142 } 143 return name 144 } 145 146 func (d *Snode) StringEx() string { 147 if d.IsProxy() { 148 return Pname(d.DaeID) 149 } 150 return Tname(d.DaeID) 151 } 152 153 func (d *Snode) StrURLs() string { 154 if d.PubNet.URL != d.ControlNet.URL || 155 d.PubNet.URL != d.DataNet.URL { 156 return fmt.Sprintf("%s(pub: %s, control: %s, data: %s)", d.Name(), 157 d.PubNet.URL, d.ControlNet.URL, d.DataNet.URL) 158 } 159 return fmt.Sprintf("%s(%s)", d.Name(), d.PubNet.URL) 160 } 161 162 func (d *Snode) URL(network string) (u string) { 163 switch network { 164 case cmn.NetPublic: 165 u = d.PubNet.URL 166 case cmn.NetIntraControl: 167 u = d.ControlNet.URL 168 case cmn.NetIntraData: 169 u = d.DataNet.URL 170 default: // (exclusively via HrwMultiHome) 171 debug.Assert(strings.Contains(network, "://"), network) // "is URI" per rfc2396.txt 172 u = network 173 } 174 return u 175 } 176 177 func (d *Snode) Eq(o *Snode) (eq bool) { 178 if d == nil || o == nil { 179 return 180 } 181 eq = d.ID() == o.ID() 182 if eq { 183 if err := d.NetEq(o); err != nil { 184 nlog.Warningln(err) 185 eq = false 186 } 187 } 188 return eq 189 } 190 191 func (d *Snode) NetEq(o *Snode) error { 192 name := d.StringEx() 193 debug.Assertf(d.DaeType == o.DaeType, "%s: node type %q vs %q", name, d.DaeType, o.DaeType) 194 if !d.PubNet.eq(&o.PubNet) { 195 return &errNetInfoChanged{name, "pub", d.PubNet.TCPEndpoint(), o.PubNet.TCPEndpoint()} 196 } 197 if !d.ControlNet.eq(&o.ControlNet) { 198 return &errNetInfoChanged{name, "control", d.ControlNet.TCPEndpoint(), o.ControlNet.TCPEndpoint()} 199 } 200 if !d.DataNet.eq(&o.DataNet) { 201 return &errNetInfoChanged{name, "data", d.DataNet.TCPEndpoint(), o.DataNet.TCPEndpoint()} 202 } 203 return nil 204 } 205 206 func (d *Snode) Validate() error { 207 if d == nil { 208 return errors.New("invalid Snode: nil") 209 } 210 if d.ID() == "" { 211 return errors.New("invalid Snode: missing node " + d.StrURLs()) 212 } 213 if d.DaeType != apc.Proxy && d.DaeType != apc.Target { 214 cos.Assertf(false, "invalid Snode type %q", d.DaeType) 215 } 216 return nil 217 } 218 219 func (d *Snode) Clone() *Snode { 220 var dst Snode 221 cos.CopyStruct(&dst, d) 222 return &dst 223 } 224 225 func (d *Snode) isDupNet(n *Snode, smap *Smap) error { 226 var ( 227 du = []string{d.PubNet.URL, d.ControlNet.URL, d.DataNet.URL} 228 nu = []string{n.PubNet.URL, n.ControlNet.URL, n.DataNet.URL} 229 ) 230 for _, ni := range nu { 231 np, err := url.Parse(ni) 232 if err != nil { 233 return fmt.Errorf("%s %s: failed to parse %s URL %q: %v", 234 cmn.BadSmapPrefix, smap, n.StringEx(), ni, err) 235 } 236 for _, di := range du { 237 dp, err := url.Parse(di) 238 if err != nil { 239 return fmt.Errorf("%s %s: failed to parse %s URL %q: %v", 240 cmn.BadSmapPrefix, smap, d.StringEx(), di, err) 241 } 242 if np.Host == dp.Host { 243 return fmt.Errorf("duplicate IPs: %s and %s share the same %q, %s", 244 d.StringEx(), n.StringEx(), np.Host, smap.StringEx()) 245 } 246 if ni == di { 247 return fmt.Errorf("duplicate URLs: %s and %s share the same %q, %s", 248 d.StringEx(), n.StringEx(), ni, smap.StringEx()) 249 } 250 } 251 } 252 return nil 253 } 254 255 // NOTE: used only for starting-up proxies and assumes that proxy's listening on a single NIC (no multihoming) 256 func (d *Snode) HasURL(rawURL string) bool { 257 u, err := url.Parse(rawURL) 258 if err != nil { 259 nlog.Errorf("failed to parse raw URL %q: %v", rawURL, err) 260 return false 261 } 262 var ( 263 host, port = u.Hostname(), u.Port() 264 isIP = net.ParseIP(host) != nil 265 nis = []NetInfo{d.PubNet, d.ControlNet, d.DataNet} 266 numIPs int 267 sameHost bool 268 samePort bool 269 ) 270 for _, ni := range nis { 271 if ni.Hostname == host { 272 if ni.Port == port { 273 return true 274 } 275 sameHost = true 276 } else if ni.Port == port { 277 samePort = true 278 } 279 if net.ParseIP(ni.Hostname) != nil { 280 numIPs++ 281 } 282 } 283 if sameHost && samePort { 284 nlog.Warningln("assuming that", d.StrURLs(), "\"contains\"", rawURL) 285 return true 286 } 287 if (numIPs > 0 && isIP) || (numIPs == 0 && !isIP) { 288 return false 289 } 290 291 // slow path: locally resolve (hostname => IPv4) and compare 292 rip, err := cmn.ParseHost2IP(host) 293 if err != nil { 294 nlog.Warningln(host, err) 295 return false 296 } 297 for _, ni := range nis { 298 nip, err := cmn.ParseHost2IP(ni.Hostname) 299 if err != nil { 300 nlog.Warningln(ni.Hostname, err) 301 return false 302 } 303 if rip.Equal(nip) && ni.Port == port { 304 return true 305 } 306 } 307 308 return false 309 } 310 311 func (d *Snode) IsProxy() bool { return d.DaeType == apc.Proxy } 312 func (d *Snode) IsTarget() bool { return d.DaeType == apc.Target } 313 314 // node flags 315 func (d *Snode) InMaintOrDecomm() bool { return d.Flags.IsAnySet(SnodeMaintDecomm) } 316 func (d *Snode) InMaint() bool { return d.Flags.IsAnySet(SnodeMaint) } 317 func (d *Snode) InMaintPostReb() bool { 318 return d.Flags.IsSet(SnodeMaint) && d.Flags.IsSet(SnodeMaintPostReb) 319 } 320 func (d *Snode) nonElectable() bool { return d.Flags.IsSet(SnodeNonElectable) } 321 func (d *Snode) IsIC() bool { return d.Flags.IsSet(SnodeIC) } 322 323 func (d *Snode) Fl2S() string { 324 if d.Flags == 0 { 325 return "none" 326 } 327 var a = make([]string, 0, 2) 328 switch { 329 case d.Flags&SnodeNonElectable != 0: 330 a = append(a, "non-elect") 331 case d.Flags&SnodeIC != 0: 332 a = append(a, "ic") 333 case d.Flags&SnodeMaint != 0: 334 a = append(a, "maintenance-mode") 335 case d.Flags&SnodeDecomm != 0: 336 a = append(a, "decommission") 337 case d.Flags&SnodeMaintPostReb != 0: 338 a = append(a, "post-rebalance") 339 } 340 return strings.Join(a, ",") 341 } 342 343 ///////////// 344 // NetInfo // 345 ///////////// 346 347 func (e *errNetInfoChanged) Error() string { 348 return fmt.Sprintf("%s: %s %s vs %s", e.sname, e.tag, e.nep, e.oep) 349 } 350 351 func _ep(hostname, port string) string { return hostname + ":" + port } 352 353 func (ni *NetInfo) Init(proto, hostname, port string) { 354 ep := _ep(hostname, port) 355 ni.Hostname = hostname 356 ni.Port = port 357 ni.URL = proto + "://" + ep // rfc2396.txt "Uniform Resource Identifiers (URI): Generic Syntax" 358 ni.tcpEndpoint = ep 359 } 360 361 func (ni *NetInfo) TCPEndpoint() string { 362 if ni.tcpEndpoint == "" { 363 ni.tcpEndpoint = _ep(ni.Hostname, ni.Port) 364 } 365 return ni.tcpEndpoint 366 } 367 368 func (ni *NetInfo) String() string { 369 return ni.TCPEndpoint() 370 } 371 372 func (ni *NetInfo) IsEmpty() bool { 373 return ni.Hostname == "" && ni.Port == "" 374 } 375 376 func (ni *NetInfo) eq(o *NetInfo) bool { 377 return ni.Port == o.Port && ni.Hostname == o.Hostname 378 } 379 380 ////////// 381 // Smap // 382 ////////// 383 384 // Cluster map (aks Smap) is a versioned, protected and replicated object 385 // Smap versioning is monotonic and incremental 386 387 func (m *Smap) InitDigests() { 388 for _, node := range m.Tmap { 389 node.setDigest() 390 } 391 for _, node := range m.Pmap { 392 node.setDigest() 393 } 394 } 395 396 func (m *Smap) String() string { 397 if m == nil { 398 return "Smap <nil>" 399 } 400 return "Smap v" + strconv.FormatInt(m.Version, 10) 401 } 402 403 func (m *Smap) StringEx() string { 404 var sb strings.Builder 405 if m == nil { 406 return "Smap <nil>" 407 } 408 sb.WriteString("Smap v") 409 sb.WriteString(strconv.FormatInt(m.Version, 10)) 410 sb.WriteByte('[') 411 sb.WriteString(m.UUID) 412 if m.Primary == nil { 413 sb.WriteString(", nil]") 414 return sb.String() 415 } 416 sb.WriteString(", ") 417 sb.WriteString(m.Primary.StringEx()) 418 sb.WriteString(", t=") 419 _counts(&sb, m.CountTargets(), m.CountActiveTs()) 420 sb.WriteString(", p=") 421 _counts(&sb, m.CountProxies(), m.CountActivePs()) 422 sb.WriteByte(']') 423 return sb.String() 424 } 425 426 func _counts(sb *strings.Builder, all, active int) { 427 if all == active { 428 sb.WriteString(strconv.Itoa(all)) 429 } else { 430 sb.WriteByte('(') 431 sb.WriteString(strconv.Itoa(active)) 432 sb.WriteByte('/') 433 sb.WriteString(strconv.Itoa(all)) 434 sb.WriteByte(')') 435 } 436 } 437 438 func (m *Smap) CountTargets() int { return len(m.Tmap) } 439 func (m *Smap) CountProxies() int { return len(m.Pmap) } 440 func (m *Smap) Count() int { return len(m.Pmap) + len(m.Tmap) } 441 442 func (m *Smap) CountActiveTs() (count int) { 443 for _, t := range m.Tmap { 444 if !t.InMaintOrDecomm() { 445 count++ 446 } 447 } 448 return 449 } 450 451 // whether this target has active peers 452 func (m *Smap) HasActiveTs(except string) bool { 453 for tid, t := range m.Tmap { 454 if tid == except || t.InMaintOrDecomm() { 455 continue 456 } 457 return true 458 } 459 return false 460 } 461 462 func (m *Smap) CountActivePs() (count int) { 463 for _, p := range m.Pmap { 464 if !p.InMaintOrDecomm() { 465 count++ 466 } 467 } 468 return 469 } 470 471 func (m *Smap) CountNonElectable() (count int) { 472 for _, p := range m.Pmap { 473 if p.nonElectable() { 474 count++ 475 } 476 } 477 return 478 } 479 480 func (m *Smap) GetProxy(pid string) *Snode { 481 psi, ok := m.Pmap[pid] 482 if !ok { 483 return nil 484 } 485 return psi 486 } 487 488 func (m *Smap) GetTarget(sid string) *Snode { 489 tsi, ok := m.Tmap[sid] 490 if !ok { 491 return nil 492 } 493 return tsi 494 } 495 496 func (m *Smap) IsPrimary(si *Snode) bool { 497 return m.Primary != nil && m.Primary.ID() == si.ID() 498 } 499 500 func (m *Smap) NewTmap(tids []string) (tmap NodeMap, err error) { 501 for _, tid := range tids { 502 if m.GetTarget(tid) == nil { 503 return nil, cos.NewErrNotFound(nil, "new-tmap: target "+tid) 504 } 505 } 506 tmap = make(NodeMap, len(tids)) 507 for _, tid := range tids { 508 tmap[tid] = m.GetTarget(tid) 509 } 510 return 511 } 512 513 func (m *Smap) GetNode(id string) *Snode { 514 if node := m.GetTarget(id); node != nil { 515 return node 516 } 517 return m.GetProxy(id) 518 } 519 520 // (convenient, slightly redundant) 521 func (m *Smap) GetActiveNode(sid string) (si *Snode) { 522 si = m.GetNode(sid) 523 if si != nil && si.InMaintOrDecomm() { 524 si = nil 525 } 526 return 527 } 528 529 // (random active) 530 func (m *Smap) GetRandTarget() (tsi *Snode, err error) { 531 var cnt int 532 for _, tsi = range m.Tmap { 533 if !tsi.InMaintOrDecomm() { 534 return 535 } 536 cnt++ 537 } 538 err = fmt.Errorf("GetRandTarget failure: %s, in maintenance >= %d", m.StringEx(), cnt) 539 return 540 } 541 542 func (m *Smap) GetRandProxy(excludePrimary bool) (si *Snode, err error) { 543 var cnt int 544 for _, psi := range m.Pmap { 545 if psi.InMaintOrDecomm() { 546 cnt++ 547 continue 548 } 549 if !excludePrimary || !m.IsPrimary(psi) { 550 return psi, nil 551 } 552 } 553 err = fmt.Errorf("GetRandProxy failure: %s, in maintenance >= %d, excl-primary %t", m.StringEx(), cnt, excludePrimary) 554 return 555 } 556 557 // whether IP is in use by a different node 558 func (m *Smap) IsDupNet(nsi *Snode) (osi *Snode, err error) { 559 for _, tsi := range m.Tmap { 560 if tsi.ID() == nsi.ID() { 561 continue 562 } 563 if err = tsi.isDupNet(nsi, m); err != nil { 564 osi = tsi 565 return 566 } 567 } 568 for _, psi := range m.Pmap { 569 if psi.ID() == nsi.ID() { 570 continue 571 } 572 if err = psi.isDupNet(nsi, m); err != nil { 573 osi = psi 574 return 575 } 576 } 577 return 578 } 579 580 func (m *Smap) Compare(other *Smap) (uuid string, sameOrigin, sameVersion, eq bool) { 581 sameOrigin, sameVersion = true, true 582 if m.UUID != "" && other.UUID != "" && m.UUID != other.UUID { 583 sameOrigin = false 584 } else { 585 uuid = m.UUID 586 if uuid == "" { 587 uuid = other.UUID 588 } 589 } 590 if m.Version != other.Version { 591 sameVersion = false 592 } 593 if m.Primary == nil || other.Primary == nil || !m.Primary.Eq(other.Primary) { 594 return // eq == false 595 } 596 eq = mapsEq(m.Tmap, other.Tmap) && mapsEq(m.Pmap, other.Pmap) 597 return 598 } 599 600 func (m *Smap) CompareTargets(other *Smap) (equal bool) { 601 return mapsEq(m.Tmap, other.Tmap) 602 } 603 604 func (m *Smap) NonElectable(psi *Snode) (ok bool) { 605 node := m.GetProxy(psi.ID()) 606 return node != nil && node.nonElectable() 607 } 608 609 // given Snode, check (usually, the current) Smap that it is present _and_ InMaintOrDecomm 610 // (see also GetActiveNode) 611 func (m *Smap) InMaintOrDecomm(si *Snode) bool { 612 node := m.GetNode(si.ID()) 613 return node != nil && node.InMaintOrDecomm() 614 } 615 616 func (m *Smap) InMaint(si *Snode) bool { 617 node := m.GetNode(si.ID()) 618 return node != nil && node.InMaint() 619 } 620 621 func (m *Smap) IsIC(psi *Snode) (ok bool) { 622 node := m.GetProxy(psi.ID()) 623 return node != nil && node.IsIC() 624 } 625 626 func (m *Smap) StrIC(node *Snode) string { 627 all := make([]string, 0, DfltCountIC) 628 for pid, psi := range m.Pmap { 629 if !psi.IsIC() { 630 continue 631 } 632 if node != nil && pid == node.ID() { 633 all = append(all, pid+"(*)") 634 } else { 635 all = append(all, pid) 636 } 637 } 638 return strings.Join(all, ",") 639 } 640 641 func (m *Smap) ICCount() (count int) { 642 for _, psi := range m.Pmap { 643 if psi.IsIC() { 644 count++ 645 } 646 } 647 return count 648 } 649 650 // checking pub net only 651 func (m *Smap) PubNet2Node(hostport string) *Snode { 652 host, port, err := net.SplitHostPort(hostport) 653 if err != nil { 654 return nil 655 } 656 all := []NodeMap{m.Tmap, m.Pmap} 657 for _, mm := range all { 658 for _, si := range mm { 659 if si.PubNet.Hostname == host && si.PubNet.Port == port { 660 return si 661 } 662 } 663 } 664 return nil 665 } 666 667 ///////////// 668 // NodeMap // 669 ///////////// 670 671 func (m NodeMap) Add(snode *Snode) { debug.Assert(m != nil); m[snode.DaeID] = snode } 672 673 func (m NodeMap) ActiveMap() (clone NodeMap) { 674 clone = make(NodeMap, len(m)) 675 for id, node := range m { 676 if node.InMaintOrDecomm() { 677 continue 678 } 679 clone[id] = node 680 } 681 return 682 } 683 684 func (m NodeMap) ActiveNodes() []*Snode { 685 snodes := make([]*Snode, 0, len(m)) 686 for _, node := range m { 687 if node.InMaintOrDecomm() { 688 continue 689 } 690 snodes = append(snodes, node) 691 } 692 return snodes 693 } 694 695 func (m NodeMap) Contains(daeID string) (exists bool) { 696 _, exists = m[daeID] 697 return 698 } 699 700 func mapsEq(a, b NodeMap) bool { 701 if len(a) != len(b) { 702 return false 703 } 704 for id, anode := range a { 705 if bnode, ok := b[id]; !ok { 706 return false 707 } else if !anode.Eq(bnode) { 708 return false 709 } 710 } 711 return true 712 } 713 714 // 715 // mem-pool of Nodes (slices) 716 // 717 718 var nodesPool sync.Pool 719 720 func AllocNodes(capacity int) (nodes Nodes) { 721 if v := nodesPool.Get(); v != nil { 722 pnodes := v.(*Nodes) 723 nodes = *pnodes 724 debug.Assert(nodes != nil && len(nodes) == 0) 725 } else { 726 debug.Assert(capacity > 0) 727 nodes = make(Nodes, 0, capacity) 728 } 729 return 730 } 731 732 func FreeNodes(nodes Nodes) { 733 nodes = nodes[:0] 734 nodesPool.Put(&nodes) 735 }