github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/tgtcp.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "errors" 9 "fmt" 10 "net/http" 11 "net/url" 12 "path/filepath" 13 "strconv" 14 "sync" 15 "time" 16 17 "github.com/NVIDIA/aistore/ais/backend" 18 "github.com/NVIDIA/aistore/api/apc" 19 "github.com/NVIDIA/aistore/cmn" 20 "github.com/NVIDIA/aistore/cmn/cifl" 21 "github.com/NVIDIA/aistore/cmn/cos" 22 "github.com/NVIDIA/aistore/cmn/debug" 23 "github.com/NVIDIA/aistore/cmn/fname" 24 "github.com/NVIDIA/aistore/cmn/nlog" 25 "github.com/NVIDIA/aistore/core" 26 "github.com/NVIDIA/aistore/core/meta" 27 "github.com/NVIDIA/aistore/ec" 28 "github.com/NVIDIA/aistore/ext/etl" 29 "github.com/NVIDIA/aistore/fs" 30 "github.com/NVIDIA/aistore/ios" 31 "github.com/NVIDIA/aistore/nl" 32 "github.com/NVIDIA/aistore/reb" 33 "github.com/NVIDIA/aistore/res" 34 "github.com/NVIDIA/aistore/xact" 35 "github.com/NVIDIA/aistore/xact/xreg" 36 jsoniter "github.com/json-iterator/go" 37 ) 38 39 const ( 40 bmdFixup = "fixup" 41 bmdRecv = "receive" 42 bmdReg = "register" 43 ) 44 45 type delb struct { 46 obck *meta.Bck 47 present bool 48 } 49 50 func (t *target) joinCluster(action string, primaryURLs ...string) (status int, err error) { 51 res := t.join(nil, t, primaryURLs...) 52 defer freeCR(res) 53 if res.err != nil { 54 status, err = res.status, res.err 55 return 56 } 57 // not being sent at cluster startup and keepalive 58 if len(res.bytes) == 0 { 59 return 60 } 61 err = t.recvCluMetaBytes(action, res.bytes, "") 62 return 63 } 64 65 const tagCM = "recv-clumeta" 66 67 // TODO: unify w/ p.recvCluMeta 68 // do not receive RMD: `receiveRMD` runs extra jobs and checks specific for metasync. 69 func (t *target) recvCluMetaBytes(action string, body []byte, caller string) error { 70 var ( 71 cm cluMeta 72 errs []error 73 self = t.String() + ":" 74 ) 75 if err := jsoniter.Unmarshal(body, &cm); err != nil { 76 return fmt.Errorf(cmn.FmtErrUnmarshal, t, tagCM, cos.BHead(body), err) 77 } 78 if cm.PrimeTime == 0 { 79 err := errors.New(self + " zero prime_time (non-primary responded to an attempt to join?") 80 nlog.Errorln(err) 81 return err 82 } 83 84 xreg.PrimeTime.Store(cm.PrimeTime) 85 xreg.MyTime.Store(time.Now().UnixNano()) 86 87 msg := t.newAmsgStr(action, cm.BMD) 88 89 // Config 90 if cm.Config == nil { 91 err := fmt.Errorf(self+" invalid %T (nil config): %+v", cm, cm) 92 nlog.Errorln(err) 93 return err 94 } 95 if err := t.receiveConfig(cm.Config, msg, nil, caller); err != nil { 96 if !isErrDowngrade(err) { 97 errs = append(errs, err) 98 nlog.Errorln(err) 99 } 100 } else { 101 nlog.Infoln(self, tagCM, action, cm.Config.String()) 102 } 103 104 // There's a window of time between: 105 // a) target joining existing cluster and b) cluster starting to rebalance itself 106 // The latter is driven by regMetasync (see regMetasync.go) distributing updated cluster map. 107 // To handle incoming GETs within this window (which would typically take a few seconds or less) 108 // we need to have the current cluster-wide regMetadata and the temporary gfn state: 109 reb.OnTimedGFN() 110 111 // BMD 112 if err := t.receiveBMD(cm.BMD, msg, nil /*ms payload */, bmdReg, caller, true /*silent*/); err != nil { 113 if !isErrDowngrade(err) { 114 errs = append(errs, err) 115 nlog.Errorln(err) 116 } 117 } else { 118 nlog.Infoln(self, tagCM, action, cm.BMD.String()) 119 } 120 // Smap 121 if err := t.receiveSmap(cm.Smap, msg, nil /*ms payload*/, caller, t.htrun.smapUpdatedCB); err != nil { 122 if !isErrDowngrade(err) { 123 errs = append(errs, err) 124 nlog.Errorln(cmn.NewErrFailedTo(t, "sync", cm.Smap, err)) 125 } 126 } else if cm.Smap != nil { 127 nlog.Infoln(self, tagCM, action, cm.Smap) 128 } 129 switch { 130 case errs == nil: 131 return nil 132 case len(errs) == 1: 133 return errs[0] 134 default: 135 return cmn.NewErrFailedTo(t, action, "clumeta", errors.Join(errs...)) 136 } 137 } 138 139 // [METHOD] /v1/daemon 140 func (t *target) daemonHandler(w http.ResponseWriter, r *http.Request) { 141 switch r.Method { 142 case http.MethodGet: 143 t.httpdaeget(w, r) 144 case http.MethodPut: 145 t.httpdaeput(w, r) 146 case http.MethodPost: 147 t.httpdaepost(w, r) 148 case http.MethodDelete: 149 t.httpdaedelete(w, r) 150 default: 151 cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodPost, http.MethodPut) 152 } 153 } 154 155 func (t *target) httpdaeput(w http.ResponseWriter, r *http.Request) { 156 apiItems, err := t.parseURL(w, r, apc.URLPathDae.L, 0, true) 157 if err != nil { 158 return 159 } 160 if len(apiItems) == 0 { 161 t.daeputJSON(w, r) 162 } else { 163 t.daeputQuery(w, r, apiItems) 164 } 165 } 166 167 func (t *target) daeputJSON(w http.ResponseWriter, r *http.Request) { 168 msg, err := t.readActionMsg(w, r) 169 if err != nil { 170 return 171 } 172 switch msg.Action { 173 case apc.ActSetConfig: // set-config #2 - via action message 174 t.setDaemonConfigMsg(w, r, msg, r.URL.Query()) 175 case apc.ActResetConfig: 176 if err := t.owner.config.resetDaemonConfig(); err != nil { 177 t.writeErr(w, r, err) 178 } 179 case apc.ActRotateLogs: 180 nlog.Flush(nlog.ActRotate) 181 case apc.ActResetStats: 182 errorsOnly := msg.Value.(bool) 183 t.statsT.ResetStats(errorsOnly) 184 185 case apc.ActStartMaintenance: 186 if !t.ensureIntraControl(w, r, true /* from primary */) { 187 return 188 } 189 t.termKaliveX(msg.Action, true) 190 case apc.ActShutdownCluster, apc.ActShutdownNode: 191 if !t.ensureIntraControl(w, r, true /* from primary */) { 192 return 193 } 194 t.termKaliveX(msg.Action, false) 195 t.shutdown(msg.Action) 196 case apc.ActRmNodeUnsafe: 197 if !t.ensureIntraControl(w, r, true /* from primary */) { 198 return 199 } 200 t.termKaliveX(msg.Action, true) 201 case apc.ActDecommissionCluster, apc.ActDecommissionNode: 202 if !t.ensureIntraControl(w, r, true /* from primary */) { 203 return 204 } 205 var opts apc.ActValRmNode 206 if err := cos.MorphMarshal(msg.Value, &opts); err != nil { 207 t.writeErr(w, r, err) 208 return 209 } 210 t.termKaliveX(msg.Action, opts.NoShutdown) 211 t.decommission(msg.Action, &opts) 212 case apc.ActCleanupMarkers: 213 if !t.ensureIntraControl(w, r, true /* from primary */) { 214 return 215 } 216 var ctx cleanmark 217 if err := cos.MorphMarshal(msg.Value, &ctx); err != nil { 218 t.writeErr(w, r, err) 219 return 220 } 221 t.cleanupMark(&ctx) 222 default: 223 t.writeErrAct(w, r, msg.Action) 224 } 225 } 226 227 func (t *target) daeputQuery(w http.ResponseWriter, r *http.Request, apiItems []string) { 228 switch apiItems[0] { 229 case apc.Proxy: 230 // PUT /v1/daemon/proxy/newprimaryproxyid 231 t.daeSetPrimary(w, r, apiItems) 232 case apc.SyncSmap: 233 newsmap := &smapX{} 234 if cmn.ReadJSON(w, r, newsmap) != nil { 235 return 236 } 237 if err := t.owner.smap.synchronize(t.si, newsmap, nil /*ms payload*/, t.htrun.smapUpdatedCB); err != nil { 238 t.writeErr(w, r, cmn.NewErrFailedTo(t, "synchronize", newsmap, err)) 239 } 240 nlog.Infof("%s: %s %s done", t, apc.SyncSmap, newsmap) 241 case apc.Mountpaths: 242 t.handleMountpathReq(w, r) 243 case apc.ActSetConfig: // set-config #1 - via query parameters and "?n1=v1&n2=v2..." 244 t.setDaemonConfigQuery(w, r) 245 } 246 } 247 248 func (t *target) daeSetPrimary(w http.ResponseWriter, r *http.Request, apiItems []string) { 249 var ( 250 err error 251 prepare bool 252 ) 253 if len(apiItems) != 2 { 254 t.writeErrf(w, r, "Incorrect number of API items: %d, should be: %d", len(apiItems), 2) 255 return 256 } 257 258 proxyID := apiItems[1] 259 query := r.URL.Query() 260 preparestr := query.Get(apc.QparamPrepare) 261 if prepare, err = cos.ParseBool(preparestr); err != nil { 262 t.writeErrf(w, r, "Failed to parse %s URL Parameter: %v", apc.QparamPrepare, err) 263 return 264 } 265 266 if prepare { 267 if cmn.Rom.FastV(4, cos.SmoduleAIS) { 268 nlog.Infoln("Preparation step: do nothing") 269 } 270 return 271 } 272 ctx := &smapModifier{pre: t._setPrim, sid: proxyID} 273 err = t.owner.smap.modify(ctx) 274 if err != nil { 275 t.writeErr(w, r, err) 276 } 277 } 278 279 func (t *target) _setPrim(ctx *smapModifier, clone *smapX) (err error) { 280 if clone.Primary.ID() == ctx.sid { 281 return 282 } 283 psi := clone.GetProxy(ctx.sid) 284 if psi == nil { 285 return &errNodeNotFound{"cannot set new primary", ctx.sid, t.si, clone} 286 } 287 clone.Primary = psi 288 return 289 } 290 291 func (t *target) httpdaeget(w http.ResponseWriter, r *http.Request) { 292 var ( 293 query = r.URL.Query() 294 getWhat = query.Get(apc.QparamWhat) 295 httpdaeWhat = "httpdaeget-" + getWhat 296 ) 297 switch getWhat { 298 case apc.WhatNodeConfig, apc.WhatSmap, apc.WhatBMD, apc.WhatSmapVote, 299 apc.WhatSnode, apc.WhatLog, apc.WhatMetricNames: 300 t.htrun.httpdaeget(w, r, query, t /*htext*/) 301 case apc.WhatSysInfo: 302 tsysinfo := apc.TSysInfo{MemCPUInfo: apc.GetMemCPU(), CapacityInfo: fs.CapStatusGetWhat()} 303 t.writeJSON(w, r, tsysinfo, httpdaeWhat) 304 case apc.WhatMountpaths: 305 t.writeJSON(w, r, fs.MountpathsToLists(), httpdaeWhat) 306 307 case apc.WhatNodeStats: 308 ds := t.statsAndStatus() 309 daeStats := t.statsT.GetStats() 310 ds.Tracker = daeStats.Tracker 311 ds.TargetCDF = daeStats.TargetCDF 312 t.writeJSON(w, r, ds, httpdaeWhat) 313 case apc.WhatNodeStatsV322: // [backward compatibility] v3.22 and prior 314 ds := t.statsAndStatusV322() 315 daeStats := t.statsT.GetStatsV322() 316 ds.Tracker = daeStats.Tracker 317 t.writeJSON(w, r, ds, httpdaeWhat) 318 case apc.WhatNodeStatsAndStatus: 319 ds := t.statsAndStatus() 320 ds.RebSnap = _rebSnap() 321 daeStats := t.statsT.GetStats() 322 ds.Tracker = daeStats.Tracker 323 ds.TargetCDF = daeStats.TargetCDF 324 t.ciiFill(&ds.Cluster) 325 t.writeJSON(w, r, ds, httpdaeWhat) 326 case apc.WhatNodeStatsAndStatusV322: // [ditto] 327 ds := t.statsAndStatusV322() 328 ds.RebSnap = _rebSnap() 329 daeStats := t.statsT.GetStatsV322() 330 ds.Tracker = daeStats.Tracker 331 ds.TargetCDF = daeStats.TargetCDF 332 t.writeJSON(w, r, ds, httpdaeWhat) 333 334 case apc.WhatDiskStats: 335 diskStats := make(ios.AllDiskStats) 336 fs.FillDiskStats(diskStats) 337 t.writeJSON(w, r, diskStats, httpdaeWhat) 338 case apc.WhatRemoteAIS: 339 var ( 340 aisbp = t.aisbp() 341 refresh = cos.IsParseBool(query.Get(apc.QparamClusterInfo)) 342 ) 343 if !refresh { 344 t.writeJSON(w, r, aisbp.GetInfoInternal(), httpdaeWhat) 345 return 346 } 347 348 anyConf := cmn.GCO.Get().Backend.Get(apc.AIS) 349 if anyConf == nil { 350 t.writeJSON(w, r, meta.RemAisVec{}, httpdaeWhat) 351 return 352 } 353 aisConf, ok := anyConf.(cmn.BackendConfAIS) 354 debug.Assert(ok) 355 356 t.writeJSON(w, r, aisbp.GetInfo(aisConf), httpdaeWhat) 357 default: 358 t.htrun.httpdaeget(w, r, query, t /*htext*/) 359 } 360 } 361 362 func _rebSnap() (rebSnap *core.Snap) { 363 if entry := xreg.GetLatest(xreg.Flt{Kind: apc.ActRebalance}); entry != nil { 364 if xctn := entry.Get(); xctn != nil { 365 rebSnap = xctn.Snap() 366 } 367 } 368 return rebSnap 369 } 370 371 // admin-join target | enable/disable mountpath 372 func (t *target) httpdaepost(w http.ResponseWriter, r *http.Request) { 373 apiItems, err := t.parseURL(w, r, apc.URLPathDae.L, 0, true) 374 if err != nil { 375 return 376 } 377 if len(apiItems) == 0 { 378 t.writeErrURL(w, r) 379 return 380 } 381 apiOp := apiItems[0] 382 if apiOp == apc.Mountpaths { 383 t.handleMountpathReq(w, r) 384 return 385 } 386 if apiOp != apc.AdminJoin { 387 t.writeErrURL(w, r) 388 return 389 } 390 391 // user request to join cluster (compare with `apc.SelfJoin`) 392 if !t.regstate.disabled.Load() { 393 if t.keepalive.paused() { 394 t.keepalive.ctrl(kaResumeMsg) 395 } else { 396 nlog.Warningf("%s already joined (\"enabled\")- nothing to do", t) 397 } 398 return 399 } 400 if daemon.cli.target.standby { 401 nlog.Infof("%s: transitioning standby => join", t) 402 } 403 t.keepalive.ctrl(kaResumeMsg) 404 body, err := cmn.ReadBytes(r) 405 if err != nil { 406 t.writeErr(w, r, err) 407 return 408 } 409 410 caller := r.Header.Get(apc.HdrCallerName) 411 if err := t.recvCluMetaBytes(apc.ActAdminJoinTarget, body, caller); err != nil { 412 t.writeErr(w, r, err) 413 return 414 } 415 if daemon.cli.target.standby { 416 if err := t.endStartupStandby(); err != nil { 417 nlog.Warningf("%s: err %v ending standby...", t, err) 418 } 419 } 420 } 421 422 func (t *target) httpdaedelete(w http.ResponseWriter, r *http.Request) { 423 apiItems, err := t.parseURL(w, r, apc.URLPathDae.L, 1, false) 424 if err != nil { 425 return 426 } 427 switch apiItems[0] { 428 case apc.Mountpaths: 429 t.handleMountpathReq(w, r) 430 default: 431 t.writeErrURL(w, r) 432 } 433 } 434 435 // called by p.cleanupMark 436 func (t *target) cleanupMark(ctx *cleanmark) { 437 smap := t.owner.smap.get() 438 if smap.version() > ctx.NewVer { 439 nlog.Warningf("%s: %s is newer - ignoring (and dropping) %v", t, smap, ctx) 440 return 441 } 442 if ctx.Interrupted { 443 if err := fs.RemoveMarker(fname.RebalanceMarker); err == nil { 444 nlog.Infof("%s: cleanmark 'rebalance', %s", t, smap) 445 } else { 446 nlog.Errorf("%s: failed to cleanmark 'rebalance': %v, %s", t, err, smap) 447 } 448 } 449 if ctx.Restarted { 450 if err := fs.RemoveMarker(fname.NodeRestartedPrev); err == nil { 451 nlog.Infof("%s: cleanmark 'restarted', %s", t, smap) 452 } else { 453 nlog.Errorf("%s: failed to cleanmark 'restarted': %v, %s", t, err, smap) 454 } 455 } 456 } 457 458 func (t *target) handleMountpathReq(w http.ResponseWriter, r *http.Request) { 459 msg, err := t.readActionMsg(w, r) 460 if err != nil { 461 return 462 } 463 mpath, ok := msg.Value.(string) 464 if !ok { 465 t.writeErrMsg(w, r, "invalid mountpath value in request") 466 return 467 } 468 if mpath == "" { 469 t.writeErrMsg(w, r, "mountpath is not defined") 470 return 471 } 472 switch msg.Action { 473 case apc.ActMountpathEnable: 474 t.enableMpath(w, r, mpath) 475 case apc.ActMountpathAttach: 476 t.attachMpath(w, r, mpath) 477 case apc.ActMountpathDisable: 478 t.disableMpath(w, r, mpath) 479 case apc.ActMountpathDetach: 480 t.detachMpath(w, r, mpath) 481 default: 482 t.writeErrAct(w, r, msg.Action) 483 } 484 485 fs.ComputeDiskSize() 486 } 487 488 func (t *target) enableMpath(w http.ResponseWriter, r *http.Request, mpath string) { 489 enabledMi, err := t.fsprg.enableMpath(mpath) 490 if err != nil { 491 if cmn.IsErrMountpathNotFound(err) { 492 t.writeErr(w, r, err, http.StatusNotFound) 493 } else { 494 // cmn.ErrInvalidMountpath 495 t.writeErr(w, r, err) 496 } 497 return 498 } 499 if enabledMi == nil { 500 w.WriteHeader(http.StatusNoContent) 501 return 502 } 503 504 // create missing buckets dirs 505 bmd := t.owner.bmd.get() 506 bmd.Range(nil, nil, func(bck *meta.Bck) bool { 507 err = enabledMi.CreateMissingBckDirs(bck.Bucket()) 508 return err != nil // break on error 509 }) 510 if err != nil { 511 t.writeErr(w, r, err) 512 return 513 } 514 } 515 516 func (t *target) attachMpath(w http.ResponseWriter, r *http.Request, mpath string) { 517 q := r.URL.Query() 518 label := ios.Label(q.Get(apc.QparamMpathLabel)) 519 addedMi, err := t.fsprg.attachMpath(mpath, label) 520 if err != nil { 521 t.writeErr(w, r, err) 522 return 523 } 524 if addedMi == nil { 525 return 526 } 527 // create missing buckets dirs, if any 528 bmd := t.owner.bmd.get() 529 bmd.Range(nil, nil, func(bck *meta.Bck) bool { 530 err = addedMi.CreateMissingBckDirs(bck.Bucket()) 531 return err != nil // break on error 532 }) 533 if err != nil { 534 t.writeErr(w, r, err) 535 return 536 } 537 } 538 539 func (t *target) disableMpath(w http.ResponseWriter, r *http.Request, mpath string) { 540 dontResilver := cos.IsParseBool(r.URL.Query().Get(apc.QparamDontResilver)) 541 disabledMi, err := t.fsprg.disableMpath(mpath, dontResilver) 542 if err != nil { 543 if cmn.IsErrMountpathNotFound(err) { 544 t.writeErr(w, r, err, http.StatusNotFound) 545 } else { 546 // cmn.ErrInvalidMountpath 547 t.writeErr(w, r, err) 548 } 549 return 550 } 551 if disabledMi == nil { 552 w.WriteHeader(http.StatusNoContent) 553 } 554 } 555 556 func (t *target) detachMpath(w http.ResponseWriter, r *http.Request, mpath string) { 557 dontResilver := cos.IsParseBool(r.URL.Query().Get(apc.QparamDontResilver)) 558 if _, err := t.fsprg.detachMpath(mpath, dontResilver); err != nil { 559 t.writeErr(w, r, err) 560 } 561 } 562 563 func (t *target) receiveBMD(newBMD *bucketMD, msg *aisMsg, payload msPayload, tag, caller string, silent bool) (err error) { 564 var oldVer int64 565 if msg.UUID == "" { 566 oldVer, err = t.applyBMD(newBMD, msg, payload, tag) 567 if newBMD.Version > oldVer { 568 if err == nil { 569 logmsync(oldVer, newBMD, msg, caller, newBMD.StringEx()) 570 } 571 } 572 return 573 } 574 575 // txn [before -- do -- after] 576 if errDone := t.transactions.commitBefore(caller, msg); errDone != nil { 577 err = fmt.Errorf("%s commit-before %s, errDone: %v", t, newBMD, errDone) 578 if !silent { 579 nlog.Errorln(err) 580 } 581 return 582 } 583 oldVer, err = t.applyBMD(newBMD, msg, payload, tag) 584 // log 585 switch { 586 case err != nil: 587 nlog.Errorf("%s: %v (receive %s from %q, action %q, uuid %q)", t, err, newBMD.StringEx(), caller, msg.Action, msg.UUID) 588 case newBMD.Version > oldVer: 589 logmsync(oldVer, newBMD, msg, caller, newBMD.StringEx()) 590 case newBMD.Version == oldVer: 591 nlog.Warningf("%s (same version w/ txn commit): receive %s from %q (action %q, uuid %q)", 592 t, newBMD.StringEx(), caller, msg.Action, msg.UUID) 593 } 594 // --after] 595 if errDone := t.transactions.commitAfter(caller, msg, err, newBMD); errDone != nil { 596 err = fmt.Errorf("%s commit-after %s, err: %v, errDone: %v", t, newBMD, err, errDone) 597 if !silent { 598 nlog.Errorln(err) 599 } 600 } 601 return 602 } 603 604 func (t *target) applyBMD(newBMD *bucketMD, msg *aisMsg, payload msPayload, tag string) (int64, error) { 605 var ( 606 smap = t.owner.smap.get() 607 psi *meta.Snode 608 ) 609 if smap.validate() == nil { 610 psi = smap.Primary // (caller?) 611 } 612 613 t.owner.bmd.Lock() 614 rmbcks, oldVer, emsg, err := t._syncBMD(newBMD, msg, payload, psi) 615 t.owner.bmd.Unlock() 616 617 if err != nil { 618 nlog.Errorln(err) 619 } else if oldVer < newBMD.Version { 620 t.regstate.prevbmd.Store(false) 621 t._postBMD(newBMD, tag, rmbcks) 622 } 623 if emsg != "" { 624 nlog.Errorln(emsg) 625 } 626 return oldVer, err 627 } 628 629 // executes under lock 630 func (t *target) _syncBMD(newBMD *bucketMD, msg *aisMsg, payload msPayload, psi *meta.Snode) (rmbcks []*meta.Bck, 631 oldVer int64, emsg string, err error) { 632 var ( 633 createErrs []error 634 destroyErrs []error 635 bmd = t.owner.bmd.get() 636 ) 637 if err = bmd.validateUUID(newBMD, t.si, psi, ""); err != nil { 638 cos.ExitLog(err) // FATAL: cluster integrity error (cie) 639 return 640 } 641 // check downgrade 642 if oldVer = bmd.version(); newBMD.version() <= oldVer { 643 if newBMD.version() < oldVer { 644 err = newErrDowngrade(t.si, bmd.StringEx(), newBMD.StringEx()) 645 } 646 return 647 } 648 nilbmd := bmd.version() == 0 || t.regstate.prevbmd.Load() 649 650 // 1. create 651 newBMD.Range(nil, nil, func(bck *meta.Bck) bool { 652 if _, present := bmd.Get(bck); present { 653 return false 654 } 655 errs := fs.CreateBucket(bck.Bucket(), nilbmd) 656 if len(errs) > 0 { 657 createErrs = append(createErrs, errs...) 658 } 659 return false 660 }) 661 if len(createErrs) > 0 { 662 err = fmt.Errorf("%s: failed to add new buckets: %s, old/cur %s(%t): %v", 663 t, newBMD, bmd, nilbmd, errors.Join(createErrs...)) 664 return 665 } 666 667 // 2. persist 668 if err = t.owner.bmd.putPersist(newBMD, payload); err != nil { 669 cos.ExitLog(err) 670 return 671 } 672 673 // 3. delete, ignore errors 674 bmd.Range(nil, nil, func(obck *meta.Bck) bool { 675 f := &delb{obck: obck} 676 newBMD.Range(nil, nil, f.do) 677 if !f.present { 678 rmbcks = append(rmbcks, obck) 679 if errD := fs.DestroyBucket("recv-bmd-"+msg.Action, obck.Bucket(), obck.Props.BID); errD != nil { 680 destroyErrs = append(destroyErrs, errD) 681 } 682 } 683 return false 684 }) 685 if len(destroyErrs) > 0 { 686 emsg = fmt.Sprintf("%s: failed to cleanup destroyed buckets: %s, old/cur %s(%t): %v", 687 t, newBMD, bmd, nilbmd, errors.Join(destroyErrs...)) 688 } 689 return 690 } 691 692 func (f *delb) do(nbck *meta.Bck) bool { 693 if !f.obck.Equal(nbck, false /*ignore BID*/, false /* ignore backend */) { 694 return false // keep going 695 } 696 f.present = true 697 698 // assorted props changed? 699 if f.obck.Props.Mirror.Enabled && !nbck.Props.Mirror.Enabled { 700 flt := xreg.Flt{Kind: apc.ActPutCopies, Bck: nbck} 701 xreg.DoAbort(flt, errors.New("apply-bmd")) 702 // NOTE: apc.ActMakeNCopies takes care of itself 703 } 704 if f.obck.Props.EC.Enabled && !nbck.Props.EC.Enabled { 705 flt := xreg.Flt{Kind: apc.ActECEncode, Bck: nbck} 706 xreg.DoAbort(flt, errors.New("apply-bmd")) 707 } 708 return true // break 709 } 710 711 func (t *target) _postBMD(newBMD *bucketMD, tag string, rmbcks []*meta.Bck) { 712 // evict LOM cache 713 if len(rmbcks) > 0 { 714 errV := fmt.Errorf("[post-bmd] %s %s: remove bucket%s", tag, newBMD, cos.Plural(len(rmbcks))) 715 xreg.AbortAllBuckets(errV, rmbcks...) 716 go func(bcks ...*meta.Bck) { 717 for _, b := range bcks { 718 core.UncacheBck(b) 719 } 720 }(rmbcks...) 721 } 722 if tag != bmdReg { 723 if err := ec.ECM.BMDChanged(); err != nil { 724 nlog.Errorf("Failed to initialize EC manager: %v", err) 725 } 726 } 727 // since some buckets may have been destroyed 728 cs := fs.Cap() 729 if cs.Err() != nil { 730 _ = t.OOS(nil) 731 } 732 } 733 734 // is called under lock 735 func (t *target) receiveRMD(newRMD *rebMD, msg *aisMsg) (err error) { 736 rmd := t.owner.rmd.get() 737 if newRMD.Version <= rmd.Version { 738 if newRMD.Version < rmd.Version { 739 err = newErrDowngrade(t.si, rmd.String(), newRMD.String()) 740 } 741 return 742 } 743 smap := t.owner.smap.get() 744 if err = smap.validate(); err != nil { 745 return 746 } 747 if smap.GetNode(t.SID()) == nil { 748 err = fmt.Errorf(fmtSelfNotPresent, t, smap.StringEx()) 749 return 750 } 751 for _, tsi := range rmd.TargetIDs { 752 if smap.GetNode(tsi) == nil { 753 nlog.Warningf("%s: %s (target_id) not present in %s (old %s, new %s)", 754 t.si, tsi, smap.StringEx(), rmd, newRMD) 755 } 756 } 757 if !t.regstate.disabled.Load() { 758 // 759 // run rebalance 760 // 761 notif := &xact.NotifXact{ 762 Base: nl.Base{When: core.UponTerm, Dsts: []string{equalIC}, F: t.notifyTerm}, 763 } 764 if msg.Action == apc.ActRebalance { 765 nlog.Infof("%s: starting user-requested rebalance[%s]", t, msg.UUID) 766 go t.reb.RunRebalance(&smap.Smap, newRMD.Version, notif) 767 return 768 } 769 770 switch msg.Action { 771 case apc.ActStartMaintenance, apc.ActDecommissionNode, apc.ActShutdownNode, apc.ActRmNodeUnsafe: 772 var opts apc.ActValRmNode 773 if err := cos.MorphMarshal(msg.Value, &opts); err != nil { 774 debug.AssertNoErr(err) // unlikely 775 } else { 776 var s string 777 if opts.DaemonID == t.SID() { 778 s = " (to subsequently deactivate or remove _this_ target)" 779 } 780 nlog.Infof("%s: starting '%s' triggered rebalance[%s]%s: %+v", 781 t, msg.Action, xact.RebID2S(newRMD.Version), s, opts) 782 } 783 default: 784 nlog.Infoln(t.String() + ": starting rebalance[" + xact.RebID2S(newRMD.Version) + "]") 785 } 786 go t.reb.RunRebalance(&smap.Smap, newRMD.Version, notif) 787 788 if newRMD.Resilver != "" { 789 nlog.Infoln(t.String() + ": ... and resilver") 790 go t.runResilver(res.Args{UUID: newRMD.Resilver, SkipGlobMisplaced: true}, nil /*wg*/) 791 } 792 t.owner.rmd.put(newRMD) 793 // TODO: move and refactor 794 } else if msg.Action == apc.ActAdminJoinTarget && daemon.cli.target.standby && msg.Name == t.SID() { 795 nlog.Warningln(t.String()+": standby => join", msg.String()) 796 if _, err = t.joinCluster(msg.Action); err == nil { 797 err = t.endStartupStandby() 798 } 799 t.owner.rmd.put(newRMD) 800 } 801 return 802 } 803 804 func (t *target) ensureLatestBMD(msg *aisMsg, r *http.Request) { 805 bmd, bmdVersion := t.owner.bmd.Get(), msg.BMDVersion 806 if bmd.Version < bmdVersion { 807 nlog.Errorf("%s: local %s < v%d %s - running fixup...", t, bmd, bmdVersion, msg) 808 t.BMDVersionFixup(r) 809 } else if bmd.Version > bmdVersion { 810 // If metasync outraces the request, we end up here, just log it and continue. 811 nlog.Warningf("%s: local %s > v%d %s", t, bmd, bmdVersion, msg) 812 } 813 } 814 815 func (t *target) getPrimaryBMD(renamed string) (bmd *bucketMD, err error) { 816 smap := t.owner.smap.get() 817 if err = smap.validate(); err != nil { 818 return nil, cmn.NewErrFailedTo(t, "get-primary-bmd", smap, err) 819 } 820 var ( 821 what = apc.WhatBMD 822 q = url.Values{apc.QparamWhat: []string{what}} 823 psi = smap.Primary 824 path = apc.URLPathDae.S 825 url = psi.URL(cmn.NetIntraControl) 826 timeout = cmn.Rom.CplaneOperation() 827 ) 828 if renamed != "" { 829 q.Set(whatRenamedLB, renamed) 830 } 831 cargs := allocCargs() 832 { 833 cargs.si = psi 834 cargs.req = cmn.HreqArgs{Method: http.MethodGet, Base: url, Path: path, Query: q} 835 cargs.timeout = timeout 836 cargs.cresv = cresBM{} 837 } 838 res := t.call(cargs, smap) 839 if res.err != nil { 840 time.Sleep(timeout / 2) 841 smap = t.owner.smap.get() 842 res = t.call(cargs, smap) 843 if res.err != nil { 844 err = res.errorf("%s: failed to GET(%q)", t.si, what) 845 } 846 } 847 if err == nil { 848 bmd = res.v.(*bucketMD) 849 } 850 freeCargs(cargs) 851 freeCR(res) 852 return 853 } 854 855 func (t *target) BMDVersionFixup(r *http.Request, bcks ...cmn.Bck) { 856 var ( 857 caller string 858 bck cmn.Bck 859 ) 860 if len(bcks) > 0 { 861 bck = bcks[0] 862 } 863 time.Sleep(200 * time.Millisecond) 864 newBucketMD, err := t.getPrimaryBMD(bck.Name) 865 if err != nil { 866 nlog.Errorln(err) 867 return 868 } 869 msg := t.newAmsgStr("get-what="+apc.WhatBMD, newBucketMD) 870 if r != nil { 871 caller = r.Header.Get(apc.HdrCallerName) 872 } 873 t.regstate.mu.Lock() 874 if nlog.Stopping() { 875 t.regstate.mu.Unlock() 876 return 877 } 878 err = t.receiveBMD(newBucketMD, msg, nil, bmdFixup, caller, true /*silent*/) 879 t.regstate.mu.Unlock() 880 if err != nil && !isErrDowngrade(err) { 881 nlog.Errorln(err) 882 } 883 } 884 885 // [METHOD] /v1/metasync 886 func (t *target) metasyncHandler(w http.ResponseWriter, r *http.Request) { 887 if nlog.Stopping() { 888 w.WriteHeader(http.StatusServiceUnavailable) 889 return 890 } 891 switch r.Method { 892 case http.MethodPut: 893 t.regstate.mu.Lock() 894 if nlog.Stopping() { 895 w.WriteHeader(http.StatusServiceUnavailable) 896 } else { 897 t.metasyncPut(w, r) 898 } 899 t.regstate.mu.Unlock() 900 case http.MethodPost: 901 t.metasyncPost(w, r) 902 default: 903 cmn.WriteErr405(w, r, http.MethodPost, http.MethodPut) 904 } 905 } 906 907 // PUT /v1/metasync 908 // compare w/ p.metasyncHandler (NOTE: executes under regstate lock) 909 func (t *target) metasyncPut(w http.ResponseWriter, r *http.Request) { 910 var ( 911 err = &errMsync{} 912 cii = &err.Cii 913 ) 914 if r.Method != http.MethodPut { 915 cmn.WriteErr405(w, r, http.MethodPut) 916 return 917 } 918 payload := make(msPayload) 919 if errP := payload.unmarshal(r.Body, "metasync put"); errP != nil { 920 cmn.WriteErr(w, r, errP) 921 return 922 } 923 // 1. extract 924 var ( 925 caller = r.Header.Get(apc.HdrCallerName) 926 newConf, msgConf, errConf = t.extractConfig(payload, caller) 927 newSmap, msgSmap, errSmap = t.extractSmap(payload, caller, false /*skip validation*/) 928 newBMD, msgBMD, errBMD = t.extractBMD(payload, caller) 929 newRMD, msgRMD, errRMD = t.extractRMD(payload, caller) 930 newEtlMD, msgEtlMD, errEtlMD = t.extractEtlMD(payload, caller) 931 ) 932 // 2. apply 933 if errConf == nil && newConf != nil { 934 errConf = t.receiveConfig(newConf, msgConf, payload, caller) 935 } 936 if errSmap == nil && newSmap != nil { 937 errSmap = t.receiveSmap(newSmap, msgSmap, payload, caller, t.htrun.smapUpdatedCB) 938 } 939 if errBMD == nil && newBMD != nil { 940 errBMD = t.receiveBMD(newBMD, msgBMD, payload, bmdRecv, caller, false /*silent*/) 941 } 942 if errRMD == nil && newRMD != nil { 943 rmd := t.owner.rmd.get() 944 logmsync(rmd.Version, newRMD, msgRMD, caller) 945 946 t.owner.rmd.Lock() 947 errRMD = t.receiveRMD(newRMD, msgRMD) 948 t.owner.rmd.Unlock() 949 } 950 if errEtlMD == nil && newEtlMD != nil { 951 errEtlMD = t.receiveEtlMD(newEtlMD, msgEtlMD, payload, caller, _stopETLs) 952 } 953 // 3. respond 954 if errConf == nil && errSmap == nil && errBMD == nil && errRMD == nil && errEtlMD == nil { 955 return 956 } 957 t.ciiFill(cii) 958 retErr := err.message(errConf, errSmap, errBMD, errRMD, errEtlMD, nil) 959 t.writeErr(w, r, retErr, http.StatusConflict) 960 } 961 962 func _stopETLs(newEtlMD, oldEtlMD *etlMD) { 963 for id := range oldEtlMD.ETLs { 964 if _, ok := newEtlMD.ETLs[id]; ok { 965 continue 966 } 967 // TODO: stop only when running 968 nlog.Infof("stopping (removed from md) etl[%s] (old md v%d, new v%d)", id, oldEtlMD.Version, newEtlMD.Version) 969 etl.Stop(id, nil) 970 } 971 } 972 973 // compare w/ p.receiveConfig 974 func (t *target) receiveConfig(newConfig *globalConfig, msg *aisMsg, payload msPayload, caller string) (err error) { 975 oldConfig := cmn.GCO.Get() 976 logmsync(oldConfig.Version, newConfig, msg, caller) 977 978 t.owner.config.Lock() 979 err = t._recvCfg(newConfig, payload) 980 t.owner.config.Unlock() 981 if err != nil { 982 return 983 } 984 985 if !t.NodeStarted() { 986 if msg.Action == apc.ActAttachRemAis || msg.Action == apc.ActDetachRemAis { 987 nlog.Errorf("%s: cannot handle %s (%s => %s) - starting up...", t, msg, oldConfig, newConfig) 988 } 989 return 990 } 991 992 // special: remais update 993 if msg.Action == apc.ActAttachRemAis || msg.Action == apc.ActDetachRemAis { 994 return t.attachDetachRemAis(newConfig, msg) 995 } 996 997 if !newConfig.Backend.EqualRemAIS(&oldConfig.Backend, t.String()) { 998 if aisConf := newConfig.Backend.Get(apc.AIS); aisConf != nil { 999 err = t.attachDetachRemAis(newConfig, msg) 1000 } else { 1001 t.backend[apc.AIS] = backend.NewAIS(t) 1002 } 1003 } 1004 return 1005 } 1006 1007 // NOTE: apply the entire config: add new and update existing entries (remote clusters) 1008 func (t *target) attachDetachRemAis(newConfig *globalConfig, msg *aisMsg) (err error) { 1009 var ( 1010 aisbp *backend.AISbp 1011 aisConf = newConfig.Backend.Get(apc.AIS) 1012 ) 1013 debug.Assert(aisConf != nil) 1014 aisbp = t.aisbp() 1015 return aisbp.Apply(aisConf, msg.Action, &newConfig.ClusterConfig) 1016 } 1017 1018 // POST /v1/metasync 1019 func (t *target) metasyncPost(w http.ResponseWriter, r *http.Request) { 1020 payload := make(msPayload) 1021 if err := payload.unmarshal(r.Body, "metasync post"); err != nil { 1022 cmn.WriteErr(w, r, err) 1023 return 1024 } 1025 caller := r.Header.Get(apc.HdrCallerName) 1026 newSmap, msg, err := t.extractSmap(payload, caller, true /*skip validation*/) 1027 if err != nil { 1028 t.writeErr(w, r, err) 1029 return 1030 } 1031 ntid := msg.UUID 1032 if cmn.Rom.FastV(4, cos.SmoduleAIS) { 1033 nlog.Infof("%s %s: %s, join %s", t, msg, newSmap, meta.Tname(ntid)) // "start-gfn" | "stop-gfn" 1034 } 1035 switch msg.Action { 1036 case apc.ActStartGFN: 1037 reb.OnTimedGFN() 1038 case apc.ActStopGFN: 1039 detail := meta.Tname(ntid) + " " + newSmap.String() 1040 reb.OffTimedGFN(detail) 1041 default: 1042 debug.Assert(false, msg.String()) 1043 t.writeErrAct(w, r, msg.Action) 1044 } 1045 } 1046 1047 // GET /v1/health (apc.Health) 1048 func (t *target) healthHandler(w http.ResponseWriter, r *http.Request) { 1049 if t.regstate.disabled.Load() && daemon.cli.target.standby { 1050 if cmn.Rom.FastV(4, cos.SmoduleAIS) { 1051 nlog.Warningf("[health] %s: standing by...", t) 1052 } 1053 } else if !t.NodeStarted() { 1054 w.WriteHeader(http.StatusServiceUnavailable) 1055 return 1056 } 1057 if responded := t.externalWD(w, r); responded { 1058 return 1059 } 1060 1061 t.uptime2hdr(w.Header()) 1062 1063 var ( 1064 getCii, getRebStatus bool 1065 ) 1066 if r.URL.RawQuery != "" { 1067 query := r.URL.Query() 1068 getCii = cos.IsParseBool(query.Get(apc.QparamClusterInfo)) 1069 getRebStatus = cos.IsParseBool(query.Get(apc.QparamRebStatus)) 1070 } 1071 1072 // piggyback [cluster info] 1073 if getCii { 1074 cii := &cifl.Info{} 1075 t.ciiFill(cii) 1076 t.writeJSON(w, r, cii, "cluster-info") 1077 return 1078 } 1079 // valid? 1080 smap := t.owner.smap.get() 1081 if !smap.isValid() { 1082 w.WriteHeader(http.StatusServiceUnavailable) 1083 return 1084 } 1085 1086 // return ok plus optional reb info 1087 var ( 1088 err error 1089 callerID = r.Header.Get(apc.HdrCallerID) 1090 caller = r.Header.Get(apc.HdrCallerName) 1091 callerSmapVer, _ = strconv.ParseInt(r.Header.Get(apc.HdrCallerSmapVer), 10, 64) 1092 ) 1093 if smap.version() != callerSmapVer { 1094 s := "older" 1095 if smap.version() < callerSmapVer { 1096 s = "newer" 1097 } 1098 err = fmt.Errorf("health-ping from (%s, %s) with %s Smap v%d", callerID, caller, s, callerSmapVer) 1099 nlog.Warningf("%s[%s]: %v", t, smap.StringEx(), err) 1100 } 1101 if getRebStatus { 1102 status := &reb.Status{} 1103 t.reb.RebStatus(status) 1104 if !t.writeJS(w, r, status, "rebalance-status") { 1105 return 1106 } 1107 if smap.version() < callerSmapVer && status.Running { 1108 // NOTE: abort right away but don't broadcast 1109 t.reb.AbortLocal(smap.version(), err) 1110 } 1111 } 1112 if smap.GetProxy(callerID) != nil { 1113 t.keepalive.heardFrom(callerID) 1114 } 1115 } 1116 1117 // unregisters the target and marks it as disabled by an internal event 1118 func (t *target) disable(msg string) { 1119 t.regstate.mu.Lock() 1120 1121 if t.regstate.disabled.Load() { 1122 t.regstate.mu.Unlock() 1123 return // nothing to do 1124 } 1125 if err := t.unregisterSelf(false); err != nil { 1126 t.regstate.mu.Unlock() 1127 nlog.Errorf("%s but failed to remove self from Smap: %v", msg, err) 1128 return 1129 } 1130 t.regstate.disabled.Store(true) 1131 t.regstate.mu.Unlock() 1132 nlog.Errorf("Warning: %s => disabled and removed self from Smap", msg) 1133 } 1134 1135 // registers the target again if it was disabled by and internal event 1136 func (t *target) enable() error { 1137 t.regstate.mu.Lock() 1138 1139 if !t.regstate.disabled.Load() { 1140 t.regstate.mu.Unlock() 1141 return nil 1142 } 1143 if _, err := t.joinCluster(apc.ActSelfJoinTarget); err != nil { 1144 t.regstate.mu.Unlock() 1145 nlog.Infof("%s failed to re-join: %v", t, err) 1146 return err 1147 } 1148 t.regstate.disabled.Store(false) 1149 t.regstate.mu.Unlock() 1150 nlog.Infof("%s is now active", t) 1151 return nil 1152 } 1153 1154 // checks with a given target to see if it has the object. 1155 // target acts as a client - compare with api.HeadObject 1156 func (t *target) headt2t(lom *core.LOM, tsi *meta.Snode, smap *smapX) (ok bool) { 1157 q := lom.Bck().NewQuery() 1158 q.Set(apc.QparamSilent, "true") 1159 q.Set(apc.QparamFltPresence, strconv.Itoa(apc.FltPresent)) 1160 cargs := allocCargs() 1161 { 1162 cargs.si = tsi 1163 cargs.req = cmn.HreqArgs{ 1164 Method: http.MethodHead, 1165 Header: http.Header{ 1166 apc.HdrCallerID: []string{t.SID()}, 1167 apc.HdrCallerName: []string{t.callerName()}, 1168 }, 1169 Base: tsi.URL(cmn.NetIntraControl), 1170 Path: apc.URLPathObjects.Join(lom.Bck().Name, lom.ObjName), 1171 Query: q, 1172 } 1173 cargs.timeout = cmn.Rom.CplaneOperation() 1174 } 1175 res := t.call(cargs, smap) 1176 ok = res.err == nil 1177 freeCargs(cargs) 1178 freeCR(res) 1179 return 1180 } 1181 1182 // headObjBcast broadcasts to all targets to find out if anyone has the specified object. 1183 // NOTE: 1) apc.QparamCheckExistsAny to make an extra effort, 2) `ignoreMaintenance` 1184 func (t *target) headObjBcast(lom *core.LOM, smap *smapX) *meta.Snode { 1185 q := lom.Bck().NewQuery() 1186 q.Set(apc.QparamSilent, "true") 1187 // lookup across all mountpaths and copy (ie., restore) if misplaced 1188 q.Set(apc.QparamFltPresence, strconv.Itoa(apc.FltPresentCluster)) 1189 args := allocBcArgs() 1190 args.req = cmn.HreqArgs{ 1191 Method: http.MethodHead, 1192 Header: http.Header{ 1193 apc.HdrCallerID: []string{t.SID()}, 1194 apc.HdrCallerName: []string{t.callerName()}, 1195 }, 1196 Path: apc.URLPathObjects.Join(lom.Bck().Name, lom.ObjName), 1197 Query: q, 1198 } 1199 args.ignoreMaintenance = true 1200 args.smap = smap 1201 results := t.bcastGroup(args) 1202 freeBcArgs(args) 1203 for _, res := range results { 1204 if res.err == nil { 1205 si := res.si 1206 freeBcastRes(results) 1207 return si 1208 } 1209 } 1210 freeBcastRes(results) 1211 return nil 1212 } 1213 1214 // 1215 // termination(s) 1216 // 1217 1218 func (t *target) termKaliveX(action string, abort bool) { 1219 t.keepalive.ctrl(kaSuspendMsg) 1220 1221 if abort { 1222 err := fmt.Errorf("%s: term-kalive by %q", t, action) 1223 xreg.AbortAll(err) // all xactions 1224 } 1225 } 1226 1227 func (t *target) shutdown(action string) { 1228 t.regstate.mu.Lock() 1229 nlog.SetStopping() 1230 t.regstate.mu.Unlock() 1231 1232 t.Stop(&errNoUnregister{action}) 1233 } 1234 1235 func (t *target) decommission(action string, opts *apc.ActValRmNode) { 1236 t.regstate.mu.Lock() 1237 nlog.SetStopping() 1238 t.regstate.mu.Unlock() 1239 1240 nlog.Infof("%s: %s %v", t, action, opts) 1241 fs.Decommission(!opts.RmUserData /*ais metadata only*/) 1242 cleanupConfigDir(t.Name(), opts.KeepInitialConfig) 1243 1244 fpath := filepath.Join(cmn.GCO.Get().ConfigDir, dbName) 1245 if err := cos.RemoveFile(fpath); err != nil { // delete kvdb 1246 nlog.Errorf("failed to delete kvdb: %v", err) 1247 } 1248 if !opts.NoShutdown { 1249 t.Stop(&errNoUnregister{action}) 1250 } 1251 } 1252 1253 // stop gracefully, return from rungroup.run 1254 func (t *target) Stop(err error) { 1255 if !nlog.Stopping() { 1256 // vs metasync 1257 t.regstate.mu.Lock() 1258 nlog.SetStopping() 1259 t.regstate.mu.Unlock() 1260 } 1261 if err == nil { 1262 nlog.Infoln("Stopping", t.String()) 1263 } else { 1264 nlog.Warningln("Stopping", t.String()+":", err) 1265 } 1266 1267 wg := &sync.WaitGroup{} 1268 wg.Add(1) 1269 go func() { 1270 core.Term() 1271 wg.Done() 1272 }() 1273 1274 xreg.AbortAll(err) 1275 1276 t.htrun.stop(wg, g.netServ.pub.s != nil && !isErrNoUnregister(err) /*rm from Smap*/) 1277 }