github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/vote.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"fmt"
     9  	"net/http"
    10  	"net/url"
    11  	"runtime"
    12  	"strconv"
    13  	"time"
    14  
    15  	"github.com/NVIDIA/aistore/api/apc"
    16  	"github.com/NVIDIA/aistore/cmn"
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  	"github.com/NVIDIA/aistore/cmn/debug"
    19  	"github.com/NVIDIA/aistore/cmn/nlog"
    20  	"github.com/NVIDIA/aistore/core"
    21  	"github.com/NVIDIA/aistore/core/meta"
    22  	"github.com/NVIDIA/aistore/xact/xreg"
    23  	"github.com/NVIDIA/aistore/xact/xs"
    24  )
    25  
    26  const (
    27  	VoteYes Vote = "YES"
    28  	VoteNo  Vote = "NO"
    29  )
    30  
    31  const maxRetryElectReq = 3
    32  
    33  type (
    34  	Vote string
    35  
    36  	VoteRecord struct {
    37  		Candidate string    `json:"candidate"`
    38  		Primary   string    `json:"primary"`
    39  		Smap      *smapX    `json:"smap"`
    40  		StartTime time.Time `json:"start_time"`
    41  		Initiator string    `json:"initiator"`
    42  	}
    43  
    44  	VoteInitiation VoteRecord
    45  	VoteResult     VoteRecord
    46  
    47  	VoteMessage struct {
    48  		Record VoteRecord `json:"vote_record"`
    49  	}
    50  
    51  	VoteInitiationMessage struct {
    52  		Request VoteInitiation `json:"vote_initiation"`
    53  	}
    54  
    55  	VoteResultMessage struct {
    56  		Result VoteResult `json:"vote_result"`
    57  	}
    58  
    59  	voteResult struct {
    60  		yes      bool
    61  		daemonID string
    62  		err      error
    63  	}
    64  )
    65  
    66  func voteInProgress() (xele core.Xact) {
    67  	if e := xreg.GetRunning(xreg.Flt{Kind: apc.ActElection}); e != nil {
    68  		xele = e.Get()
    69  	}
    70  	return
    71  }
    72  
    73  //
    74  // voting: proxy
    75  //
    76  
    77  // [METHOD] /v1/vote
    78  func (p *proxy) voteHandler(w http.ResponseWriter, r *http.Request) {
    79  	if r.Method != http.MethodGet && r.Method != http.MethodPut {
    80  		cmn.WriteErr405(w, r, http.MethodGet, http.MethodPut)
    81  		return
    82  	}
    83  	apiItems, err := p.parseURL(w, r, apc.URLPathVote.L, 1, false)
    84  	if err != nil {
    85  		return
    86  	}
    87  	item := apiItems[0]
    88  	if !p.NodeStarted() {
    89  		w.WriteHeader(http.StatusServiceUnavailable)
    90  		return
    91  	}
    92  	// MethodGet
    93  	if r.Method == http.MethodGet {
    94  		if item != apc.Proxy {
    95  			p.writeErrURL(w, r)
    96  			return
    97  		}
    98  		p.httpgetvote(w, r)
    99  		return
   100  	}
   101  	// MethodPut
   102  	switch item {
   103  	case apc.Voteres:
   104  		p.httpsetprimary(w, r)
   105  	case apc.VoteInit:
   106  		p.httpelect(w, r)
   107  	case apc.PriStop:
   108  		callerID := r.Header.Get(apc.HdrCallerID)
   109  		p.onPrimaryDown(p, callerID)
   110  	default:
   111  		p.writeErrURL(w, r)
   112  	}
   113  }
   114  
   115  // PUT /v1/vote/init (via sendElectionRequest)
   116  func (p *proxy) httpelect(w http.ResponseWriter, r *http.Request) {
   117  	if _, err := p.parseURL(w, r, apc.URLPathVoteInit.L, 0, false); err != nil {
   118  		return
   119  	}
   120  	msg := VoteInitiationMessage{}
   121  	if err := cmn.ReadJSON(w, r, &msg); err != nil {
   122  		return
   123  	}
   124  	newSmap := msg.Request.Smap
   125  	if err := newSmap.validate(); err != nil {
   126  		p.writeErrf(w, r, "%s: invalid %s in the Vote Request, err: %v", p.si, newSmap, err)
   127  		return
   128  	}
   129  	smap := p.owner.smap.get()
   130  	caller := r.Header.Get(apc.HdrCallerName)
   131  	nlog.Infof("[vote] receive %s from %q (local: %s)", newSmap.StringEx(), caller, smap.StringEx())
   132  
   133  	if !newSmap.isPresent(p.si) {
   134  		p.writeErrf(w, r, "%s: not present in the Vote Request, %s", p.si, newSmap)
   135  		return
   136  	}
   137  	debug.Assert(!newSmap.isPrimary(p.si))
   138  
   139  	if err := p.owner.smap.synchronize(p.si, newSmap, nil /*ms payload*/, p.htrun.smapUpdatedCB); err != nil {
   140  		if isErrDowngrade(err) {
   141  			psi := newSmap.GetProxy(msg.Request.Candidate)
   142  			psi2 := p.owner.smap.get().GetProxy(msg.Request.Candidate)
   143  			if psi2.Eq(psi) {
   144  				err = nil
   145  			}
   146  		}
   147  		if err != nil {
   148  			p.writeErr(w, r, cmn.NewErrFailedTo(p, "synchronize", newSmap, err))
   149  			return
   150  		}
   151  	}
   152  
   153  	smap = p.owner.smap.get()
   154  	psi, err := smap.HrwProxy(smap.Primary.ID())
   155  	if err != nil {
   156  		p.writeErr(w, r, err)
   157  		return
   158  	}
   159  
   160  	// proceed with election iff:
   161  	if psi.ID() != p.SID() {
   162  		nlog.Warningf("%s: not next in line %s", p, psi)
   163  		return
   164  	}
   165  	if !p.ClusterStarted() {
   166  		nlog.Warningf("%s: not ready yet to be elected - starting up", p)
   167  		w.WriteHeader(http.StatusServiceUnavailable)
   168  		return
   169  	}
   170  
   171  	vr := &VoteRecord{
   172  		Candidate: msg.Request.Candidate,
   173  		Primary:   msg.Request.Primary,
   174  		StartTime: time.Now(),
   175  		Initiator: p.SID(),
   176  	}
   177  	// include resulting Smap in the response
   178  	vr.Smap = p.owner.smap.get()
   179  
   180  	// xaction (minimal and, unlike target xactions, not visible via API (TODO))
   181  	go p.startElection(vr)
   182  }
   183  
   184  // Election Functions
   185  
   186  func (p *proxy) startElection(vr *VoteRecord) {
   187  	if p.owner.smap.get().isPrimary(p.si) {
   188  		nlog.Infof("%s: already in primary state", p)
   189  		return
   190  	}
   191  	rns := xreg.RenewElection()
   192  	if rns.Err != nil {
   193  		nlog.Errorf("%s: %+v %v", p, vr, rns.Err)
   194  		debug.AssertNoErr(rns.Err)
   195  		return
   196  	}
   197  	if rns.IsRunning() {
   198  		return
   199  	}
   200  	xctn := rns.Entry.Get()
   201  	xele, ok := xctn.(*xs.Election)
   202  	debug.Assert(ok)
   203  	nlog.Infoln(xele.Name())
   204  	p.elect(vr, xele)
   205  	xele.Finish()
   206  }
   207  
   208  func (p *proxy) elect(vr *VoteRecord, xele *xs.Election) {
   209  	var (
   210  		smap       *smapX
   211  		err        error
   212  		curPrimary = vr.Smap.Primary
   213  		config     = cmn.GCO.Get()
   214  		timeout    = config.Timeout.CplaneOperation.D() / 2
   215  	)
   216  	// 1. ping the current primary (not using apc.QparamAskPrimary as it might be transitioning)
   217  	for i := range 2 {
   218  		if i > 0 {
   219  			runtime.Gosched()
   220  		}
   221  		smap = p.owner.smap.get()
   222  		if smap.version() > vr.Smap.version() {
   223  			nlog.Warningf("%s: %s updated from %s, moving back to idle", p, smap, vr.Smap)
   224  			return
   225  		}
   226  		_, _, err = p.reqHealth(curPrimary, timeout, nil /*ask primary*/, smap)
   227  		if err == nil {
   228  			break
   229  		}
   230  		timeout = config.Timeout.CplaneOperation.D()
   231  	}
   232  	if err == nil {
   233  		// move back to idle
   234  		query := url.Values{apc.QparamAskPrimary: []string{"true"}}
   235  		_, _, err = p.reqHealth(curPrimary, timeout, query /*ask primary*/, smap)
   236  		if err == nil {
   237  			nlog.Infof("%s: current primary %s is up, moving back to idle", p, curPrimary)
   238  		} else {
   239  			errV := fmt.Errorf("%s: current primary(?) %s responds but does not consider itself primary",
   240  				p, curPrimary.StringEx())
   241  			xele.AddErr(errV, 0)
   242  		}
   243  		return
   244  	}
   245  	nlog.Infof("%s: primary %s is confirmed down: [%v] - moving to election state phase 1 (prepare)",
   246  		p, curPrimary.StringEx(), err)
   247  
   248  	// 2. election phase 1
   249  	elected, votingErrors := p.electPhase1(vr)
   250  	if !elected {
   251  		errV := fmt.Errorf("%s: election phase 1 (prepare) failed: primary still %s w/ status unknown",
   252  			p, curPrimary.StringEx())
   253  		xele.AddErr(errV, 0)
   254  
   255  		smap = p.owner.smap.get()
   256  		if smap.version() > vr.Smap.version() {
   257  			nlog.Warningf("%s: %s updated from %s, moving back to idle", p, smap, vr.Smap)
   258  			return
   259  		}
   260  
   261  		// best-effort
   262  		svm, _, slowp := p.bcastMaxVer(smap, nil, nil)
   263  		if svm.Smap != nil && !slowp {
   264  			if svm.Smap.UUID == smap.UUID && svm.Smap.version() > smap.version() && svm.Smap.validate() == nil {
   265  				nlog.Warningf("%s: upgrading local %s to cluster max-ver %s",
   266  					p, smap.StringEx(), svm.Smap.StringEx())
   267  				if svm.Smap.Primary.ID() != smap.Primary.ID() {
   268  					nlog.Warningf("%s: new primary %s is already elected ...",
   269  						p, svm.Smap.Primary.StringEx())
   270  				}
   271  				errV := p.owner.smap.synchronize(p.si, svm.Smap, nil /*ms payload*/, p.smapUpdatedCB)
   272  				if errV != nil {
   273  					cos.ExitLog(errV)
   274  				}
   275  			}
   276  		}
   277  
   278  		return
   279  	}
   280  
   281  	// 3. election phase 2
   282  	nlog.Infoln(p.String()+":", "moving to election state phase 2 (commit)")
   283  	confirmationErrors := p.electPhase2(vr)
   284  	for sid := range confirmationErrors {
   285  		if !votingErrors.Contains(sid) {
   286  			errV := fmt.Errorf("%s: error confirming the election: %s was healthy when voting", p, sid)
   287  			xele.AddErr(errV, 0)
   288  		}
   289  	}
   290  
   291  	// 4. become!
   292  	nlog.Infof("%s: becoming primary", p)
   293  	p.becomeNewPrimary(vr.Primary /*proxyIDToRemove*/)
   294  }
   295  
   296  // phase 1: prepare (via simple majority voting)
   297  func (p *proxy) electPhase1(vr *VoteRecord) (winner bool, errors cos.StrSet) {
   298  	var (
   299  		resCh = p.requestVotes(vr)
   300  		y, n  int
   301  	)
   302  	for res := range resCh {
   303  		if res.err != nil {
   304  			if errors == nil {
   305  				errors = cos.NewStrSet(res.daemonID)
   306  			} else {
   307  				errors.Set(res.daemonID)
   308  			}
   309  			n++
   310  		} else {
   311  			if cmn.Rom.FastV(4, cos.SmoduleAIS) {
   312  				nlog.Infof("Node %s responded with (winner: %t)", res.daemonID, res.yes)
   313  			}
   314  			if res.yes {
   315  				y++
   316  			} else {
   317  				n++
   318  			}
   319  		}
   320  	}
   321  
   322  	winner = y > n || (y+n == 0) // No Votes: Default Winner
   323  	nlog.Infof("Vote Results:\n Y: %d, N: %d\n Victory: %t\n", y, n, winner)
   324  	return
   325  }
   326  
   327  func (p *proxy) requestVotes(vr *VoteRecord) chan voteResult {
   328  	var (
   329  		msg = VoteMessage{Record: *vr}
   330  		q   = url.Values{}
   331  	)
   332  	q.Set(apc.QparamPrimaryCandidate, p.SID())
   333  	args := allocBcArgs()
   334  	args.req = cmn.HreqArgs{
   335  		Method: http.MethodGet,
   336  		Path:   apc.URLPathVoteProxy.S,
   337  		Body:   cos.MustMarshal(&msg),
   338  		Query:  q,
   339  	}
   340  	args.to = core.AllNodes
   341  	results := p.bcastGroup(args)
   342  	freeBcArgs(args)
   343  	resCh := make(chan voteResult, len(results))
   344  	for _, res := range results {
   345  		if res.err != nil {
   346  			resCh <- voteResult{
   347  				yes:      false,
   348  				daemonID: res.si.ID(),
   349  				err:      res.err,
   350  			}
   351  		} else {
   352  			resCh <- voteResult{
   353  				yes:      VoteYes == Vote(res.bytes),
   354  				daemonID: res.si.ID(),
   355  				err:      nil,
   356  			}
   357  		}
   358  	}
   359  	freeBcastRes(results)
   360  	close(resCh)
   361  	return resCh
   362  }
   363  
   364  // phase 2: confirm and commit
   365  func (p *proxy) electPhase2(vr *VoteRecord) cos.StrSet {
   366  	var (
   367  		errors = cos.StrSet{}
   368  		msg    = &VoteResultMessage{
   369  			VoteResult{
   370  				Candidate: vr.Candidate,
   371  				Primary:   vr.Primary,
   372  				Smap:      vr.Smap,
   373  				StartTime: time.Now(),
   374  				Initiator: p.SID(),
   375  			},
   376  		}
   377  	)
   378  	args := allocBcArgs()
   379  	args.req = cmn.HreqArgs{Method: http.MethodPut, Path: apc.URLPathVoteVoteres.S, Body: cos.MustMarshal(msg)}
   380  	args.to = core.AllNodes
   381  	results := p.bcastGroup(args)
   382  	freeBcArgs(args)
   383  	for _, res := range results {
   384  		if res.err == nil {
   385  			continue
   386  		}
   387  		nlog.Warningf("%s: failed to confirm election with %s: %v", p, res.si.StringEx(), res.err)
   388  		errors.Set(res.si.ID())
   389  	}
   390  	freeBcastRes(results)
   391  	return errors
   392  }
   393  
   394  //
   395  // voting: target
   396  //
   397  
   398  // [METHOD] /v1/vote
   399  func (t *target) voteHandler(w http.ResponseWriter, r *http.Request) {
   400  	if r.Method != http.MethodGet && r.Method != http.MethodPut {
   401  		cmn.WriteErr405(w, r, http.MethodGet, http.MethodPut)
   402  		return
   403  	}
   404  	apiItems, err := t.parseURL(w, r, apc.URLPathVote.L, 1, false)
   405  	if err != nil {
   406  		return
   407  	}
   408  	switch {
   409  	case r.Method == http.MethodGet && apiItems[0] == apc.Proxy:
   410  		t.httpgetvote(w, r)
   411  	case r.Method == http.MethodPut && apiItems[0] == apc.Voteres:
   412  		t.httpsetprimary(w, r)
   413  	default:
   414  		t.writeErrURL(w, r)
   415  	}
   416  }
   417  
   418  //
   419  // voting: common methods
   420  //
   421  
   422  func (h *htrun) onPrimaryDown(self *proxy, callerID string) {
   423  	smap := h.owner.smap.get()
   424  	if smap.validate() != nil {
   425  		return
   426  	}
   427  	clone := smap.clone()
   428  	s := "via keepalive"
   429  	if callerID != "" {
   430  		s = "via direct call"
   431  		if callerID != clone.Primary.ID() {
   432  			nlog.Errorf("%s (%s): non-primary caller reporting primary down (%s, %s, %s)",
   433  				h, s, callerID, clone.Primary.StringEx(), smap)
   434  			return
   435  		}
   436  	}
   437  	nlog.Infof("%s (%s): primary %s is no longer online and must be reelected", h, s, clone.Primary.StringEx())
   438  
   439  	for {
   440  		if nlog.Stopping() {
   441  			return
   442  		}
   443  		// use HRW ordering
   444  		nextPrimaryProxy, err := clone.HrwProxy(clone.Primary.ID())
   445  		if err != nil {
   446  			if !nlog.Stopping() {
   447  				nlog.Errorf("%s failed to execute HRW selection: %v", h, err)
   448  			}
   449  			return
   450  		}
   451  
   452  		// If this proxy is the next primary proxy candidate, it starts the election directly.
   453  		if nextPrimaryProxy.ID() == h.si.ID() {
   454  			debug.Assert(h.si.IsProxy())
   455  			debug.Assert(h.SID() == self.SID())
   456  			nlog.Infof("%s: starting election (candidate = self)", h)
   457  			vr := &VoteRecord{
   458  				Candidate: nextPrimaryProxy.ID(),
   459  				Primary:   clone.Primary.ID(),
   460  				StartTime: time.Now(),
   461  				Initiator: h.si.ID(),
   462  			}
   463  			vr.Smap = clone
   464  			self.startElection(vr)
   465  			return
   466  		}
   467  
   468  		nlog.Infof("%s: trying %s as the new primary candidate", h, meta.Pname(nextPrimaryProxy.ID()))
   469  
   470  		// ask the candidate to start election
   471  		vr := &VoteInitiation{
   472  			Candidate: nextPrimaryProxy.ID(),
   473  			Primary:   clone.Primary.ID(),
   474  			StartTime: time.Now(),
   475  			Initiator: h.si.ID(),
   476  		}
   477  		vr.Smap = clone
   478  		if h.sendElectionRequest(vr, nextPrimaryProxy) == nil {
   479  			return // the candidate has accepted the request and started election
   480  		}
   481  
   482  		// No response from the candidate (or it failed to start election) - remove
   483  		// it from the Smap and try the next candidate
   484  		// TODO: handle http.StatusServiceUnavailable from the candidate that is currently starting up
   485  		// (see httpelect)
   486  		if clone.GetProxy(nextPrimaryProxy.ID()) != nil {
   487  			clone.delProxy(nextPrimaryProxy.ID())
   488  		}
   489  	}
   490  }
   491  
   492  // GET /v1/vote/proxy
   493  func (h *htrun) httpgetvote(w http.ResponseWriter, r *http.Request) {
   494  	if _, err := h.parseURL(w, r, apc.URLPathVoteProxy.L, 0, false); err != nil {
   495  		return
   496  	}
   497  	msg := VoteMessage{}
   498  	if err := cmn.ReadJSON(w, r, &msg); err != nil {
   499  		return
   500  	}
   501  	candidate := msg.Record.Candidate
   502  	if candidate == "" {
   503  		h.writeErrf(w, r, "%s: unexpected: empty candidate field [%v]", h, msg.Record)
   504  		return
   505  	}
   506  	smap := h.owner.smap.get()
   507  	if smap.Primary == nil {
   508  		h.writeErrf(w, r, "%s: current primary undefined, %s", h, smap)
   509  		return
   510  	}
   511  	currPrimaryID := smap.Primary.ID()
   512  	if candidate == currPrimaryID {
   513  		h.writeErrf(w, r, "%s: candidate %q _is_ the current primary, %s", h, candidate, smap)
   514  		return
   515  	}
   516  	newSmap := msg.Record.Smap
   517  	psi := newSmap.GetProxy(candidate)
   518  	if psi == nil {
   519  		h.writeErrf(w, r, "%s: candidate %q not present in the VoteRecord %s", h, candidate, newSmap)
   520  		return
   521  	}
   522  	if !newSmap.isPresent(h.si) {
   523  		h.writeErrf(w, r, "%s: not present in the VoteRecord %s", h, newSmap)
   524  		return
   525  	}
   526  
   527  	if err := h.owner.smap.synchronize(h.si, newSmap, nil /*ms payload*/, h.smapUpdatedCB); err != nil {
   528  		// double-checking errDowngrade
   529  		if isErrDowngrade(err) {
   530  			newSmap2 := h.owner.smap.get()
   531  			psi2 := newSmap2.GetProxy(candidate)
   532  			if psi2.Eq(psi) {
   533  				err = nil // not an error - can vote Yes
   534  			}
   535  		}
   536  		if err != nil {
   537  			nlog.Errorf("%s: failed to synch %s, err %v - voting No", h, newSmap, err)
   538  			w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(VoteNo)))
   539  			_, err := w.Write([]byte(VoteNo))
   540  			debug.AssertNoErr(err)
   541  			return
   542  		}
   543  	}
   544  
   545  	vote, err := h.voteOnProxy(psi.ID(), currPrimaryID)
   546  	if err != nil {
   547  		h.writeErr(w, r, err)
   548  		return
   549  	}
   550  	if vote {
   551  		w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(VoteYes)))
   552  		_, err = w.Write([]byte(VoteYes))
   553  	} else {
   554  		w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(VoteNo)))
   555  		_, err = w.Write([]byte(VoteNo))
   556  	}
   557  	debug.AssertNoErr(err)
   558  }
   559  
   560  // PUT /v1/vote/result
   561  func (h *htrun) httpsetprimary(w http.ResponseWriter, r *http.Request) {
   562  	if _, err := h.parseURL(w, r, apc.URLPathVoteVoteres.L, 0, false); err != nil {
   563  		return
   564  	}
   565  	msg := VoteResultMessage{}
   566  	if err := cmn.ReadJSON(w, r, &msg); err != nil {
   567  		return
   568  	}
   569  	vr := msg.Result
   570  	nlog.Infof("%s: received vote result: new primary %s (old %s)", h.si, vr.Candidate, vr.Primary)
   571  
   572  	ctx := &smapModifier{
   573  		pre: h._votedPrimary,
   574  		nid: vr.Candidate,
   575  		sid: vr.Primary,
   576  	}
   577  	err := h.owner.smap.modify(ctx)
   578  	if err != nil {
   579  		h.writeErr(w, r, err)
   580  	}
   581  }
   582  
   583  func (h *htrun) _votedPrimary(ctx *smapModifier, clone *smapX) error {
   584  	newPrimary, oldPrimary := ctx.nid, ctx.sid
   585  	psi := clone.GetProxy(newPrimary)
   586  	if psi == nil {
   587  		return &errNodeNotFound{"cannot accept new primary election:", newPrimary, h.si, clone}
   588  	}
   589  	clone.Primary = psi
   590  	if oldPrimary != "" && clone.GetProxy(oldPrimary) != nil {
   591  		clone.delProxy(oldPrimary)
   592  	}
   593  	nlog.Infof("%s: voted-primary result: %s", h.si, clone)
   594  	return nil
   595  }
   596  
   597  func (h *htrun) sendElectionRequest(vr *VoteInitiation, nextPrimaryProxy *meta.Snode) (err error) {
   598  	var (
   599  		msg   = VoteInitiationMessage{Request: *vr}
   600  		body  = cos.MustMarshal(&msg)
   601  		cargs = allocCargs()
   602  	)
   603  	{
   604  		cargs.si = nextPrimaryProxy
   605  		cargs.req = cmn.HreqArgs{
   606  			Method: http.MethodPut,
   607  			Base:   nextPrimaryProxy.ControlNet.URL,
   608  			Path:   apc.URLPathVoteInit.S,
   609  			Body:   body,
   610  		}
   611  		cargs.timeout = apc.DefaultTimeout
   612  	}
   613  	res := h.call(cargs, vr.Smap)
   614  	err = res.err
   615  	freeCR(res)
   616  	defer freeCargs(cargs)
   617  	if err == nil || !cos.IsRetriableConnErr(err) {
   618  		return
   619  	}
   620  	// retry
   621  	sleep := cmn.Rom.CplaneOperation() / 2
   622  	for range maxRetryElectReq {
   623  		time.Sleep(sleep)
   624  		res = h.call(cargs, vr.Smap)
   625  		err = res.err
   626  		freeCR(res)
   627  		if err == nil {
   628  			return
   629  		}
   630  		if !cos.IsRetriableConnErr(err) {
   631  			break
   632  		}
   633  		sleep += sleep / 2
   634  	}
   635  	if !nlog.Stopping() {
   636  		nlog.Errorf("%s: failed to request election from the _next_ primary %s: %v",
   637  			h.si, nextPrimaryProxy.StringEx(), err)
   638  	}
   639  	return
   640  }
   641  
   642  func (h *htrun) voteOnProxy(daemonID, currPrimaryID string) (bool, error) {
   643  	// First: Check last keepalive timestamp. If the proxy was recently successfully reached,
   644  	// this will always vote no, as we believe the original proxy is still alive.
   645  	if !h.keepalive.timeToPing(currPrimaryID) {
   646  		if cmn.Rom.FastV(4, cos.SmoduleAIS) {
   647  			nlog.Warningf("Primary %s is still alive", currPrimaryID)
   648  		}
   649  		return false, nil
   650  	}
   651  
   652  	// Second: Vote according to whether or not the candidate is the Highest Random Weight remaining
   653  	// in the Smap
   654  	smap := h.owner.smap.get()
   655  	nextPrimaryProxy, err := smap.HrwProxy(currPrimaryID)
   656  	if err != nil {
   657  		return false, fmt.Errorf("error executing HRW: %v", err)
   658  	}
   659  
   660  	vote := nextPrimaryProxy.ID() == daemonID
   661  	if cmn.Rom.FastV(4, cos.SmoduleAIS) {
   662  		nlog.Infof("%s: voting '%t' for %s", h, vote, daemonID)
   663  	}
   664  	return vote, nil
   665  }