
     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     7  import (
     8  	"context"
     9  	"crypto/tls"
    10  	"crypto/x509"
    11  	"errors"
    12  	"fmt"
    13  	"io"
    14  	"log"
    15  	"net"
    16  	"net/http"
    17  	"net/url"
    18  	"os"
    19  	rdebug "runtime/debug"
    20  	"strings"
    21  	"sync"
    22  	"syscall"
    23  	"time"
    25  	""
    26  	""
    27  	""
    28  	""
    29  	""
    30  	""
    31  	""
    32  	""
    33  	""
    34  	""
    35  	""
    36  	jsoniter ""
    37  	""
    38  )
    40  const ua = "aisnode"
    42  const unknownDaemonID = "unknown"
    44  const whatRenamedLB = "renamedlb"
    46  // common error formats
    47  const (
    48  	fmtErrInsuffMpaths1 = "%s: not enough mountpaths (%d) to configure %s as %d-way mirror"
    49  	fmtErrInsuffMpaths2 = "%s: not enough mountpaths (%d) to replicate %s (configured) %d times"
    50  	fmtErrInvaldAction  = "invalid action %q (expected one of %v)"
    51  	fmtUnknownQue       = "unexpected query [what=%s]"
    52  	fmtNested           = "%s: nested (%v): failed to %s %q: %v"
    53  	fmtOutside          = "%s is present (vs requested 'flt-outside'(%d))"
    54  	fmtFailedRejoin     = "%s failed to rejoin cluster: %v(%d)"
    55  	fmtSelfNotPresent   = "%s (self) not present in %s"
    56  )
    58  // intra-cluster control messages
    59  type (
    60  	// cluster-wide control information - replicated, versioned, and synchronized
    61  	// usage: elect new primary, join cluster, ...
    62  	cluMeta struct {
    63  		Smap      *smapX        `json:"smap"`
    64  		BMD       *bucketMD     `json:"bmd"`
    65  		RMD       *rebMD        `json:"rmd"`
    66  		EtlMD     *etlMD        `json:"etlMD"`
    67  		Config    *globalConfig `json:"config"`
    68  		SI        *meta.Snode   `json:"si"`
    69  		PrimeTime int64         `json:"prime_time"`
    70  		Flags     cifl.Flags    `json:"flags"`
    71  	}
    73  	// extend control msg: ActionMsg with an extra information for node <=> node control plane communications
    74  	aisMsg struct {
    75  		apc.ActMsg
    76  		UUID       string `json:"uuid"` // cluster-wide ID of this action (operation, transaction)
    77  		BMDVersion int64  `json:"bmdversion,string"`
    78  		RMDVersion int64  `json:"rmdversion,string"`
    79  	}
    81  	cleanmark struct {
    82  		OldVer      int64 `json:"oldver,string"`
    83  		NewVer      int64 `json:"newver,string"`
    84  		Interrupted bool  `json:"interrupted"`
    85  		Restarted   bool  `json:"restarted"`
    86  	}
    87  )
    89  type (
    90  	byteRanges struct {
    91  		Range string // cos.HdrRange, see
    92  		Size  int64  // size, in bytes
    93  	}
    95  	// callResult contains HTTP response.
    96  	callResult struct {
    97  		v       any // unmarshalled value (only when requested via `callArgs.v`)
    98  		err     error
    99  		si      *meta.Snode
   100  		header  http.Header
   101  		details string
   102  		bytes   []byte // response bytes (raw)
   103  		status  int
   104  	}
   106  	sliceResults []*callResult
   107  	bcastResults struct {
   108  		s  sliceResults
   109  		mu sync.Mutex
   110  	}
   112  	// cresv: call result value factory and result-type specific decoder
   113  	// (used in both callArgs and bcastArgs)
   114  	cresv interface {
   115  		newV() any
   116  		read(*callResult, io.Reader)
   117  	}
   119  	// callArgs: unicast control-plane call arguments
   120  	callArgs struct {
   121  		cresv   cresv
   122  		si      *meta.Snode
   123  		req     cmn.HreqArgs
   124  		timeout time.Duration
   125  	}
   127  	// bcastArgs: intra-cluster broadcast call args
   128  	bcastArgs struct {
   129  		cresv             cresv          // call result value (comment above)
   130  		smap              *smapX         // Smap to use
   131  		network           string         // one of the cmn.KnownNetworks
   132  		req               cmn.HreqArgs   // args
   133  		nodes             []meta.NodeMap // broadcast destinations - map(s)
   134  		selected          meta.Nodes     // broadcast destinations - slice of selected few
   135  		timeout           time.Duration  // call timeout
   136  		to                int            // (all targets, all proxies, all nodes) enum
   137  		nodeCount         int            // m.b. greater or equal destination count
   138  		ignoreMaintenance bool           // do not skip nodes in maintenance mode
   139  		async             bool           // ignore results
   140  	}
   142  	networkHandler struct {
   143  		r   string           // resource
   144  		h   http.HandlerFunc // handler
   145  		net netAccess        // handler network access
   146  	}
   148  	nodeRegPool []cluMeta
   150  	// what data to omit when sending request/response (join-cluster, kalive)
   151  	cmetaFillOpt struct {
   152  		htext         htext
   153  		skipSmap      bool
   154  		skipBMD       bool
   155  		skipRMD       bool
   156  		skipConfig    bool
   157  		skipEtlMD     bool
   158  		fillRebMarker bool
   159  		skipPrimeTime bool
   160  	}
   162  	getMaxCii struct {
   163  		h          *htrun
   164  		maxCii     *cifl.Info
   165  		query      url.Values
   166  		maxConfVer int64
   167  		timeout    time.Duration
   168  		mu         sync.Mutex
   169  		cnt        int
   170  		checkAll   bool
   171  	}
   173  	httpMuxers map[string]*mux.ServeMux // by http.Method
   175  	// http server and http runner (common for proxy and target)
   176  	netServer struct {
   177  		sync.Mutex
   178  		s             *http.Server
   179  		muxers        httpMuxers
   180  		sndRcvBufSize int
   181  	}
   183  	nlogWriter struct{}
   184  )
   186  // error types
   187  type (
   188  	errTgtBmdUUIDDiffer struct{ detail string } // BMD & its uuid
   189  	errPrxBmdUUIDDiffer struct{ detail string }
   190  	errBmdUUIDSplit     struct{ detail string }
   191  	errSmapUUIDDiffer   struct{ detail string } // ditto Smap
   192  	errNodeNotFound     struct {
   193  		msg  string
   194  		id   string
   195  		si   *meta.Snode
   196  		smap *smapX
   197  	}
   198  	errNotEnoughTargets struct {
   199  		si       *meta.Snode
   200  		smap     *smapX
   201  		required int // should at least contain
   202  	}
   203  	errDowngrade struct {
   204  		si       *meta.Snode
   205  		from, to string
   206  	}
   207  	errNotPrimary struct {
   208  		si     *meta.Snode
   209  		smap   *smapX
   210  		detail string
   211  	}
   212  	errNoUnregister struct {
   213  		action string
   214  	}
   215  )
   217  var allHTTPverbs = []string{
   218  	http.MethodGet, http.MethodHead, http.MethodPost, http.MethodPut, http.MethodPatch,
   219  	http.MethodDelete, http.MethodConnect, http.MethodOptions, http.MethodTrace,
   220  }
   222  var (
   223  	errRebalanceDisabled = errors.New("rebalance is disabled")
   224  	errForwarded         = errors.New("forwarded")
   225  	errSendingResp       = errors.New("err-sending-resp")
   226  	errFastKalive        = errors.New("cannot fast-keepalive")
   227  )
   229  // BMD uuid errs
   230  var errNoBMD = errors.New("no bucket metadata")
   232  func (e *errTgtBmdUUIDDiffer) Error() string { return e.detail }
   233  func (e *errBmdUUIDSplit) Error() string     { return e.detail }
   234  func (e *errPrxBmdUUIDDiffer) Error() string { return e.detail }
   235  func (e *errSmapUUIDDiffer) Error() string   { return e.detail }
   236  func (e *errNodeNotFound) Error() string {
   237  	return fmt.Sprintf("%s: %s node %s not present in the %s",, e.msg,, e.smap)
   238  }
   240  /////////////////////
   241  // errNoUnregister //
   242  /////////////////////
   244  func (e *errNoUnregister) Error() string { return e.action }
   246  func isErrNoUnregister(err error) (ok bool) {
   247  	_, ok = err.(*errNoUnregister)
   248  	return
   249  }
   251  //////////////////
   252  // errDowngrade //
   253  //////////////////
   255  func newErrDowngrade(si *meta.Snode, from, to string) *errDowngrade {
   256  	return &errDowngrade{si, from, to}
   257  }
   259  func (e *errDowngrade) Error() string {
   260  	return fmt.Sprintf("%s: attempt to downgrade %s to %s",, e.from,
   261  }
   263  func isErrDowngrade(err error) bool {
   264  	if _, ok := err.(*errDowngrade); ok {
   265  		return true
   266  	}
   267  	erd := &errDowngrade{}
   268  	return errors.As(err, &erd)
   269  }
   271  /////////////////////////
   272  // errNotEnoughTargets //
   273  /////////////////////////
   275  func (e *errNotEnoughTargets) Error() string {
   276  	return fmt.Sprintf("%s: not enough targets: %s, need %d, have %d",
   277, e.smap, e.required, e.smap.CountActiveTs())
   278  }
   280  ///////////////////
   281  // errNotPrimary //
   282  ///////////////////
   284  func newErrNotPrimary(si *meta.Snode, smap *smapX, detail ...string) *errNotPrimary {
   285  	if len(detail) == 0 {
   286  		return &errNotPrimary{si, smap, ""}
   287  	}
   288  	return &errNotPrimary{si, smap, detail[0]}
   289  }
   291  func (e *errNotPrimary) Error() string {
   292  	var present, detail string
   293  	if !e.smap.isPresent( {
   294  		present = "not present in the "
   295  	}
   296  	if e.detail != "" {
   297  		detail = ": " + e.detail
   298  	}
   299  	return fmt.Sprintf("%s is not primary [%s%s]%s",, present, e.smap.StringEx(), detail)
   300  }
   302  ///////////////
   303  // bargsPool & callArgsPool
   304  ///////////////
   306  var (
   307  	bargsPool, cargsPool sync.Pool
   308  	bargs0               bcastArgs
   309  	cargs0               callArgs
   310  )
   312  func allocBcArgs() (a *bcastArgs) {
   313  	if v := bargsPool.Get(); v != nil {
   314  		a = v.(*bcastArgs)
   315  		return
   316  	}
   317  	return &bcastArgs{}
   318  }
   320  func freeBcArgs(a *bcastArgs) {
   321  	sel := a.selected
   322  	*a = bargs0
   323  	if sel != nil {
   324  		a.selected = sel[:0]
   325  	}
   326  	bargsPool.Put(a)
   327  }
   329  func allocCargs() (a *callArgs) {
   330  	if v := cargsPool.Get(); v != nil {
   331  		a = v.(*callArgs)
   332  		return
   333  	}
   334  	return &callArgs{}
   335  }
   337  func freeCargs(a *callArgs) {
   338  	*a = cargs0
   339  	cargsPool.Put(a)
   340  }
   342  ///////////////////////
   343  // call result pools //
   344  ///////////////////////
   346  var (
   347  	resultsPool sync.Pool
   348  	callResPool sync.Pool
   349  	callRes0    callResult
   350  )
   352  func allocCR() (a *callResult) {
   353  	if v := callResPool.Get(); v != nil {
   354  		a = v.(*callResult)
   355  		debug.Assert( == nil)
   356  		return
   357  	}
   358  	return &callResult{}
   359  }
   361  func freeCR(res *callResult) {
   362  	*res = callRes0
   363  	callResPool.Put(res)
   364  }
   366  func allocBcastRes(n int) sliceResults {
   367  	if v := resultsPool.Get(); v != nil {
   368  		a := v.(*sliceResults)
   369  		return *a
   370  	}
   371  	return make(sliceResults, 0, n)
   372  }
   374  func freeBcastRes(results sliceResults) {
   375  	for _, res := range results {
   376  		freeCR(res)
   377  	}
   378  	results = results[:0]
   379  	resultsPool.Put(&results)
   380  }
   382  //
   383  // all `cresv` implementations
   384  // and common read-body methods w/ optional value-unmarshaling
   385  //
   387  type (
   388  	cresCM struct{} // -> cluMeta; selectively and alternatively, via `recvCluMetaBytes`
   389  	cresSM struct{} // -> smapX
   390  	cresND struct{} // -> meta.Snode
   391  	cresBA struct{} // -> cmn.BackendInfoAIS
   392  	cresEI struct{} // -> etl.InfoList
   393  	cresEL struct{} // -> etl.Logs
   394  	cresEM struct{} // -> etl.CPUMemUsed
   395  	cresIC struct{} // -> icBundle
   396  	cresBM struct{} // -> bucketMD
   398  	cresLso   struct{} // -> cmn.LsoRes
   399  	cresBsumm struct{} // -> cmn.AllBsummResults
   400  )
   402  var (
   403  	_ cresv = cresCM{}
   404  	_ cresv = cresLso{}
   405  	_ cresv = cresSM{}
   406  	_ cresv = cresND{}
   407  	_ cresv = cresBA{}
   408  	_ cresv = cresEI{}
   409  	_ cresv = cresEL{}
   410  	_ cresv = cresEM{}
   411  	_ cresv = cresIC{}
   412  	_ cresv = cresBM{}
   413  	_ cresv = cresBsumm{}
   414  )
   416  func (res *callResult) read(body io.Reader)  { res.bytes, res.err = io.ReadAll(body) }
   417  func (res *callResult) jread(body io.Reader) { res.err = jsoniter.NewDecoder(body).Decode(res.v) }
   419  func (res *callResult) mread(body io.Reader) {
   420  	vv, ok := res.v.(msgp.Decodable)
   421  	debug.Assert(ok)
   422  	buf, slab := memsys.PageMM().AllocSize(cmn.MsgpLsoBufSize)
   423  	res.err = vv.DecodeMsg(msgp.NewReaderBuf(body, buf))
   424  	slab.Free(buf)
   425  }
   427  func (cresCM) newV() any                              { return &cluMeta{} }
   428  func (c cresCM) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   430  func (cresLso) newV() any                              { return &cmn.LsoRes{} }
   431  func (c cresLso) read(res *callResult, body io.Reader) { res.v = c.newV(); res.mread(body) }
   433  func (cresSM) newV() any                              { return &smapX{} }
   434  func (c cresSM) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   436  func (cresND) newV() any                              { return &meta.Snode{} }
   437  func (c cresND) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   439  func (cresBA) newV() any                              { return &meta.RemAisVec{} }
   440  func (c cresBA) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   442  func (cresEI) newV() any                              { return &etl.InfoList{} }
   443  func (c cresEI) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   445  func (cresEL) newV() any                              { return &etl.Logs{} }
   446  func (c cresEL) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   448  func (cresEM) newV() any                              { return &etl.CPUMemUsed{} }
   449  func (c cresEM) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   451  func (cresIC) newV() any                              { return &icBundle{} }
   452  func (c cresIC) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   454  func (cresBM) newV() any                              { return &bucketMD{} }
   455  func (c cresBM) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   457  func (cresBsumm) newV() any                              { return &cmn.AllBsummResults{} }
   458  func (c cresBsumm) read(res *callResult, body io.Reader) { res.v = c.newV(); res.jread(body) }
   460  ////////////////
   461  // nlogWriter //
   462  ////////////////
   464  const tlsHandshakeErrorPrefix = "http: TLS handshake error"
   466  func (*nlogWriter) Write(p []byte) (int, error) {
   467  	s := string(p)
   468  	// Ignore TLS handshake errors (see:
   469  	if strings.Contains(s, tlsHandshakeErrorPrefix) {
   470  		return len(p), nil
   471  	}
   473  	nlog.Errorln(s)
   475  	stacktrace := rdebug.Stack()
   476  	nlog.Errorln(string(stacktrace))
   477  	return len(p), nil
   478  }
   480  ///////////////
   481  // netServer //
   482  ///////////////
   484  // Override muxer ServeHTTP to support proxying HTTPS requests. Clients
   485  // initiate all HTTPS requests with CONNECT method instead of GET/PUT etc.
   486  func (server *netServer) ServeHTTP(w http.ResponseWriter, r *http.Request) {
   487  	if r.Method != http.MethodConnect {
   488  		server.muxers.ServeHTTP(w, r)
   489  		return
   490  	}
   492  	// TODO: add support for caching HTTPS requests
   493  	destConn, err := net.DialTimeout("tcp", r.Host, 10*time.Second)
   494  	if err != nil {
   495  		cmn.WriteErr(w, r, err, http.StatusServiceUnavailable)
   496  		return
   497  	}
   499  	// Second, hijack the connection. A kind of man-in-the-middle attack
   500  	// From this point on, this function is responsible for HTTP connection
   501  	hijacker, ok := w.(http.Hijacker)
   502  	if !ok {
   503  		cmn.WriteErr(w, r, errors.New("response writer does not support hijacking"),
   504  			http.StatusInternalServerError)
   505  		return
   506  	}
   508  	// First, send that everything is OK. Trying to write a header after
   509  	// hijacking generates a warning and nothing works
   510  	w.WriteHeader(http.StatusOK)
   512  	clientConn, _, err := hijacker.Hijack()
   513  	if err != nil {
   514  		// NOTE: cannot send error because we have already written a header.
   515  		nlog.Errorln(err)
   516  		return
   517  	}
   519  	// Third, start transparently sending data between source and destination
   520  	// by creating a tunnel between them
   521  	transfer := func(destination io.WriteCloser, source io.ReadCloser) {
   522  		io.Copy(destination, source)
   523  		source.Close()
   524  		destination.Close()
   525  	}
   527  	// NOTE: it looks like double closing both connections.
   528  	// Need to check how the tunnel works
   529  	go transfer(destConn, clientConn)
   530  	go transfer(clientConn, destConn)
   531  }
   533  func (server *netServer) listen(addr string, logger *log.Logger, tlsConf *tls.Config, config *cmn.Config) (err error) {
   534  	var (
   535  		httpHandler = server.muxers
   536  		tag         = "HTTP"
   537  		retried     bool
   538  	)
   539  	server.Lock()
   540  	server.s = &http.Server{
   541  		Addr:              addr,
   542  		Handler:           httpHandler,
   543  		ErrorLog:          logger,
   544  		ReadHeaderTimeout: apc.ReadHeaderTimeout,
   545  	}
   546  	if timeout, isSet := cmn.ParseReadHeaderTimeout(); isSet { // optional env var
   547  		server.s.ReadHeaderTimeout = timeout
   548  	}
   549  	if server.sndRcvBufSize > 0 && !config.Net.HTTP.UseHTTPS {
   550  		server.s.ConnState = server.connStateListener // setsockopt; see also cmn.NewTransport
   551  	}
   552  	server.s.TLSConfig = tlsConf
   553  	server.Unlock()
   554  retry:
   555  	if config.Net.HTTP.UseHTTPS {
   556  		tag = "HTTPS"
   557  		err = server.s.ListenAndServeTLS(config.Net.HTTP.Certificate, config.Net.HTTP.CertKey)
   558  	} else {
   559  		err = server.s.ListenAndServe()
   560  	}
   561  	if err == http.ErrServerClosed {
   562  		return nil
   563  	}
   564  	if errors.Is(err, syscall.EADDRINUSE) && !retried {
   565  		nlog.Warningf("%q - shutting-down-and-restarting or else? will retry once...", err)
   566  		time.Sleep(max(5*time.Second, config.Timeout.MaxKeepalive.D()))
   567  		retried = true
   568  		goto retry
   569  	}
   570  	nlog.Errorf("%s terminated with error: %v", tag, err)
   571  	return
   572  }
   574  func newTLS(conf *cmn.HTTPConf) (tlsConf *tls.Config, err error) {
   575  	var (
   576  		pool       *x509.CertPool
   577  		caCert     []byte
   578  		clientAuth = tls.ClientAuthType(conf.ClientAuthTLS)
   579  	)
   580  	if clientAuth > tls.RequestClientCert {
   581  		if caCert, err = os.ReadFile(conf.ClientCA); err != nil {
   582  			return
   583  		}
   584  		pool = x509.NewCertPool()
   585  		if ok := pool.AppendCertsFromPEM(caCert); !ok {
   586  			return nil, fmt.Errorf("tls: failed to append CA certs from PEM: %q", conf.ClientCA)
   587  		}
   588  	}
   589  	tlsConf = &tls.Config{ClientAuth: clientAuth, ClientCAs: pool}
   590  	return
   591  }
   593  func (server *netServer) connStateListener(c net.Conn, cs http.ConnState) {
   594  	if cs != http.StateNew {
   595  		return
   596  	}
   597  	tcpconn, ok := c.(*net.TCPConn)
   598  	cos.Assert(ok)
   599  	rawconn, _ := tcpconn.SyscallConn()
   600  	args := cmn.TransportArgs{SndRcvBufSize: server.sndRcvBufSize}
   601  	rawconn.Control(args.ConnControl(rawconn))
   602  }
   604  func (server *netServer) shutdown(config *cmn.Config) {
   605  	server.Lock()
   606  	defer server.Unlock()
   607  	if server.s == nil {
   608  		return
   609  	}
   610  	ctx, cancel := context.WithTimeout(context.Background(), config.Timeout.MaxHostBusy.D())
   611  	if err := server.s.Shutdown(ctx); err != nil {
   612  		nlog.Infoln("http server shutdown err:", err)
   613  	}
   614  	cancel()
   615  }
   617  ////////////////
   618  // httpMuxers //
   619  ////////////////
   621  // interface guard
   622  var _ http.Handler = (*httpMuxers)(nil)
   624  func newMuxers() httpMuxers {
   625  	m := make(httpMuxers, len(allHTTPverbs))
   626  	for _, v := range allHTTPverbs {
   627  		m[v] = mux.NewServeMux()
   628  	}
   629  	return m
   630  }
   632  // ServeHTTP dispatches the request to the handler whose
   633  // pattern most closely matches the request URL.
   634  func (m httpMuxers) ServeHTTP(w http.ResponseWriter, r *http.Request) {
   635  	if sm, ok := m[r.Method]; ok {
   636  		sm.ServeHTTP(w, r)
   637  		return
   638  	}
   639  	w.WriteHeader(http.StatusBadRequest)
   640  }
   642  /////////////////
   643  // clusterInfo //
   644  /////////////////
   646  func (p *proxy) ciiFill(cii *cifl.Info) {
   647  	p.htrun.fill(cii)
   648  	onl := true
   649  	flt := nlFilter{Kind: apc.ActRebalance, OnlyRunning: &onl}
   650  	if nl := p.notifs.find(flt); nl != nil {
   651  		cii.Flags = cii.Flags.Set(cifl.Rebalancing)
   652  	}
   653  }
   655  func (t *target) ciiFill(cii *cifl.Info) {
   656  	t.htrun.fill(cii)
   657  	marked := xreg.GetRebMarked()
   658  	if marked.Xact != nil {
   659  		cii.Flags = cii.Flags.Set(cifl.Rebalancing)
   660  	}
   661  	if marked.Interrupted {
   662  		cii.Flags = cii.Flags.Set(cifl.RebalanceInterrupted)
   663  	}
   664  	if marked.Restarted {
   665  		cii.Flags = cii.Flags.Set(cifl.Restarted)
   666  	}
   667  	marked = xreg.GetResilverMarked()
   668  	if marked.Xact != nil {
   669  		cii.Flags = cii.Flags.Set(cifl.Resilvering)
   670  	}
   671  	if marked.Interrupted {
   672  		cii.Flags = cii.Flags.Set(cifl.ResilverInterrupted)
   673  	}
   674  }
   676  func (h *htrun) fill(cii *cifl.Info) {
   677  	var (
   678  		smap = h.owner.smap.get()
   679  		bmd  = h.owner.bmd.get()
   680  		rmd  = h.owner.rmd.get()
   681  		etl  = h.owner.etl.get()
   682  	)
   683  	smap.fill(cii)
   684  	cii.BMD.Version = bmd.version()
   685  	cii.BMD.UUID = bmd.UUID
   686  	cii.RMD.Version = rmd.Version
   687  	cii.Config.Version = h.owner.config.version()
   688  	cii.EtlMD.Version = etl.version()
   689  	if h.ClusterStarted() {
   690  		cii.Flags = cii.Flags.Set(cifl.ClusterStarted)
   691  	}
   692  	if h.NodeStarted() {
   693  		cii.Flags = cii.Flags.Set(cifl.NodeStarted)
   694  	}
   695  }
   697  func (smap *smapX) fill(cii *cifl.Info) {
   698  	cii.Smap.Version = smap.version()
   699  	cii.Smap.UUID = smap.UUID
   700  	if smap.Primary != nil {
   701  		cii.Smap.Primary.CtrlURL = smap.Primary.URL(cmn.NetIntraControl)
   702  		cii.Smap.Primary.PubURL = smap.Primary.URL(cmn.NetPublic)
   703  		cii.Smap.Primary.ID = smap.Primary.ID()
   704  		if voteInProgress() != nil {
   705  			cii.Flags = cii.Flags.Set(cifl.VoteInProgress)
   706  		}
   707  	}
   708  }
   710  ///////////////
   711  // getMaxCii //
   712  ///////////////
   714  func (c *getMaxCii) do(si *meta.Snode, wg cos.WG, smap *smapX) {
   715  	var cii *cifl.Info
   716  	body, _, err := c.h.reqHealth(si, c.timeout, c.query, smap)
   717  	if err != nil {
   718  		goto ret
   719  	}
   720  	if cii = extractCii(body, smap,, si); cii == nil {
   721  		goto ret
   722  	}
   723  	if cii.Smap.UUID != smap.UUID {
   724  		if cii.Smap.UUID == "" {
   725  			goto ret
   726  		}
   727  		if smap.UUID != "" {
   728  			// FATAL: cluster integrity error (cie)
   729  			cos.ExitLogf("%s: split-brain uuid [%s %s] vs %+v", ciError(10), c.h, smap.StringEx(), cii.Smap)
   730  		}
   731  	}
   733  	if c.maxCii.Smap.Version < cii.Smap.Version {
   734  		// reset confirmation count if there's any sign of disagreement
   735  		if c.maxCii.Smap.Primary.ID != cii.Smap.Primary.ID || cii.Flags.IsSet(cifl.VoteInProgress) {
   736  			c.cnt = 1
   737  		} else {
   738  			c.cnt++
   739  		}
   740  		c.maxCii = cii
   741  	} else if c.maxCii.SmapEqual(cii) {
   742  		c.cnt++
   743  	}
   744  	if c.maxConfVer < cii.Config.Version {
   745  		c.maxConfVer = cii.Config.Version
   746  	}
   748  ret:
   749  	wg.Done()
   750  }
   752  // have enough confirmations?
   753  func (c *getMaxCii) haveEnough() (yes bool) {
   755  	yes = c.cnt >= maxVerConfirmations
   757  	return
   758  }
   760  func extractCii(body []byte, smap *smapX, self, si *meta.Snode) *cifl.Info {
   761  	var cii cifl.Info
   762  	if err := jsoniter.Unmarshal(body, &cii); err != nil {
   763  		nlog.Errorf("%s: failed to unmarshal clusterInfo, err: %v", self, err)
   764  		return nil
   765  	}
   766  	if smap.UUID != cii.Smap.UUID {
   767  		nlog.Errorf("%s: Smap have different UUIDs: %s and %s from %s", self, smap.UUID, cii.Smap.UUID, si)
   768  		return nil
   769  	}
   770  	return &cii
   771  }
   773  ////////////////
   774  // apiRequest //
   775  ////////////////
   777  type apiRequest struct {
   778  	bck *meta.Bck // out: initialized bucket
   780  	// URL query: the conventional/slow and
   781  	// the fast alternative tailored exclusively for the datapath (either/or)
   782  	dpq   *dpq
   783  	query url.Values
   785  	prefix []string // in: URL must start with these items
   786  	items  []string // out: URL items after the prefix
   788  	after  int // in: the number of items after the prefix
   789  	bckIdx int // in: ordinal number of bucket in URL (some paths starts with extra items: EC & ETL)
   790  }
   792  var (
   793  	apiReqPool sync.Pool
   794  	apireq0    apiRequest
   795  )
   797  func apiReqAlloc(after int, prefix []string, useDpq bool) (a *apiRequest) {
   798  	if v := apiReqPool.Get(); v != nil {
   799  		a = v.(*apiRequest)
   800  	} else {
   801  		a = &apiRequest{}
   802  	}
   803  	a.after, a.prefix = after, prefix
   804  	if useDpq {
   805  		a.dpq = dpqAlloc()
   806  	}
   807  	return a
   808  }
   810  func apiReqFree(a *apiRequest) {
   811  	if a.dpq != nil {
   812  		dpqFree(a.dpq)
   813  	}
   814  	*a = apireq0
   815  	apiReqPool.Put(a)
   816  }
   818  //
   819  // misc helpers
   820  //
   822  func newBckFromQ(bckName string, query url.Values, dpq *dpq) (*meta.Bck, error) {
   823  	bck := _bckFromQ(bckName, query, dpq)
   824  	normp, err := cmn.NormalizeProvider(bck.Provider)
   825  	if err == nil {
   826  		bck.Provider = normp
   827  		err = bck.Validate()
   828  	}
   829  	return bck, err
   830  }
   832  func newQbckFromQ(bckName string, query url.Values, dpq *dpq) (*cmn.QueryBcks, error) {
   833  	qbck := (*cmn.QueryBcks)(_bckFromQ(bckName, query, dpq))
   834  	return qbck, qbck.Validate()
   835  }
   837  func _bckFromQ(bckName string, query url.Values, dpq *dpq) *meta.Bck {
   838  	var (
   839  		provider  string
   840  		namespace cmn.Ns
   841  	)
   842  	if query != nil {
   843  		debug.Assert(dpq == nil)
   844  		provider = query.Get(apc.QparamProvider)
   845  		namespace = cmn.ParseNsUname(query.Get(apc.QparamNamespace))
   846  	} else {
   847  		provider = dpq.bck.provider
   848  		namespace = cmn.ParseNsUname(dpq.bck.namespace)
   849  	}
   850  	return &meta.Bck{Name: bckName, Provider: provider, Ns: namespace}
   851  }
   853  func newBckFromQuname(query url.Values, required bool) (*meta.Bck, error) {
   854  	uname := query.Get(apc.QparamBckTo)
   855  	if uname == "" {
   856  		if required {
   857  			return nil, fmt.Errorf("missing %q query parameter", apc.QparamBckTo)
   858  		}
   859  		return nil, nil
   860  	}
   861  	bck, objName := cmn.ParseUname(uname)
   862  	if objName != "" {
   863  		return nil, fmt.Errorf("bucket %s: unexpected non-empty object name %q", bck, objName)
   864  	}
   865  	if err := bck.Validate(); err != nil {
   866  		return nil, err
   867  	}
   868  	return meta.CloneBck(&bck), nil
   869  }
   871  func _reMirror(bprops, nprops *cmn.Bprops) bool {
   872  	if !bprops.Mirror.Enabled && nprops.Mirror.Enabled {
   873  		return true
   874  	}
   875  	if bprops.Mirror.Enabled && nprops.Mirror.Enabled {
   876  		return bprops.Mirror.Copies != nprops.Mirror.Copies
   877  	}
   878  	return false
   879  }
   881  func _reEC(bprops, nprops *cmn.Bprops, bck *meta.Bck, smap *smapX) (targetCnt int, yes bool) {
   882  	if !nprops.EC.Enabled {
   883  		if bprops.EC.Enabled {
   884  			// abort running ec-encode xaction, if exists
   885  			flt := xreg.Flt{Kind: apc.ActECEncode, Bck: bck}
   886  			xreg.DoAbort(flt, errors.New("ec-disabled"))
   887  		}
   888  		return
   889  	}
   890  	if smap != nil {
   891  		targetCnt = smap.CountActiveTs()
   892  	}
   893  	if !bprops.EC.Enabled ||
   894  		(bprops.EC.DataSlices != nprops.EC.DataSlices || bprops.EC.ParitySlices != nprops.EC.ParitySlices) {
   895  		yes = true
   896  	}
   897  	return
   898  }