github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/proxy.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"io"
    11  	"net"
    12  	"net/http"
    13  	"net/url"
    14  	"os"
    15  	"path"
    16  	"path/filepath"
    17  	"strconv"
    18  	"strings"
    19  	"sync"
    20  	"syscall"
    21  	"time"
    22  
    23  	"github.com/NVIDIA/aistore/ais/s3"
    24  	"github.com/NVIDIA/aistore/api/apc"
    25  	"github.com/NVIDIA/aistore/cmn"
    26  	"github.com/NVIDIA/aistore/cmn/archive"
    27  	"github.com/NVIDIA/aistore/cmn/atomic"
    28  	"github.com/NVIDIA/aistore/cmn/cifl"
    29  	"github.com/NVIDIA/aistore/cmn/cos"
    30  	"github.com/NVIDIA/aistore/cmn/debug"
    31  	"github.com/NVIDIA/aistore/cmn/feat"
    32  	"github.com/NVIDIA/aistore/cmn/fname"
    33  	"github.com/NVIDIA/aistore/cmn/k8s"
    34  	"github.com/NVIDIA/aistore/cmn/mono"
    35  	"github.com/NVIDIA/aistore/cmn/nlog"
    36  	"github.com/NVIDIA/aistore/core"
    37  	"github.com/NVIDIA/aistore/core/meta"
    38  	"github.com/NVIDIA/aistore/ext/dsort"
    39  	"github.com/NVIDIA/aistore/memsys"
    40  	"github.com/NVIDIA/aistore/nl"
    41  	"github.com/NVIDIA/aistore/stats"
    42  	"github.com/NVIDIA/aistore/xact"
    43  	"github.com/NVIDIA/aistore/xact/xreg"
    44  	jsoniter "github.com/json-iterator/go"
    45  )
    46  
    47  const (
    48  	lsotag = "list-objects"
    49  )
    50  
    51  type (
    52  	ClusterMountpathsRaw struct {
    53  		Targets cos.JSONRawMsgs `json:"targets"`
    54  	}
    55  
    56  	// proxy runner
    57  	proxy struct {
    58  		htrun
    59  		authn      *authManager
    60  		metasyncer *metasyncer
    61  		ic         ic
    62  		qm         lsobjMem
    63  		rproxy     reverseProxy
    64  		notifs     notifs
    65  		lstca      lstca
    66  		reg        struct {
    67  			pool nodeRegPool
    68  			mu   sync.RWMutex
    69  		}
    70  		remais struct {
    71  			meta.RemAisVec
    72  			old []*meta.RemAis // to facilitate a2u resolution (and, therefore, offline access)
    73  			mu  sync.RWMutex
    74  			in  atomic.Bool
    75  		}
    76  		settingNewPrimary atomic.Bool // primary executing "set new primary" request (state)
    77  		readyToFastKalive atomic.Bool // primary can accept fast keepalives
    78  	}
    79  )
    80  
    81  // interface guard
    82  var _ cos.Runner = (*proxy)(nil)
    83  
    84  func (*proxy) Name() string { return apc.Proxy } // as cos.Runner
    85  
    86  func (p *proxy) initClusterCIDR() {
    87  	if nodeCIDR := os.Getenv("AIS_CLUSTER_CIDR"); nodeCIDR != "" {
    88  		_, network, err := net.ParseCIDR(nodeCIDR)
    89  		p.si.LocalNet = network
    90  		cos.AssertNoErr(err)
    91  		nlog.Infof("local network: %+v", *network)
    92  	}
    93  }
    94  
    95  func (p *proxy) init(config *cmn.Config) {
    96  	p.initSnode(config)
    97  
    98  	// (a) get node ID from command-line or env var (see envDaemonID())
    99  	// (b) load existing ID from config file stored under local config `confdir` (compare w/ target)
   100  	// (c) generate a new one (genDaemonID())
   101  	// - in that sequence
   102  	p.si.Init(initPID(config), apc.Proxy)
   103  
   104  	memsys.Init(p.SID(), p.SID(), config)
   105  
   106  	cos.InitShortID(p.si.Digest())
   107  
   108  	p.initClusterCIDR()
   109  	daemon.rg.add(p)
   110  
   111  	ps := &stats.Prunner{}
   112  	startedUp := ps.Init(p)
   113  	daemon.rg.add(ps)
   114  	p.statsT = ps
   115  
   116  	k := newPalive(p, ps, startedUp)
   117  	daemon.rg.add(k)
   118  	p.keepalive = k
   119  
   120  	m := newMetasyncer(p)
   121  	daemon.rg.add(m)
   122  	p.metasyncer = m
   123  }
   124  
   125  func initPID(config *cmn.Config) (pid string) {
   126  	// 1. ID from env
   127  	if pid = envDaemonID(apc.Proxy); pid != "" {
   128  		if err := cos.ValidateDaemonID(pid); err != nil {
   129  			nlog.Errorf("Warning: %v", err)
   130  		}
   131  		return
   132  	}
   133  
   134  	// 2. proxy, K8s
   135  	if k8s.IsK8s() {
   136  		// NOTE: always generate i.e., compute
   137  		if net.ParseIP(k8s.NodeName) != nil { // does not parse as IP
   138  			nlog.Warningf("using K8s node name %q, an IP addr, to compute _persistent_ proxy ID", k8s.NodeName)
   139  		}
   140  		return cos.HashK8sProxyID(k8s.NodeName)
   141  	}
   142  
   143  	// 3. try to read ID
   144  	if pid = readProxyID(config); pid != "" {
   145  		nlog.Infof("p[%s] from %q", pid, fname.ProxyID)
   146  		return
   147  	}
   148  
   149  	// 4. initial deployment
   150  	pid = genDaemonID(apc.Proxy, config)
   151  	err := cos.ValidateDaemonID(pid)
   152  	debug.AssertNoErr(err)
   153  
   154  	// store ID on disk
   155  	err = os.WriteFile(filepath.Join(config.ConfigDir, fname.ProxyID), []byte(pid), cos.PermRWR)
   156  	debug.AssertNoErr(err)
   157  	nlog.Infof("p[%s] ID randomly generated", pid)
   158  	return
   159  }
   160  
   161  func readProxyID(config *cmn.Config) (id string) {
   162  	if b, err := os.ReadFile(filepath.Join(config.ConfigDir, fname.ProxyID)); err == nil {
   163  		id = string(b)
   164  	} else if !os.IsNotExist(err) {
   165  		nlog.Errorln(err)
   166  	}
   167  	return
   168  }
   169  
   170  func (p *proxy) pready(smap *smapX, withRR bool /* also check readiness to rebalance */) error {
   171  	const msg = "%s primary: not ready yet "
   172  	debug.Assert(smap == nil || smap.IsPrimary(p.si))
   173  
   174  	if !p.ClusterStarted() {
   175  		return fmt.Errorf(msg+"(cluster is starting up)", p)
   176  	}
   177  	if withRR && p.owner.rmd.starting.Load() {
   178  		return fmt.Errorf(msg+"(finalizing global rebalancing state)", p)
   179  	}
   180  	return nil
   181  }
   182  
   183  // start proxy runner
   184  func (p *proxy) Run() error {
   185  	config := cmn.GCO.Get()
   186  	p.htrun.init(config)
   187  	p.owner.bmd = newBMDOwnerPrx(config)
   188  	p.owner.etl = newEtlMDOwnerPrx(config)
   189  
   190  	p.owner.bmd.init() // initialize owner and load BMD
   191  	p.owner.etl.init() // initialize owner and load EtlMD
   192  
   193  	core.Pinit()
   194  
   195  	p.statsT.RegMetrics(p.si) // reg target metrics to common; init Prometheus if used
   196  
   197  	// startup sequence - see earlystart.go for the steps and commentary
   198  	p.bootstrap()
   199  
   200  	p.authn = newAuthManager()
   201  
   202  	p.rproxy.init()
   203  
   204  	p.notifs.init(p)
   205  	p.ic.init(p)
   206  	p.qm.init()
   207  
   208  	//
   209  	// REST API: register proxy handlers and start listening
   210  	//
   211  	networkHandlers := []networkHandler{
   212  		{r: apc.Reverse, h: p.reverseHandler, net: accessNetPublic},
   213  
   214  		// pubnet handlers: cluster must be started
   215  		{r: apc.Buckets, h: p.bucketHandler, net: accessNetPublic},
   216  		{r: apc.Objects, h: p.objectHandler, net: accessNetPublic},
   217  		{r: apc.Download, h: p.downloadHandler, net: accessNetPublic},
   218  		{r: apc.ETL, h: p.etlHandler, net: accessNetPublic},
   219  		{r: apc.Sort, h: p.dsortHandler, net: accessNetPublic},
   220  
   221  		{r: apc.IC, h: p.ic.handler, net: accessNetIntraControl},
   222  		{r: apc.Daemon, h: p.daemonHandler, net: accessNetPublicControl},
   223  		{r: apc.Cluster, h: p.clusterHandler, net: accessNetPublicControl},
   224  		{r: apc.Tokens, h: p.tokenHandler, net: accessNetPublic},
   225  
   226  		{r: apc.Metasync, h: p.metasyncHandler, net: accessNetIntraControl},
   227  		{r: apc.Health, h: p.healthHandler, net: accessNetPublicControl},
   228  		{r: apc.Vote, h: p.voteHandler, net: accessNetIntraControl},
   229  
   230  		{r: apc.Notifs, h: p.notifs.handler, net: accessNetIntraControl},
   231  
   232  		// S3 compatibility
   233  		{r: "/" + apc.S3, h: p.s3Handler, net: accessNetPublic},
   234  
   235  		// "easy URL"
   236  		{r: "/" + apc.GSScheme, h: p.easyURLHandler, net: accessNetPublic},
   237  		{r: "/" + apc.AZScheme, h: p.easyURLHandler, net: accessNetPublic},
   238  		{r: "/" + apc.AISScheme, h: p.easyURLHandler, net: accessNetPublic},
   239  
   240  		// ht:// _or_ S3 compatibility, depending on feature flag
   241  		{r: "/", h: p.rootHandler, net: accessNetPublic},
   242  	}
   243  	p.regNetHandlers(networkHandlers)
   244  
   245  	nlog.Infoln(cmn.NetPublic+":", "\t\t", p.si.PubNet.URL)
   246  	if p.si.PubNet.URL != p.si.ControlNet.URL {
   247  		nlog.Infoln(cmn.NetIntraControl+":", "\t", p.si.ControlNet.URL)
   248  	}
   249  	if p.si.PubNet.URL != p.si.DataNet.URL {
   250  		nlog.Infoln(cmn.NetIntraData+":", "\t", p.si.DataNet.URL)
   251  	}
   252  
   253  	dsort.Pinit(p, config)
   254  
   255  	return p.htrun.run(config)
   256  }
   257  
   258  func (p *proxy) joinCluster(action string, primaryURLs ...string) (status int, err error) {
   259  	var query url.Values
   260  	if smap := p.owner.smap.get(); smap.isPrimary(p.si) {
   261  		return 0, fmt.Errorf("%s should not be joining: is primary, %s", p, smap.StringEx())
   262  	}
   263  	if cmn.GCO.Get().Proxy.NonElectable {
   264  		query = url.Values{apc.QparamNonElectable: []string{"true"}}
   265  	}
   266  	res := p.join(query, nil /*htext*/, primaryURLs...)
   267  	defer freeCR(res)
   268  	if res.err != nil {
   269  		status, err = res.status, res.err
   270  		return
   271  	}
   272  	// not being sent at cluster startup and keepalive
   273  	if len(res.bytes) == 0 {
   274  		return
   275  	}
   276  	err = p.recvCluMetaBytes(action, res.bytes, "")
   277  	return
   278  }
   279  
   280  // apart from minor, albeit subtle, differences between `t.joinCluster` vs `p.joinCluster`
   281  // this method is otherwise identical to t.gojoin (TODO: unify)
   282  func (p *proxy) gojoin(config *cmn.Config) {
   283  	var (
   284  		smap      = p.owner.smap.get()
   285  		pub, ctrl string
   286  	)
   287  	if smap.Primary != nil && smap.Version > 0 {
   288  		pub = smap.Primary.URL(cmn.NetPublic)
   289  		ctrl = smap.Primary.URL(cmn.NetIntraControl)
   290  	}
   291  	cii := p.pollClusterStarted(config, smap.Primary)
   292  	if nlog.Stopping() {
   293  		return
   294  	}
   295  
   296  	if cii != nil {
   297  		// (primary changed)
   298  		pub, ctrl = cii.Smap.Primary.PubURL, cii.Smap.Primary.CtrlURL
   299  		if status, err := p.joinCluster(apc.ActSelfJoinProxy, ctrl, pub); err != nil {
   300  			nlog.Errorf(fmtFailedRejoin, p, err, status)
   301  			return
   302  		}
   303  	}
   304  
   305  	// normally, immediately return with "is ready";
   306  	// otherwise, handle: (not present in cluster map | net-info changed)
   307  	i, sleep, total := 2, config.Timeout.MaxKeepalive.D(), config.Timeout.MaxHostBusy.D()
   308  	for total >= 0 {
   309  		smap = p.owner.smap.get()
   310  		si := smap.GetNode(p.SID())
   311  		if si == nil {
   312  			nlog.Errorf(fmtSelfNotPresent, p, smap.StringEx())
   313  		} else {
   314  			nerr := si.NetEq(p.si)
   315  			if nerr == nil {
   316  				p.markClusterStarted()
   317  				nlog.Infoln(p.String(), "is ready")
   318  				return // ok ---
   319  			}
   320  			nlog.Warningln(p.String(), "- trying to rejoin and, simultaneously, have the primary to update net-info:")
   321  			nlog.Warningln("\t", nerr, smap.StringEx())
   322  		}
   323  
   324  		if nlog.Stopping() {
   325  			return
   326  		}
   327  		time.Sleep(sleep)
   328  		i++
   329  		total -= sleep
   330  		smap = p.owner.smap.get()
   331  		if ctrl == "" && smap.Primary != nil && smap.Version > 0 {
   332  			pub = smap.Primary.URL(cmn.NetPublic)
   333  			ctrl = smap.Primary.URL(cmn.NetIntraControl)
   334  		}
   335  		nlog.Warningln(p.String(), "- attempt number", i, "to join")
   336  		if status, err := p.joinCluster(apc.ActSelfJoinProxy, ctrl, pub); err != nil {
   337  			nlog.Errorf(fmtFailedRejoin, p, err, status)
   338  			return
   339  		}
   340  	}
   341  
   342  	p.markClusterStarted()
   343  	nlog.Infoln(p.String(), "is ready(?)")
   344  }
   345  
   346  func (p *proxy) recvCluMetaBytes(action string, body []byte, caller string) error {
   347  	var cm cluMeta
   348  	if err := jsoniter.Unmarshal(body, &cm); err != nil {
   349  		return fmt.Errorf(cmn.FmtErrUnmarshal, p, "reg-meta", cos.BHead(body), err)
   350  	}
   351  	return p.recvCluMeta(&cm, action, caller)
   352  }
   353  
   354  // TODO: unify w/ t.recvCluMetaBytes
   355  func (p *proxy) recvCluMeta(cm *cluMeta, action, caller string) error {
   356  	var (
   357  		msg  = p.newAmsgStr(action, cm.BMD)
   358  		self = p.String() + ":"
   359  		errs []error
   360  	)
   361  	if cm.PrimeTime != 0 {
   362  		xreg.PrimeTime.Store(cm.PrimeTime)
   363  		xreg.MyTime.Store(time.Now().UnixNano())
   364  	}
   365  	// Config
   366  	if cm.Config == nil {
   367  		err := fmt.Errorf(self+" invalid %T (nil config): %+v", cm, cm)
   368  		nlog.Errorln(err)
   369  		return err
   370  	}
   371  	if err := p.receiveConfig(cm.Config, msg, nil, caller); err != nil {
   372  		if !isErrDowngrade(err) {
   373  			errs = append(errs, err)
   374  			nlog.Errorln(err)
   375  		}
   376  	} else {
   377  		nlog.Infoln(self, tagCM, action, cm.Config.String())
   378  	}
   379  	// Smap
   380  	if err := p.receiveSmap(cm.Smap, msg, nil /*ms payload*/, caller, p.smapOnUpdate); err != nil {
   381  		if !isErrDowngrade(err) {
   382  			errs = append(errs, err)
   383  			nlog.Errorln(err)
   384  		}
   385  	} else if cm.Smap != nil {
   386  		nlog.Infoln(self, tagCM, action, cm.Smap.String())
   387  	}
   388  	// BMD
   389  	if err := p.receiveBMD(cm.BMD, msg, nil, caller); err != nil {
   390  		if !isErrDowngrade(err) {
   391  			errs = append(errs, err)
   392  			nlog.Errorln(err)
   393  		}
   394  	} else {
   395  		nlog.Infoln(self, tagCM, action, cm.BMD.String())
   396  	}
   397  	// RMD
   398  	if err := p.receiveRMD(cm.RMD, msg, caller); err != nil {
   399  		if !isErrDowngrade(err) {
   400  			errs = append(errs, err)
   401  			nlog.Errorln(err)
   402  		}
   403  	} else {
   404  		nlog.Infoln(self, tagCM, action, cm.RMD.String())
   405  	}
   406  	// EtlMD
   407  	if err := p.receiveEtlMD(cm.EtlMD, msg, nil, caller, nil); err != nil {
   408  		if !isErrDowngrade(err) {
   409  			errs = append(errs, err)
   410  			nlog.Errorln(err)
   411  		}
   412  	} else if cm.EtlMD != nil {
   413  		nlog.Infoln(self, tagCM, action, cm.EtlMD.String())
   414  	}
   415  
   416  	switch {
   417  	case errs == nil:
   418  		return nil
   419  	case len(errs) == 1:
   420  		return errs[0]
   421  	default:
   422  		s := fmt.Sprintf("%v", errs)
   423  		return cmn.NewErrFailedTo(p, action, tagCM, errors.New(s))
   424  	}
   425  }
   426  
   427  // parse request + init/lookup bucket (combo)
   428  func (p *proxy) _parseReqTry(w http.ResponseWriter, r *http.Request, bckArgs *bctx) (bck *meta.Bck,
   429  	objName string, err error) {
   430  	apireq := apiReqAlloc(2, apc.URLPathObjects.L, false /*dpq*/)
   431  	if err = p.parseReq(w, r, apireq); err != nil {
   432  		apiReqFree(apireq)
   433  		return
   434  	}
   435  	bckArgs.bck, bckArgs.query = apireq.bck, apireq.query
   436  	bck, err = bckArgs.initAndTry()
   437  	objName = apireq.items[1]
   438  
   439  	apiReqFree(apireq)
   440  	freeBctx(bckArgs) // caller does alloc
   441  	return
   442  }
   443  
   444  // verb /v1/buckets/
   445  func (p *proxy) bucketHandler(w http.ResponseWriter, r *http.Request) {
   446  	if !p.cluStartedWithRetry() {
   447  		w.WriteHeader(http.StatusServiceUnavailable)
   448  		return
   449  	}
   450  	switch r.Method {
   451  	case http.MethodGet:
   452  		dpq := dpqAlloc()
   453  		p.httpbckget(w, r, dpq)
   454  		dpqFree(dpq)
   455  	case http.MethodDelete:
   456  		apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false /*dpq*/)
   457  		p.httpbckdelete(w, r, apireq)
   458  		apiReqFree(apireq)
   459  	case http.MethodPut:
   460  		p.httpbckput(w, r)
   461  	case http.MethodPost:
   462  		p.httpbckpost(w, r)
   463  	case http.MethodHead:
   464  		apireq := apiReqAlloc(1, apc.URLPathBuckets.L, true /*dpq*/)
   465  		p.httpbckhead(w, r, apireq)
   466  		apiReqFree(apireq)
   467  	case http.MethodPatch:
   468  		apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false /*dpq*/)
   469  		p.httpbckpatch(w, r, apireq)
   470  		apiReqFree(apireq)
   471  	default:
   472  		cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodHead,
   473  			http.MethodPatch, http.MethodPost)
   474  	}
   475  }
   476  
   477  // verb /v1/objects/
   478  func (p *proxy) objectHandler(w http.ResponseWriter, r *http.Request) {
   479  	switch r.Method {
   480  	case http.MethodGet:
   481  		p.httpobjget(w, r)
   482  	case http.MethodPut:
   483  		apireq := apiReqAlloc(2, apc.URLPathObjects.L, true /*dpq*/)
   484  		p.httpobjput(w, r, apireq)
   485  		apiReqFree(apireq)
   486  	case http.MethodDelete:
   487  		p.httpobjdelete(w, r)
   488  	case http.MethodPost:
   489  		apireq := apiReqAlloc(1, apc.URLPathObjects.L, false /*dpq*/)
   490  		p.httpobjpost(w, r, apireq)
   491  		apiReqFree(apireq)
   492  	case http.MethodHead:
   493  		p.httpobjhead(w, r)
   494  	case http.MethodPatch:
   495  		p.httpobjpatch(w, r)
   496  	default:
   497  		cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodHead,
   498  			http.MethodPost, http.MethodPut)
   499  	}
   500  }
   501  
   502  // "Easy URL" (feature) is a simple alternative mapping of the AIS API to handle
   503  // URLs paths that look as follows:
   504  //
   505  //	/gs/mybucket/myobject   - to access Google Cloud buckets
   506  //	/az/mybucket/myobject   - Azure Blob Storage
   507  //	/ais/mybucket/myobject  - AIS
   508  //
   509  // In other words, easy URL is a convenience feature that allows reading, writing,
   510  // deleting, and listing objects as follows:
   511  //
   512  // # Example: GET
   513  // $ curl -L -X GET 'http://aistore/gs/my-google-bucket/abc-train-0001.tar'
   514  // # Example: PUT
   515  // $ curl -L -X PUT 'http://aistore/gs/my-google-bucket/abc-train-9999.tar -T /tmp/9999.tar'
   516  // # Example: LIST
   517  // $ curl -L -X GET 'http://aistore/gs/my-google-bucket'
   518  //
   519  // NOTE:
   520  //
   521  //	Amazon S3 is missing in the list that includes GCP and Azure. The reason
   522  //	for this is that AIS provides S3 compatibility layer via its "/s3" endpoint.
   523  //	S3 compatibility (see https://github.com/NVIDIA/aistore/blob/main/docs/s3compat.md)
   524  //	shall not be confused with a simple alternative URL Path mapping via easyURLHandler,
   525  //	whereby a path (e.g.) "gs/mybucket/myobject" gets replaced with
   526  //	"v1/objects/mybucket/myobject?provider=gcp" with _no_ other changes to the request
   527  //	and response parameters and components.
   528  func (p *proxy) easyURLHandler(w http.ResponseWriter, r *http.Request) {
   529  	apiItems, err := p.parseURL(w, r, nil, 1, true)
   530  	if err != nil {
   531  		return
   532  	}
   533  	provider := apiItems[0]
   534  	if provider, err = cmn.NormalizeProvider(provider); err != nil {
   535  		p.writeErr(w, r, err)
   536  		return
   537  	}
   538  	// num items: 1
   539  	if len(apiItems) == 1 {
   540  		// list buckets for a given provider
   541  		// NOTE two differences between this implementation and `p.bckNamesFromBMD` (s3 API):
   542  		// - `/s3` is an API endpoint rather than a namesake provider
   543  		//   (the API must "cover" all providers)
   544  		// - `/s3` and its subordinate URL paths can only "see" buckets that are already present
   545  		//   in the BMD, while native API, when given sufficient permissions, can immediately
   546  		//   access (read, write, list) any remote buckets, while adding them to the BMD "on the fly".
   547  		r.URL.Path = apc.URLPathBuckets.S
   548  		if r.URL.RawQuery == "" {
   549  			qbck := cmn.QueryBcks{Provider: provider}
   550  			query := qbck.NewQuery()
   551  			r.URL.RawQuery = query.Encode()
   552  		} else if !strings.Contains(r.URL.RawQuery, apc.QparamProvider) {
   553  			r.URL.RawQuery += "&" + apc.QparamProvider + "=" + provider
   554  		}
   555  		p.bucketHandler(w, r)
   556  		return
   557  	}
   558  	// num items: 2
   559  	bucket := apiItems[1]
   560  	bck := cmn.Bck{Name: bucket, Provider: provider}
   561  	if err := bck.ValidateName(); err != nil {
   562  		p.writeErr(w, r, err)
   563  		return
   564  	}
   565  
   566  	var objName string
   567  	if len(apiItems) > 2 {
   568  		// num items: 3
   569  		objName = apiItems[2]
   570  		r.URL.Path = apc.URLPathObjects.Join(bucket, objName)
   571  		r.URL.Path += path.Join(apiItems[3:]...)
   572  	} else {
   573  		if r.Method == http.MethodPut {
   574  			p.writeErrMsg(w, r, "missing destination object name in the \"easy URL\"")
   575  			return
   576  		}
   577  		r.URL.Path = apc.URLPathBuckets.Join(bucket)
   578  	}
   579  
   580  	if r.URL.RawQuery == "" {
   581  		query := bck.NewQuery()
   582  		r.URL.RawQuery = query.Encode()
   583  	} else if !strings.Contains(r.URL.RawQuery, apc.QparamProvider) {
   584  		r.URL.RawQuery += "&" + apc.QparamProvider + "=" + bck.Provider
   585  	}
   586  	// and finally
   587  	if objName != "" {
   588  		p.objectHandler(w, r)
   589  	} else {
   590  		p.bucketHandler(w, r)
   591  	}
   592  }
   593  
   594  // GET /v1/buckets[/bucket-name]
   595  func (p *proxy) httpbckget(w http.ResponseWriter, r *http.Request, dpq *dpq) {
   596  	var (
   597  		msg     *apc.ActMsg
   598  		bckName string
   599  		qbck    *cmn.QueryBcks
   600  	)
   601  	apiItems, err := p.parseURL(w, r, apc.URLPathBuckets.L, 0, true)
   602  	if err != nil {
   603  		return
   604  	}
   605  	if len(apiItems) > 0 {
   606  		bckName = apiItems[0]
   607  	}
   608  	ctype := r.Header.Get(cos.HdrContentType)
   609  	if r.ContentLength == 0 && !strings.HasPrefix(ctype, cos.ContentJSON) {
   610  		// e.g. "easy URL" request: curl -L -X GET 'http://aistore/ais/abc'
   611  		msg = &apc.ActMsg{Action: apc.ActList, Value: &apc.LsoMsg{}}
   612  	} else if msg, err = p.readActionMsg(w, r); err != nil {
   613  		return
   614  	}
   615  	if err := dpq.parse(r.URL.RawQuery); err != nil {
   616  		p.writeErr(w, r, err)
   617  		return
   618  	}
   619  	if qbck, err = newQbckFromQ(bckName, nil, dpq); err != nil {
   620  		p.writeErr(w, r, err)
   621  		return
   622  	}
   623  
   624  	// switch (I) through (IV) --------------------------
   625  
   626  	// (I) summarize buckets
   627  	if msg.Action == apc.ActSummaryBck {
   628  		var summMsg apc.BsummCtrlMsg
   629  		if err := cos.MorphMarshal(msg.Value, &summMsg); err != nil {
   630  			p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err)
   631  			return
   632  		}
   633  		if qbck.IsBucket() {
   634  			bck := (*meta.Bck)(qbck)
   635  			bckArgs := bctx{p: p, w: w, r: r, msg: msg, perms: apc.AceBckHEAD, bck: bck, dpq: dpq}
   636  			bckArgs.createAIS = false
   637  			bckArgs.dontHeadRemote = summMsg.BckPresent
   638  			if _, err := bckArgs.initAndTry(); err != nil {
   639  				return
   640  			}
   641  		}
   642  		p.bsummact(w, r, qbck, &summMsg)
   643  		return
   644  	}
   645  
   646  	// (II) invalid action
   647  	if msg.Action != apc.ActList {
   648  		p.writeErrAct(w, r, msg.Action)
   649  		return
   650  	}
   651  
   652  	// (III) list buckets
   653  	if msg.Value == nil {
   654  		if qbck.Name != "" && qbck.Name != msg.Name {
   655  			p.writeErrf(w, r, "bad list-buckets request: %q vs %q (%+v, %+v)", qbck.Name, msg.Name, qbck, msg)
   656  			return
   657  		}
   658  		qbck.Name = msg.Name
   659  		if qbck.IsRemoteAIS() {
   660  			qbck.Ns.UUID = p.a2u(qbck.Ns.UUID)
   661  		}
   662  		if err := p.checkAccess(w, r, nil, apc.AceListBuckets); err == nil {
   663  			p.listBuckets(w, r, qbck, msg, dpq)
   664  		}
   665  		return
   666  	}
   667  
   668  	// (IV) list objects (NOTE -- TODO: currently, always forwarding)
   669  	if !qbck.IsBucket() {
   670  		p.writeErrf(w, r, "bad list-objects request: %q is not a bucket (is a bucket query?)", qbck)
   671  		return
   672  	}
   673  	if p.forwardCP(w, r, msg, lsotag+" "+qbck.String()) {
   674  		return
   675  	}
   676  
   677  	// lsmsg
   678  	var (
   679  		lsmsg apc.LsoMsg
   680  		bck   = meta.CloneBck((*cmn.Bck)(qbck))
   681  	)
   682  	if err = cos.MorphMarshal(msg.Value, &lsmsg); err != nil {
   683  		p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err)
   684  		return
   685  	}
   686  	if lsmsg.Prefix != "" && strings.Contains(lsmsg.Prefix, "../") {
   687  		p.writeErrf(w, r, "bad list-objects request: invalid prefix %q", lsmsg.Prefix)
   688  		return
   689  	}
   690  	bckArgs := bctx{p: p, w: w, r: r, msg: msg, perms: apc.AceObjLIST, bck: bck, dpq: dpq}
   691  	bckArgs.createAIS = false
   692  
   693  	if lsmsg.IsFlagSet(apc.LsBckPresent) {
   694  		bckArgs.dontHeadRemote = true
   695  		bckArgs.dontAddRemote = true
   696  	} else {
   697  		bckArgs.tryHeadRemote = lsmsg.IsFlagSet(apc.LsDontHeadRemote)
   698  		bckArgs.dontAddRemote = lsmsg.IsFlagSet(apc.LsDontAddRemote)
   699  	}
   700  
   701  	// do
   702  	if bck, err = bckArgs.initAndTry(); err == nil {
   703  		p.listObjects(w, r, bck, msg /*amsg*/, &lsmsg)
   704  	}
   705  }
   706  
   707  // GET /v1/objects/bucket-name/object-name
   708  func (p *proxy) httpobjget(w http.ResponseWriter, r *http.Request, origURLBck ...string) {
   709  	// 1. request
   710  	apireq := apiReqAlloc(2, apc.URLPathObjects.L, true /*dpq*/)
   711  	if err := p.parseReq(w, r, apireq); err != nil {
   712  		apiReqFree(apireq)
   713  		return
   714  	}
   715  
   716  	// 2. bucket
   717  	bckArgs := allocBctx()
   718  	{
   719  		bckArgs.p = p
   720  		bckArgs.w = w
   721  		bckArgs.r = r
   722  		bckArgs.bck = apireq.bck
   723  		bckArgs.dpq = apireq.dpq
   724  		bckArgs.perms = apc.AceGET
   725  		bckArgs.createAIS = false
   726  	}
   727  	if len(origURLBck) > 0 {
   728  		bckArgs.origURLBck = origURLBck[0]
   729  	}
   730  	bck, err := bckArgs.initAndTry()
   731  	freeBctx(bckArgs)
   732  
   733  	objName := apireq.items[1]
   734  	apiReqFree(apireq)
   735  	if err != nil {
   736  		return
   737  	}
   738  
   739  	// 3. redirect
   740  	smap := p.owner.smap.get()
   741  	tsi, netPub, err := smap.HrwMultiHome(bck.MakeUname(objName))
   742  	if err != nil {
   743  		p.writeErr(w, r, err)
   744  		return
   745  	}
   746  	if cmn.Rom.FastV(5, cos.SmoduleAIS) {
   747  		nlog.Infoln("GET " + bck.Cname(objName) + " => " + tsi.String())
   748  	}
   749  	redirectURL := p.redirectURL(r, tsi, time.Now() /*started*/, cmn.NetIntraData, netPub)
   750  	http.Redirect(w, r, redirectURL, http.StatusMovedPermanently)
   751  
   752  	// 4. stats
   753  	p.statsT.Inc(stats.GetCount)
   754  }
   755  
   756  // PUT /v1/objects/bucket-name/object-name
   757  func (p *proxy) httpobjput(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
   758  	var (
   759  		nodeID string
   760  		perms  apc.AccessAttrs
   761  	)
   762  	// 1. request
   763  	if err := p.parseReq(w, r, apireq); err != nil {
   764  		return
   765  	}
   766  	appendTyProvided := apireq.dpq.apnd.ty != "" // apc.QparamAppendType
   767  	if !appendTyProvided {
   768  		perms = apc.AcePUT
   769  	} else {
   770  		perms = apc.AceAPPEND
   771  		if apireq.dpq.apnd.hdl != "" {
   772  			items, err := preParse(apireq.dpq.apnd.hdl) // apc.QparamAppendHandle
   773  			if err != nil {
   774  				p.writeErr(w, r, err)
   775  				return
   776  			}
   777  			nodeID = items[0] // nodeID; compare w/ apndOI.parse
   778  		}
   779  	}
   780  
   781  	// 2. bucket
   782  	bckArgs := allocBctx()
   783  	{
   784  		bckArgs.p = p
   785  		bckArgs.w = w
   786  		bckArgs.r = r
   787  		bckArgs.perms = perms
   788  		bckArgs.createAIS = false
   789  	}
   790  	bckArgs.bck, bckArgs.dpq = apireq.bck, apireq.dpq
   791  	bck, err := bckArgs.initAndTry()
   792  	freeBctx(bckArgs)
   793  	if err != nil {
   794  		return
   795  	}
   796  
   797  	// 3. redirect
   798  	var (
   799  		tsi     *meta.Snode
   800  		smap    = p.owner.smap.get()
   801  		started = time.Now()
   802  		objName = apireq.items[1]
   803  		netPub  = cmn.NetPublic
   804  	)
   805  	if nodeID == "" {
   806  		tsi, netPub, err = smap.HrwMultiHome(bck.MakeUname(objName))
   807  		if err != nil {
   808  			p.writeErr(w, r, err)
   809  			return
   810  		}
   811  	} else {
   812  		if tsi = smap.GetTarget(nodeID); tsi == nil {
   813  			err = &errNodeNotFound{"PUT failure:", nodeID, p.si, smap}
   814  			p.writeErr(w, r, err)
   815  			return
   816  		}
   817  	}
   818  
   819  	// verbose
   820  	if cmn.Rom.FastV(5, cos.SmoduleAIS) {
   821  		verb, s := "PUT", ""
   822  		if appendTyProvided {
   823  			verb = "APPEND"
   824  		}
   825  		if bck.Props.Mirror.Enabled {
   826  			s = " (put-mirror)"
   827  		}
   828  		nlog.Infof("%s %s => %s%s", verb, bck.Cname(objName), tsi.StringEx(), s)
   829  	}
   830  
   831  	redirectURL := p.redirectURL(r, tsi, started, cmn.NetIntraData, netPub)
   832  	http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect)
   833  
   834  	// 4. stats
   835  	if !appendTyProvided {
   836  		p.statsT.Inc(stats.PutCount)
   837  	} else {
   838  		p.statsT.Inc(stats.AppendCount)
   839  	}
   840  }
   841  
   842  // DELETE /v1/objects/bucket-name/object-name
   843  func (p *proxy) httpobjdelete(w http.ResponseWriter, r *http.Request) {
   844  	bckArgs := allocBctx()
   845  	{
   846  		bckArgs.p = p
   847  		bckArgs.w = w
   848  		bckArgs.r = r
   849  		bckArgs.perms = apc.AceObjDELETE
   850  		bckArgs.createAIS = false
   851  	}
   852  	bck, objName, err := p._parseReqTry(w, r, bckArgs)
   853  	if err != nil {
   854  		return
   855  	}
   856  	smap := p.owner.smap.get()
   857  	tsi, err := smap.HrwName2T(bck.MakeUname(objName))
   858  	if err != nil {
   859  		p.writeErr(w, r, err)
   860  		return
   861  	}
   862  	if cmn.Rom.FastV(5, cos.SmoduleAIS) {
   863  		nlog.Infoln("DELETE " + bck.Cname(objName) + " => " + tsi.StringEx())
   864  	}
   865  	redirectURL := p.redirectURL(r, tsi, time.Now() /*started*/, cmn.NetIntraControl)
   866  	http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect)
   867  
   868  	p.statsT.Inc(stats.DeleteCount)
   869  }
   870  
   871  // DELETE { action } /v1/buckets
   872  func (p *proxy) httpbckdelete(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
   873  	// 1. request
   874  	if err := p.parseReq(w, r, apireq); err != nil {
   875  		return
   876  	}
   877  	msg, err := p.readActionMsg(w, r)
   878  	if err != nil {
   879  		return
   880  	}
   881  	perms := apc.AceDestroyBucket
   882  	if msg.Action == apc.ActDeleteObjects || msg.Action == apc.ActEvictObjects {
   883  		perms = apc.AceObjDELETE
   884  	}
   885  
   886  	// 2. bucket
   887  	bck := apireq.bck
   888  	bckArgs := bctx{p: p, w: w, r: r, msg: msg, perms: perms, bck: bck, dpq: apireq.dpq, query: apireq.query}
   889  	bckArgs.createAIS = false
   890  	if msg.Action == apc.ActEvictRemoteBck {
   891  		var ecode int
   892  		bckArgs.dontHeadRemote = true // unconditionally
   893  		ecode, err = bckArgs.init()
   894  		if err != nil {
   895  			if ecode != http.StatusNotFound && !cmn.IsErrRemoteBckNotFound(err) {
   896  				p.writeErr(w, r, err, ecode)
   897  			}
   898  			return
   899  		}
   900  	} else if bck, err = bckArgs.initAndTry(); err != nil {
   901  		return
   902  	}
   903  
   904  	// 3. action
   905  	switch msg.Action {
   906  	case apc.ActEvictRemoteBck:
   907  		if err := cmn.ValidateRemoteBck(apc.ActEvictRemoteBck, bck.Bucket()); err != nil {
   908  			p.writeErr(w, r, err)
   909  			return
   910  		}
   911  		keepMD := cos.IsParseBool(apireq.query.Get(apc.QparamKeepRemote))
   912  		if keepMD {
   913  			if err := p.destroyBucketData(msg, bck); err != nil {
   914  				p.writeErr(w, r, err)
   915  			}
   916  			return
   917  		}
   918  		if p.forwardCP(w, r, msg, bck.Name) {
   919  			return
   920  		}
   921  		if err := p.destroyBucket(msg, bck); err != nil {
   922  			p.writeErr(w, r, err)
   923  		}
   924  	case apc.ActDestroyBck:
   925  		if p.forwardCP(w, r, msg, bck.Name) {
   926  			return
   927  		}
   928  		if bck.IsRemoteAIS() {
   929  			if err := p.destroyBucket(msg, bck); err != nil {
   930  				if !cmn.IsErrBckNotFound(err) {
   931  					p.writeErr(w, r, err)
   932  					return
   933  				}
   934  			}
   935  			// having removed bucket from BMD ask remote to do the same
   936  			p.reverseRemAis(w, r, msg, bck.Bucket(), apireq.query)
   937  			return
   938  		}
   939  		if err := p.destroyBucket(msg, bck); err != nil {
   940  			if cmn.IsErrBckNotFound(err) {
   941  				nlog.Infof("%s: %s already %q-ed, nothing to do", p, bck, msg.Action)
   942  			} else {
   943  				p.writeErr(w, r, err)
   944  			}
   945  		}
   946  	case apc.ActDeleteObjects, apc.ActEvictObjects:
   947  		if msg.Action == apc.ActEvictObjects {
   948  			if err := cmn.ValidateRemoteBck(apc.ActEvictRemoteBck, bck.Bucket()); err != nil {
   949  				p.writeErr(w, r, err)
   950  				return
   951  			}
   952  		}
   953  		xid, err := p.listrange(r.Method, bck.Name, msg, apireq.query)
   954  		if err != nil {
   955  			p.writeErr(w, r, err)
   956  			return
   957  		}
   958  		w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(xid)))
   959  		w.Write([]byte(xid))
   960  	default:
   961  		p.writeErrAct(w, r, msg.Action)
   962  	}
   963  }
   964  
   965  // PUT /v1/metasync
   966  // (compare with p.recvCluMeta and t.metasyncHandlerPut)
   967  func (p *proxy) metasyncHandler(w http.ResponseWriter, r *http.Request) {
   968  	var (
   969  		err = &errMsync{}
   970  		cii = &err.Cii
   971  	)
   972  	if r.Method != http.MethodPut {
   973  		cmn.WriteErr405(w, r, http.MethodPut)
   974  		return
   975  	}
   976  	smap := p.owner.smap.get()
   977  
   978  	if smap.isPrimary(p.si) {
   979  		const txt = "cannot be on the receiving side of metasync"
   980  		xctn := voteInProgress()
   981  		maps := smap.StringEx()
   982  		p.ciiFill(cii)
   983  		switch {
   984  		case !p.ClusterStarted():
   985  			err.Message = fmt.Sprintf("%s(self) %s, %s", p, "is starting up as primary, "+txt, maps)
   986  		case xctn != nil:
   987  			err.Message = fmt.Sprintf("%s(self) %s, %s", p, "is still primary while voting is in progress, "+txt, maps)
   988  		default:
   989  			err.Message = fmt.Sprintf("%s(self) %s, %s", p, "is primary, "+txt, maps)
   990  		}
   991  		nlog.Errorln(err.Message)
   992  		// marshal along with cii
   993  		p.writeErr(w, r, errors.New(cos.MustMarshalToString(err)), http.StatusConflict, Silent)
   994  		return
   995  	}
   996  
   997  	payload := make(msPayload)
   998  	if errP := payload.unmarshal(r.Body, "metasync put"); errP != nil {
   999  		cmn.WriteErr(w, r, errP)
  1000  		return
  1001  	}
  1002  	// 1. extract
  1003  	var (
  1004  		caller                       = r.Header.Get(apc.HdrCallerName)
  1005  		newConf, msgConf, errConf    = p.extractConfig(payload, caller)
  1006  		newSmap, msgSmap, errSmap    = p.extractSmap(payload, caller, false /*skip validation*/)
  1007  		newBMD, msgBMD, errBMD       = p.extractBMD(payload, caller)
  1008  		newRMD, msgRMD, errRMD       = p.extractRMD(payload, caller)
  1009  		newEtlMD, msgEtlMD, errEtlMD = p.extractEtlMD(payload, caller)
  1010  		revokedTokens, errTokens     = p.extractRevokedTokenList(payload, caller)
  1011  	)
  1012  	// 2. apply
  1013  	if errConf == nil && newConf != nil {
  1014  		errConf = p.receiveConfig(newConf, msgConf, payload, caller)
  1015  	}
  1016  	if errSmap == nil && newSmap != nil {
  1017  		errSmap = p.receiveSmap(newSmap, msgSmap, payload, caller, p.smapOnUpdate)
  1018  	}
  1019  	if errBMD == nil && newBMD != nil {
  1020  		errBMD = p.receiveBMD(newBMD, msgBMD, payload, caller)
  1021  	}
  1022  	if errRMD == nil && newRMD != nil {
  1023  		errRMD = p.receiveRMD(newRMD, msgRMD, caller)
  1024  	}
  1025  	if errEtlMD == nil && newEtlMD != nil {
  1026  		errEtlMD = p.receiveEtlMD(newEtlMD, msgEtlMD, payload, caller, nil)
  1027  	}
  1028  	if errTokens == nil && revokedTokens != nil {
  1029  		_ = p.authn.updateRevokedList(revokedTokens)
  1030  	}
  1031  	// 3. respond
  1032  	if errConf == nil && errSmap == nil && errBMD == nil && errRMD == nil && errTokens == nil && errEtlMD == nil {
  1033  		return
  1034  	}
  1035  	p.ciiFill(cii)
  1036  	retErr := err.message(errConf, errSmap, errBMD, errRMD, errEtlMD, errTokens)
  1037  	p.writeErr(w, r, retErr, http.StatusConflict)
  1038  }
  1039  
  1040  func (p *proxy) syncNewICOwners(smap, newSmap *smapX) {
  1041  	if !smap.IsIC(p.si) || !newSmap.IsIC(p.si) {
  1042  		return
  1043  	}
  1044  	// async - not waiting
  1045  	for _, psi := range newSmap.Pmap {
  1046  		if p.SID() != psi.ID() && newSmap.IsIC(psi) && !smap.IsIC(psi) {
  1047  			go func(psi *meta.Snode) {
  1048  				if err := p.ic.sendOwnershipTbl(psi, newSmap); err != nil {
  1049  					nlog.Errorln(p.String()+": failed to send ownership table to", psi.String()+":", err)
  1050  				}
  1051  			}(psi)
  1052  		}
  1053  	}
  1054  }
  1055  
  1056  // GET /v1/health
  1057  func (p *proxy) healthHandler(w http.ResponseWriter, r *http.Request) {
  1058  	if !p.NodeStarted() {
  1059  		w.WriteHeader(http.StatusServiceUnavailable)
  1060  		return
  1061  	}
  1062  
  1063  	p.uptime2hdr(w.Header())
  1064  
  1065  	var (
  1066  		prr, getCii, askPrimary bool
  1067  	)
  1068  	if r.URL.RawQuery != "" {
  1069  		query := r.URL.Query()
  1070  		prr = cos.IsParseBool(query.Get(apc.QparamPrimaryReadyReb))
  1071  		getCii = cos.IsParseBool(query.Get(apc.QparamClusterInfo))
  1072  		askPrimary = cos.IsParseBool(query.Get(apc.QparamAskPrimary))
  1073  	}
  1074  
  1075  	if !prr {
  1076  		if responded := p.externalWD(w, r); responded {
  1077  			return
  1078  		}
  1079  	}
  1080  	// piggy-backing cluster info on health
  1081  	if getCii {
  1082  		debug.Assert(!prr)
  1083  		cii := &cifl.Info{}
  1084  		p.ciiFill(cii)
  1085  		p.writeJSON(w, r, cii, "cluster-info")
  1086  		return
  1087  	}
  1088  	smap := p.owner.smap.get()
  1089  	if err := smap.validate(); err != nil {
  1090  		if !p.ClusterStarted() {
  1091  			w.WriteHeader(http.StatusServiceUnavailable)
  1092  		} else {
  1093  			p.writeErr(w, r, err, http.StatusServiceUnavailable)
  1094  		}
  1095  		return
  1096  	}
  1097  
  1098  	callerID := r.Header.Get(apc.HdrCallerID)
  1099  	if smap.GetProxy(callerID) != nil {
  1100  		p.keepalive.heardFrom(callerID)
  1101  	}
  1102  
  1103  	// primary
  1104  	if smap.isPrimary(p.si) {
  1105  		if prr {
  1106  			if err := p.pready(smap, true); err != nil {
  1107  				if cmn.Rom.FastV(5, cos.SmoduleAIS) {
  1108  					p.writeErr(w, r, err, http.StatusServiceUnavailable)
  1109  				} else {
  1110  					p.writeErr(w, r, err, http.StatusServiceUnavailable, Silent)
  1111  				}
  1112  				return
  1113  			}
  1114  		}
  1115  		w.WriteHeader(http.StatusOK)
  1116  		return
  1117  	}
  1118  	// non-primary
  1119  	if !p.ClusterStarted() {
  1120  		// keep returning 503 until cluster starts up
  1121  		w.WriteHeader(http.StatusServiceUnavailable)
  1122  		return
  1123  	}
  1124  	if prr || askPrimary {
  1125  		caller := r.Header.Get(apc.HdrCallerName)
  1126  		p.writeErrf(w, r, "%s (non-primary): misdirected health-of-primary request from %s, %s",
  1127  			p, caller, smap.StringEx())
  1128  		return
  1129  	}
  1130  	w.WriteHeader(http.StatusOK)
  1131  }
  1132  
  1133  // PUT { action } /v1/buckets/bucket-name
  1134  func (p *proxy) httpbckput(w http.ResponseWriter, r *http.Request) {
  1135  	var (
  1136  		msg           *apc.ActMsg
  1137  		query         = r.URL.Query()
  1138  		apiItems, err = p.parseURL(w, r, apc.URLPathBuckets.L, 1, true)
  1139  	)
  1140  	if err != nil {
  1141  		return
  1142  	}
  1143  	bucket := apiItems[0]
  1144  	bck, err := newBckFromQ(bucket, query, nil)
  1145  	if err != nil {
  1146  		p.writeErr(w, r, err)
  1147  		return
  1148  	}
  1149  	if msg, err = p.readActionMsg(w, r); err != nil {
  1150  		return
  1151  	}
  1152  	bckArgs := bctx{p: p, w: w, r: r, bck: bck, msg: msg, query: query}
  1153  	bckArgs.createAIS = false
  1154  	if bck, err = bckArgs.initAndTry(); err != nil {
  1155  		return
  1156  	}
  1157  	switch msg.Action {
  1158  	case apc.ActArchive:
  1159  		var (
  1160  			bckFrom = bck
  1161  			archMsg = &cmn.ArchiveBckMsg{}
  1162  		)
  1163  		if err := cos.MorphMarshal(msg.Value, archMsg); err != nil {
  1164  			p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err)
  1165  			return
  1166  		}
  1167  		bckTo := meta.CloneBck(&archMsg.ToBck)
  1168  		if bckTo.IsEmpty() {
  1169  			bckTo = bckFrom
  1170  		} else {
  1171  			bckToArgs := bctx{p: p, w: w, r: r, bck: bckTo, msg: msg, perms: apc.AcePUT, query: query}
  1172  			bckToArgs.createAIS = false
  1173  			if bckTo, err = bckToArgs.initAndTry(); err != nil {
  1174  				return
  1175  			}
  1176  		}
  1177  		//
  1178  		// NOTE: strict enforcement of the standard & supported file extensions
  1179  		//
  1180  		if _, err := archive.Strict(archMsg.Mime, archMsg.ArchName); err != nil {
  1181  			p.writeErr(w, r, err)
  1182  			return
  1183  		}
  1184  		xid, err := p.createArchMultiObj(bckFrom, bckTo, msg)
  1185  		if err == nil {
  1186  			w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(xid)))
  1187  			w.Write([]byte(xid))
  1188  		} else {
  1189  			p.writeErr(w, r, err)
  1190  		}
  1191  	default:
  1192  		p.writeErrAct(w, r, msg.Action)
  1193  	}
  1194  }
  1195  
  1196  // POST { action } /v1/buckets[/bucket-name]
  1197  func (p *proxy) httpbckpost(w http.ResponseWriter, r *http.Request) {
  1198  	var msg *apc.ActMsg
  1199  	apiItems, err := p.parseURL(w, r, apc.URLPathBuckets.L, 1, true)
  1200  	if err != nil {
  1201  		return
  1202  	}
  1203  	if msg, err = p.readActionMsg(w, r); err != nil {
  1204  		return
  1205  	}
  1206  	bucket := apiItems[0]
  1207  	if len(apiItems) > 1 {
  1208  		err := cmn.InitErrHTTP(r, fmt.Errorf("invalid request URI %q", r.URL.Path), 0)
  1209  		p.writeErr(w, r, err)
  1210  		return
  1211  	}
  1212  	p._bckpost(w, r, msg, bucket)
  1213  }
  1214  
  1215  func (p *proxy) _bckpost(w http.ResponseWriter, r *http.Request, msg *apc.ActMsg, bucket string) {
  1216  	const (
  1217  		warnDstNotExist = "%s: destination %s doesn't exist and will be created with the %s (source bucket) props"
  1218  		errPrependSync  = "prepend option (%q) is incompatible with the request to synchronize buckets"
  1219  	)
  1220  	var (
  1221  		query    = r.URL.Query()
  1222  		bck, err = newBckFromQ(bucket, query, nil)
  1223  	)
  1224  	if err != nil {
  1225  		p.writeErr(w, r, err)
  1226  		return
  1227  	}
  1228  
  1229  	if msg.Action == apc.ActCreateBck {
  1230  		if bck.IsRemoteAIS() {
  1231  			// create bucket (remais)
  1232  			p.reverseRemAis(w, r, msg, bck.Bucket(), query)
  1233  			return
  1234  		}
  1235  		// create bucket (this cluster)
  1236  		p._bcr(w, r, query, msg, bck)
  1237  		return
  1238  	}
  1239  
  1240  	// only the primary can do metasync
  1241  	dtor := xact.Table[msg.Action]
  1242  	if dtor.Metasync {
  1243  		if p.forwardCP(w, r, msg, bucket) {
  1244  			return
  1245  		}
  1246  	}
  1247  
  1248  	bckArgs := bctx{p: p, w: w, r: r, bck: bck, perms: apc.AceObjLIST | apc.AceGET, msg: msg, query: query}
  1249  	bckArgs.createAIS = false
  1250  	if bck, err = bckArgs.initAndTry(); err != nil {
  1251  		return
  1252  	}
  1253  
  1254  	//
  1255  	// POST {action} on bucket
  1256  	//
  1257  	var xid string
  1258  	switch msg.Action {
  1259  	case apc.ActMoveBck:
  1260  		bckFrom := bck
  1261  		bckTo, err := newBckFromQuname(query, true /*required*/)
  1262  		if err != nil {
  1263  			p.writeErr(w, r, err)
  1264  			return
  1265  		}
  1266  		if !bckFrom.IsAIS() && bckFrom.Backend() == nil {
  1267  			p.writeErrf(w, r, "can only rename AIS ('ais://') bucket (%q is not)", bckFrom)
  1268  			return
  1269  		}
  1270  		if bckTo.IsRemote() {
  1271  			p.writeErrf(w, r, "can only rename to AIS ('ais://') bucket (%q is remote)", bckTo)
  1272  			return
  1273  		}
  1274  		if bckFrom.Equal(bckTo, false, false) {
  1275  			p.writeErrf(w, r, "cannot rename bucket %q to itself (%q)", bckFrom, bckTo)
  1276  			return
  1277  		}
  1278  		bckFrom.Provider, bckTo.Provider = apc.AIS, apc.AIS
  1279  		if _, present := p.owner.bmd.get().Get(bckTo); present {
  1280  			err := cmn.NewErrBckAlreadyExists(bckTo.Bucket())
  1281  			p.writeErr(w, r, err)
  1282  			return
  1283  		}
  1284  		if err := p.checkAccess(w, r, nil, apc.AceMoveBucket); err != nil {
  1285  			return
  1286  		}
  1287  		nlog.Infof("%s bucket %s => %s", msg.Action, bckFrom, bckTo)
  1288  		if xid, err = p.renameBucket(bckFrom, bckTo, msg); err != nil {
  1289  			p.writeErr(w, r, err)
  1290  			return
  1291  		}
  1292  	case apc.ActCopyBck, apc.ActETLBck:
  1293  		var (
  1294  			bckFrom     = bck
  1295  			bckTo       *meta.Bck
  1296  			tcbmsg      = &apc.TCBMsg{}
  1297  			ecode       int
  1298  			fltPresence = apc.FltPresent
  1299  		)
  1300  		switch msg.Action {
  1301  		case apc.ActETLBck:
  1302  			if err := cos.MorphMarshal(msg.Value, tcbmsg); err != nil {
  1303  				p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err)
  1304  				return
  1305  			}
  1306  			if err := tcbmsg.Validate(true); err != nil {
  1307  				p.writeErr(w, r, err)
  1308  				return
  1309  			}
  1310  		case apc.ActCopyBck:
  1311  			if err = cos.MorphMarshal(msg.Value, &tcbmsg.CopyBckMsg); err != nil {
  1312  				p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err)
  1313  				return
  1314  			}
  1315  		}
  1316  		if tcbmsg.Sync && tcbmsg.Prepend != "" {
  1317  			p.writeErrf(w, r, errPrependSync, tcbmsg.Prepend)
  1318  			return
  1319  		}
  1320  		bckTo, err = newBckFromQuname(query, true /*required*/)
  1321  		if err != nil {
  1322  			p.writeErr(w, r, err)
  1323  			return
  1324  		}
  1325  		if bckFrom.Equal(bckTo, true, true) {
  1326  			if !bckFrom.IsRemote() {
  1327  				p.writeErrf(w, r, "cannot %s bucket %q onto itself", msg.Action, bckFrom)
  1328  				return
  1329  			}
  1330  			nlog.Infoln("proceeding to copy remote", bckFrom.String())
  1331  		}
  1332  
  1333  		bckTo, ecode, err = p.initBckTo(w, r, query, bckTo)
  1334  		if err != nil {
  1335  			return
  1336  		}
  1337  		if ecode == http.StatusNotFound {
  1338  			if p.forwardCP(w, r, msg, bucket) { // to create
  1339  				return
  1340  			}
  1341  			if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil {
  1342  				return
  1343  			}
  1344  			nlog.Infof(warnDstNotExist, p, bckTo, bckFrom)
  1345  		}
  1346  
  1347  		// start x-tcb or x-tco
  1348  		if v := query.Get(apc.QparamFltPresence); v != "" {
  1349  			fltPresence, _ = strconv.Atoi(v)
  1350  		}
  1351  		debug.Assertf(fltPresence != apc.FltExistsOutside, "(flt %d=\"outside\") not implemented yet", fltPresence)
  1352  		if !apc.IsFltPresent(fltPresence) && (bckFrom.IsCloud() || bckFrom.IsRemoteAIS()) {
  1353  			lstcx := &lstcx{
  1354  				p:       p,
  1355  				bckFrom: bckFrom,
  1356  				bckTo:   bckTo,
  1357  				amsg:    msg,
  1358  				config:  cmn.GCO.Get(),
  1359  			}
  1360  			lstcx.tcomsg.TCBMsg = *tcbmsg
  1361  			xid, err = lstcx.do()
  1362  		} else {
  1363  			nlog.Infoln("x-tcb:", bckFrom.String(), "=>", bckTo.String())
  1364  			xid, err = p.tcb(bckFrom, bckTo, msg, tcbmsg.DryRun)
  1365  		}
  1366  		if err != nil {
  1367  			p.writeErr(w, r, err)
  1368  			return
  1369  		}
  1370  	case apc.ActCopyObjects, apc.ActETLObjects:
  1371  		var (
  1372  			tcomsg = &cmn.TCObjsMsg{}
  1373  			bckTo  *meta.Bck
  1374  			ecode  int
  1375  			eq     bool
  1376  		)
  1377  		if err = cos.MorphMarshal(msg.Value, tcomsg); err != nil {
  1378  			p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err)
  1379  			return
  1380  		}
  1381  		if tcomsg.Sync && tcomsg.Prepend != "" {
  1382  			p.writeErrf(w, r, errPrependSync, tcomsg.Prepend)
  1383  			return
  1384  		}
  1385  		bckTo = meta.CloneBck(&tcomsg.ToBck)
  1386  
  1387  		if bck.Equal(bckTo, true, true) {
  1388  			eq = true
  1389  			nlog.Warningf("multi-object operation %q within the same bucket %q", msg.Action, bck)
  1390  		}
  1391  		if bckTo.IsHTTP() {
  1392  			p.writeErrf(w, r, "cannot %s to HTTP bucket %q", msg.Action, bckTo)
  1393  			return
  1394  		}
  1395  		if !eq {
  1396  			bckTo, ecode, err = p.initBckTo(w, r, query, bckTo)
  1397  			if err != nil {
  1398  				return
  1399  			}
  1400  			if ecode == http.StatusNotFound {
  1401  				if p.forwardCP(w, r, msg, bucket) { // to create
  1402  					return
  1403  				}
  1404  				if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil {
  1405  					return
  1406  				}
  1407  				nlog.Infof(warnDstNotExist, p, bckTo, bck)
  1408  			}
  1409  		}
  1410  
  1411  		xid, err = p.tcobjs(bck, bckTo, cmn.GCO.Get(), msg, tcomsg)
  1412  		if err != nil {
  1413  			p.writeErr(w, r, err)
  1414  			return
  1415  		}
  1416  	case apc.ActAddRemoteBck:
  1417  		if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil {
  1418  			return
  1419  		}
  1420  		if err := p.createBucket(msg, bck, nil); err != nil {
  1421  			p.writeErr(w, r, err, crerrStatus(err))
  1422  		}
  1423  		return
  1424  	case apc.ActPrefetchObjects:
  1425  		// TODO: GET vs SYNC?
  1426  		if err := cmn.ValidateRemoteBck(apc.ActPrefetchObjects, bck.Bucket()); err != nil {
  1427  			p.writeErr(w, r, err)
  1428  			return
  1429  		}
  1430  		if xid, err = p.listrange(r.Method, bucket, msg, query); err != nil {
  1431  			p.writeErr(w, r, err)
  1432  			return
  1433  		}
  1434  	case apc.ActInvalListCache:
  1435  		p.qm.c.invalidate(bck.Bucket())
  1436  		return
  1437  	case apc.ActMakeNCopies:
  1438  		if xid, err = p.makeNCopies(msg, bck); err != nil {
  1439  			p.writeErr(w, r, err)
  1440  			return
  1441  		}
  1442  	case apc.ActECEncode:
  1443  		if xid, err = p.ecEncode(bck, msg); err != nil {
  1444  			p.writeErr(w, r, err)
  1445  			return
  1446  		}
  1447  	default:
  1448  		p.writeErrAct(w, r, msg.Action)
  1449  		return
  1450  	}
  1451  
  1452  	debug.Assertf(xact.IsValidUUID(xid) || strings.IndexByte(xid, ',') > 0, "%q: %q", msg.Action, xid)
  1453  	w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(xid)))
  1454  	w.Write([]byte(xid))
  1455  }
  1456  
  1457  // init existing or create remote
  1458  // not calling `initAndTry` - delegating ais:from// props cloning to the separate method
  1459  func (p *proxy) initBckTo(w http.ResponseWriter, r *http.Request, query url.Values, bckTo *meta.Bck) (*meta.Bck, int, error) {
  1460  	bckToArgs := bctx{p: p, w: w, r: r, bck: bckTo, perms: apc.AcePUT, query: query}
  1461  	bckToArgs.createAIS = true
  1462  
  1463  	ecode, err := bckToArgs.init()
  1464  	if err != nil && ecode != http.StatusNotFound {
  1465  		p.writeErr(w, r, err, ecode)
  1466  		return nil, 0, err
  1467  	}
  1468  
  1469  	// remote bucket: create it (BMD-wise) on the fly
  1470  	if ecode == http.StatusNotFound && bckTo.IsRemote() {
  1471  		if bckTo, err = bckToArgs.try(); err != nil {
  1472  			return nil, 0, err
  1473  		}
  1474  		ecode = 0
  1475  	}
  1476  	return bckTo, ecode, nil
  1477  }
  1478  
  1479  // POST { apc.ActCreateBck } /v1/buckets/bucket-name
  1480  func (p *proxy) _bcr(w http.ResponseWriter, r *http.Request, query url.Values, msg *apc.ActMsg, bck *meta.Bck) {
  1481  	var (
  1482  		remoteHdr http.Header
  1483  		bucket    = bck.Name
  1484  	)
  1485  	if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil {
  1486  		return
  1487  	}
  1488  	if err := bck.Validate(); err != nil {
  1489  		p.writeErr(w, r, err)
  1490  		return
  1491  	}
  1492  	if p.forwardCP(w, r, msg, bucket) {
  1493  		return
  1494  	}
  1495  	if bck.Provider == "" {
  1496  		bck.Provider = apc.AIS
  1497  	}
  1498  
  1499  	if bck.IsRemote() {
  1500  		// (feature) add Cloud bucket to BMD, to further set its `Props.Extra`
  1501  		// with alternative access profile and/or endpoint
  1502  		// TODO:
  1503  		// change bucket props - and the BMD meta-version - to have Flags int64 for
  1504  		// the bits that'll include "renamed" (instead of the current `Props.Renamed`)
  1505  		// and "added-with-no-head"; use the latter to synchronize Cloud props once
  1506  		// connected
  1507  		if cos.IsParseBool(query.Get(apc.QparamDontHeadRemote)) {
  1508  			if !bck.IsCloud() {
  1509  				p.writeErr(w, r, cmn.NewErrUnsupp("skip lookup for the", bck.Provider+":// bucket"))
  1510  				return
  1511  			}
  1512  			msg.Action = apc.ActAddRemoteBck // NOTE: substituting action in the message
  1513  
  1514  			// NOTE: inherit cluster defaults
  1515  			config := cmn.GCO.Get()
  1516  			bprops := bck.Bucket().DefaultProps(&config.ClusterConfig)
  1517  			bprops.SetProvider(bck.Provider)
  1518  
  1519  			if err := p._createBucketWithProps(msg, bck, bprops); err != nil {
  1520  				p.writeErr(w, r, err, crerrStatus(err))
  1521  			}
  1522  			return
  1523  		}
  1524  
  1525  		// remote: check existence and get (cloud) props
  1526  		rhdr, statusCode, err := p.headRemoteBck(bck.RemoteBck(), nil)
  1527  		if err != nil {
  1528  			if bck.IsCloud() {
  1529  				statusCode = http.StatusNotImplemented
  1530  				err = cmn.NewErrNotImpl("create", bck.Provider+"(cloud) bucket")
  1531  			} else if !bck.IsRemoteAIS() {
  1532  				err = cmn.NewErrUnsupp("create", bck.Provider+":// bucket")
  1533  			}
  1534  			p.writeErr(w, r, err, statusCode)
  1535  			return
  1536  		}
  1537  		remoteHdr = rhdr
  1538  		msg.Action = apc.ActAddRemoteBck // ditto
  1539  	}
  1540  	// props-to-update at creation time
  1541  	if msg.Value != nil {
  1542  		propsToUpdate := cmn.BpropsToSet{}
  1543  		if err := cos.MorphMarshal(msg.Value, &propsToUpdate); err != nil {
  1544  			p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err)
  1545  			return
  1546  		}
  1547  		// Make and validate new bucket props.
  1548  		bck.Props = defaultBckProps(bckPropsArgs{bck: bck})
  1549  		nprops, err := p.makeNewBckProps(bck, &propsToUpdate, true /*creating*/)
  1550  		if err != nil {
  1551  			p.writeErr(w, r, err)
  1552  			return
  1553  		}
  1554  		bck.Props = nprops
  1555  		if backend := bck.Backend(); backend != nil {
  1556  			if err := backend.Validate(); err != nil {
  1557  				p.writeErrf(w, r, "cannot create %s: invalid backend %s, err: %v", bck, backend, err)
  1558  				return
  1559  			}
  1560  			// Initialize backend bucket.
  1561  			if err := backend.InitNoBackend(p.owner.bmd); err != nil {
  1562  				if !cmn.IsErrRemoteBckNotFound(err) {
  1563  					p.writeErrf(w, r, "cannot create %s: failing to initialize backend %s, err: %v",
  1564  						bck, backend, err)
  1565  					return
  1566  				}
  1567  				args := bctx{p: p, w: w, r: r, bck: backend, msg: msg, query: query}
  1568  				args.createAIS = false
  1569  				if _, err = args.try(); err != nil {
  1570  					return
  1571  				}
  1572  			}
  1573  		}
  1574  		// Send all props to the target
  1575  		msg.Value = bck.Props
  1576  	}
  1577  	if err := p.createBucket(msg, bck, remoteHdr); err != nil {
  1578  		p.writeErr(w, r, err, crerrStatus(err))
  1579  	}
  1580  }
  1581  
  1582  func crerrStatus(err error) (ecode int) {
  1583  	switch err.(type) {
  1584  	case *cmn.ErrBucketAlreadyExists:
  1585  		ecode = http.StatusConflict
  1586  	case *cmn.ErrNotImpl:
  1587  		ecode = http.StatusNotImplemented
  1588  	}
  1589  	return
  1590  }
  1591  
  1592  // one page => msgpack rsp
  1593  func (p *proxy) listObjects(w http.ResponseWriter, r *http.Request, bck *meta.Bck, amsg *apc.ActMsg, lsmsg *apc.LsoMsg) {
  1594  	// LsVerChanged a.k.a. '--check-versions' limitations
  1595  	if lsmsg.IsFlagSet(apc.LsVerChanged) {
  1596  		const a = "cannot perform remote versions check"
  1597  		if !bck.HasVersioningMD() {
  1598  			p.writeErrMsg(w, r, a+": bucket "+bck.Cname("")+" does not provide (remote) versioning info")
  1599  			return
  1600  		}
  1601  		if lsmsg.IsFlagSet(apc.LsNameOnly) || lsmsg.IsFlagSet(apc.LsNameSize) {
  1602  			p.writeErrMsg(w, r, a+": flag 'LsVerChanged' is incompatible with 'LsNameOnly', 'LsNameSize'")
  1603  			return
  1604  		}
  1605  		if !lsmsg.WantProp(apc.GetPropsCustom) {
  1606  			p.writeErrf(w, r, a+" without listing %q (object property)", apc.GetPropsCustom)
  1607  			return
  1608  		}
  1609  	}
  1610  
  1611  	// default props & flags => user-provided message
  1612  	switch {
  1613  	case lsmsg.Props == "":
  1614  		if lsmsg.IsFlagSet(apc.LsObjCached) {
  1615  			lsmsg.AddProps(apc.GetPropsDefaultAIS...)
  1616  		} else {
  1617  			lsmsg.AddProps(apc.GetPropsMinimal...)
  1618  			lsmsg.SetFlag(apc.LsNameSize)
  1619  		}
  1620  	case lsmsg.Props == apc.GetPropsName:
  1621  		lsmsg.SetFlag(apc.LsNameOnly)
  1622  	case lsmsg.Props == apc.GetPropsNameSize:
  1623  		lsmsg.SetFlag(apc.LsNameSize)
  1624  	}
  1625  	if bck.IsHTTP() || lsmsg.IsFlagSet(apc.LsArchDir) {
  1626  		lsmsg.SetFlag(apc.LsObjCached)
  1627  	}
  1628  
  1629  	// do page
  1630  	beg := mono.NanoTime()
  1631  	lst, err := p.lsPage(bck, amsg, lsmsg, r.Header, p.owner.smap.get())
  1632  	if err != nil {
  1633  		p.writeErr(w, r, err)
  1634  		return
  1635  	}
  1636  	p.statsT.AddMany(
  1637  		cos.NamedVal64{Name: stats.ListCount, Value: 1},
  1638  		cos.NamedVal64{Name: stats.ListLatency, Value: mono.SinceNano(beg)},
  1639  	)
  1640  
  1641  	var ok bool
  1642  	if strings.Contains(r.Header.Get(cos.HdrAccept), cos.ContentMsgPack) {
  1643  		ok = p.writeMsgPack(w, lst, lsotag)
  1644  	} else {
  1645  		ok = p.writeJS(w, r, lst, lsotag)
  1646  	}
  1647  	if !ok && cmn.Rom.FastV(4, cos.SmoduleAIS) {
  1648  		nlog.Errorln("failed to transmit list-objects page (TCP RST?)")
  1649  	}
  1650  
  1651  	// GC
  1652  	clear(lst.Entries)
  1653  	lst.Entries = lst.Entries[:0]
  1654  	lst.Entries = nil
  1655  	lst = nil
  1656  }
  1657  
  1658  // one page; common code (native, s3 api)
  1659  func (p *proxy) lsPage(bck *meta.Bck, amsg *apc.ActMsg, lsmsg *apc.LsoMsg, hdr http.Header, smap *smapX) (*cmn.LsoRes, error) {
  1660  	var (
  1661  		nl             nl.Listener
  1662  		err            error
  1663  		tsi            *meta.Snode
  1664  		lst            *cmn.LsoRes
  1665  		newls          bool
  1666  		listRemote     bool
  1667  		wantOnlyRemote bool
  1668  	)
  1669  	if lsmsg.UUID == "" {
  1670  		lsmsg.UUID = cos.GenUUID()
  1671  		newls = true
  1672  	}
  1673  	tsi, listRemote, wantOnlyRemote, err = p._lsofc(bck, lsmsg, smap)
  1674  	if err != nil {
  1675  		return nil, err
  1676  	}
  1677  	if newls {
  1678  		if wantOnlyRemote {
  1679  			nl = xact.NewXactNL(lsmsg.UUID, apc.ActList, &smap.Smap, meta.NodeMap{tsi.ID(): tsi}, bck.Bucket())
  1680  		} else {
  1681  			// bcast
  1682  			nl = xact.NewXactNL(lsmsg.UUID, apc.ActList, &smap.Smap, nil, bck.Bucket())
  1683  		}
  1684  		// NOTE #2: TODO: currently, always primary - hrw redirect vs scenarios***
  1685  		nl.SetOwner(smap.Primary.ID())
  1686  		p.ic.registerEqual(regIC{nl: nl, smap: smap, msg: amsg})
  1687  	}
  1688  
  1689  	if listRemote {
  1690  		if lsmsg.StartAfter != "" {
  1691  			// TODO: remote AIS first, then Cloud
  1692  			return nil, fmt.Errorf("%s option --start_after (%s) not yet supported for remote buckets (%s)",
  1693  				lsotag, lsmsg.StartAfter, bck)
  1694  		}
  1695  		// verbose log
  1696  		if cmn.Rom.FastV(4, cos.SmoduleAIS) {
  1697  			var s string
  1698  			if lsmsg.ContinuationToken != "" {
  1699  				s = " cont=" + lsmsg.ContinuationToken
  1700  			}
  1701  			if lsmsg.SID != "" {
  1702  				s += " via " + tsi.StringEx()
  1703  			}
  1704  			nlog.Infof("%s[%s] %s%s", amsg.Action, lsmsg.UUID, bck.Cname(""), s)
  1705  		}
  1706  
  1707  		config := cmn.GCO.Get()
  1708  		lst, err = p.lsObjsR(bck, lsmsg, hdr, smap, tsi, config, wantOnlyRemote)
  1709  
  1710  		// TODO: `status == http.StatusGone`: at this point we know that this
  1711  		// remote bucket exists and is offline. We should somehow try to list
  1712  		// cached objects. This isn't easy as we basically need to start a new
  1713  		// xaction and return a new `UUID`.
  1714  	} else {
  1715  		lst, err = p.lsObjsA(bck, lsmsg)
  1716  	}
  1717  
  1718  	return lst, err
  1719  }
  1720  
  1721  // list-objects flow control helper
  1722  func (p *proxy) _lsofc(bck *meta.Bck, lsmsg *apc.LsoMsg, smap *smapX) (tsi *meta.Snode, listRemote, wantOnlyRemote bool, err error) {
  1723  	listRemote = bck.IsRemote() && !lsmsg.IsFlagSet(apc.LsObjCached)
  1724  	if !listRemote {
  1725  		return
  1726  	}
  1727  	if bck.Props.BID == 0 {
  1728  		// remote bucket outside cluster (not in BMD) that hasn't been added ("on the fly") by the caller
  1729  		// (lsmsg flag below)
  1730  		debug.Assert(bck.IsRemote())
  1731  		debug.Assert(lsmsg.IsFlagSet(apc.LsDontAddRemote))
  1732  		wantOnlyRemote = true
  1733  		if !lsmsg.WantOnlyRemoteProps() {
  1734  			err = fmt.Errorf("cannot list remote not-in-cluster bucket %s for not-only-remote object properties: %q",
  1735  				bck.Cname(""), lsmsg.Props)
  1736  			return
  1737  		}
  1738  	} else {
  1739  		// default
  1740  		wantOnlyRemote = lsmsg.WantOnlyRemoteProps()
  1741  	}
  1742  
  1743  	// designate one target to carry-out backend.list-objects
  1744  	if lsmsg.SID != "" {
  1745  		tsi = smap.GetTarget(lsmsg.SID)
  1746  		if tsi == nil || tsi.InMaintOrDecomm() {
  1747  			err = &errNodeNotFound{lsotag + " failure:", lsmsg.SID, p.si, smap}
  1748  			nlog.Errorln(err)
  1749  			if smap.CountActiveTs() == 1 {
  1750  				// (walk an extra mile)
  1751  				orig := err
  1752  				tsi, err = smap.HrwTargetTask(lsmsg.UUID)
  1753  				if err == nil {
  1754  					nlog.Warningf("ignoring [%v] - utilizing the last (or the only) active target %s", orig, tsi)
  1755  					lsmsg.SID = tsi.ID()
  1756  				}
  1757  			}
  1758  		}
  1759  		return
  1760  	}
  1761  	// if listing using bucket inventory (`apc.HdrInventory`) is requested
  1762  	// target selection can change - see lsObjsR below
  1763  	if tsi, err = smap.HrwTargetTask(lsmsg.UUID); err == nil {
  1764  		lsmsg.SID = tsi.ID()
  1765  	}
  1766  	return
  1767  }
  1768  
  1769  // POST { action } /v1/objects/bucket-name[/object-name]
  1770  func (p *proxy) httpobjpost(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
  1771  	msg, err := p.readActionMsg(w, r)
  1772  	if err != nil {
  1773  		return
  1774  	}
  1775  	if msg.Action == apc.ActRenameObject {
  1776  		apireq.after = 2
  1777  	}
  1778  	if err := p.parseReq(w, r, apireq); err != nil {
  1779  		return
  1780  	}
  1781  
  1782  	bck := apireq.bck
  1783  	bckArgs := bctx{p: p, w: w, r: r, msg: msg, perms: apc.AcePUT, bck: bck}
  1784  	bckArgs.createAIS = false
  1785  	bckArgs.dontHeadRemote = true
  1786  	if _, err := bckArgs.initAndTry(); err != nil {
  1787  		return
  1788  	}
  1789  
  1790  	switch msg.Action {
  1791  	case apc.ActRenameObject:
  1792  		if err := p.checkAccess(w, r, bck, apc.AceObjMOVE); err != nil {
  1793  			return
  1794  		}
  1795  		if bck.IsRemote() {
  1796  			p.writeErrActf(w, r, msg.Action, "not supported for remote buckets (%s)", bck)
  1797  			return
  1798  		}
  1799  		if bck.Props.EC.Enabled {
  1800  			p.writeErrActf(w, r, msg.Action, "not supported for erasure-coded buckets (%s)", bck)
  1801  			return
  1802  		}
  1803  		objName, objNameTo := apireq.items[1], msg.Name
  1804  		if objName == objNameTo {
  1805  			p.writeErrMsg(w, r, "cannot rename "+bck.Cname(objName)+" to self, nothing to do")
  1806  			return
  1807  		}
  1808  		if !p.isValidObjname(w, r, objNameTo) {
  1809  			return
  1810  		}
  1811  		p.redirectObjAction(w, r, bck, apireq.items[1], msg)
  1812  	case apc.ActPromote:
  1813  		if err := p.checkAccess(w, r, bck, apc.AcePromote); err != nil {
  1814  			return
  1815  		}
  1816  		// ActionMsg.Name is the source
  1817  		if !filepath.IsAbs(msg.Name) {
  1818  			if msg.Name == "" {
  1819  				p.writeErrMsg(w, r, "promoted source pathname is empty")
  1820  			} else {
  1821  				p.writeErrf(w, r, "promoted source must be an absolute path (got %q)", msg.Name)
  1822  			}
  1823  			return
  1824  		}
  1825  		args := &apc.PromoteArgs{}
  1826  		if err := cos.MorphMarshal(msg.Value, args); err != nil {
  1827  			p.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, p.si, msg.Action, msg.Value, err)
  1828  			return
  1829  		}
  1830  		var tsi *meta.Snode
  1831  		if args.DaemonID != "" {
  1832  			smap := p.owner.smap.get()
  1833  			if tsi = smap.GetTarget(args.DaemonID); tsi == nil {
  1834  				err := &errNodeNotFound{apc.ActPromote + " failure:", args.DaemonID, p.si, smap}
  1835  				p.writeErr(w, r, err)
  1836  				return
  1837  			}
  1838  		}
  1839  		xid, err := p.promote(bck, msg, tsi)
  1840  		if err != nil {
  1841  			p.writeErr(w, r, err)
  1842  			return
  1843  		}
  1844  		w.Write([]byte(xid))
  1845  	case apc.ActBlobDl:
  1846  		if err := p.checkAccess(w, r, bck, apc.AccessRW); err != nil {
  1847  			return
  1848  		}
  1849  		if err := cmn.ValidateRemoteBck(apc.ActBlobDl, bck.Bucket()); err != nil {
  1850  			p.writeErr(w, r, err)
  1851  			return
  1852  		}
  1853  		objName := msg.Name
  1854  		p.redirectObjAction(w, r, bck, objName, msg)
  1855  	default:
  1856  		p.writeErrAct(w, r, msg.Action)
  1857  	}
  1858  }
  1859  
  1860  // HEAD /v1/buckets/bucket-name
  1861  func (p *proxy) httpbckhead(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
  1862  	err := p.parseReq(w, r, apireq)
  1863  	if err != nil {
  1864  		return
  1865  	}
  1866  	bckArgs := bctx{p: p, w: w, r: r, bck: apireq.bck, perms: apc.AceBckHEAD, dpq: apireq.dpq, query: apireq.query}
  1867  	bckArgs.dontAddRemote = apireq.dpq.dontAddRemote // QparamDontAddRemote
  1868  
  1869  	var (
  1870  		info        *cmn.BsummResult
  1871  		dpq         = apireq.dpq
  1872  		msg         apc.BsummCtrlMsg
  1873  		fltPresence int
  1874  		status      int
  1875  	)
  1876  	if dpq.fltPresence != "" {
  1877  		fltPresence, err = strconv.Atoi(dpq.fltPresence)
  1878  		if err != nil {
  1879  			p.writeErrf(w, r, "%s: parse 'flt-presence': %w", p, err)
  1880  			return
  1881  		}
  1882  		bckArgs.dontHeadRemote = bckArgs.dontHeadRemote || apc.IsFltPresent(fltPresence)
  1883  	}
  1884  	if dpq.binfo != "" { // QparamBinfoWithOrWithoutRemote
  1885  		msg = apc.BsummCtrlMsg{
  1886  			UUID:          dpq.uuid,
  1887  			ObjCached:     !cos.IsParseBool(dpq.binfo),
  1888  			BckPresent:    apc.IsFltPresent(fltPresence),
  1889  			DontAddRemote: dpq.dontAddRemote,
  1890  		}
  1891  		bckArgs.dontAddRemote = msg.DontAddRemote
  1892  	}
  1893  	bckArgs.createAIS = false
  1894  
  1895  	bck, err := bckArgs.initAndTry()
  1896  	if err != nil {
  1897  		return
  1898  	}
  1899  
  1900  	// 1. bucket is present (and was present prior to this call), and we are done with it here
  1901  	if bckArgs.isPresent {
  1902  		if fltPresence == apc.FltExistsOutside {
  1903  			nlog.Warningf("bucket %s is present, flt %d=\"outside\" not implemented yet", bck.Cname(""), fltPresence)
  1904  		}
  1905  		if dpq.binfo != "" {
  1906  			info, status, err = p.bsummhead(bck, &msg)
  1907  			if err != nil {
  1908  				p.writeErr(w, r, err)
  1909  				return
  1910  			}
  1911  			if info != nil {
  1912  				info.IsBckPresent = true
  1913  			}
  1914  		}
  1915  		toHdr(w, bck, info, status, msg.UUID)
  1916  		return
  1917  	}
  1918  
  1919  	// 2. bucket is remote and does exist
  1920  	debug.Assert(bck.IsRemote(), bck.String())
  1921  	debug.Assert(bckArgs.exists)
  1922  
  1923  	// [filtering] when the bucket that must be present is not
  1924  	if apc.IsFltPresent(fltPresence) {
  1925  		toHdr(w, bck, nil, 0, "")
  1926  		return
  1927  	}
  1928  
  1929  	var (
  1930  		bprops *cmn.Bprops
  1931  		bmd    = p.owner.bmd.get()
  1932  	)
  1933  	bprops, bckArgs.isPresent = bmd.Get(bck)
  1934  	if bprops != nil {
  1935  		// just added via bckArgs.initAndTry() above, with dontAdd == false
  1936  		bck.Props = bprops
  1937  	} // otherwise, keep bck.Props as per (#18995)
  1938  
  1939  	if dpq.binfo != "" {
  1940  		info, status, err = p.bsummhead(bck, &msg)
  1941  		if err != nil {
  1942  			p.writeErr(w, r, err)
  1943  			return
  1944  		}
  1945  		if info != nil {
  1946  			info.IsBckPresent = true
  1947  		}
  1948  	}
  1949  	toHdr(w, bck, info, status, msg.UUID)
  1950  }
  1951  
  1952  func toHdr(w http.ResponseWriter, bck *meta.Bck, info *cmn.BsummResult, status int, xid string) {
  1953  	hdr := w.Header()
  1954  	if bck.Props != nil {
  1955  		hdr.Set(apc.HdrBucketProps, cos.MustMarshalToString(bck.Props))
  1956  	}
  1957  	if info != nil {
  1958  		hdr.Set(apc.HdrBucketSumm, cos.MustMarshalToString(info))
  1959  	}
  1960  	if xid != "" {
  1961  		hdr.Set(apc.HdrXactionID, xid)
  1962  	}
  1963  	if status > 0 {
  1964  		w.WriteHeader(status)
  1965  	}
  1966  }
  1967  
  1968  // PATCH /v1/buckets/bucket-name
  1969  func (p *proxy) httpbckpatch(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
  1970  	var (
  1971  		err           error
  1972  		msg           *apc.ActMsg
  1973  		propsToUpdate cmn.BpropsToSet
  1974  		xid           string
  1975  		nprops        *cmn.Bprops // complete instance of bucket props with propsToUpdate changes
  1976  	)
  1977  	if err = p.parseReq(w, r, apireq); err != nil {
  1978  		return
  1979  	}
  1980  	if msg, err = p.readActionMsg(w, r); err != nil {
  1981  		return
  1982  	}
  1983  	if err := cos.MorphMarshal(msg.Value, &propsToUpdate); err != nil {
  1984  		p.writeErrMsg(w, r, "invalid props-to-update value in apireq: "+msg.String())
  1985  		return
  1986  	}
  1987  	bck := apireq.bck
  1988  	if p.forwardCP(w, r, msg, "patch "+bck.String()) {
  1989  		return
  1990  	}
  1991  	perms := apc.AcePATCH
  1992  	if propsToUpdate.Access != nil {
  1993  		perms |= apc.AceBckSetACL
  1994  	}
  1995  	bckArgs := bctx{p: p, w: w, r: r, bck: bck, msg: msg, skipBackend: true,
  1996  		perms: perms, dpq: apireq.dpq, query: apireq.query}
  1997  	bckArgs.createAIS = false
  1998  	if bck, err = bckArgs.initAndTry(); err != nil {
  1999  		return
  2000  	}
  2001  	if err = _checkAction(msg, apc.ActSetBprops, apc.ActResetBprops); err != nil {
  2002  		p.writeErr(w, r, err)
  2003  		return
  2004  	}
  2005  	// make and validate new props
  2006  	if nprops, err = p.makeNewBckProps(bck, &propsToUpdate); err != nil {
  2007  		p.writeErr(w, r, err)
  2008  		return
  2009  	}
  2010  	if !nprops.BackendBck.IsEmpty() {
  2011  		// backend must exist, must init itself
  2012  		backendBck := meta.CloneBck(&nprops.BackendBck)
  2013  		backendBck.Props = nil
  2014  
  2015  		args := bctx{p: p, w: w, r: r, bck: backendBck, msg: msg, dpq: apireq.dpq, query: apireq.query}
  2016  		args.createAIS = false
  2017  		if _, err = args.initAndTry(); err != nil {
  2018  			return
  2019  		}
  2020  		// init and validate
  2021  		if err = p.initBackendProp(nprops); err != nil {
  2022  			p.writeErr(w, r, err)
  2023  			return
  2024  		}
  2025  	}
  2026  	if xid, err = p.setBprops(msg, bck, nprops); err != nil {
  2027  		p.writeErr(w, r, err)
  2028  		return
  2029  	}
  2030  	w.Write([]byte(xid))
  2031  }
  2032  
  2033  // HEAD /v1/objects/bucket-name/object-name
  2034  func (p *proxy) httpobjhead(w http.ResponseWriter, r *http.Request, origURLBck ...string) {
  2035  	bckArgs := allocBctx()
  2036  	{
  2037  		bckArgs.p = p
  2038  		bckArgs.w = w
  2039  		bckArgs.r = r
  2040  		bckArgs.perms = apc.AceObjHEAD
  2041  		bckArgs.createAIS = false
  2042  	}
  2043  	if len(origURLBck) > 0 {
  2044  		bckArgs.origURLBck = origURLBck[0]
  2045  	}
  2046  	bck, objName, err := p._parseReqTry(w, r, bckArgs)
  2047  	if err != nil {
  2048  		return
  2049  	}
  2050  	smap := p.owner.smap.get()
  2051  	si, err := smap.HrwName2T(bck.MakeUname(objName))
  2052  	if err != nil {
  2053  		p.writeErr(w, r, err, http.StatusInternalServerError)
  2054  		return
  2055  	}
  2056  	if cmn.Rom.FastV(5, cos.SmoduleAIS) {
  2057  		nlog.Infof("%s %s => %s", r.Method, bck.Cname(objName), si.StringEx())
  2058  	}
  2059  	redirectURL := p.redirectURL(r, si, time.Now() /*started*/, cmn.NetIntraControl)
  2060  	http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect)
  2061  }
  2062  
  2063  // PATCH /v1/objects/bucket-name/object-name
  2064  func (p *proxy) httpobjpatch(w http.ResponseWriter, r *http.Request) {
  2065  	started := time.Now()
  2066  	bckArgs := allocBctx()
  2067  	{
  2068  		bckArgs.p = p
  2069  		bckArgs.w = w
  2070  		bckArgs.r = r
  2071  		bckArgs.perms = apc.AceObjHEAD
  2072  		bckArgs.createAIS = false
  2073  	}
  2074  	bck, objName, err := p._parseReqTry(w, r, bckArgs)
  2075  	if err != nil {
  2076  		return
  2077  	}
  2078  	smap := p.owner.smap.get()
  2079  	si, err := smap.HrwName2T(bck.MakeUname(objName))
  2080  	if err != nil {
  2081  		p.writeErr(w, r, err, http.StatusInternalServerError)
  2082  		return
  2083  	}
  2084  	if cmn.Rom.FastV(5, cos.SmoduleAIS) {
  2085  		nlog.Infof("%s %s => %s", r.Method, bck.Cname(objName), si.StringEx())
  2086  	}
  2087  	redirectURL := p.redirectURL(r, si, started, cmn.NetIntraControl)
  2088  	http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect)
  2089  }
  2090  
  2091  func (p *proxy) listBuckets(w http.ResponseWriter, r *http.Request, qbck *cmn.QueryBcks, msg *apc.ActMsg, dpq *dpq) {
  2092  	var (
  2093  		bmd     = p.owner.bmd.get()
  2094  		present bool
  2095  	)
  2096  	if qbck.IsAIS() || qbck.IsHTTP() {
  2097  		bcks := bmd.Select(qbck)
  2098  		p.writeJSON(w, r, bcks, "list-buckets")
  2099  		return
  2100  	}
  2101  
  2102  	// present-only filtering
  2103  	if dpq.fltPresence != "" {
  2104  		if v, err := strconv.Atoi(dpq.fltPresence); err == nil {
  2105  			present = apc.IsFltPresent(v)
  2106  		}
  2107  	}
  2108  	if present {
  2109  		bcks := bmd.Select(qbck)
  2110  		p.writeJSON(w, r, bcks, "list-buckets")
  2111  		return
  2112  	}
  2113  
  2114  	// via random target
  2115  	smap := p.owner.smap.get()
  2116  	si, err := smap.GetRandTarget()
  2117  	if err != nil {
  2118  		p.writeErr(w, r, err)
  2119  		return
  2120  	}
  2121  
  2122  	cargs := allocCargs()
  2123  	{
  2124  		cargs.si = si
  2125  		cargs.req = cmn.HreqArgs{
  2126  			Method:   r.Method,
  2127  			Path:     r.URL.Path,
  2128  			RawQuery: r.URL.RawQuery,
  2129  			Header:   r.Header,
  2130  			Body:     cos.MustMarshal(msg),
  2131  		}
  2132  		cargs.timeout = apc.DefaultTimeout
  2133  	}
  2134  	res := p.call(cargs, smap)
  2135  	freeCargs(cargs)
  2136  
  2137  	if res.err != nil {
  2138  		err = res.toErr()
  2139  		p.writeErr(w, r, err, res.status)
  2140  		return
  2141  	}
  2142  
  2143  	hdr := w.Header()
  2144  	hdr.Set(cos.HdrContentType, res.header.Get(cos.HdrContentType))
  2145  	hdr.Set(cos.HdrContentLength, strconv.Itoa(len(res.bytes)))
  2146  	_, err = w.Write(res.bytes)
  2147  	debug.AssertNoErr(err)
  2148  }
  2149  
  2150  func (p *proxy) redirectURL(r *http.Request, si *meta.Snode, ts time.Time, netIntra string, netPubs ...string) (redirect string) {
  2151  	var (
  2152  		nodeURL string
  2153  		netPub  = cmn.NetPublic
  2154  	)
  2155  	if len(netPubs) > 0 {
  2156  		netPub = netPubs[0]
  2157  	}
  2158  	if p.si.LocalNet == nil {
  2159  		nodeURL = si.URL(netPub)
  2160  	} else {
  2161  		var local bool
  2162  		remote := r.RemoteAddr
  2163  		if colon := strings.Index(remote, ":"); colon != -1 {
  2164  			remote = remote[:colon]
  2165  		}
  2166  		if ip := net.ParseIP(remote); ip != nil {
  2167  			local = p.si.LocalNet.Contains(ip)
  2168  		}
  2169  		if local {
  2170  			nodeURL = si.URL(netIntra)
  2171  		} else {
  2172  			nodeURL = si.URL(netPub)
  2173  		}
  2174  	}
  2175  	redirect = nodeURL + r.URL.Path + "?"
  2176  	if r.URL.RawQuery != "" {
  2177  		redirect += r.URL.RawQuery + "&"
  2178  	}
  2179  
  2180  	query := url.Values{
  2181  		apc.QparamProxyID:  []string{p.SID()},
  2182  		apc.QparamUnixTime: []string{cos.UnixNano2S(ts.UnixNano())},
  2183  	}
  2184  	redirect += query.Encode()
  2185  	return
  2186  }
  2187  
  2188  // lsObjsA reads object list from all targets, combines, sorts and returns
  2189  // the final list. Excess of object entries from each target is remembered in the
  2190  // buffer (see: `queryBuffers`) so we won't request the same objects again.
  2191  func (p *proxy) lsObjsA(bck *meta.Bck, lsmsg *apc.LsoMsg) (allEntries *cmn.LsoRes, err error) {
  2192  	var (
  2193  		aisMsg    *aisMsg
  2194  		args      *bcastArgs
  2195  		entries   cmn.LsoEntries
  2196  		results   sliceResults
  2197  		smap      = p.owner.smap.get()
  2198  		cacheID   = cacheReqID{bck: bck.Bucket(), prefix: lsmsg.Prefix}
  2199  		token     = lsmsg.ContinuationToken
  2200  		props     = lsmsg.PropsSet()
  2201  		hasEnough bool
  2202  		flags     uint32
  2203  	)
  2204  	if lsmsg.PageSize == 0 {
  2205  		lsmsg.PageSize = apc.MaxPageSizeAIS
  2206  	}
  2207  	pageSize := lsmsg.PageSize
  2208  
  2209  	// TODO: Before checking cache and buffer we should check if there is another
  2210  	// request in-flight that asks for the same page - if true wait for the cache
  2211  	// to get populated.
  2212  
  2213  	if lsmsg.IsFlagSet(apc.UseListObjsCache) {
  2214  		entries, hasEnough = p.qm.c.get(cacheID, token, pageSize)
  2215  		if hasEnough {
  2216  			goto end
  2217  		}
  2218  	}
  2219  	entries, hasEnough = p.qm.b.get(lsmsg.UUID, token, pageSize)
  2220  	if hasEnough {
  2221  		// We have enough in the buffer to fulfill the request.
  2222  		goto endWithCache
  2223  	}
  2224  
  2225  	// User requested some page but we don't have enough (but we may have part
  2226  	// of the full page). Therefore, we must ask targets for page starting from
  2227  	// what we have locally, so we don't re-request the objects.
  2228  	lsmsg.ContinuationToken = p.qm.b.last(lsmsg.UUID, token)
  2229  
  2230  	aisMsg = p.newAmsgActVal(apc.ActList, &lsmsg)
  2231  	args = allocBcArgs()
  2232  	args.req = cmn.HreqArgs{
  2233  		Method: http.MethodGet,
  2234  		Path:   apc.URLPathBuckets.Join(bck.Name),
  2235  		Query:  bck.NewQuery(),
  2236  		Body:   cos.MustMarshal(aisMsg),
  2237  	}
  2238  	args.timeout = apc.LongTimeout
  2239  	args.smap = smap
  2240  	args.cresv = cresLso{} // -> cmn.LsoRes
  2241  
  2242  	// Combine the results.
  2243  	results = p.bcastGroup(args)
  2244  	freeBcArgs(args)
  2245  	for _, res := range results {
  2246  		if res.err != nil {
  2247  			err = res.toErr()
  2248  			freeBcastRes(results)
  2249  			return nil, err
  2250  		}
  2251  		objList := res.v.(*cmn.LsoRes)
  2252  		flags |= objList.Flags
  2253  		p.qm.b.set(lsmsg.UUID, res.si.ID(), objList.Entries, pageSize)
  2254  	}
  2255  	freeBcastRes(results)
  2256  	entries, hasEnough = p.qm.b.get(lsmsg.UUID, token, pageSize)
  2257  	debug.Assert(hasEnough)
  2258  
  2259  endWithCache:
  2260  	if lsmsg.IsFlagSet(apc.UseListObjsCache) {
  2261  		p.qm.c.set(cacheID, token, entries, pageSize)
  2262  	}
  2263  end:
  2264  	if lsmsg.IsFlagSet(apc.UseListObjsCache) && !props.All(apc.GetPropsAll...) {
  2265  		// Since cache keeps entries with whole subset props we must create copy
  2266  		// of the entries with smaller subset of props (if we would change the
  2267  		// props of the `entries` it would also affect entries inside cache).
  2268  		propsEntries := make(cmn.LsoEntries, len(entries))
  2269  		for idx := range entries {
  2270  			propsEntries[idx] = entries[idx].CopyWithProps(props)
  2271  		}
  2272  		entries = propsEntries
  2273  	}
  2274  
  2275  	allEntries = &cmn.LsoRes{
  2276  		UUID:    lsmsg.UUID,
  2277  		Entries: entries,
  2278  		Flags:   flags,
  2279  	}
  2280  	if len(entries) >= int(pageSize) {
  2281  		allEntries.ContinuationToken = entries[len(entries)-1].Name
  2282  	}
  2283  
  2284  	// when recursion is disabled (i.e., lsmsg.IsFlagSet(apc.LsNoRecursion))
  2285  	// the (`cmn.LsoRes`) result _may_ include duplicated names of the virtual subdirectories
  2286  	// - that's why:
  2287  	if lsmsg.IsFlagSet(apc.LsNoRecursion) {
  2288  		allEntries.Entries = cmn.DedupLso(allEntries.Entries, len(entries))
  2289  	}
  2290  
  2291  	return allEntries, nil
  2292  }
  2293  
  2294  func (p *proxy) lsObjsR(bck *meta.Bck, lsmsg *apc.LsoMsg, hdr http.Header, smap *smapX, tsi *meta.Snode, config *cmn.Config,
  2295  	wantOnlyRemote bool) (*cmn.LsoRes, error) {
  2296  	var (
  2297  		results sliceResults
  2298  		aisMsg  = p.newAmsgActVal(apc.ActList, &lsmsg)
  2299  		args    = allocBcArgs()
  2300  		timeout = config.Client.ListObjTimeout.D()
  2301  	)
  2302  	if cos.IsParseBool(hdr.Get(apc.HdrInventory)) {
  2303  		// TODO: extend to other Clouds or, more precisely, other list-objects supporting backends
  2304  		if !bck.IsRemoteS3() {
  2305  			return nil, cmn.NewErrUnsupp("list (via bucket inventory) non-S3 bucket", bck.Cname(""))
  2306  		}
  2307  		if lsmsg.ContinuationToken == "" /*first page*/ {
  2308  			timeout = config.Client.TimeoutLong.D()
  2309  
  2310  			// override _lsofc selection (see above)
  2311  			_, objName := s3.InvPrefObjname(bck.Bucket(), hdr.Get(apc.HdrInvName), hdr.Get(apc.HdrInvID))
  2312  			tsi, err := smap.HrwName2T(bck.MakeUname(objName))
  2313  			if err != nil {
  2314  				return nil, err
  2315  			}
  2316  			lsmsg.SID = tsi.ID()
  2317  		}
  2318  	}
  2319  	args.req = cmn.HreqArgs{
  2320  		Method: http.MethodGet,
  2321  		Path:   apc.URLPathBuckets.Join(bck.Name),
  2322  		Header: hdr,
  2323  		Query:  bck.NewQuery(),
  2324  		Body:   cos.MustMarshal(aisMsg),
  2325  	}
  2326  	if wantOnlyRemote {
  2327  		cargs := allocCargs()
  2328  		{
  2329  			cargs.si = tsi
  2330  			cargs.req = args.req
  2331  			cargs.timeout = timeout
  2332  			cargs.cresv = cresLso{} // -> cmn.LsoRes
  2333  		}
  2334  		// duplicate via query to have target ignoring an (early) failure to initialize bucket
  2335  		if lsmsg.IsFlagSet(apc.LsDontHeadRemote) {
  2336  			cargs.req.Query.Set(apc.QparamDontHeadRemote, "true")
  2337  		}
  2338  		if lsmsg.IsFlagSet(apc.LsDontAddRemote) {
  2339  			cargs.req.Query.Set(apc.QparamDontAddRemote, "true")
  2340  		}
  2341  		res := p.call(cargs, smap)
  2342  		freeCargs(cargs)
  2343  		results = make(sliceResults, 1)
  2344  		results[0] = res
  2345  	} else {
  2346  		args.timeout = timeout
  2347  		args.smap = smap
  2348  		args.cresv = cresLso{} // -> cmn.LsoRes
  2349  		results = p.bcastGroup(args)
  2350  	}
  2351  
  2352  	freeBcArgs(args)
  2353  
  2354  	// Combine the results.
  2355  	resLists := make([]*cmn.LsoRes, 0, len(results))
  2356  	for _, res := range results {
  2357  		if res.err != nil {
  2358  			err := res.toErr()
  2359  			freeBcastRes(results)
  2360  			return nil, err
  2361  		}
  2362  		resLists = append(resLists, res.v.(*cmn.LsoRes))
  2363  	}
  2364  	freeBcastRes(results)
  2365  
  2366  	return cmn.MergeLso(resLists, 0), nil
  2367  }
  2368  
  2369  func (p *proxy) redirectObjAction(w http.ResponseWriter, r *http.Request, bck *meta.Bck, objName string, msg *apc.ActMsg) {
  2370  	started := time.Now()
  2371  	smap := p.owner.smap.get()
  2372  	si, err := smap.HrwName2T(bck.MakeUname(objName))
  2373  	if err != nil {
  2374  		p.writeErr(w, r, err)
  2375  		return
  2376  	}
  2377  	if cmn.Rom.FastV(5, cos.SmoduleAIS) {
  2378  		nlog.Infof("%q %s => %s", msg.Action, bck.Cname(objName), si.StringEx())
  2379  	}
  2380  
  2381  	// NOTE: Code 307 is the only way to http-redirect with the original JSON payload.
  2382  	redirectURL := p.redirectURL(r, si, started, cmn.NetIntraControl)
  2383  	http.Redirect(w, r, redirectURL, http.StatusTemporaryRedirect)
  2384  
  2385  	p.statsT.Inc(stats.RenameCount)
  2386  }
  2387  
  2388  func (p *proxy) listrange(method, bucket string, msg *apc.ActMsg, query url.Values) (xid string, err error) {
  2389  	var (
  2390  		smap   = p.owner.smap.get()
  2391  		aisMsg = p.newAmsg(msg, nil, cos.GenUUID())
  2392  		body   = cos.MustMarshal(aisMsg)
  2393  		path   = apc.URLPathBuckets.Join(bucket)
  2394  	)
  2395  	nlb := xact.NewXactNL(aisMsg.UUID, aisMsg.Action, &smap.Smap, nil)
  2396  	nlb.SetOwner(equalIC)
  2397  	p.ic.registerEqual(regIC{smap: smap, query: query, nl: nlb})
  2398  	args := allocBcArgs()
  2399  	args.req = cmn.HreqArgs{Method: method, Path: path, Query: query, Body: body}
  2400  	args.smap = smap
  2401  	args.timeout = apc.DefaultTimeout
  2402  	results := p.bcastGroup(args)
  2403  	freeBcArgs(args)
  2404  	for _, res := range results {
  2405  		if res.err == nil {
  2406  			continue
  2407  		}
  2408  		err = res.errorf("%s failed to %q List/Range", res.si, msg.Action)
  2409  		break
  2410  	}
  2411  	freeBcastRes(results)
  2412  	xid = aisMsg.UUID
  2413  	return
  2414  }
  2415  
  2416  func (p *proxy) reverseHandler(w http.ResponseWriter, r *http.Request) {
  2417  	apiItems, err := p.parseURL(w, r, apc.URLPathReverse.L, 1, false)
  2418  	if err != nil {
  2419  		return
  2420  	}
  2421  
  2422  	// rewrite URL path (removing `apc.Reverse`)
  2423  	r.URL.Path = cos.JoinWords(apc.Version, apiItems[0])
  2424  
  2425  	nodeID := r.Header.Get(apc.HdrNodeID)
  2426  	if nodeID == "" {
  2427  		p.writeErrMsg(w, r, "missing node ID")
  2428  		return
  2429  	}
  2430  	smap := p.owner.smap.get()
  2431  	si := smap.GetNode(nodeID)
  2432  	if si != nil && si.InMaintOrDecomm() {
  2433  		daeStatus := "inactive"
  2434  		switch {
  2435  		case si.Flags.IsSet(meta.SnodeMaint):
  2436  			daeStatus = apc.NodeMaintenance
  2437  		case si.Flags.IsSet(meta.SnodeDecomm):
  2438  			daeStatus = apc.NodeDecommission
  2439  		}
  2440  		if r.Method == http.MethodGet {
  2441  			what := r.URL.Query().Get(apc.QparamWhat)
  2442  			if what == apc.WhatNodeStatsAndStatus {
  2443  				// skip reversing, return status as per Smap
  2444  				msg := &stats.NodeStatus{
  2445  					Node:   stats.Node{Snode: si},
  2446  					Status: daeStatus,
  2447  				}
  2448  				p.writeJSON(w, r, msg, what)
  2449  				return
  2450  			}
  2451  		}
  2452  		// otherwise, warn and go ahead
  2453  		// (e.g. scenario: shutdown when transitioning through states)
  2454  		nlog.Warningf("%s: %s status is: %s", p, si.StringEx(), daeStatus)
  2455  	}
  2456  
  2457  	// access control
  2458  	switch r.Method {
  2459  	case http.MethodGet:
  2460  		// must be consistent with httpdaeget, httpcluget
  2461  		err = p.checkAccess(w, r, nil, apc.AceShowCluster)
  2462  	case http.MethodPost:
  2463  		// (ditto) httpdaepost, httpclupost
  2464  		err = p.checkAccess(w, r, nil, apc.AceAdmin)
  2465  	case http.MethodPut, http.MethodDelete:
  2466  		// (ditto) httpdaeput/delete and httpcluput/delete
  2467  		err = p.checkAccess(w, r, nil, apc.AceAdmin)
  2468  	default:
  2469  		cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodPost, http.MethodPut)
  2470  		return
  2471  	}
  2472  	if err != nil {
  2473  		return
  2474  	}
  2475  
  2476  	// do
  2477  	if si != nil {
  2478  		p.reverseNodeRequest(w, r, si)
  2479  		return
  2480  	}
  2481  	// special case when the target self-removed itself from cluster map
  2482  	// after having lost all mountpaths.
  2483  	nodeURL := r.Header.Get(apc.HdrNodeURL)
  2484  	if nodeURL == "" {
  2485  		err = &errNodeNotFound{"cannot rproxy to", nodeID, p.si, smap}
  2486  		p.writeErr(w, r, err, http.StatusNotFound)
  2487  		return
  2488  	}
  2489  	parsedURL, err := url.Parse(nodeURL)
  2490  	if err != nil {
  2491  		p.writeErrf(w, r, "%s: invalid URL %q for node %s", p.si, nodeURL, nodeID)
  2492  		return
  2493  	}
  2494  
  2495  	p.reverseRequest(w, r, nodeID, parsedURL)
  2496  }
  2497  
  2498  //
  2499  // /daemon handlers
  2500  //
  2501  
  2502  // [METHOD] /v1/daemon
  2503  func (p *proxy) daemonHandler(w http.ResponseWriter, r *http.Request) {
  2504  	switch r.Method {
  2505  	case http.MethodGet:
  2506  		p.httpdaeget(w, r)
  2507  	case http.MethodPut:
  2508  		p.httpdaeput(w, r)
  2509  	case http.MethodPost:
  2510  		p.httpdaepost(w, r)
  2511  	default:
  2512  		cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodPost, http.MethodPut)
  2513  	}
  2514  }
  2515  
  2516  func (p *proxy) handlePendingRenamedLB(renamedBucket string) {
  2517  	ctx := &bmdModifier{
  2518  		pre:   p.bmodPostMv,
  2519  		final: p.bmodSync,
  2520  		msg:   &apc.ActMsg{Value: apc.ActMoveBck},
  2521  		bcks:  []*meta.Bck{meta.NewBck(renamedBucket, apc.AIS, cmn.NsGlobal)},
  2522  	}
  2523  	_, err := p.owner.bmd.modify(ctx)
  2524  	debug.AssertNoErr(err)
  2525  }
  2526  
  2527  func (p *proxy) bmodPostMv(ctx *bmdModifier, clone *bucketMD) error {
  2528  	var (
  2529  		bck            = ctx.bcks[0]
  2530  		props, present = clone.Get(bck)
  2531  	)
  2532  	if !present {
  2533  		ctx.terminate = true
  2534  		// Already removed via the the very first target calling here.
  2535  		return nil
  2536  	}
  2537  	if props.Renamed == "" {
  2538  		nlog.Errorf("%s: renamed bucket %s: unexpected props %+v", p, bck.Name, *bck.Props)
  2539  		ctx.terminate = true
  2540  		return nil
  2541  	}
  2542  	clone.del(bck)
  2543  	return nil
  2544  }
  2545  
  2546  func (p *proxy) httpdaeget(w http.ResponseWriter, r *http.Request) {
  2547  	var (
  2548  		query = r.URL.Query()
  2549  		what  = query.Get(apc.QparamWhat)
  2550  	)
  2551  	if err := p.checkAccess(w, r, nil, apc.AceShowCluster); err != nil {
  2552  		return
  2553  	}
  2554  	switch what {
  2555  	case apc.WhatBMD:
  2556  		if renamedBucket := query.Get(whatRenamedLB); renamedBucket != "" {
  2557  			p.handlePendingRenamedLB(renamedBucket)
  2558  		}
  2559  		fallthrough // fallthrough
  2560  	case apc.WhatNodeConfig, apc.WhatSmapVote, apc.WhatSnode, apc.WhatLog,
  2561  		apc.WhatNodeStats, apc.WhatNodeStatsV322, apc.WhatMetricNames,
  2562  		apc.WhatNodeStatsAndStatusV322:
  2563  		p.htrun.httpdaeget(w, r, query, nil /*htext*/)
  2564  
  2565  	case apc.WhatNodeStatsAndStatus:
  2566  		ds := p.statsAndStatus()
  2567  		daeStats := p.statsT.GetStats()
  2568  		ds.Tracker = daeStats.Tracker
  2569  		p.ciiFill(&ds.Cluster)
  2570  		p.writeJSON(w, r, ds, what)
  2571  
  2572  	case apc.WhatSysInfo:
  2573  		p.writeJSON(w, r, apc.GetMemCPU(), what)
  2574  	case apc.WhatSmap:
  2575  		const retries = 16
  2576  		var (
  2577  			smap  = p.owner.smap.get()
  2578  			sleep = cmn.Rom.CplaneOperation() / 2
  2579  		)
  2580  		for i := 0; smap.validate() != nil && i < retries; i++ {
  2581  			if !p.NodeStarted() {
  2582  				time.Sleep(sleep)
  2583  				smap = p.owner.smap.get()
  2584  				if err := smap.validate(); err != nil {
  2585  					nlog.Errorf("%s is starting up, cannot return %s yet: %v", p, smap, err)
  2586  				}
  2587  				break
  2588  			}
  2589  			smap = p.owner.smap.get()
  2590  			time.Sleep(sleep)
  2591  		}
  2592  		if err := smap.validate(); err != nil {
  2593  			nlog.Errorf("%s: startup is taking unusually long time: %s (%v)", p, smap, err)
  2594  			w.WriteHeader(http.StatusServiceUnavailable)
  2595  			return
  2596  		}
  2597  		p.writeJSON(w, r, smap, what)
  2598  	default:
  2599  		p.htrun.httpdaeget(w, r, query, nil /*htext*/)
  2600  	}
  2601  }
  2602  
  2603  func (p *proxy) httpdaeput(w http.ResponseWriter, r *http.Request) {
  2604  	apiItems, err := p.parseURL(w, r, apc.URLPathDae.L, 0, true)
  2605  	if err != nil {
  2606  		return
  2607  	}
  2608  	if err := p.checkAccess(w, r, nil, apc.AceAdmin); err != nil {
  2609  		return
  2610  	}
  2611  	// urlpath-based actions
  2612  	if len(apiItems) > 0 {
  2613  		action := apiItems[0]
  2614  		p.daePathAction(w, r, action)
  2615  		return
  2616  	}
  2617  	// message-based actions
  2618  	query := r.URL.Query()
  2619  	msg, err := p.readActionMsg(w, r)
  2620  	if err != nil {
  2621  		return
  2622  	}
  2623  	switch msg.Action {
  2624  	case apc.ActSetConfig: // set-config #2 - via action message
  2625  		p.setDaemonConfigMsg(w, r, msg, query)
  2626  	case apc.ActResetConfig:
  2627  		if err := p.owner.config.resetDaemonConfig(); err != nil {
  2628  			p.writeErr(w, r, err)
  2629  		}
  2630  	case apc.ActRotateLogs:
  2631  		nlog.Flush(nlog.ActRotate)
  2632  	case apc.ActResetStats:
  2633  		errorsOnly := msg.Value.(bool)
  2634  		p.statsT.ResetStats(errorsOnly)
  2635  
  2636  	case apc.ActStartMaintenance:
  2637  		if !p.ensureIntraControl(w, r, true /* from primary */) {
  2638  			return
  2639  		}
  2640  		p.termKalive(msg.Action)
  2641  	case apc.ActDecommissionCluster, apc.ActDecommissionNode:
  2642  		if !p.ensureIntraControl(w, r, true /* from primary */) {
  2643  			return
  2644  		}
  2645  		var opts apc.ActValRmNode
  2646  		if err := cos.MorphMarshal(msg.Value, &opts); err != nil {
  2647  			p.writeErr(w, r, err)
  2648  			return
  2649  		}
  2650  		p.termKalive(msg.Action)
  2651  		p.decommission(msg.Action, &opts)
  2652  	case apc.ActShutdownNode:
  2653  		if !p.ensureIntraControl(w, r, true /* from primary */) {
  2654  			return
  2655  		}
  2656  		p.termKalive(msg.Action)
  2657  		p.shutdown(msg.Action)
  2658  	case apc.ActShutdownCluster:
  2659  		smap := p.owner.smap.get()
  2660  		isPrimary := smap.isPrimary(p.si)
  2661  		if !isPrimary {
  2662  			if !p.ensureIntraControl(w, r, true /* from primary */) {
  2663  				return
  2664  			}
  2665  			p.Stop(&errNoUnregister{msg.Action})
  2666  			return
  2667  		}
  2668  		force := cos.IsParseBool(query.Get(apc.QparamForce))
  2669  		if !force {
  2670  			p.writeErrf(w, r, "cannot shutdown primary %s (consider %s=true option)",
  2671  				p.si, apc.QparamForce)
  2672  			return
  2673  		}
  2674  		_ = syscall.Kill(syscall.Getpid(), syscall.SIGINT)
  2675  	default:
  2676  		p.writeErrAct(w, r, msg.Action)
  2677  	}
  2678  }
  2679  
  2680  func (p *proxy) daePathAction(w http.ResponseWriter, r *http.Request, action string) {
  2681  	switch action {
  2682  	case apc.Proxy:
  2683  		p.daeSetPrimary(w, r)
  2684  	case apc.SyncSmap:
  2685  		newsmap := &smapX{}
  2686  		if cmn.ReadJSON(w, r, newsmap) != nil {
  2687  			return
  2688  		}
  2689  		if err := newsmap.validate(); err != nil {
  2690  			p.writeErrf(w, r, "%s: invalid %s: %v", p.si, newsmap, err)
  2691  			return
  2692  		}
  2693  		if err := p.owner.smap.synchronize(p.si, newsmap, nil /*ms payload*/, p.htrun.smapUpdatedCB); err != nil {
  2694  			p.writeErr(w, r, cmn.NewErrFailedTo(p, "synchronize", newsmap, err))
  2695  			return
  2696  		}
  2697  		nlog.Infof("%s: %s %s done", p, apc.SyncSmap, newsmap)
  2698  	case apc.ActSetConfig: // set-config #1 - via query parameters and "?n1=v1&n2=v2..."
  2699  		p.setDaemonConfigQuery(w, r)
  2700  	default:
  2701  		p.writeErrAct(w, r, action)
  2702  	}
  2703  }
  2704  
  2705  func (p *proxy) httpdaepost(w http.ResponseWriter, r *http.Request) {
  2706  	apiItems, err := p.parseURL(w, r, apc.URLPathDae.L, 0, true)
  2707  	if err != nil {
  2708  		return
  2709  	}
  2710  	if len(apiItems) == 0 || apiItems[0] != apc.AdminJoin {
  2711  		p.writeErrURL(w, r)
  2712  		return
  2713  	}
  2714  	if err := p.checkAccess(w, r, nil, apc.AceAdmin); err != nil {
  2715  		return
  2716  	}
  2717  	if !p.keepalive.paused() {
  2718  		nlog.Warningf("%s: keepalive is already active - proceeding to resume (and reset) anyway", p)
  2719  	}
  2720  	p.keepalive.ctrl(kaResumeMsg)
  2721  	body, err := cmn.ReadBytes(r)
  2722  	if err != nil {
  2723  		p.writeErr(w, r, err)
  2724  		return
  2725  	}
  2726  	caller := r.Header.Get(apc.HdrCallerName)
  2727  	if err := p.recvCluMetaBytes(apc.ActAdminJoinProxy, body, caller); err != nil {
  2728  		p.writeErr(w, r, err)
  2729  	}
  2730  }
  2731  
  2732  func (p *proxy) smapFromURL(baseURL string) (smap *smapX, err error) {
  2733  	cargs := allocCargs()
  2734  	{
  2735  		cargs.req = cmn.HreqArgs{
  2736  			Method: http.MethodGet,
  2737  			Base:   baseURL,
  2738  			Path:   apc.URLPathDae.S,
  2739  			Query:  url.Values{apc.QparamWhat: []string{apc.WhatSmap}},
  2740  		}
  2741  		cargs.timeout = apc.DefaultTimeout
  2742  		cargs.cresv = cresSM{} // -> smapX
  2743  	}
  2744  	res := p.call(cargs, p.owner.smap.get())
  2745  	if res.err != nil {
  2746  		err = res.errorf("failed to get Smap from %s", baseURL)
  2747  	} else {
  2748  		smap = res.v.(*smapX)
  2749  		if err = smap.validate(); err != nil {
  2750  			err = fmt.Errorf("%s: invalid %s from %s: %v", p, smap, baseURL, err)
  2751  			smap = nil
  2752  		}
  2753  	}
  2754  	freeCargs(cargs)
  2755  	freeCR(res)
  2756  	return
  2757  }
  2758  
  2759  // forceful primary change - is used when the original primary network is down
  2760  // for a while and the remained nodes selected a new primary. After the
  2761  // original primary is back it does not attach automatically to the new primary
  2762  // and the cluster gets into split-brain mode. This request makes original
  2763  // primary connect to the new primary
  2764  func (p *proxy) forcefulJoin(w http.ResponseWriter, r *http.Request, proxyID string) {
  2765  	newPrimaryURL := r.URL.Query().Get(apc.QparamPrimaryCandidate)
  2766  	nlog.Infof("%s: force new primary %s (URL: %s)", p, proxyID, newPrimaryURL)
  2767  
  2768  	if p.SID() == proxyID {
  2769  		nlog.Warningf("%s is already primary", p)
  2770  		return
  2771  	}
  2772  	smap := p.owner.smap.get()
  2773  	psi := smap.GetProxy(proxyID)
  2774  	if psi == nil && newPrimaryURL == "" {
  2775  		err := &errNodeNotFound{"failed to find new primary", proxyID, p.si, smap}
  2776  		p.writeErr(w, r, err, http.StatusNotFound)
  2777  		return
  2778  	}
  2779  	if newPrimaryURL == "" {
  2780  		newPrimaryURL = psi.ControlNet.URL
  2781  	}
  2782  	if newPrimaryURL == "" {
  2783  		err := &errNodeNotFound{"failed to get new primary's direct URL", proxyID, p.si, smap}
  2784  		p.writeErr(w, r, err)
  2785  		return
  2786  	}
  2787  	newSmap, err := p.smapFromURL(newPrimaryURL)
  2788  	if err != nil {
  2789  		p.writeErr(w, r, err)
  2790  		return
  2791  	}
  2792  	primary := newSmap.Primary
  2793  	if proxyID != primary.ID() {
  2794  		p.writeErrf(w, r, "%s: proxy %s is not the primary, current %s", p.si, proxyID, newSmap.pp())
  2795  		return
  2796  	}
  2797  
  2798  	p.metasyncer.becomeNonPrimary() // metasync to stop syncing and cancel all pending requests
  2799  	p.owner.smap.put(newSmap)
  2800  	res := p.regTo(primary.ControlNet.URL, primary, apc.DefaultTimeout, nil, nil, false /*keepalive*/)
  2801  	if res.err != nil {
  2802  		p.writeErr(w, r, res.toErr())
  2803  	}
  2804  }
  2805  
  2806  func (p *proxy) daeSetPrimary(w http.ResponseWriter, r *http.Request) {
  2807  	apiItems, err := p.parseURL(w, r, apc.URLPathDae.L, 2, false)
  2808  	if err != nil {
  2809  		return
  2810  	}
  2811  	proxyID := apiItems[1]
  2812  	query := r.URL.Query()
  2813  	force := cos.IsParseBool(query.Get(apc.QparamForce))
  2814  
  2815  	// force primary change
  2816  	if force && apiItems[0] == apc.Proxy {
  2817  		if smap := p.owner.smap.get(); !smap.isPrimary(p.si) {
  2818  			p.writeErr(w, r, newErrNotPrimary(p.si, smap))
  2819  		}
  2820  		p.forcefulJoin(w, r, proxyID)
  2821  		return
  2822  	}
  2823  	prepare, err := cos.ParseBool(query.Get(apc.QparamPrepare))
  2824  	if err != nil {
  2825  		p.writeErrf(w, r, "failed to parse URL query %q: %v", apc.QparamPrepare, err)
  2826  		return
  2827  	}
  2828  	if p.owner.smap.get().isPrimary(p.si) {
  2829  		p.writeErrf(w, r, "%s: am PRIMARY, expecting '/v1/cluster/...' when designating a new one", p)
  2830  		return
  2831  	}
  2832  	if prepare {
  2833  		var cluMeta cluMeta
  2834  		if err := cmn.ReadJSON(w, r, &cluMeta); err != nil {
  2835  			return
  2836  		}
  2837  		if err := p.recvCluMeta(&cluMeta, "set-primary", cluMeta.SI.String()); err != nil {
  2838  			p.writeErrf(w, r, "%s: failed to receive clu-meta: %v", p, err)
  2839  			return
  2840  		}
  2841  	}
  2842  
  2843  	// self
  2844  	if p.SID() == proxyID {
  2845  		smap := p.owner.smap.get()
  2846  		if smap.GetActiveNode(proxyID) == nil {
  2847  			p.writeErrf(w, r, "%s: in maintenance or decommissioned", p)
  2848  			return
  2849  		}
  2850  		if !prepare {
  2851  			p.becomeNewPrimary("")
  2852  		}
  2853  		return
  2854  	}
  2855  
  2856  	// other
  2857  	smap := p.owner.smap.get()
  2858  	psi := smap.GetProxy(proxyID)
  2859  	if psi == nil {
  2860  		err := &errNodeNotFound{"cannot set new primary", proxyID, p.si, smap}
  2861  		p.writeErr(w, r, err)
  2862  		return
  2863  	}
  2864  	if prepare {
  2865  		if cmn.Rom.FastV(4, cos.SmoduleAIS) {
  2866  			nlog.Infoln("Preparation step: do nothing")
  2867  		}
  2868  		return
  2869  	}
  2870  	ctx := &smapModifier{pre: func(_ *smapModifier, clone *smapX) error { clone.Primary = psi; return nil }}
  2871  	err = p.owner.smap.modify(ctx)
  2872  	debug.AssertNoErr(err)
  2873  }
  2874  
  2875  func (p *proxy) becomeNewPrimary(proxyIDToRemove string) {
  2876  	ctx := &smapModifier{
  2877  		pre:   p._becomePre,
  2878  		final: p._becomeFinal,
  2879  		sid:   proxyIDToRemove,
  2880  	}
  2881  	err := p.owner.smap.modify(ctx)
  2882  	cos.AssertNoErr(err)
  2883  }
  2884  
  2885  func (p *proxy) _becomePre(ctx *smapModifier, clone *smapX) error {
  2886  	if !clone.isPresent(p.si) {
  2887  		cos.Assertf(false, "%s must always be present in the %s", p.si, clone.pp())
  2888  	}
  2889  	if ctx.sid != "" && clone.GetNode(ctx.sid) != nil {
  2890  		// decision is made: going ahead to remove
  2891  		nlog.Infof("%s: removing failed primary %s", p, ctx.sid)
  2892  		clone.delProxy(ctx.sid)
  2893  
  2894  		// Remove reverse proxy entry for the node.
  2895  		p.rproxy.nodes.Delete(ctx.sid)
  2896  	}
  2897  
  2898  	clone.Primary = clone.GetProxy(p.SID())
  2899  	clone.Version += 100
  2900  	clone.staffIC()
  2901  	return nil
  2902  }
  2903  
  2904  func (p *proxy) _becomeFinal(ctx *smapModifier, clone *smapX) {
  2905  	var (
  2906  		bmd   = p.owner.bmd.get()
  2907  		rmd   = p.owner.rmd.get()
  2908  		msg   = p.newAmsgStr(apc.ActNewPrimary, bmd)
  2909  		pairs = []revsPair{{clone, msg}, {bmd, msg}, {rmd, msg}}
  2910  	)
  2911  	nlog.Infof("%s: distributing (%s, %s, %s) with newly elected primary (self)", p, clone, bmd, rmd)
  2912  	config, err := p.ensureConfigURLs()
  2913  	if err != nil {
  2914  		nlog.Errorln(err)
  2915  	}
  2916  	if config != nil {
  2917  		pairs = append(pairs, revsPair{config, msg})
  2918  		nlog.Infof("%s: plus %s", p, config)
  2919  	}
  2920  	etl := p.owner.etl.get()
  2921  	if etl != nil && etl.version() > 0 {
  2922  		pairs = append(pairs, revsPair{etl, msg})
  2923  		nlog.Infof("%s: plus %s", p, etl)
  2924  	}
  2925  	// metasync
  2926  	debug.Assert(clone._sgl != nil)
  2927  	_ = p.metasyncer.sync(pairs...)
  2928  
  2929  	// synchronize IC tables
  2930  	p.syncNewICOwners(ctx.smap, clone)
  2931  }
  2932  
  2933  func (p *proxy) ensureConfigURLs() (config *globalConfig, err error) {
  2934  	config, err = p.owner.config.modify(&configModifier{pre: p._configURLs})
  2935  	if err != nil {
  2936  		err = cmn.NewErrFailedTo(p, "update config (primary, original, discovery) URLs", config, err)
  2937  	}
  2938  	return config, err
  2939  }
  2940  
  2941  // using cmn.NetIntraControl network for all three: PrimaryURL, OriginalURL, and DiscoveryURL
  2942  func (p *proxy) _configURLs(_ *configModifier, clone *globalConfig) (updated bool, _ error) {
  2943  	smap := p.owner.smap.get()
  2944  	debug.Assert(smap.isPrimary(p.si))
  2945  
  2946  	if prim := smap.Primary.URL(cmn.NetIntraControl); clone.Proxy.PrimaryURL != prim {
  2947  		clone.Proxy.PrimaryURL = prim
  2948  		updated = true
  2949  	}
  2950  	orig, disc := smap.configURLsIC(clone.Proxy.OriginalURL, clone.Proxy.DiscoveryURL)
  2951  	if orig != "" && orig != clone.Proxy.OriginalURL {
  2952  		clone.Proxy.OriginalURL = orig
  2953  		updated = true
  2954  	}
  2955  	if disc != "" && disc != clone.Proxy.DiscoveryURL {
  2956  		clone.Proxy.DiscoveryURL = disc
  2957  		updated = true
  2958  	}
  2959  	return updated, nil
  2960  }
  2961  
  2962  // [METHOD] /v1/sort
  2963  func (p *proxy) dsortHandler(w http.ResponseWriter, r *http.Request) {
  2964  	if !p.cluStartedWithRetry() {
  2965  		w.WriteHeader(http.StatusServiceUnavailable)
  2966  		return
  2967  	}
  2968  	if err := p.checkAccess(w, r, nil, apc.AceAdmin); err != nil {
  2969  		return
  2970  	}
  2971  	apiItems, err := cmn.ParseURL(r.URL.Path, apc.URLPathdSort.L, 0, true)
  2972  	if err != nil {
  2973  		p.writeErrURL(w, r)
  2974  		return
  2975  	}
  2976  
  2977  	switch r.Method {
  2978  	case http.MethodPost:
  2979  		// - validate request, check input_bck and output_bck
  2980  		// - start dsort
  2981  		body, err := io.ReadAll(r.Body)
  2982  		if err != nil {
  2983  			p.writeErrStatusf(w, r, http.StatusInternalServerError, "failed to receive dsort request: %v", err)
  2984  			return
  2985  		}
  2986  		rs := &dsort.RequestSpec{}
  2987  		if err := jsoniter.Unmarshal(body, rs); err != nil {
  2988  			err = fmt.Errorf(cmn.FmtErrUnmarshal, p, "dsort request", cos.BHead(body), err)
  2989  			p.writeErr(w, r, err)
  2990  			return
  2991  		}
  2992  		parsc, err := rs.ParseCtx()
  2993  		if err != nil {
  2994  			p.writeErr(w, r, err)
  2995  			return
  2996  		}
  2997  		bck := meta.CloneBck(&parsc.InputBck)
  2998  		args := bctx{p: p, w: w, r: r, bck: bck, perms: apc.AceObjLIST | apc.AceGET}
  2999  		if _, err = args.initAndTry(); err != nil {
  3000  			return
  3001  		}
  3002  		if !parsc.OutputBck.Equal(&parsc.InputBck) {
  3003  			bckTo := meta.CloneBck(&parsc.OutputBck)
  3004  			bckTo, ecode, err := p.initBckTo(w, r, nil /*query*/, bckTo)
  3005  			if err != nil {
  3006  				return
  3007  			}
  3008  			if ecode == http.StatusNotFound {
  3009  				if err := p.checkAccess(w, r, nil, apc.AceCreateBucket); err != nil {
  3010  					return
  3011  				}
  3012  				naction := "dsort-create-output-bck"
  3013  				warnfmt := "%s: %screate 'output_bck' %s with the 'input_bck' (%s) props"
  3014  				if p.forwardCP(w, r, nil /*msg*/, naction, body /*orig body*/) { // to create
  3015  					return
  3016  				}
  3017  				ctx := &bmdModifier{
  3018  					pre:   bmodCpProps,
  3019  					final: p.bmodSync,
  3020  					msg:   &apc.ActMsg{Action: naction},
  3021  					txnID: "",
  3022  					bcks:  []*meta.Bck{bck, bckTo},
  3023  					wait:  true,
  3024  				}
  3025  				if _, err = p.owner.bmd.modify(ctx); err != nil {
  3026  					debug.AssertNoErr(err)
  3027  					err = fmt.Errorf(warnfmt+": %w", p, "failed to ", bckTo, bck, err)
  3028  					p.writeErr(w, r, err)
  3029  					return
  3030  				}
  3031  				nlog.Warningf(warnfmt, p, "", bckTo, bck)
  3032  			}
  3033  		}
  3034  		dsort.PstartHandler(w, r, parsc)
  3035  	case http.MethodGet:
  3036  		dsort.PgetHandler(w, r)
  3037  	case http.MethodDelete:
  3038  		if len(apiItems) == 1 && apiItems[0] == apc.Abort {
  3039  			dsort.PabortHandler(w, r)
  3040  		} else if len(apiItems) == 0 {
  3041  			dsort.PremoveHandler(w, r)
  3042  		} else {
  3043  			p.writeErrURL(w, r)
  3044  		}
  3045  	default:
  3046  		cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodPost)
  3047  	}
  3048  }
  3049  
  3050  func (p *proxy) rootHandler(w http.ResponseWriter, r *http.Request) {
  3051  	const fs3 = "/" + apc.S3
  3052  	if !p.cluStartedWithRetry() {
  3053  		w.WriteHeader(http.StatusServiceUnavailable)
  3054  		return
  3055  	}
  3056  
  3057  	// by default, s3 is serviced at `/s3`
  3058  	// with `/` root reserved for vanilla http locations via ht:// mechanism
  3059  	if !cmn.Rom.Features().IsSet(feat.S3APIviaRoot) {
  3060  		p.htHandler(w, r)
  3061  		return
  3062  	}
  3063  
  3064  	// prepend /s3 and handle
  3065  	switch {
  3066  	case r.URL.Path == "" || r.URL.Path == "/":
  3067  		r.URL.Path = fs3
  3068  	case r.URL.Path[0] == '/':
  3069  		r.URL.Path = fs3 + r.URL.Path
  3070  	default:
  3071  		r.URL.Path = fs3 + "/" + r.URL.Path
  3072  	}
  3073  	p.s3Handler(w, r)
  3074  }
  3075  
  3076  // GET | HEAD vanilla http(s) location via `ht://` bucket with the corresponding `OrigURLBck`
  3077  func (p *proxy) htHandler(w http.ResponseWriter, r *http.Request) {
  3078  	if r.URL.Scheme == "" {
  3079  		p.writeErrURL(w, r)
  3080  		return
  3081  	}
  3082  	baseURL := r.URL.Scheme + "://" + r.URL.Host
  3083  	if cmn.Rom.FastV(5, cos.SmoduleAIS) {
  3084  		nlog.Infof("[HTTP CLOUD] RevProxy handler for: %s -> %s", baseURL, r.URL.Path)
  3085  	}
  3086  	if r.Method == http.MethodGet || r.Method == http.MethodHead {
  3087  		// bck.IsHTTP()
  3088  		hbo := cmn.NewHTTPObj(r.URL)
  3089  		q := r.URL.Query()
  3090  		q.Set(apc.QparamOrigURL, r.URL.String())
  3091  		q.Set(apc.QparamProvider, apc.HTTP)
  3092  		r.URL.Path = apc.URLPathObjects.Join(hbo.Bck.Name, hbo.ObjName)
  3093  		r.URL.RawQuery = q.Encode()
  3094  		if r.Method == http.MethodGet {
  3095  			p.httpobjget(w, r, hbo.OrigURLBck)
  3096  		} else {
  3097  			p.httpobjhead(w, r, hbo.OrigURLBck)
  3098  		}
  3099  		return
  3100  	}
  3101  	p.writeErrf(w, r, "%q provider doesn't support %q", apc.HTTP, r.Method)
  3102  }
  3103  
  3104  //
  3105  // metasync Rx
  3106  //
  3107  
  3108  // compare w/ t.receiveConfig
  3109  func (p *proxy) receiveConfig(newConfig *globalConfig, msg *aisMsg, payload msPayload, caller string) (err error) {
  3110  	oldConfig := cmn.GCO.Get()
  3111  	logmsync(oldConfig.Version, newConfig, msg, caller)
  3112  
  3113  	p.owner.config.Lock()
  3114  	err = p._recvCfg(newConfig, payload)
  3115  	p.owner.config.Unlock()
  3116  	if err != nil {
  3117  		return
  3118  	}
  3119  
  3120  	if !p.NodeStarted() {
  3121  		if msg.Action == apc.ActAttachRemAis || msg.Action == apc.ActDetachRemAis {
  3122  			nlog.Warningf("%s: cannot handle %s (%s => %s) - starting up...", p, msg, oldConfig, newConfig)
  3123  		}
  3124  		return
  3125  	}
  3126  
  3127  	if msg.Action != apc.ActAttachRemAis && msg.Action != apc.ActDetachRemAis &&
  3128  		newConfig.Backend.EqualRemAIS(&oldConfig.Backend, p.String()) {
  3129  		return // nothing to do
  3130  	}
  3131  
  3132  	go p._remais(&newConfig.ClusterConfig, false)
  3133  	return
  3134  }
  3135  
  3136  // refresh local p.remais cache via intra-cluster call to a random target
  3137  func (p *proxy) _remais(newConfig *cmn.ClusterConfig, blocking bool) {
  3138  	const maxretries = 5
  3139  	if !p.remais.in.CAS(false, true) {
  3140  		return
  3141  	}
  3142  	var (
  3143  		sleep      = newConfig.Timeout.CplaneOperation.D()
  3144  		retries    = maxretries
  3145  		over, nver int64
  3146  	)
  3147  	if blocking {
  3148  		retries = 1
  3149  	} else {
  3150  		maxsleep := newConfig.Timeout.MaxKeepalive.D()
  3151  		if uptime := p.keepalive.cluUptime(mono.NanoTime()); uptime < maxsleep {
  3152  			sleep = 2 * maxsleep
  3153  		}
  3154  	}
  3155  	for ; retries > 0; retries-- {
  3156  		time.Sleep(sleep)
  3157  		all, err := p.getRemAisVec(false /*refresh*/)
  3158  		if err != nil {
  3159  			if retries < maxretries {
  3160  				nlog.Errorf("%s: failed to get remais (%d attempts)", p, retries-1)
  3161  			}
  3162  			continue
  3163  		}
  3164  		p.remais.mu.Lock()
  3165  		if over <= 0 {
  3166  			over = p.remais.Ver
  3167  		}
  3168  		if p.remais.Ver < all.Ver {
  3169  			// keep old/detached clusters to support access to existing ("cached") buckets
  3170  			// i.e., the ability to resolve remote alias to Ns.UUID (see p.a2u)
  3171  			for _, a := range p.remais.RemAisVec.A {
  3172  				var found bool
  3173  				for _, b := range p.remais.old {
  3174  					if b.UUID == a.UUID {
  3175  						*b = *a
  3176  						found = true
  3177  						break
  3178  					}
  3179  					if b.Alias == a.Alias {
  3180  						nlog.Errorf("duplicated remais alias: (%q, %q) vs (%q, %q)", a.UUID, a.Alias, b.UUID, b.Alias)
  3181  					}
  3182  				}
  3183  				if !found {
  3184  					p.remais.old = append(p.remais.old, a)
  3185  				}
  3186  			}
  3187  
  3188  			p.remais.RemAisVec = *all
  3189  			nver = p.remais.Ver
  3190  			p.remais.mu.Unlock()
  3191  			break
  3192  		}
  3193  		p.remais.mu.Unlock()
  3194  		nlog.Errorf("%s: retrying remais ver=%d (%d attempts)", p, all.Ver, retries-1)
  3195  		sleep = newConfig.Timeout.CplaneOperation.D()
  3196  	}
  3197  
  3198  	p.remais.in.Store(false)
  3199  	nlog.Infof("%s: remais v%d => v%d", p, over, nver)
  3200  }
  3201  
  3202  func (p *proxy) receiveRMD(newRMD *rebMD, msg *aisMsg, caller string) (err error) {
  3203  	rmd := p.owner.rmd.get()
  3204  	logmsync(rmd.Version, newRMD, msg, caller)
  3205  
  3206  	p.owner.rmd.Lock()
  3207  	rmd = p.owner.rmd.get()
  3208  	if newRMD.version() <= rmd.version() {
  3209  		p.owner.rmd.Unlock()
  3210  		if newRMD.version() < rmd.version() {
  3211  			err = newErrDowngrade(p.si, rmd.String(), newRMD.String())
  3212  		}
  3213  		return
  3214  	}
  3215  	p.owner.rmd.put(newRMD)
  3216  	err = p.owner.rmd.persist(newRMD)
  3217  	debug.AssertNoErr(err)
  3218  	p.owner.rmd.Unlock()
  3219  
  3220  	// Register `nl` for rebalance/resilver
  3221  	smap := p.owner.smap.get()
  3222  	if smap.IsIC(p.si) && smap.CountActiveTs() > 0 && (smap.IsPrimary(p.si) || p.ClusterStarted()) {
  3223  		nl := xact.NewXactNL(xact.RebID2S(newRMD.Version), apc.ActRebalance, &smap.Smap, nil)
  3224  		nl.SetOwner(equalIC)
  3225  		err := p.notifs.add(nl)
  3226  		debug.AssertNoErr(err)
  3227  
  3228  		if newRMD.Resilver != "" {
  3229  			nl = xact.NewXactNL(newRMD.Resilver, apc.ActResilver, &smap.Smap, nil)
  3230  			nl.SetOwner(equalIC)
  3231  			err := p.notifs.add(nl)
  3232  			debug.AssertNoErr(err)
  3233  		}
  3234  	}
  3235  	return
  3236  }
  3237  
  3238  func (p *proxy) smapOnUpdate(newSmap, oldSmap *smapX, nfl, ofl cos.BitFlags) {
  3239  	// When some node was removed from the cluster we need to clean up the
  3240  	// reverse proxy structure.
  3241  	p.rproxy.nodes.Range(func(key, _ any) bool {
  3242  		nodeID := key.(string)
  3243  		if oldSmap.GetNode(nodeID) != nil && newSmap.GetNode(nodeID) == nil {
  3244  			p.rproxy.nodes.Delete(nodeID)
  3245  		}
  3246  		return true
  3247  	})
  3248  	p.syncNewICOwners(oldSmap, newSmap)
  3249  
  3250  	p.htrun.smapUpdatedCB(newSmap, oldSmap, nfl, ofl)
  3251  }
  3252  
  3253  func (p *proxy) receiveBMD(newBMD *bucketMD, msg *aisMsg, payload msPayload, caller string) (err error) {
  3254  	bmd := p.owner.bmd.get()
  3255  	logmsync(bmd.Version, newBMD, msg, caller)
  3256  
  3257  	p.owner.bmd.Lock()
  3258  	bmd = p.owner.bmd.get()
  3259  	if err = bmd.validateUUID(newBMD, p.si, nil, caller); err != nil {
  3260  		cos.Assert(!p.owner.smap.get().isPrimary(p.si))
  3261  		// cluster integrity error: making exception for non-primary proxies
  3262  		nlog.Errorf("%s (non-primary): %v - proceeding to override BMD", p, err)
  3263  	} else if newBMD.version() <= bmd.version() {
  3264  		p.owner.bmd.Unlock()
  3265  		return newErrDowngrade(p.si, bmd.String(), newBMD.String())
  3266  	}
  3267  	err = p.owner.bmd.putPersist(newBMD, payload)
  3268  	debug.AssertNoErr(err)
  3269  	p.owner.bmd.Unlock()
  3270  	return
  3271  }
  3272  
  3273  // getDaemonInfo queries osi for its daemon info and returns it.
  3274  func (p *proxy) _getSI(osi *meta.Snode) (si *meta.Snode, err error) {
  3275  	cargs := allocCargs()
  3276  	{
  3277  		cargs.si = osi
  3278  		cargs.req = cmn.HreqArgs{
  3279  			Method: http.MethodGet,
  3280  			Path:   apc.URLPathDae.S,
  3281  			Query:  url.Values{apc.QparamWhat: []string{apc.WhatSnode}},
  3282  		}
  3283  		cargs.timeout = cmn.Rom.CplaneOperation()
  3284  		cargs.cresv = cresND{} // -> meta.Snode
  3285  	}
  3286  	res := p.call(cargs, p.owner.smap.get())
  3287  	if res.err != nil {
  3288  		err = res.err
  3289  	} else {
  3290  		si = res.v.(*meta.Snode)
  3291  	}
  3292  	freeCargs(cargs)
  3293  	freeCR(res)
  3294  	return
  3295  }
  3296  
  3297  func (p *proxy) headRemoteBck(bck *cmn.Bck, q url.Values) (header http.Header, statusCode int, err error) {
  3298  	var (
  3299  		tsi  *meta.Snode
  3300  		path = apc.URLPathBuckets.Join(bck.Name)
  3301  		smap = p.owner.smap.get()
  3302  	)
  3303  	if tsi, err = smap.GetRandTarget(); err != nil {
  3304  		return
  3305  	}
  3306  	if bck.IsCloud() {
  3307  		config := cmn.GCO.Get()
  3308  		if config.Backend.Get(bck.Provider) == nil {
  3309  			err = &cmn.ErrMissingBackend{Provider: bck.Provider}
  3310  			statusCode = http.StatusNotFound
  3311  			err = cmn.NewErrFailedTo(p, "lookup Cloud bucket", bck, err, statusCode)
  3312  			return
  3313  		}
  3314  	}
  3315  	q = bck.AddToQuery(q)
  3316  	cargs := allocCargs()
  3317  	{
  3318  		cargs.si = tsi
  3319  		cargs.req = cmn.HreqArgs{Method: http.MethodHead, Path: path, Query: q}
  3320  		cargs.timeout = apc.DefaultTimeout
  3321  	}
  3322  	res := p.call(cargs, smap)
  3323  	if res.status == http.StatusNotFound {
  3324  		err = cmn.NewErrRemoteBckNotFound(bck)
  3325  	} else if res.status == http.StatusGone {
  3326  		err = cmn.NewErrRemoteBckOffline(bck)
  3327  	} else {
  3328  		err = res.err
  3329  		header = res.header
  3330  	}
  3331  	statusCode = res.status
  3332  	freeCargs(cargs)
  3333  	freeCR(res)
  3334  	return
  3335  }
  3336  
  3337  ////////////////
  3338  // misc utils //
  3339  ////////////////
  3340  
  3341  func resolveUUIDBMD(bmds bmds) (*bucketMD, error) {
  3342  	var (
  3343  		mlist = make(map[string][]cluMeta) // uuid => list(targetRegMeta)
  3344  		maxor = make(map[string]*bucketMD) // uuid => max-ver BMD
  3345  	)
  3346  	// results => (mlist, maxor)
  3347  	for si, bmd := range bmds {
  3348  		if bmd.Version == 0 {
  3349  			continue
  3350  		}
  3351  		mlist[bmd.UUID] = append(mlist[bmd.UUID], cluMeta{BMD: bmd, SI: si})
  3352  
  3353  		if rbmd, ok := maxor[bmd.UUID]; !ok {
  3354  			maxor[bmd.UUID] = bmd
  3355  		} else if rbmd.Version < bmd.Version {
  3356  			maxor[bmd.UUID] = bmd
  3357  		}
  3358  	}
  3359  	if len(maxor) == 0 {
  3360  		return nil, errNoBMD
  3361  	}
  3362  	// by simple majority
  3363  	uuid, l := "", 0
  3364  	for u, lst := range mlist {
  3365  		if l < len(lst) {
  3366  			uuid, l = u, len(lst)
  3367  		}
  3368  	}
  3369  	for u, lst := range mlist {
  3370  		if l == len(lst) && u != uuid {
  3371  			s := fmt.Sprintf("%s: BMDs have different UUIDs with no simple majority:\n%v",
  3372  				ciError(60), mlist)
  3373  			return nil, &errBmdUUIDSplit{s}
  3374  		}
  3375  	}
  3376  	var err error
  3377  	if len(mlist) > 1 {
  3378  		s := fmt.Sprintf("%s: BMDs have different UUIDs with simple majority: %s:\n%v",
  3379  			ciError(70), uuid, mlist)
  3380  		err = &errTgtBmdUUIDDiffer{s}
  3381  	}
  3382  	bmd := maxor[uuid]
  3383  	cos.Assert(cos.IsValidUUID(bmd.UUID))
  3384  	return bmd, err
  3385  }
  3386  
  3387  func ciError(num int) string {
  3388  	return fmt.Sprintf(cmn.FmtErrIntegrity, ciePrefix, num, cmn.GitHubHome)
  3389  }
  3390  
  3391  //
  3392  // termination(s)
  3393  //
  3394  
  3395  func (p *proxy) termKalive(action string) {
  3396  	p.keepalive.ctrl(kaSuspendMsg)
  3397  
  3398  	err := fmt.Errorf("%s: term-kalive by %q", p, action)
  3399  	xreg.AbortAll(err)
  3400  }
  3401  
  3402  func (p *proxy) shutdown(action string) {
  3403  	p.Stop(&errNoUnregister{action})
  3404  }
  3405  
  3406  func (p *proxy) decommission(action string, opts *apc.ActValRmNode) {
  3407  	cleanupConfigDir(p.Name(), opts.KeepInitialConfig)
  3408  	if !opts.NoShutdown {
  3409  		p.Stop(&errNoUnregister{action})
  3410  	}
  3411  }
  3412  
  3413  // and return from rungroup.run
  3414  func (p *proxy) Stop(err error) {
  3415  	var (
  3416  		s         = "Stopping " + p.String()
  3417  		smap      = p.owner.smap.get()
  3418  		isPrimary = smap.isPrimary(p.si)
  3419  		e, isEnu  = err.(*errNoUnregister)
  3420  	)
  3421  	if isPrimary {
  3422  		s += "(primary)"
  3423  		if !isEnu || e.action != apc.ActShutdownCluster {
  3424  			if npsi, err := smap.HrwProxy(p.SID()); err == nil {
  3425  				p.notifyCandidate(npsi, smap)
  3426  			}
  3427  		}
  3428  	}
  3429  	if err == nil {
  3430  		nlog.Infoln(s)
  3431  	} else {
  3432  		nlog.Warningf("%s: %v", s, err)
  3433  	}
  3434  	xreg.AbortAll(errors.New("p-stop"))
  3435  
  3436  	p.htrun.stop(&sync.WaitGroup{}, !isPrimary && smap.isValid() && !isEnu /*rmFromSmap*/)
  3437  }
  3438  
  3439  // on a best-effort basis, ignoring errors and bodyclose
  3440  func (p *proxy) notifyCandidate(npsi *meta.Snode, smap *smapX) {
  3441  	cargs := allocCargs()
  3442  	cargs.si = npsi
  3443  	cargs.req = cmn.HreqArgs{Method: http.MethodPut, Base: npsi.URL(cmn.NetIntraControl), Path: apc.URLPathVotePriStop.S}
  3444  	req, err := cargs.req.Req()
  3445  	if err != nil {
  3446  		return
  3447  	}
  3448  	req.Header.Set(apc.HdrCallerID, p.SID())
  3449  	req.Header.Set(apc.HdrCallerSmapVer, smap.vstr)
  3450  	g.client.control.Do(req) //nolint:bodyclose // exiting
  3451  }