github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/target.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"io"
    11  	"net"
    12  	"net/http"
    13  	"net/url"
    14  	"os"
    15  	"path/filepath"
    16  	"strconv"
    17  	"strings"
    18  	"sync"
    19  	"time"
    20  
    21  	"github.com/NVIDIA/aistore/ais/backend"
    22  	"github.com/NVIDIA/aistore/ais/s3"
    23  	"github.com/NVIDIA/aistore/api/apc"
    24  	"github.com/NVIDIA/aistore/cmn"
    25  	"github.com/NVIDIA/aistore/cmn/archive"
    26  	"github.com/NVIDIA/aistore/cmn/atomic"
    27  	"github.com/NVIDIA/aistore/cmn/cos"
    28  	"github.com/NVIDIA/aistore/cmn/debug"
    29  	"github.com/NVIDIA/aistore/cmn/feat"
    30  	"github.com/NVIDIA/aistore/cmn/fname"
    31  	"github.com/NVIDIA/aistore/cmn/kvdb"
    32  	"github.com/NVIDIA/aistore/cmn/mono"
    33  	"github.com/NVIDIA/aistore/cmn/nlog"
    34  	"github.com/NVIDIA/aistore/core"
    35  	"github.com/NVIDIA/aistore/core/meta"
    36  	"github.com/NVIDIA/aistore/ec"
    37  	"github.com/NVIDIA/aistore/ext/dload"
    38  	"github.com/NVIDIA/aistore/ext/dsort"
    39  	"github.com/NVIDIA/aistore/ext/etl"
    40  	"github.com/NVIDIA/aistore/fs"
    41  	"github.com/NVIDIA/aistore/fs/health"
    42  	"github.com/NVIDIA/aistore/memsys"
    43  	"github.com/NVIDIA/aistore/mirror"
    44  	"github.com/NVIDIA/aistore/reb"
    45  	"github.com/NVIDIA/aistore/res"
    46  	"github.com/NVIDIA/aistore/stats"
    47  	"github.com/NVIDIA/aistore/transport"
    48  	"github.com/NVIDIA/aistore/volume"
    49  	"github.com/NVIDIA/aistore/xact/xreg"
    50  	"github.com/NVIDIA/aistore/xact/xs"
    51  )
    52  
    53  const dbName = "ais.db"
    54  
    55  const clusterClockDrift = 5 * time.Millisecond // is expected to be bounded by
    56  
    57  type (
    58  	regstate struct {
    59  		mu       sync.Mutex  // serialize metasync Rx, stopping, and transitioning to standby
    60  		disabled atomic.Bool // true: standing by
    61  		prevbmd  atomic.Bool // special
    62  	}
    63  	backends map[string]core.Backend
    64  	// main
    65  	target struct {
    66  		htrun
    67  		backend      backends
    68  		fshc         *health.FSHC
    69  		fsprg        fsprungroup
    70  		reb          *reb.Reb
    71  		res          *res.Res
    72  		transactions transactions
    73  		regstate     regstate
    74  	}
    75  )
    76  
    77  type redial struct {
    78  	t         *target
    79  	dialTout  time.Duration
    80  	totalTout time.Duration
    81  	inUse     string
    82  }
    83  
    84  // interface guard
    85  var (
    86  	_ cos.Runner = (*target)(nil)
    87  	_ htext      = (*target)(nil)
    88  )
    89  
    90  func (*target) Name() string { return apc.Target } // as cos.Runner
    91  
    92  // as htext
    93  func (*target) interruptedRestarted() (interrupted, restarted bool) {
    94  	interrupted = fs.MarkerExists(fname.RebalanceMarker)
    95  	restarted = fs.MarkerExists(fname.NodeRestartedPrev)
    96  	return
    97  }
    98  
    99  //
   100  // target
   101  //
   102  
   103  func (t *target) initBackends() {
   104  	config := cmn.GCO.Get()
   105  	aisbp := backend.NewAIS(t)
   106  	t.backend[apc.AIS] = aisbp                       // always present
   107  	t.backend[apc.HTTP] = backend.NewHTTP(t, config) // ditto
   108  
   109  	if aisConf := config.Backend.Get(apc.AIS); aisConf != nil {
   110  		if err := aisbp.Apply(aisConf, "init", &config.ClusterConfig); err != nil {
   111  			nlog.Errorln(t.String()+":", err, "- proceeding to start anyway")
   112  		} else {
   113  			nlog.Infoln(t.String()+": remote-ais", aisConf)
   114  		}
   115  	}
   116  
   117  	if err := t._initBuiltin(); err != nil {
   118  		cos.ExitLog(err)
   119  	}
   120  }
   121  
   122  // init built-in (via build tags) backends
   123  // - remote (e.g. cloud) backends  w/ empty stubs unless populated via build tags
   124  // - enabled/disabled via config.Backend
   125  func (t *target) _initBuiltin() error {
   126  	var (
   127  		enabled, disabled, notlinked []string
   128  		config                       = cmn.GCO.Get()
   129  	)
   130  	for provider := range apc.Providers {
   131  		var (
   132  			add core.Backend
   133  			err error
   134  		)
   135  		switch provider {
   136  		case apc.AWS:
   137  			add, err = backend.NewAWS(t)
   138  		case apc.GCP:
   139  			add, err = backend.NewGCP(t)
   140  		case apc.Azure:
   141  			add, err = backend.NewAzure(t)
   142  		case apc.AIS, apc.HTTP:
   143  			continue
   144  		default:
   145  			return fmt.Errorf(cmn.FmtErrUnknown, t, "backend provider", provider)
   146  		}
   147  		t.backend[provider] = add
   148  
   149  		configured := config.Backend.Get(provider) != nil
   150  		switch {
   151  		case err == nil && configured:
   152  			enabled = append(enabled, provider)
   153  		case err == nil && !configured:
   154  			disabled = append(disabled, provider)
   155  		case err != nil && configured:
   156  			notlinked = append(notlinked, provider)
   157  		}
   158  	}
   159  	switch {
   160  	case len(notlinked) > 0:
   161  		return fmt.Errorf("%s backends: enabled %v, disabled %v, missing in the build %v", t, enabled, disabled, notlinked)
   162  	case len(disabled) > 0:
   163  		nlog.Warningf("%s backends: enabled %v, disabled %v", t, enabled, disabled)
   164  	default:
   165  		nlog.Infoln(t.String(), "backends:", enabled)
   166  	}
   167  	return nil
   168  }
   169  
   170  func (t *target) aisbp() *backend.AISbp {
   171  	bendp := t.backend[apc.AIS]
   172  	return bendp.(*backend.AISbp)
   173  }
   174  
   175  func (t *target) init(config *cmn.Config) {
   176  	t.initSnode(config)
   177  
   178  	// (a) get node ID from command-line or env var (see envDaemonID())
   179  	// (b) load existing node ID (replicated xattr at roots of respective mountpaths)
   180  	// (c) generate a new one (genDaemonID())
   181  	// - in that exact sequence
   182  	tid, generated := initTID(config)
   183  	if generated && len(config.FSP.Paths) > 0 {
   184  		var recovered bool
   185  		// in an unlikely event when losing all mountpath-stored IDs but still having a volume
   186  		tid, recovered = volume.RecoverTID(tid, config.FSP.Paths)
   187  		generated = !recovered
   188  
   189  		// TODO: generated == true will not sit well with loading a local copy of Smap
   190  		// later on during startup sequence - and not finding _this_ target in it
   191  	}
   192  	t.si.Init(tid, apc.Target)
   193  
   194  	cos.InitShortID(t.si.Digest())
   195  
   196  	memsys.Init(t.SID(), t.SID(), config)
   197  
   198  	// new fs, check and add mountpaths
   199  	vini := volume.IniCtx{
   200  		UseLoopbacks:  daemon.cli.target.useLoopbackDevs,
   201  		IgnoreMissing: daemon.cli.target.startWithLostMountpath,
   202  		RandomTID:     generated,
   203  	}
   204  	newVol := volume.Init(t, config, vini)
   205  	fs.ComputeDiskSize()
   206  
   207  	t.initHostIP(config)
   208  	daemon.rg.add(t)
   209  
   210  	ts := stats.NewTrunner(t) // iostat below
   211  	startedUp := ts.Init(t)   // reg common metrics (and target-only - via RegMetrics/regDiskMetrics below)
   212  	daemon.rg.add(ts)
   213  	t.statsT = ts // stats tracker
   214  
   215  	k := newTalive(t, ts, startedUp)
   216  	daemon.rg.add(k)
   217  	t.keepalive = k
   218  
   219  	t.fsprg.init(t, newVol) // subgroup of the daemon.rg rungroup
   220  
   221  	sc := transport.Init(ts, config) // init transport sub-system; new stream collector
   222  	daemon.rg.add(sc)
   223  
   224  	fshc := health.NewFSHC(t)
   225  	daemon.rg.add(fshc)
   226  	t.fshc = fshc
   227  
   228  	if err := ts.InitCDF(); err != nil {
   229  		cos.ExitLog(err)
   230  	}
   231  	fs.Clblk()
   232  }
   233  
   234  func (t *target) initHostIP(config *cmn.Config) {
   235  	hostIP := os.Getenv("AIS_HOST_IP")
   236  	if hostIP == "" {
   237  		return
   238  	}
   239  	extAddr := net.ParseIP(hostIP)
   240  	cos.AssertMsg(extAddr != nil, "invalid public IP addr via 'AIS_HOST_IP' env: "+hostIP)
   241  
   242  	extPort := config.HostNet.Port
   243  	if portStr := os.Getenv("AIS_HOST_PORT"); portStr != "" {
   244  		portNum, err := cmn.ParsePort(portStr)
   245  		cos.AssertNoErr(err)
   246  		extPort = portNum
   247  	}
   248  	t.si.PubNet.Hostname = extAddr.String()
   249  	t.si.PubNet.Port = strconv.Itoa(extPort)
   250  	t.si.PubNet.URL = fmt.Sprintf("%s://%s:%d", config.Net.HTTP.Proto, extAddr.String(), extPort)
   251  
   252  	nlog.Infoln("AIS_HOST_IP:", hostIP, "pub:", t.si.URL(cmn.NetPublic))
   253  
   254  	// applies to intra-cluster networks unless separately defined
   255  	if !config.HostNet.UseIntraControl {
   256  		t.si.ControlNet = t.si.PubNet
   257  	}
   258  	if !config.HostNet.UseIntraData {
   259  		t.si.DataNet = t.si.PubNet
   260  	}
   261  }
   262  
   263  func initTID(config *cmn.Config) (tid string, generated bool) {
   264  	if tid = envDaemonID(apc.Target); tid != "" {
   265  		if err := cos.ValidateDaemonID(tid); err != nil {
   266  			nlog.Errorln("Warning:", err)
   267  		}
   268  		return tid, false
   269  	}
   270  
   271  	var err error
   272  	if tid, err = fs.LoadNodeID(config.FSP.Paths); err != nil {
   273  		cos.ExitLog(err) // FATAL
   274  	}
   275  	if tid != "" {
   276  		return tid, false
   277  	}
   278  
   279  	// this target: generate random ID
   280  	tid = genDaemonID(apc.Target, config)
   281  	err = cos.ValidateDaemonID(tid)
   282  	debug.AssertNoErr(err)
   283  	nlog.Infoln(meta.Tname(tid) + ": ID randomly generated")
   284  	return tid, true
   285  }
   286  
   287  func regDiskMetrics(node *meta.Snode, tstats *stats.Trunner, mpi fs.MPI) {
   288  	for _, mi := range mpi {
   289  		for _, disk := range mi.Disks {
   290  			tstats.RegDiskMetrics(node, disk)
   291  		}
   292  	}
   293  }
   294  
   295  func (t *target) Run() error {
   296  	if err := t.si.Validate(); err != nil {
   297  		cos.ExitLog(err)
   298  	}
   299  	config := cmn.GCO.Get()
   300  	t.htrun.init(config)
   301  
   302  	tstats := t.statsT.(*stats.Trunner)
   303  
   304  	core.Tinit(t, tstats, true /*run hk*/)
   305  
   306  	// metrics, disks first
   307  	availablePaths, disabledPaths := fs.Get()
   308  	if len(availablePaths) == 0 {
   309  		cos.ExitLog(cmn.ErrNoMountpaths)
   310  	}
   311  	regDiskMetrics(t.si, tstats, availablePaths)
   312  	regDiskMetrics(t.si, tstats, disabledPaths)
   313  	t.statsT.RegMetrics(t.si) // + Prometheus, if configured
   314  
   315  	fatalErr, writeErr := t.checkRestarted(config)
   316  	if fatalErr != nil {
   317  		cos.ExitLog(fatalErr)
   318  	}
   319  	if writeErr != nil {
   320  		nlog.Errorln("")
   321  		nlog.Errorln(writeErr)
   322  		nlog.Errorln("")
   323  	}
   324  
   325  	// register object type and workfile type
   326  	fs.CSM.Reg(fs.ObjectType, &fs.ObjectContentResolver{})
   327  	fs.CSM.Reg(fs.WorkfileType, &fs.WorkfileContentResolver{})
   328  
   329  	// Init meta-owners and load local instances
   330  	if prev := t.owner.bmd.init(); prev {
   331  		t.regstate.prevbmd.Store(true)
   332  	}
   333  	t.owner.etl.init()
   334  
   335  	smap, reliable := t.loadSmap()
   336  	if !reliable {
   337  		smap = newSmap()
   338  		smap.Tmap[t.SID()] = t.si // add self to initial temp smap
   339  	} else {
   340  		nlog.Infoln(t.String()+": loaded", smap.StringEx())
   341  	}
   342  	t.owner.smap.put(smap)
   343  
   344  	if daemon.cli.target.standby {
   345  		tstats.Standby(true)
   346  		t.regstate.disabled.Store(true)
   347  		nlog.Warningln(t.String(), "not joining - standing by")
   348  
   349  		// see endStartupStandby()
   350  	} else {
   351  		// discover primary and join cluster (compare with manual `apc.AdminJoin`)
   352  		if status, err := t.joinCluster(apc.ActSelfJoinTarget); err != nil {
   353  			nlog.Errorf("%s failed to join cluster: %v(%d)", t, err, status)
   354  			nlog.Errorln(t.String(), "terminating")
   355  			return err
   356  		}
   357  		t.markNodeStarted()
   358  		go t.gojoin(config)
   359  	}
   360  
   361  	t.initBackends()
   362  
   363  	db, err := kvdb.NewBuntDB(filepath.Join(config.ConfigDir, dbName))
   364  	if err != nil {
   365  		nlog.Errorln(t.String(), "failed to initialize kvdb:", err)
   366  		return err
   367  	}
   368  
   369  	t.transactions.init(t)
   370  
   371  	t.reb = reb.New(config)
   372  	t.res = res.New()
   373  
   374  	// register storage target's handler(s) and start listening
   375  	t.initRecvHandlers()
   376  
   377  	ec.Init()
   378  	mirror.Init()
   379  
   380  	xreg.RegWithHK()
   381  
   382  	marked := xreg.GetResilverMarked()
   383  	if marked.Interrupted || daemon.resilver.required {
   384  		go t.goresilver(marked.Interrupted)
   385  	}
   386  
   387  	dsort.Tinit(t.statsT, db, config)
   388  	dload.Init(t.statsT, db, &config.Client)
   389  
   390  	err = t.htrun.run(config)
   391  
   392  	etl.StopAll()                              // stop all running ETLs if any
   393  	cos.Close(db)                              // close kv db
   394  	fs.RemoveMarker(fname.NodeRestartedMarker) // exit gracefully
   395  	return err
   396  }
   397  
   398  // apart from minor (albeit subtle) differences between `t.joinCluster` vs `p.joinCluster`
   399  // this method is otherwise identical to t.gojoin (TODO: unify)
   400  func (t *target) gojoin(config *cmn.Config) {
   401  	smap := t.owner.smap.get()
   402  	cii := t.pollClusterStarted(config, smap.Primary)
   403  	if nlog.Stopping() {
   404  		return
   405  	}
   406  
   407  	if cii != nil {
   408  		// (primary changed)
   409  		primary := cii.Smap.Primary
   410  		if status, err := t.joinCluster(apc.ActSelfJoinTarget, primary.CtrlURL, primary.PubURL); err != nil {
   411  			nlog.Errorf(fmtFailedRejoin, t, err, status)
   412  			return
   413  		}
   414  	}
   415  	t.markClusterStarted()
   416  
   417  	if t.fsprg.newVol && !config.TestingEnv() {
   418  		config := cmn.GCO.BeginUpdate()
   419  		fspathsSave(config)
   420  	}
   421  	nlog.Infoln(t.String(), "is ready")
   422  }
   423  
   424  func (t *target) goresilver(interrupted bool) {
   425  	if interrupted {
   426  		nlog.Infoln("Resuming resilver...")
   427  	} else if daemon.resilver.required {
   428  		nlog.Infof("Starting resilver, reason: %q", daemon.resilver.reason)
   429  	}
   430  	t.runResilver(res.Args{}, nil /*wg*/)
   431  }
   432  
   433  func (t *target) runResilver(args res.Args, wg *sync.WaitGroup) {
   434  	// with no cluster-wide UUID it's a local run
   435  	if args.UUID == "" {
   436  		args.UUID = cos.GenUUID()
   437  		regMsg := xactRegMsg{UUID: args.UUID, Kind: apc.ActResilver, Srcs: []string{t.SID()}}
   438  		msg := t.newAmsgActVal(apc.ActRegGlobalXaction, regMsg)
   439  		t.bcastAsyncIC(msg)
   440  	}
   441  	if wg != nil {
   442  		wg.Done() // compare w/ xact.GoRunW(()
   443  	}
   444  	t.res.RunResilver(args)
   445  }
   446  
   447  func (t *target) endStartupStandby() (err error) {
   448  	smap := t.owner.smap.get()
   449  	if err = smap.validate(); err != nil {
   450  		return
   451  	}
   452  	daemon.cli.target.standby = false
   453  	t.markNodeStarted()
   454  	t.markClusterStarted()
   455  	t.regstate.disabled.Store(false)
   456  	tstats := t.statsT.(*stats.Trunner)
   457  	tstats.Standby(false)
   458  	nlog.Infof("%s enabled and joined (%s)", t, smap.StringEx())
   459  
   460  	config := cmn.GCO.Get()
   461  	if t.fsprg.newVol && !config.TestingEnv() {
   462  		config = cmn.GCO.BeginUpdate()
   463  		fspathsSave(config)
   464  	}
   465  	return
   466  }
   467  
   468  func (t *target) initRecvHandlers() {
   469  	networkHandlers := []networkHandler{
   470  		{r: apc.Buckets, h: t.bucketHandler, net: accessNetAll},
   471  		{r: apc.Objects, h: t.objectHandler, net: accessNetAll},
   472  		{r: apc.Daemon, h: t.daemonHandler, net: accessNetPublicControl},
   473  		{r: apc.Metasync, h: t.metasyncHandler, net: accessNetIntraControl},
   474  		{r: apc.Health, h: t.healthHandler, net: accessNetPublicControl},
   475  		{r: apc.Xactions, h: t.xactHandler, net: accessNetIntraControl},
   476  		{r: apc.EC, h: t.ecHandler, net: accessNetIntraData},
   477  		{r: apc.Vote, h: t.voteHandler, net: accessNetIntraControl},
   478  		{r: apc.Txn, h: t.txnHandler, net: accessNetIntraControl},
   479  		{r: apc.ObjStream, h: transport.RxAnyStream, net: accessControlData},
   480  
   481  		{r: apc.Download, h: t.downloadHandler, net: accessNetIntraControl},
   482  		{r: apc.Sort, h: dsort.TargetHandler, net: accessControlData},
   483  		{r: apc.ETL, h: t.etlHandler, net: accessNetAll},
   484  
   485  		{r: "/" + apc.S3, h: t.s3Handler, net: accessNetPublicData},
   486  		{r: "/", h: t.errURL, net: accessNetAll},
   487  	}
   488  	t.regNetHandlers(networkHandlers)
   489  }
   490  
   491  func (t *target) checkRestarted(config *cmn.Config) (fatalErr, writeErr error) {
   492  	if fs.MarkerExists(fname.NodeRestartedMarker) {
   493  		red := redial{t: t, dialTout: config.Timeout.CplaneOperation.D(), totalTout: config.Timeout.MaxKeepalive.D()}
   494  		if red.acked() {
   495  			fatalErr = fmt.Errorf("%s: %q is in use (duplicate or overlapping run?)", t, red.inUse)
   496  			return
   497  		}
   498  		t.statsT.Inc(stats.RestartCount)
   499  		fs.PersistMarker(fname.NodeRestartedPrev)
   500  	}
   501  	fatalErr, writeErr = fs.PersistMarker(fname.NodeRestartedMarker)
   502  	return
   503  }
   504  
   505  // NOTE in re 'node-restarted' scenario: the risk of "overlapping" aisnode run -
   506  // which'll fail shortly with "bind: address already in use" but not before
   507  // triggering (`NodeRestartedPrev` => GFN) sequence and stealing nlog symlinks
   508  // - this risk exists, and that's why we go extra length
   509  func (red *redial) acked() bool {
   510  	var (
   511  		err   error
   512  		tsi   = red.t.si
   513  		sleep = cos.ProbingFrequency(red.totalTout)
   514  		addrs = []string{tsi.PubNet.TCPEndpoint()}
   515  		once  bool
   516  	)
   517  	if ep := red.t.si.DataNet.TCPEndpoint(); ep != addrs[0] {
   518  		addrs = append(addrs, ep)
   519  	} else if ep := red.t.si.ControlNet.TCPEndpoint(); ep != addrs[0] {
   520  		addrs = append(addrs, ep)
   521  	}
   522  	for _, addr := range addrs {
   523  		for elapsed := time.Duration(0); elapsed < red.totalTout; elapsed += sleep {
   524  			_, err = net.DialTimeout("tcp4", addr, max(2*time.Second, red.dialTout))
   525  			if err != nil {
   526  				break
   527  			}
   528  			once = true
   529  			time.Sleep(sleep)
   530  			// could be shutting down
   531  		}
   532  		if !once {
   533  			return false
   534  		}
   535  		if err == nil {
   536  			if red.inUse == "" {
   537  				red.inUse = addr
   538  			}
   539  			return true
   540  		}
   541  		time.Sleep(sleep)
   542  	}
   543  	return false // got tcp synack at least once but not (getting it) any longer
   544  }
   545  
   546  //
   547  // http handlers
   548  //
   549  
   550  func (t *target) errURL(w http.ResponseWriter, r *http.Request) {
   551  	if r.URL.Scheme != "" {
   552  		t.writeErrURL(w, r)
   553  		return
   554  	}
   555  	path := r.URL.Path
   556  	if path != "" && path[0] == '/' {
   557  		path = path[1:]
   558  	}
   559  	split := strings.Split(path, "/")
   560  	// "easy URL"
   561  	if len(split) > 0 &&
   562  		(split[0] == apc.GSScheme || split[0] == apc.AZScheme || split[0] == apc.AISScheme) {
   563  		t.writeErrMsg(w, r, "trying to execute \"easy URL\" via AIS target? (hint: use proxy)")
   564  	} else {
   565  		t.writeErrURL(w, r)
   566  	}
   567  }
   568  
   569  // verb /v1/buckets
   570  func (t *target) bucketHandler(w http.ResponseWriter, r *http.Request) {
   571  	switch r.Method {
   572  	case http.MethodGet:
   573  		dpq := dpqAlloc()
   574  		t.httpbckget(w, r, dpq)
   575  		dpqFree(dpq)
   576  	case http.MethodDelete:
   577  		apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false)
   578  		t.httpbckdelete(w, r, apireq)
   579  		apiReqFree(apireq)
   580  	case http.MethodPost:
   581  		apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false)
   582  		t.httpbckpost(w, r, apireq)
   583  		apiReqFree(apireq)
   584  	case http.MethodHead:
   585  		apireq := apiReqAlloc(1, apc.URLPathBuckets.L, false)
   586  		t.httpbckhead(w, r, apireq)
   587  		apiReqFree(apireq)
   588  	default:
   589  		cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodHead, http.MethodPost)
   590  	}
   591  }
   592  
   593  // verb /v1/objects
   594  func (t *target) objectHandler(w http.ResponseWriter, r *http.Request) {
   595  	switch r.Method {
   596  	case http.MethodGet:
   597  		apireq := apiReqAlloc(2, apc.URLPathObjects.L, true /*dpq*/)
   598  		t.httpobjget(w, r, apireq)
   599  		apiReqFree(apireq)
   600  	case http.MethodHead:
   601  		apireq := apiReqAlloc(2, apc.URLPathObjects.L, false)
   602  		t.httpobjhead(w, r, apireq)
   603  		apiReqFree(apireq)
   604  	case http.MethodPut:
   605  		apireq := apiReqAlloc(2, apc.URLPathObjects.L, true /*dpq*/)
   606  		if err := t.parseReq(w, r, apireq); err == nil {
   607  			lom := core.AllocLOM(apireq.items[1])
   608  			t.httpobjput(w, r, apireq, lom)
   609  			core.FreeLOM(lom)
   610  		}
   611  		apiReqFree(apireq)
   612  	case http.MethodDelete:
   613  		apireq := apiReqAlloc(2, apc.URLPathObjects.L, false)
   614  		t.httpobjdelete(w, r, apireq)
   615  		apiReqFree(apireq)
   616  	case http.MethodPost:
   617  		apireq := apiReqAlloc(2, apc.URLPathObjects.L, false /*useDpq*/)
   618  		t.httpobjpost(w, r, apireq)
   619  		apiReqFree(apireq)
   620  	case http.MethodPatch:
   621  		apireq := apiReqAlloc(2, apc.URLPathObjects.L, false)
   622  		t.httpobjpatch(w, r, apireq)
   623  		apiReqFree(apireq)
   624  	default:
   625  		cmn.WriteErr405(w, r, http.MethodDelete, http.MethodGet, http.MethodHead,
   626  			http.MethodPost, http.MethodPut)
   627  	}
   628  }
   629  
   630  // verb /v1/slices
   631  // Non-public inerface
   632  func (t *target) ecHandler(w http.ResponseWriter, r *http.Request) {
   633  	switch r.Method {
   634  	case http.MethodGet:
   635  		t.httpecget(w, r)
   636  	default:
   637  		cmn.WriteErr405(w, r, http.MethodGet)
   638  	}
   639  }
   640  
   641  //
   642  // httpobj* handlers
   643  //
   644  
   645  // GET /v1/objects/<bucket-name>/<object-name>
   646  //
   647  // Initially validates if the request is internal request (either from proxy
   648  // or target) and calls getObject.
   649  //
   650  // Checks if the object exists locally (if not, downloads it) and sends it back
   651  // If the bucket is in the Cloud one and ValidateWarmGet is enabled there is an extra
   652  // check whether the object exists locally. Version is checked as well if configured.
   653  func (t *target) httpobjget(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
   654  	err := t.parseReq(w, r, apireq)
   655  	if err != nil {
   656  		return
   657  	}
   658  	err = apireq.dpq.parse(r.URL.RawQuery)
   659  	if err != nil {
   660  		debug.AssertNoErr(err)
   661  		t.writeErr(w, r, err)
   662  		return
   663  	}
   664  	if cmn.Rom.Features().IsSet(feat.EnforceIntraClusterAccess) {
   665  		if apireq.dpq.ptime == "" /*isRedirect*/ && t.isIntraCall(r.Header, false /*from primary*/) != nil {
   666  			t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected (remaddr=%s)",
   667  				t.si, r.Method, r.RemoteAddr)
   668  			return
   669  		}
   670  	}
   671  
   672  	lom := core.AllocLOM(apireq.items[1])
   673  	lom, err = t.getObject(w, r, apireq.dpq, apireq.bck, lom)
   674  	if err != nil {
   675  		t._erris(w, r, apireq.dpq.silent, err, 0)
   676  	}
   677  	core.FreeLOM(lom)
   678  }
   679  
   680  func (t *target) getObject(w http.ResponseWriter, r *http.Request, dpq *dpq, bck *meta.Bck, lom *core.LOM) (*core.LOM, error) {
   681  	if err := lom.InitBck(bck.Bucket()); err != nil {
   682  		if cmn.IsErrRemoteBckNotFound(err) {
   683  			t.BMDVersionFixup(r)
   684  			err = lom.InitBck(bck.Bucket())
   685  		}
   686  		if err != nil {
   687  			return lom, err
   688  		}
   689  	}
   690  
   691  	// two special flows
   692  	if dpq.etlName != "" {
   693  		t.getETL(w, r, dpq.etlName, bck, lom.ObjName)
   694  		return lom, nil
   695  	}
   696  	if cos.IsParseBool(r.Header.Get(apc.HdrBlobDownload)) {
   697  		var msg apc.BlobMsg
   698  		if err := msg.FromHeader(r.Header); err != nil {
   699  			return lom, err
   700  		}
   701  
   702  		// NOTE: make a blocking call w/ simultaneous Tx
   703  		args := &core.BlobParams{
   704  			RspW: w,
   705  			Lom:  lom,
   706  			Msg:  &msg,
   707  		}
   708  		_, _, err := t.blobdl(args, nil /*oa*/)
   709  		return lom, err
   710  	}
   711  
   712  	// GET: regular | archive | range
   713  	goi := allocGOI()
   714  	{
   715  		goi.atime = time.Now().UnixNano()
   716  		goi.ltime = mono.NanoTime()
   717  		if dpq.ptime != "" {
   718  			if d := ptLatency(goi.atime, dpq.ptime, r.Header.Get(apc.HdrCallerIsPrimary)); d > 0 {
   719  				t.statsT.Add(stats.GetRedirLatency, d)
   720  			}
   721  		}
   722  		goi.t = t
   723  		goi.lom = lom
   724  		goi.dpq = dpq
   725  		goi.req = r
   726  		goi.w = w
   727  		goi.ctx = context.Background()
   728  		goi.ranges = byteRanges{Range: r.Header.Get(cos.HdrRange), Size: 0}
   729  		goi.latestVer = _validateWarmGet(goi.lom, dpq.latestVer) // apc.QparamLatestVer || versioning.*_warm_get
   730  	}
   731  	if dpq.isArch() {
   732  		if goi.ranges.Range != "" {
   733  			details := fmt.Sprintf("range: %s, arch query: %s", goi.ranges.Range, goi.dpq._archstr())
   734  			return lom, cmn.NewErrUnsupp("range-read archived content", details)
   735  		}
   736  		if dpq.arch.path != "" {
   737  			if strings.HasPrefix(dpq.arch.path, lom.ObjName) {
   738  				if rel, err := filepath.Rel(lom.ObjName, dpq.arch.path); err == nil {
   739  					dpq.arch.path = rel
   740  				}
   741  			}
   742  		}
   743  	}
   744  
   745  	// apc.QparamOrigURL
   746  	if bck.IsHTTP() {
   747  		originalURL := dpq.origURL
   748  		goi.ctx = context.WithValue(goi.ctx, cos.CtxOriginalURL, originalURL)
   749  	}
   750  
   751  	// do
   752  	if ecode, err := goi.getObject(); err != nil {
   753  		t.statsT.IncErr(stats.GetCount)
   754  
   755  		// handle right here, return nil
   756  		if err != errSendingResp {
   757  			if dpq.isS3 {
   758  				s3.WriteErr(w, r, err, ecode)
   759  			} else {
   760  				if ecode == http.StatusNotFound {
   761  					dpq.silent = true
   762  				}
   763  				t._erris(w, r, dpq.silent, err, ecode)
   764  			}
   765  		}
   766  	}
   767  	lom = goi.lom
   768  	freeGOI(goi)
   769  	return lom, nil
   770  }
   771  
   772  func _validateWarmGet(lom *core.LOM, latestVer bool /*apc.QparamLatestVer*/) bool {
   773  	switch {
   774  	case !lom.Bck().IsCloud() && !lom.Bck().IsRemoteAIS():
   775  		return false
   776  	case !latestVer:
   777  		return lom.VersionConf().ValidateWarmGet || lom.VersionConf().Sync // bucket prop
   778  	default:
   779  		return true
   780  	}
   781  }
   782  
   783  // err in silence
   784  func (t *target) _erris(w http.ResponseWriter, r *http.Request, silent bool /*apc.QparamSilent*/, err error, code int) {
   785  	if silent {
   786  		t.writeErr(w, r, err, code, Silent)
   787  	} else {
   788  		t.writeErr(w, r, err, code)
   789  	}
   790  }
   791  
   792  // PUT /v1/objects/bucket-name/object-name; does:
   793  // 1) append object 2) append to archive 3) PUT
   794  func (t *target) httpobjput(w http.ResponseWriter, r *http.Request, apireq *apiRequest, lom *core.LOM) {
   795  	var (
   796  		config  = cmn.GCO.Get()
   797  		started = time.Now().UnixNano()
   798  		t2tput  = isT2TPut(r.Header)
   799  	)
   800  	if !t.isValidObjname(w, r, lom.ObjName) {
   801  		return
   802  	}
   803  	if apireq.dpq.ptime == "" && !t2tput {
   804  		t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected or replicated", t.si, r.Method)
   805  		return
   806  	}
   807  	cs := fs.Cap()
   808  	if errCap := cs.Err(); errCap != nil || cs.PctMax > int32(config.Space.CleanupWM) {
   809  		cs = t.OOS(nil)
   810  		if cs.IsOOS() {
   811  			// fail this write
   812  			t.writeErr(w, r, errCap, http.StatusInsufficientStorage)
   813  			return
   814  		}
   815  	}
   816  
   817  	// init
   818  	if err := lom.InitBck(apireq.bck.Bucket()); err != nil {
   819  		if cmn.IsErrRemoteBckNotFound(err) {
   820  			t.BMDVersionFixup(r)
   821  			err = lom.InitBck(apireq.bck.Bucket())
   822  		}
   823  		if err != nil {
   824  			t.writeErr(w, r, err)
   825  			return
   826  		}
   827  	}
   828  
   829  	// load (maybe)
   830  	skipVC := lom.IsFeatureSet(feat.SkipVC) || apireq.dpq.skipVC
   831  	if !skipVC {
   832  		_ = lom.Load(true, false)
   833  	}
   834  
   835  	// do
   836  	var (
   837  		handle string
   838  		err    error
   839  		ecode  int
   840  	)
   841  	switch {
   842  	case apireq.dpq.arch.path != "": // apc.QparamArchpath
   843  		apireq.dpq.arch.mime, err = archive.MimeFQN(t.smm, apireq.dpq.arch.mime, lom.FQN)
   844  		if err != nil {
   845  			break
   846  		}
   847  		// do
   848  		lom.Lock(true)
   849  		ecode, err = t.putApndArch(r, lom, started, apireq.dpq)
   850  		lom.Unlock(true)
   851  	case apireq.dpq.apnd.ty != "": // apc.QparamAppendType
   852  		a := &apndOI{
   853  			started: started,
   854  			t:       t,
   855  			config:  config,
   856  			lom:     lom,
   857  			r:       r.Body,
   858  			op:      apireq.dpq.apnd.ty, // apc.QparamAppendType
   859  		}
   860  		if err := a.parse(apireq.dpq.apnd.hdl /*apc.QparamAppendHandle*/); err != nil {
   861  			t.writeErr(w, r, err)
   862  			return
   863  		}
   864  		handle, ecode, err = a.do(r)
   865  		if err == nil && handle != "" {
   866  			w.Header().Set(apc.HdrAppendHandle, handle)
   867  			return
   868  		}
   869  		t.statsT.IncErr(stats.AppendCount)
   870  	default:
   871  		poi := allocPOI()
   872  		{
   873  			poi.atime = started
   874  			if apireq.dpq.ptime != "" {
   875  				if d := ptLatency(poi.atime, apireq.dpq.ptime, r.Header.Get(apc.HdrCallerIsPrimary)); d > 0 {
   876  					t.statsT.Add(stats.PutRedirLatency, d)
   877  				}
   878  			}
   879  			poi.t = t
   880  			poi.lom = lom
   881  			poi.config = config
   882  			poi.skipVC = skipVC // feat.SkipVC || apc.QparamSkipVC
   883  			poi.restful = true
   884  			poi.t2t = t2tput
   885  		}
   886  		ecode, err = poi.do(w.Header(), r, apireq.dpq)
   887  		freePOI(poi)
   888  	}
   889  	if err != nil {
   890  		t.fsErr(err, lom.FQN)
   891  		t.writeErr(w, r, err, ecode)
   892  	}
   893  }
   894  
   895  // DELETE [ { action } ] /v1/objects/bucket-name/object-name
   896  func (t *target) httpobjdelete(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
   897  	var msg aisMsg
   898  	if err := readJSON(w, r, &msg); err != nil {
   899  		return
   900  	}
   901  	if err := t.parseReq(w, r, apireq); err != nil {
   902  		return
   903  	}
   904  	objName := apireq.items[1]
   905  	if !t.isValidObjname(w, r, objName) {
   906  		return
   907  	}
   908  	if isRedirect(apireq.query) == "" {
   909  		t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected", t.si, r.Method)
   910  		return
   911  	}
   912  
   913  	evict := msg.Action == apc.ActEvictObjects
   914  	lom := core.AllocLOM(objName)
   915  	if err := lom.InitBck(apireq.bck.Bucket()); err != nil {
   916  		t.writeErr(w, r, err)
   917  		core.FreeLOM(lom)
   918  		return
   919  	}
   920  
   921  	ecode, err := t.DeleteObject(lom, evict)
   922  	if err == nil && ecode == 0 {
   923  		// EC cleanup if EC is enabled
   924  		ec.ECM.CleanupObject(lom)
   925  	} else {
   926  		if ecode == http.StatusNotFound {
   927  			t.writeErrSilentf(w, r, http.StatusNotFound, "%s doesn't exist", lom.Cname())
   928  		} else {
   929  			t.writeErr(w, r, err, ecode)
   930  		}
   931  	}
   932  	core.FreeLOM(lom)
   933  }
   934  
   935  // POST /v1/objects/bucket-name/object-name
   936  func (t *target) httpobjpost(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
   937  	msg, err := t.readActionMsg(w, r)
   938  	if err != nil {
   939  		return
   940  	}
   941  	if msg.Action == apc.ActBlobDl {
   942  		apireq.after = 1
   943  	}
   944  	if t.parseReq(w, r, apireq) != nil {
   945  		return
   946  	}
   947  	if isRedirect(apireq.query) == "" {
   948  		t.writeErrf(w, r, "%s: %s-%s(obj) is expected to be redirected", t.si, r.Method, msg.Action)
   949  		return
   950  	}
   951  	var lom *core.LOM
   952  	switch msg.Action {
   953  	case apc.ActRenameObject:
   954  		lom = core.AllocLOM(apireq.items[1])
   955  		if err = lom.InitBck(apireq.bck.Bucket()); err != nil {
   956  			break
   957  		}
   958  		if err = t.objMv(lom, msg); err == nil {
   959  			t.statsT.Inc(stats.RenameCount)
   960  			core.FreeLOM(lom)
   961  			lom = nil
   962  		} else {
   963  			t.statsT.IncErr(stats.RenameCount)
   964  		}
   965  	case apc.ActBlobDl:
   966  		var (
   967  			xid     string
   968  			objName = msg.Name
   969  			blobMsg apc.BlobMsg
   970  		)
   971  		lom = core.AllocLOM(objName)
   972  		if err = lom.InitBck(apireq.bck.Bucket()); err != nil {
   973  			break
   974  		}
   975  		if err = cos.MorphMarshal(msg.Value, &blobMsg); err != nil {
   976  			err = fmt.Errorf(cmn.FmtErrMorphUnmarshal, t, "set-custom", msg.Value, err)
   977  			break
   978  		}
   979  		args := &core.BlobParams{
   980  			Lom: lom,
   981  			Msg: &blobMsg,
   982  		}
   983  		if xid, _, err = t.blobdl(args, nil /*oa*/); xid != "" {
   984  			debug.AssertNoErr(err)
   985  			w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(xid)))
   986  			w.Write([]byte(xid))
   987  			// lom is eventually freed by x-blob
   988  		}
   989  	default:
   990  		t.writeErrAct(w, r, msg.Action)
   991  		return
   992  	}
   993  	if err != nil {
   994  		t.writeErr(w, r, err)
   995  		core.FreeLOM(lom)
   996  	}
   997  }
   998  
   999  // HEAD /v1/objects/<bucket-name>/<object-name>
  1000  func (t *target) httpobjhead(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
  1001  	if err := t.parseReq(w, r, apireq); err != nil {
  1002  		return
  1003  	}
  1004  	query, bck, objName := apireq.query, apireq.bck, apireq.items[1]
  1005  	if cmn.Rom.Features().IsSet(feat.EnforceIntraClusterAccess) {
  1006  		// validates that the request is internal (by a node in the same cluster)
  1007  		if isRedirect(query) == "" && t.isIntraCall(r.Header, false) != nil {
  1008  			t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected (remaddr=%s)",
  1009  				t.si, r.Method, r.RemoteAddr)
  1010  			return
  1011  		}
  1012  	}
  1013  	lom := core.AllocLOM(objName)
  1014  	ecode, err := t.objHead(w.Header(), query, bck, lom)
  1015  	core.FreeLOM(lom)
  1016  	if err != nil {
  1017  		t._erris(w, r, cos.IsParseBool(query.Get(apc.QparamSilent)), err, ecode)
  1018  	}
  1019  }
  1020  
  1021  func (t *target) objHead(hdr http.Header, query url.Values, bck *meta.Bck, lom *core.LOM) (ecode int, err error) {
  1022  	var (
  1023  		fltPresence int
  1024  		exists      = true
  1025  		hasEC       bool
  1026  	)
  1027  	if tmp := query.Get(apc.QparamFltPresence); tmp != "" {
  1028  		var erp error
  1029  		fltPresence, erp = strconv.Atoi(tmp)
  1030  		debug.AssertNoErr(erp)
  1031  	}
  1032  	if err = lom.InitBck(bck.Bucket()); err != nil {
  1033  		if cmn.IsErrBucketNought(err) {
  1034  			ecode = http.StatusNotFound
  1035  		}
  1036  		return
  1037  	}
  1038  	err = lom.Load(true /*cache it*/, false /*locked*/)
  1039  	if err == nil {
  1040  		if apc.IsFltNoProps(fltPresence) {
  1041  			return
  1042  		}
  1043  		if fltPresence == apc.FltExistsOutside {
  1044  			err = fmt.Errorf(fmtOutside, lom.Cname(), fltPresence)
  1045  			return
  1046  		}
  1047  	} else {
  1048  		if !cmn.IsErrObjNought(err) {
  1049  			return
  1050  		}
  1051  		exists = false
  1052  		if fltPresence == apc.FltPresentCluster {
  1053  			exists = lom.RestoreToLocation()
  1054  		}
  1055  	}
  1056  
  1057  	if !exists {
  1058  		if bck.IsAIS() || apc.IsFltPresent(fltPresence) {
  1059  			err = cos.NewErrNotFound(t, lom.Cname())
  1060  			return http.StatusNotFound, err
  1061  		}
  1062  	}
  1063  
  1064  	// props
  1065  	op := cmn.ObjectProps{Name: lom.ObjName, Bck: *lom.Bucket(), Present: exists}
  1066  	if exists {
  1067  		op.ObjAttrs = *lom.ObjAttrs()
  1068  		op.Location = lom.Location()
  1069  		op.Mirror.Copies = lom.NumCopies()
  1070  		if lom.HasCopies() {
  1071  			lom.Lock(false)
  1072  			for fs := range lom.GetCopies() {
  1073  				if idx := strings.Index(fs, "/@"); idx >= 0 {
  1074  					fs = fs[:idx]
  1075  				}
  1076  				op.Mirror.Paths = append(op.Mirror.Paths, fs)
  1077  			}
  1078  			lom.Unlock(false)
  1079  		} else {
  1080  			fs := lom.FQN
  1081  			if idx := strings.Index(fs, "/@"); idx >= 0 {
  1082  				fs = fs[:idx]
  1083  			}
  1084  			op.Mirror.Paths = append(op.Mirror.Paths, fs)
  1085  		}
  1086  		if lom.ECEnabled() {
  1087  			if md, err := ec.ObjectMetadata(lom.Bck(), lom.ObjName); err == nil {
  1088  				hasEC = true
  1089  				op.EC.DataSlices = md.Data
  1090  				op.EC.ParitySlices = md.Parity
  1091  				op.EC.IsECCopy = md.IsCopy
  1092  				op.EC.Generation = md.Generation
  1093  			}
  1094  		}
  1095  	} else {
  1096  		// cold HEAD
  1097  		var oa *cmn.ObjAttrs
  1098  		oa, ecode, err = t.Backend(lom.Bck()).HeadObj(context.Background(), lom, nil /*origReq*/)
  1099  		if err != nil {
  1100  			if ecode != http.StatusNotFound {
  1101  				err = cmn.NewErrFailedTo(t, "HEAD", lom.Cname(), err)
  1102  			}
  1103  			return
  1104  		}
  1105  		if apc.IsFltNoProps(fltPresence) {
  1106  			return
  1107  		}
  1108  		op.ObjAttrs = *oa
  1109  		op.ObjAttrs.Atime = 0
  1110  	}
  1111  
  1112  	// to header
  1113  	cmn.ToHeader(&op.ObjAttrs, hdr, op.ObjAttrs.Size)
  1114  	if op.ObjAttrs.Cksum == nil {
  1115  		// cos.Cksum does not have default nil/zero value (reflection)
  1116  		op.ObjAttrs.Cksum = cos.NewCksum("", "")
  1117  	}
  1118  	errIter := cmn.IterFields(op, func(tag string, field cmn.IterField) (err error, b bool) {
  1119  		if !hasEC && strings.HasPrefix(tag, "ec.") {
  1120  			return nil, false
  1121  		}
  1122  		// NOTE: op.ObjAttrs were already added via cmn.ToHeader
  1123  		if tag[0] == '.' {
  1124  			return nil, false
  1125  		}
  1126  		v := field.String()
  1127  		if v == "" {
  1128  			return nil, false
  1129  		}
  1130  		name := apc.PropToHeader(tag)
  1131  		debug.Func(func() {
  1132  			vv := hdr.Get(name)
  1133  			debug.Assertf(vv == "", "not expecting duplications: %s=(%q, %q)", name, v, vv)
  1134  		})
  1135  		hdr.Set(name, v)
  1136  		return nil, false
  1137  	})
  1138  	debug.AssertNoErr(errIter)
  1139  	return
  1140  }
  1141  
  1142  // PATCH /v1/objects/<bucket-name>/<object-name>
  1143  // By default, adds or updates existing custom keys. Will remove all existing keys and
  1144  // replace them with the specified ones _iff_ `apc.QparamNewCustom` is set.
  1145  func (t *target) httpobjpatch(w http.ResponseWriter, r *http.Request, apireq *apiRequest) {
  1146  	if err := t.parseReq(w, r, apireq); err != nil {
  1147  		return
  1148  	}
  1149  	if cmn.Rom.Features().IsSet(feat.EnforceIntraClusterAccess) {
  1150  		if isRedirect(apireq.query) == "" && t.isIntraCall(r.Header, false) != nil {
  1151  			t.writeErrf(w, r, "%s: %s(obj) is expected to be redirected (remaddr=%s)",
  1152  				t.si, r.Method, r.RemoteAddr)
  1153  			return
  1154  		}
  1155  	}
  1156  	msg, err := t.readActionMsg(w, r)
  1157  	if err != nil {
  1158  		return
  1159  	}
  1160  	custom := cos.StrKVs{}
  1161  	if err := cos.MorphMarshal(msg.Value, &custom); err != nil {
  1162  		t.writeErrf(w, r, cmn.FmtErrMorphUnmarshal, t.si, "set-custom", msg.Value, err)
  1163  		return
  1164  	}
  1165  	lom := core.AllocLOM(apireq.items[1] /*objName*/)
  1166  	defer core.FreeLOM(lom)
  1167  	if !t.isValidObjname(w, r, lom.ObjName) {
  1168  		return
  1169  	}
  1170  	if err := lom.InitBck(apireq.bck.Bucket()); err != nil {
  1171  		t.writeErr(w, r, err)
  1172  		return
  1173  	}
  1174  	if err := lom.Load(true /*cache it*/, false /*locked*/); err != nil {
  1175  		if cos.IsNotExist(err, 0) {
  1176  			t.writeErr(w, r, err, http.StatusNotFound)
  1177  		} else {
  1178  			t.writeErr(w, r, err)
  1179  		}
  1180  		return
  1181  	}
  1182  	delOldSetNew := cos.IsParseBool(apireq.query.Get(apc.QparamNewCustom))
  1183  	if delOldSetNew {
  1184  		lom.SetCustomMD(custom)
  1185  	} else {
  1186  		for key, val := range custom {
  1187  			lom.SetCustomKey(key, val)
  1188  		}
  1189  	}
  1190  	lom.Persist()
  1191  }
  1192  
  1193  //
  1194  // httpec* handlers
  1195  //
  1196  
  1197  // Returns a slice. Does not use GFN.
  1198  func (t *target) httpecget(w http.ResponseWriter, r *http.Request) {
  1199  	apireq := apiReqAlloc(3, apc.URLPathEC.L, false)
  1200  	apireq.bckIdx = 1
  1201  	if err := t.parseReq(w, r, apireq); err != nil {
  1202  		apiReqFree(apireq)
  1203  		return
  1204  	}
  1205  	switch apireq.items[0] {
  1206  	case ec.URLMeta:
  1207  		t.sendECMetafile(w, r, apireq.bck, apireq.items[2])
  1208  	case ec.URLCT:
  1209  		lom := core.AllocLOM(apireq.items[2])
  1210  		t.sendECCT(w, r, apireq.bck, lom)
  1211  		core.FreeLOM(lom)
  1212  	default:
  1213  		t.writeErrURL(w, r)
  1214  	}
  1215  	apiReqFree(apireq)
  1216  }
  1217  
  1218  // Returns a CT's metadata.
  1219  func (t *target) sendECMetafile(w http.ResponseWriter, r *http.Request, bck *meta.Bck, objName string) {
  1220  	if err := bck.Init(t.owner.bmd); err != nil {
  1221  		if !cmn.IsErrRemoteBckNotFound(err) { // is ais
  1222  			t.writeErr(w, r, err, Silent)
  1223  			return
  1224  		}
  1225  	}
  1226  	md, err := ec.ObjectMetadata(bck, objName)
  1227  	if err != nil {
  1228  		if os.IsNotExist(err) {
  1229  			t.writeErr(w, r, err, http.StatusNotFound, Silent)
  1230  		} else {
  1231  			t.writeErr(w, r, err, http.StatusInternalServerError, Silent)
  1232  		}
  1233  		return
  1234  	}
  1235  	b := md.NewPack()
  1236  	w.Header().Set(cos.HdrContentLength, strconv.Itoa(len(b)))
  1237  	w.Write(b)
  1238  }
  1239  
  1240  func (t *target) sendECCT(w http.ResponseWriter, r *http.Request, bck *meta.Bck, lom *core.LOM) {
  1241  	if err := lom.InitBck(bck.Bucket()); err != nil {
  1242  		if cmn.IsErrRemoteBckNotFound(err) {
  1243  			t.BMDVersionFixup(r)
  1244  			err = lom.InitBck(bck.Bucket())
  1245  		}
  1246  		if err != nil {
  1247  			t.writeErr(w, r, err)
  1248  			return
  1249  		}
  1250  	}
  1251  	sliceFQN := lom.Mountpath().MakePathFQN(bck.Bucket(), fs.ECSliceType, lom.ObjName)
  1252  	finfo, err := os.Stat(sliceFQN)
  1253  	if err != nil {
  1254  		t.writeErr(w, r, err, http.StatusNotFound, Silent)
  1255  		return
  1256  	}
  1257  	file, err := os.Open(sliceFQN)
  1258  	if err != nil {
  1259  		t.fsErr(err, sliceFQN)
  1260  		t.writeErr(w, r, err, http.StatusInternalServerError)
  1261  		return
  1262  	}
  1263  
  1264  	w.Header().Set(cos.HdrContentLength, strconv.FormatInt(finfo.Size(), 10))
  1265  	_, err = io.Copy(w, file) // No need for `io.CopyBuffer` as `sendfile` syscall will be used.
  1266  	cos.Close(file)
  1267  	if err != nil {
  1268  		nlog.Errorf("Failed to send slice %s: %v", lom.Cname(), err)
  1269  	}
  1270  }
  1271  
  1272  // called under lock
  1273  func (t *target) putApndArch(r *http.Request, lom *core.LOM, started int64, dpq *dpq) (int, error) {
  1274  	var (
  1275  		mime     = dpq.arch.mime // apc.QparamArchmime
  1276  		filename = dpq.arch.path // apc.QparamArchpath
  1277  		flags    int64
  1278  	)
  1279  	if strings.HasPrefix(filename, lom.ObjName) {
  1280  		if rel, err := filepath.Rel(lom.ObjName, filename); err == nil {
  1281  			filename = rel
  1282  		}
  1283  	}
  1284  	if s := r.Header.Get(apc.HdrPutApndArchFlags); s != "" {
  1285  		var errV error
  1286  		if flags, errV = strconv.ParseInt(s, 10, 64); errV != nil {
  1287  			return http.StatusBadRequest,
  1288  				fmt.Errorf("failed to archive %s: invalid flags %q in the request", lom.Cname(), s)
  1289  		}
  1290  	}
  1291  	a := &putA2I{
  1292  		started:  started,
  1293  		t:        t,
  1294  		lom:      lom,
  1295  		r:        r.Body,
  1296  		filename: filename,
  1297  		mime:     mime,
  1298  		put:      false, // below
  1299  	}
  1300  	if err := lom.Load(false /*cache it*/, true /*locked*/); err != nil {
  1301  		if !os.IsNotExist(err) {
  1302  			return http.StatusInternalServerError, err
  1303  		}
  1304  		if flags == apc.ArchAppend {
  1305  			return http.StatusNotFound, err
  1306  		}
  1307  		a.put = true
  1308  	} else {
  1309  		a.put = (flags == 0)
  1310  	}
  1311  	if s := r.Header.Get(cos.HdrContentLength); s != "" {
  1312  		if size, err := strconv.ParseInt(s, 10, 64); err == nil {
  1313  			a.size = size
  1314  		}
  1315  	}
  1316  	if a.size == 0 {
  1317  		return http.StatusBadRequest, fmt.Errorf("failed to archive %s: missing %q in the request",
  1318  			lom.Cname(), cos.HdrContentLength)
  1319  	}
  1320  	return a.do()
  1321  }
  1322  
  1323  func (t *target) DeleteObject(lom *core.LOM, evict bool) (code int, err error) {
  1324  	var isback bool
  1325  	lom.Lock(true)
  1326  	code, err, isback = t.delobj(lom, evict)
  1327  	lom.Unlock(true)
  1328  
  1329  	// special corner-case retry (quote):
  1330  	// - googleapi: "Error 503: We encountered an internal error. Please try again."
  1331  	// - aws-error[InternalError: We encountered an internal error. Please try again.]
  1332  	if err != nil && isback {
  1333  		if code == http.StatusServiceUnavailable || strings.Contains(err.Error(), "try again") {
  1334  			nlog.Errorf("failed to delete %s: %v(%d) - retrying...", lom, err, code)
  1335  			time.Sleep(time.Second)
  1336  			code, err = t.Backend(lom.Bck()).DeleteObj(lom)
  1337  		}
  1338  	}
  1339  	if err == nil {
  1340  		t.statsT.Inc(stats.DeleteCount)
  1341  	} else {
  1342  		t.statsT.IncErr(stats.DeleteCount) // TODO: count GET/PUT/DELETE remote errors separately..
  1343  	}
  1344  	return
  1345  }
  1346  
  1347  func (t *target) delobj(lom *core.LOM, evict bool) (int, error, bool) {
  1348  	var (
  1349  		aisErr, backendErr         error
  1350  		aisErrCode, backendErrCode int
  1351  		delFromAIS, delFromBackend bool
  1352  	)
  1353  	delFromBackend = lom.Bck().IsRemote() && !evict
  1354  	err := lom.Load(false /*cache it*/, true /*locked*/)
  1355  	if err != nil {
  1356  		if !cos.IsNotExist(err, 0) {
  1357  			return 0, err, false
  1358  		}
  1359  		if !delFromBackend {
  1360  			return http.StatusNotFound, err, false
  1361  		}
  1362  	} else {
  1363  		delFromAIS = true
  1364  	}
  1365  
  1366  	// do
  1367  	if delFromBackend {
  1368  		backendErrCode, backendErr = t.Backend(lom.Bck()).DeleteObj(lom)
  1369  	}
  1370  	if delFromAIS {
  1371  		size := lom.SizeBytes()
  1372  		aisErr = lom.Remove()
  1373  		if aisErr != nil {
  1374  			if !os.IsNotExist(aisErr) {
  1375  				if backendErr != nil {
  1376  					// unlikely
  1377  					nlog.Errorf("double-failure to delete %s: ais err %v, backend err %v(%d)",
  1378  						lom, aisErr, backendErr, backendErrCode)
  1379  				}
  1380  				return 0, aisErr, false
  1381  			}
  1382  		} else if evict {
  1383  			debug.Assert(lom.Bck().IsRemote())
  1384  			t.statsT.AddMany(
  1385  				cos.NamedVal64{Name: stats.LruEvictCount, Value: 1},
  1386  				cos.NamedVal64{Name: stats.LruEvictSize, Value: size},
  1387  			)
  1388  		}
  1389  	}
  1390  	if backendErr != nil {
  1391  		return backendErrCode, backendErr, true
  1392  	}
  1393  	return aisErrCode, aisErr, false
  1394  }
  1395  
  1396  // rename obj
  1397  func (t *target) objMv(lom *core.LOM, msg *apc.ActMsg) (err error) {
  1398  	if lom.Bck().IsRemote() {
  1399  		return fmt.Errorf("%s: cannot rename object %s from remote bucket", t.si, lom)
  1400  	}
  1401  	if lom.ECEnabled() {
  1402  		return fmt.Errorf("%s: cannot rename erasure-coded object %s", t.si, lom)
  1403  	}
  1404  	if msg.Name == lom.ObjName {
  1405  		return fmt.Errorf("%s: cannot rename/move object %s onto itself", t.si, lom)
  1406  	}
  1407  
  1408  	buf, slab := t.gmm.Alloc()
  1409  	coiParams := core.AllocCOI()
  1410  	{
  1411  		coiParams.BckTo = lom.Bck()
  1412  		coiParams.ObjnameTo = msg.Name /* new object name */
  1413  		coiParams.Buf = buf
  1414  		coiParams.Config = cmn.GCO.Get()
  1415  		coiParams.OWT = cmn.OwtCopy
  1416  		coiParams.Finalize = true
  1417  	}
  1418  	coi := (*copyOI)(coiParams)
  1419  	_, err = coi.do(t, nil /*DM*/, lom)
  1420  	core.FreeCOI(coiParams)
  1421  	slab.Free(buf)
  1422  	if err != nil {
  1423  		return err
  1424  	}
  1425  
  1426  	// TODO: combine copy+delete under a single write lock
  1427  	lom.Lock(true)
  1428  	if err := lom.Remove(); err != nil {
  1429  		nlog.Warningf("%s: failed to delete renamed object %s (new name %s): %v", t, lom, msg.Name, err)
  1430  	}
  1431  	lom.Unlock(true)
  1432  	return nil
  1433  }
  1434  
  1435  // compare running the same via (generic) t.xstart
  1436  func (t *target) blobdl(params *core.BlobParams, oa *cmn.ObjAttrs) (string, *xs.XactBlobDl, error) {
  1437  	// cap
  1438  	cs := fs.Cap()
  1439  	if errCap := cs.Err(); errCap != nil {
  1440  		cs = t.OOS(nil)
  1441  		if err := cs.Err(); err != nil {
  1442  			return "", nil, err
  1443  		}
  1444  	}
  1445  
  1446  	if oa != nil {
  1447  		return _blobdl(params, oa)
  1448  	}
  1449  
  1450  	// - try-lock (above) to load, check availability
  1451  	// - unlock right away
  1452  	// - subsequently, use cmn.OwtGetPrefetchLock to finalize
  1453  	// - there's a single x-blob-download per object (see WhenPrevIsRunning)
  1454  	lom, latestVer := params.Lom, params.Msg.LatestVer
  1455  	if !lom.TryLock(false) {
  1456  		return "", nil, cmn.NewErrBusy("blob", lom.Cname())
  1457  	}
  1458  
  1459  	oa, deleted, err := lom.LoadLatest(latestVer)
  1460  	lom.Unlock(false)
  1461  
  1462  	// w/ assorted returns
  1463  	switch {
  1464  	case deleted: // remotely
  1465  		debug.Assert(latestVer && err != nil)
  1466  		return "", nil, err
  1467  	case oa != nil:
  1468  		debug.Assert(latestVer && err == nil)
  1469  		// not latest
  1470  	case err == nil:
  1471  		return "", nil, nil // nothing to do
  1472  	case !cmn.IsErrObjNought(err):
  1473  		return "", nil, err
  1474  	}
  1475  
  1476  	// handle: (not-present || latest-not-eq)
  1477  	return _blobdl(params, oa)
  1478  }
  1479  
  1480  // returns an empty xid ("") if nothing to do
  1481  func _blobdl(params *core.BlobParams, oa *cmn.ObjAttrs) (string, *xs.XactBlobDl, error) {
  1482  	if params.WriteSGL == nil {
  1483  		// regular lom save (custom writer not present)
  1484  		wfqn := fs.CSM.Gen(params.Lom, fs.WorkfileType, "blob-dl")
  1485  		lmfh, err := params.Lom.CreateFile(wfqn)
  1486  		if err != nil {
  1487  			return "", nil, err
  1488  		}
  1489  		params.Lmfh = lmfh
  1490  		params.Wfqn = wfqn
  1491  	}
  1492  	// new
  1493  	xid := cos.GenUUID()
  1494  	rns := xs.RenewBlobDl(xid, params, oa)
  1495  	if rns.Err != nil || rns.IsRunning() { // cmn.IsErrXactUsePrev(rns.Err): single blob-downloader per blob
  1496  		if params.Lmfh != nil {
  1497  			cos.Close(params.Lmfh)
  1498  		}
  1499  		if params.Wfqn != "" {
  1500  			if errRemove := cos.RemoveFile(params.Wfqn); errRemove != nil {
  1501  				nlog.Errorln("nested err", errRemove)
  1502  			}
  1503  		}
  1504  		return "", nil, rns.Err
  1505  	}
  1506  
  1507  	// a) via x-start, x-blob-download
  1508  	xblob := rns.Entry.Get().(*xs.XactBlobDl)
  1509  	if params.RspW == nil {
  1510  		go xblob.Run(nil)
  1511  		return xblob.ID(), xblob, nil
  1512  	}
  1513  	// b) via GET (blocking w/ simultaneous transmission)
  1514  	xblob.Run(nil)
  1515  	return "", nil, xblob.AbortErr()
  1516  }
  1517  
  1518  func (t *target) fsErr(err error, filepath string) {
  1519  	if !cmn.GCO.Get().FSHC.Enabled || !cos.IsIOError(err) {
  1520  		return
  1521  	}
  1522  	mi, _ := fs.Path2Mpath(filepath)
  1523  	if mi == nil {
  1524  		return
  1525  	}
  1526  	if cos.IsErrOOS(err) {
  1527  		cs := t.OOS(nil)
  1528  		nlog.Errorf("%s: fsErr %s", t, cs.String())
  1529  		return
  1530  	}
  1531  	nlog.Errorf("%s: waking up FSHC to check %q for err %v", t, filepath, err)
  1532  	keyName := mi.Path
  1533  	// keyName is the mountpath is the fspath - counting IO errors on a per basis..
  1534  	t.statsT.AddMany(cos.NamedVal64{Name: stats.ErrIOCount, NameSuffix: keyName, Value: 1})
  1535  	t.fshc.OnErr(filepath)
  1536  }