github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/tgtobj.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"archive/tar"
     9  	"context"
    10  	"encoding"
    11  	"encoding/base64"
    12  	"errors"
    13  	"fmt"
    14  	"io"
    15  	"net/http"
    16  	"os"
    17  	"strconv"
    18  	"strings"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/NVIDIA/aistore/ais/s3"
    23  	"github.com/NVIDIA/aistore/api/apc"
    24  	"github.com/NVIDIA/aistore/cmn"
    25  	"github.com/NVIDIA/aistore/cmn/archive"
    26  	"github.com/NVIDIA/aistore/cmn/cos"
    27  	"github.com/NVIDIA/aistore/cmn/debug"
    28  	"github.com/NVIDIA/aistore/cmn/feat"
    29  	"github.com/NVIDIA/aistore/cmn/mono"
    30  	"github.com/NVIDIA/aistore/cmn/nlog"
    31  	"github.com/NVIDIA/aistore/core"
    32  	"github.com/NVIDIA/aistore/core/meta"
    33  	"github.com/NVIDIA/aistore/ec"
    34  	"github.com/NVIDIA/aistore/fs"
    35  	"github.com/NVIDIA/aistore/memsys"
    36  	"github.com/NVIDIA/aistore/mirror"
    37  	"github.com/NVIDIA/aistore/reb"
    38  	"github.com/NVIDIA/aistore/stats"
    39  	"github.com/NVIDIA/aistore/transport"
    40  	"github.com/NVIDIA/aistore/transport/bundle"
    41  	"github.com/NVIDIA/aistore/xact/xreg"
    42  )
    43  
    44  //
    45  // PUT, GET, APPEND (to file | to archive), and COPY object
    46  //
    47  
    48  type (
    49  	putOI struct {
    50  		oreq       *http.Request
    51  		r          io.ReadCloser // content reader
    52  		xctn       core.Xact     // xaction that puts
    53  		t          *target       // this
    54  		lom        *core.LOM     // obj
    55  		cksumToUse *cos.Cksum    // if available (not `none`), can be validated and will be stored
    56  		config     *cmn.Config   // (during this request)
    57  		resphdr    http.Header   // as implied
    58  		workFQN    string        // temp fqn to be renamed
    59  		atime      int64         // access time.Now()
    60  		ltime      int64         // mono.NanoTime, to measure latency
    61  		size       int64         // aka Content-Length
    62  		owt        cmn.OWT       // object write transaction enum { OwtPut, ..., OwtGet* }
    63  		restful    bool          // being invoked via RESTful API
    64  		t2t        bool          // by another target
    65  		skipEC     bool          // do not erasure-encode when finalizing
    66  		skipVC     bool          // skip loading existing Version and skip comparing Checksums (skip VC)
    67  		coldGET    bool          // (one implication: proceed to write)
    68  	}
    69  
    70  	getOI struct {
    71  		req        *http.Request
    72  		w          http.ResponseWriter
    73  		ctx        context.Context // context used when getting object from remote backend (access creds)
    74  		t          *target         // this
    75  		lom        *core.LOM       // obj
    76  		dpq        *dpq
    77  		ranges     byteRanges // range read (see https://www.rfc-editor.org/rfc/rfc7233#section-2.1)
    78  		atime      int64      // access time.Now()
    79  		ltime      int64      // mono.NanoTime, to measure latency
    80  		chunked    bool       // chunked transfer (en)coding: https://tools.ietf.org/html/rfc7230#page-36
    81  		unlocked   bool       // internal
    82  		verchanged bool       // version changed
    83  		retry      bool       // once
    84  		cold       bool       // true if executed backend.Get
    85  		latestVer  bool       // QparamLatestVer || 'versioning.*_warm_get'
    86  	}
    87  
    88  	// textbook append: (packed) handle and control structure (see also `putA2I` arch below)
    89  	aoHdl struct {
    90  		partialCksum *cos.CksumHash
    91  		nodeID       string
    92  		workFQN      string
    93  	}
    94  	apndOI struct {
    95  		started int64         // start time (nanoseconds)
    96  		r       io.ReadCloser // content reader
    97  		t       *target       // this
    98  		config  *cmn.Config   // (during this request)
    99  		lom     *core.LOM     // append to or _as_
   100  		cksum   *cos.Cksum    // checksum expected once Flush-ed
   101  		hdl     aoHdl         // (packed)
   102  		op      string        // enum {apc.AppendOp, apc.FlushOp}
   103  		size    int64         // Content-Length
   104  	}
   105  
   106  	copyOI core.CopyParams
   107  
   108  	sendArgs struct {
   109  		reader    cos.ReadOpenCloser
   110  		dm        *bundle.DataMover
   111  		objAttrs  cos.OAH
   112  		tsi       *meta.Snode
   113  		bckTo     *meta.Bck
   114  		objNameTo string
   115  		owt       cmn.OWT
   116  	}
   117  
   118  	// put/append-to arch
   119  	putA2I struct {
   120  		r        io.ReadCloser // read bytes to append
   121  		t        *target       // this
   122  		lom      *core.LOM     // resulting shard
   123  		filename string        // fqn inside
   124  		mime     string        // format
   125  		started  int64         // time of receiving
   126  		size     int64         // aka Content-Length
   127  		put      bool          // overwrite
   128  	}
   129  )
   130  
   131  //
   132  // PUT(object)
   133  //
   134  
   135  // poi.restful entry point
   136  func (poi *putOI) do(resphdr http.Header, r *http.Request, dpq *dpq) (int, error) {
   137  	{
   138  		poi.oreq = r
   139  		poi.r = r.Body
   140  		poi.resphdr = resphdr
   141  		poi.workFQN = fs.CSM.Gen(poi.lom, fs.WorkfileType, fs.WorkfilePut)
   142  		poi.cksumToUse = poi.lom.ObjAttrs().FromHeader(r.Header)
   143  		poi.owt = cmn.OwtPut // default
   144  	}
   145  	if dpq.owt != "" {
   146  		poi.owt.FromS(dpq.owt)
   147  	}
   148  	if dpq.uuid != "" {
   149  		// resolve cluster-wide xact "behind" this PUT (promote via a single target won't show up)
   150  		xctn, err := xreg.GetXact(dpq.uuid)
   151  		if err != nil {
   152  			nlog.Errorln(err)
   153  			return 0, err
   154  		}
   155  		if xctn != nil {
   156  			poi.xctn = xctn
   157  		}
   158  	}
   159  	if sizeStr := r.Header.Get(cos.HdrContentLength); sizeStr != "" {
   160  		if size, ers := strconv.ParseInt(sizeStr, 10, 64); ers == nil {
   161  			poi.size = size
   162  		}
   163  	}
   164  	return poi.putObject()
   165  }
   166  
   167  func (poi *putOI) putObject() (ecode int, err error) {
   168  	poi.ltime = mono.NanoTime()
   169  	// PUT is a no-op if the checksums do match
   170  	if !poi.skipVC && !poi.coldGET && !poi.cksumToUse.IsEmpty() {
   171  		if poi.lom.EqCksum(poi.cksumToUse) {
   172  			if cmn.Rom.FastV(4, cos.SmoduleAIS) {
   173  				nlog.Infof("destination %s has identical %s: PUT is a no-op", poi.lom, poi.cksumToUse)
   174  			}
   175  			cos.DrainReader(poi.r)
   176  			return 0, nil
   177  		}
   178  	}
   179  
   180  	buf, slab, lmfh, erw := poi.write()
   181  	poi._cleanup(buf, slab, lmfh, erw)
   182  	if erw != nil {
   183  		err, ecode = erw, http.StatusInternalServerError
   184  		goto rerr
   185  	}
   186  
   187  	if ecode, err = poi.finalize(); err != nil {
   188  		goto rerr
   189  	}
   190  
   191  	// resp. header & stats
   192  	if !poi.t2t {
   193  		// NOTE: counting only user PUTs; ignoring EC and copies, on the one hand, and
   194  		// same-checksum-skip-writing, on the other
   195  		if poi.owt == cmn.OwtPut && poi.restful {
   196  			debug.Assert(cos.IsValidAtime(poi.atime), poi.atime)
   197  			size := poi.lom.SizeBytes()
   198  			poi.t.statsT.AddMany(
   199  				cos.NamedVal64{Name: stats.PutCount, Value: 1},
   200  				cos.NamedVal64{Name: stats.PutSize, Value: size},
   201  				cos.NamedVal64{Name: stats.PutThroughput, Value: size},
   202  				cos.NamedVal64{Name: stats.PutLatency, Value: mono.SinceNano(poi.ltime)},
   203  			)
   204  			// RESTful PUT response header
   205  			if poi.resphdr != nil {
   206  				cmn.ToHeader(poi.lom.ObjAttrs(), poi.resphdr, 0 /*skip setting content-length*/)
   207  			}
   208  		}
   209  	} else if poi.xctn != nil && poi.owt == cmn.OwtPromote {
   210  		// xaction in-objs counters, promote first
   211  		poi.xctn.InObjsAdd(1, poi.lom.SizeBytes())
   212  	}
   213  	if cmn.Rom.FastV(5, cos.SmoduleAIS) {
   214  		nlog.Infoln(poi.loghdr())
   215  	}
   216  	return
   217  rerr:
   218  	if poi.owt == cmn.OwtPut && poi.restful && !poi.t2t {
   219  		poi.t.statsT.IncErr(stats.PutCount)
   220  	}
   221  	return
   222  }
   223  
   224  // verbose only
   225  func (poi *putOI) loghdr() string {
   226  	sb := strings.Builder{}
   227  	sb.WriteString(poi.owt.String())
   228  	sb.WriteString(", ")
   229  	sb.WriteString(poi.lom.String())
   230  	if poi.xctn != nil {
   231  		sb.WriteString(", ")
   232  		sb.WriteString(poi.xctn.String())
   233  	}
   234  	if poi.skipVC {
   235  		sb.WriteString(", skip-vc")
   236  	}
   237  	if poi.coldGET {
   238  		sb.WriteString(", cold-get")
   239  	}
   240  	if poi.t2t {
   241  		sb.WriteString(", t2t")
   242  	}
   243  	return sb.String()
   244  }
   245  
   246  func (poi *putOI) finalize() (ecode int, err error) {
   247  	if ecode, err = poi.fini(); err != nil {
   248  		if err1 := cos.Stat(poi.workFQN); err1 == nil || !os.IsNotExist(err1) {
   249  			if err1 == nil {
   250  				err1 = err
   251  			}
   252  			poi.t.fsErr(err1, poi.workFQN)
   253  			if err2 := cos.RemoveFile(poi.workFQN); err2 != nil && !os.IsNotExist(err2) {
   254  				nlog.Errorf(fmtNested, poi.t, err1, "remove", poi.workFQN, err2)
   255  			}
   256  		}
   257  		poi.lom.Uncache()
   258  		if ecode != http.StatusInsufficientStorage && cmn.IsErrCapExceeded(err) {
   259  			ecode = http.StatusInsufficientStorage
   260  		}
   261  		return ecode, err
   262  	}
   263  	if !poi.skipEC {
   264  		if ecErr := ec.ECM.EncodeObject(poi.lom, nil); ecErr != nil && ecErr != ec.ErrorECDisabled {
   265  			err = ecErr
   266  			if ecode != http.StatusInsufficientStorage && cmn.IsErrCapExceeded(err) {
   267  				ecode = http.StatusInsufficientStorage
   268  			}
   269  			return ecode, err
   270  		}
   271  	}
   272  	poi.t.putMirror(poi.lom)
   273  	return 0, nil
   274  }
   275  
   276  // poi.workFQN => LOM
   277  func (poi *putOI) fini() (ecode int, err error) {
   278  	var (
   279  		lom = poi.lom
   280  		bck = lom.Bck()
   281  	)
   282  	// put remote
   283  	if bck.IsRemote() && poi.owt < cmn.OwtRebalance {
   284  		ecode, err = poi.putRemote()
   285  		if err != nil {
   286  			loghdr := poi.loghdr()
   287  			nlog.Errorf("PUT (%s): %v(%d)", loghdr, err, ecode)
   288  			if ecode != http.StatusServiceUnavailable {
   289  				return
   290  			}
   291  			// (googleapi: "Error 503: We encountered an internal error. Please try again.")
   292  			time.Sleep(time.Second)
   293  			ecode, err = poi.putRemote()
   294  			if err != nil {
   295  				return
   296  			}
   297  			nlog.Infof("PUT (%s): retried OK", loghdr)
   298  		}
   299  	}
   300  
   301  	// locking strategies: optimistic and otherwise
   302  	// (see GetCold() implementation and cmn.OWT enum)
   303  	switch poi.owt {
   304  	case cmn.OwtGetTryLock, cmn.OwtGetLock, cmn.OwtGet:
   305  		debug.AssertFunc(func() bool { _, exclusive := lom.IsLocked(); return exclusive })
   306  	case cmn.OwtGetPrefetchLock:
   307  		if !lom.TryLock(true) {
   308  			if cmn.Rom.FastV(4, cos.SmoduleAIS) {
   309  				nlog.Warningln(poi.loghdr(), "is busy")
   310  			}
   311  			return 0, cmn.ErrSkip // e.g. prefetch can skip it and keep on going
   312  		}
   313  		defer lom.Unlock(true)
   314  	default:
   315  		// expecting valid atime passed with `poi`
   316  		debug.Assert(cos.IsValidAtime(poi.atime), poi.atime)
   317  		lom.Lock(true)
   318  		defer lom.Unlock(true)
   319  		lom.SetAtimeUnix(poi.atime)
   320  	}
   321  
   322  	// ais versioning
   323  	if bck.IsAIS() && lom.VersionConf().Enabled {
   324  		if poi.owt < cmn.OwtRebalance {
   325  			if poi.skipVC {
   326  				err = lom.IncVersion()
   327  				debug.AssertNoErr(err)
   328  			} else if remSrc, ok := lom.GetCustomKey(cmn.SourceObjMD); !ok || remSrc == "" {
   329  				if err = lom.IncVersion(); err != nil {
   330  					nlog.Errorln(err)
   331  				}
   332  			}
   333  		}
   334  	}
   335  
   336  	// done
   337  	if err = lom.RenameFrom(poi.workFQN); err != nil {
   338  		return
   339  	}
   340  	if lom.HasCopies() {
   341  		if errdc := lom.DelAllCopies(); errdc != nil {
   342  			nlog.Errorf("PUT (%s): failed to delete old copies [%v], proceeding anyway...", poi.loghdr(), errdc)
   343  		}
   344  	}
   345  	if lom.AtimeUnix() == 0 { // (is set when migrating within cluster; prefetch special case)
   346  		lom.SetAtimeUnix(poi.atime)
   347  	}
   348  	err = lom.PersistMain()
   349  	return
   350  }
   351  
   352  // via backend.PutObj()
   353  func (poi *putOI) putRemote() (ecode int, err error) {
   354  	var (
   355  		lom     = poi.lom
   356  		backend = poi.t.Backend(lom.Bck())
   357  	)
   358  	lmfh, err := cos.NewFileHandle(poi.workFQN)
   359  	if err != nil {
   360  		err = cmn.NewErrFailedTo(poi.t, "open", poi.workFQN, err)
   361  		return
   362  	}
   363  	if poi.owt == cmn.OwtPut && !lom.Bck().IsRemoteAIS() {
   364  		// some/all of those are set by the backend.PutObj()
   365  		lom.ObjAttrs().DelCustomKeys(cmn.SourceObjMD, cmn.CRC32CObjMD, cmn.ETag, cmn.MD5ObjMD, cmn.VersionObjMD)
   366  	}
   367  
   368  	ecode, err = backend.PutObj(lmfh, lom, poi.oreq)
   369  	if err == nil && !lom.Bck().IsRemoteAIS() {
   370  		lom.SetCustomKey(cmn.SourceObjMD, backend.Provider())
   371  	}
   372  	return
   373  }
   374  
   375  // LOM is updated at the end of this call with size and checksum.
   376  // `poi.r` (reader) is also closed upon exit.
   377  func (poi *putOI) write() (buf []byte, slab *memsys.Slab, lmfh *os.File, err error) {
   378  	var (
   379  		written int64
   380  		cksums  = struct {
   381  			store     *cos.CksumHash // store with LOM
   382  			expct     *cos.Cksum     // caller-provided (aka "end-to-end protection")
   383  			compt     *cos.CksumHash // compute to validate `expct` - iff provided
   384  			finalized bool           // to avoid computing the same checksum type twice
   385  		}{}
   386  		ckconf = poi.lom.CksumConf()
   387  	)
   388  	if lmfh, err = poi.lom.CreateFile(poi.workFQN); err != nil {
   389  		return
   390  	}
   391  	if poi.size <= 0 {
   392  		buf, slab = poi.t.gmm.Alloc()
   393  	} else {
   394  		buf, slab = poi.t.gmm.AllocSize(poi.size)
   395  	}
   396  
   397  	switch {
   398  	case ckconf.Type == cos.ChecksumNone:
   399  		poi.lom.SetCksum(cos.NoneCksum)
   400  		// not using `ReadFrom` of the `*os.File` -
   401  		// ultimately, https://github.com/golang/go/blob/master/src/internal/poll/copy_file_range_linux.go#L100
   402  		written, err = cos.CopyBuffer(lmfh, poi.r, buf)
   403  	case !poi.cksumToUse.IsEmpty() && !poi.validateCksum(ckconf):
   404  		// if the corresponding validation is not configured/enabled we just go ahead
   405  		// and use the checksum that has arrived with the object
   406  		poi.lom.SetCksum(poi.cksumToUse)
   407  		// (ditto)
   408  		written, err = cos.CopyBuffer(lmfh, poi.r, buf)
   409  	default:
   410  		writers := make([]io.Writer, 0, 3)
   411  		cksums.store = cos.NewCksumHash(ckconf.Type) // always according to the bucket
   412  		writers = append(writers, cksums.store.H)
   413  		if !poi.skipVC && !poi.cksumToUse.IsEmpty() && poi.validateCksum(ckconf) {
   414  			cksums.expct = poi.cksumToUse
   415  			if poi.cksumToUse.Type() == cksums.store.Type() {
   416  				cksums.compt = cksums.store
   417  			} else {
   418  				// otherwise, compute separately
   419  				cksums.compt = cos.NewCksumHash(poi.cksumToUse.Type())
   420  				writers = append(writers, cksums.compt.H)
   421  			}
   422  		}
   423  		writers = append(writers, lmfh)
   424  		written, err = cos.CopyBuffer(cos.NewWriterMulti(writers...), poi.r, buf) // (ditto)
   425  	}
   426  	if err != nil {
   427  		return
   428  	}
   429  
   430  	// validate
   431  	if cksums.compt != nil {
   432  		cksums.finalized = cksums.compt == cksums.store
   433  		cksums.compt.Finalize()
   434  		if !cksums.compt.Equal(cksums.expct) {
   435  			err = cos.NewErrDataCksum(cksums.expct, &cksums.compt.Cksum, poi.lom.String())
   436  			poi.t.statsT.AddMany(
   437  				cos.NamedVal64{Name: stats.ErrCksumCount, Value: 1},
   438  				cos.NamedVal64{Name: stats.ErrCksumSize, Value: written},
   439  			)
   440  			return
   441  		}
   442  	}
   443  
   444  	// ok
   445  	if poi.lom.IsFeatureSet(feat.FsyncPUT) {
   446  		err = lmfh.Sync() // compare w/ cos.FlushClose
   447  		debug.AssertNoErr(err)
   448  	}
   449  
   450  	cos.Close(lmfh)
   451  	lmfh = nil
   452  
   453  	poi.lom.SetSize(written) // TODO: compare with non-zero lom.SizeBytes() that may have been set via oa.FromHeader()
   454  	if cksums.store != nil {
   455  		if !cksums.finalized {
   456  			cksums.store.Finalize()
   457  		}
   458  		poi.lom.SetCksum(&cksums.store.Cksum)
   459  	}
   460  	return
   461  }
   462  
   463  // post-write close & cleanup
   464  func (poi *putOI) _cleanup(buf []byte, slab *memsys.Slab, lmfh *os.File, err error) {
   465  	if buf != nil {
   466  		slab.Free(buf)
   467  	}
   468  	if err == nil {
   469  		cos.Close(poi.r)
   470  		return // ok
   471  	}
   472  
   473  	// not ok
   474  	poi.r.Close()
   475  	if nerr := lmfh.Close(); nerr != nil {
   476  		nlog.Errorf(fmtNested, poi.t, err, "close", poi.workFQN, nerr)
   477  	}
   478  	if nerr := cos.RemoveFile(poi.workFQN); nerr != nil && !os.IsNotExist(nerr) {
   479  		nlog.Errorf(fmtNested, poi.t, err, "remove", poi.workFQN, nerr)
   480  	}
   481  }
   482  
   483  func (poi *putOI) validateCksum(c *cmn.CksumConf) (v bool) {
   484  	switch poi.owt {
   485  	case cmn.OwtRebalance, cmn.OwtCopy:
   486  		v = c.ValidateObjMove
   487  	case cmn.OwtPut:
   488  		v = true
   489  	case cmn.OwtGetTryLock, cmn.OwtGetLock, cmn.OwtGet:
   490  		v = c.ValidateColdGet
   491  	case cmn.OwtGetPrefetchLock:
   492  	default:
   493  		debug.Assert(false, poi.owt)
   494  	}
   495  	return
   496  }
   497  
   498  //
   499  // GET(object)
   500  //
   501  
   502  func (goi *getOI) getObject() (ecode int, err error) {
   503  	debug.Assert(!goi.unlocked)
   504  	goi.lom.Lock(false)
   505  	ecode, err = goi.get()
   506  	if !goi.unlocked {
   507  		goi.lom.Unlock(false)
   508  	}
   509  	return ecode, err
   510  }
   511  
   512  // is under rlock
   513  func (goi *getOI) get() (ecode int, err error) {
   514  	var (
   515  		cs          fs.CapStatus
   516  		doubleCheck bool
   517  		retried     bool
   518  		cold        bool
   519  	)
   520  do:
   521  	err = goi.lom.Load(true /*cache it*/, true /*locked*/)
   522  	if err != nil {
   523  		cold = cos.IsNotExist(err, 0)
   524  		if !cold {
   525  			return http.StatusInternalServerError, err
   526  		}
   527  		if goi.lom.IsFeatureSet(feat.DisableColdGET) && goi.lom.Bck().IsRemote() {
   528  			return http.StatusNotFound, fmt.Errorf("%w (cold GET disabled)", err)
   529  		}
   530  		cs = fs.Cap()
   531  		if cs.IsOOS() {
   532  			return http.StatusInsufficientStorage, cs.Err()
   533  		}
   534  		if errN := cmn.ValidateObjName(goi.lom.ObjName); errN != nil {
   535  			return 0, errN
   536  		}
   537  	}
   538  
   539  	switch {
   540  	case cold && goi.lom.Bck().IsAIS():
   541  		// ais bucket with no backend - try recover
   542  		goi.lom.Unlock(false)
   543  		doubleCheck, ecode, err = goi.restoreFromAny(false /*skipLomRestore*/)
   544  		if doubleCheck && err != nil {
   545  			lom2 := core.AllocLOM(goi.lom.ObjName)
   546  			er2 := lom2.InitBck(goi.lom.Bucket())
   547  			if er2 == nil {
   548  				er2 = lom2.Load(true /*cache it*/, false /*locked*/)
   549  			}
   550  			if er2 == nil {
   551  				core.FreeLOM(goi.lom)
   552  				goi.lom = lom2
   553  				err = nil
   554  			} else {
   555  				core.FreeLOM(lom2)
   556  			}
   557  		}
   558  		if err != nil {
   559  			goi.unlocked = true
   560  			return ecode, err
   561  		}
   562  		goi.lom.Lock(false)
   563  		if err = goi.lom.Load(true /*cache it*/, true /*locked*/); err != nil {
   564  			return 0, err
   565  		}
   566  		goto fin // ok, done
   567  	case cold:
   568  		// have remote backend - use it
   569  	case goi.latestVer:
   570  		// apc.QparamLatestVer or 'versioning.validate_warm_get'
   571  		res := goi.lom.CheckRemoteMD(true /* rlocked */, false /*synchronize*/, goi.req)
   572  		if res.Err != nil {
   573  			return res.ErrCode, res.Err
   574  		}
   575  		if !res.Eq {
   576  			cold, goi.verchanged = true, true
   577  		}
   578  		// TODO: utilize res.ObjAttrs
   579  	}
   580  
   581  	// validate checksums and recover (a.k.a. self-heal) if corrupted
   582  	if !cold && goi.lom.CksumConf().ValidateWarmGet {
   583  		cold, ecode, err = goi.validateRecover()
   584  		if err != nil {
   585  			if !cold {
   586  				nlog.Errorln(err)
   587  				return ecode, err
   588  			}
   589  			nlog.Errorf("%v - proceeding to cold-GET from %s", err, goi.lom.Bck())
   590  		}
   591  	}
   592  
   593  	// cold-GET: upgrade rlock => wlock, call t.Backend.GetObjReader
   594  	if cold {
   595  		var (
   596  			res    core.GetReaderResult
   597  			ckconf = goi.lom.CksumConf()
   598  			loaded bool
   599  		)
   600  		if cs.IsNil() {
   601  			cs = fs.Cap()
   602  		}
   603  		if cs.IsOOS() {
   604  			return http.StatusInsufficientStorage, cs.Err()
   605  		}
   606  		goi.lom.SetAtimeUnix(goi.atime)
   607  
   608  		if loaded, err = goi._coldLock(); err != nil {
   609  			return 0, err
   610  		}
   611  		if loaded {
   612  			goto fin
   613  		}
   614  
   615  		// zero-out prev. version custom metadata, if any
   616  		goi.lom.SetCustomMD(nil)
   617  
   618  		// get remote reader (compare w/ t.GetCold)
   619  		res = goi.t.Backend(goi.lom.Bck()).GetObjReader(goi.ctx, goi.lom, 0, 0)
   620  		if res.Err != nil {
   621  			goi.lom.Unlock(true)
   622  			goi.unlocked = true
   623  			if !cos.IsNotExist(res.Err, res.ErrCode) {
   624  				nlog.Infoln(ftcg+"(read)", goi.lom.Cname(), res.Err, res.ErrCode)
   625  			}
   626  			return res.ErrCode, res.Err
   627  		}
   628  		goi.cold = true
   629  
   630  		// 3 alternative ways to perform cold GET
   631  		if goi.dpq.arch.path == "" && goi.dpq.arch.regx == "" &&
   632  			(ckconf.Type == cos.ChecksumNone || (!ckconf.ValidateColdGet && !ckconf.EnableReadRange)) {
   633  			if goi.ranges.Range == "" && goi.lom.IsFeatureSet(feat.StreamingColdGET) {
   634  				err = goi.coldStream(&res)
   635  			} else {
   636  				err = goi.coldReopen(&res)
   637  			}
   638  			goi.unlocked = true // always
   639  			return 0, err
   640  		}
   641  		// otherwise, regular path
   642  		ecode, err = goi._coldPut(&res)
   643  		if err != nil {
   644  			goi.unlocked = true
   645  			return ecode, err
   646  		}
   647  		// with remaining stats via goi.stats()
   648  		goi.t.statsT.AddMany(
   649  			cos.NamedVal64{Name: stats.GetColdCount, Value: 1},
   650  			cos.NamedVal64{Name: stats.GetColdSize, Value: res.Size},
   651  			cos.NamedVal64{Name: stats.GetColdRwLatency, Value: mono.SinceNano(goi.ltime)},
   652  		)
   653  	}
   654  
   655  	// read locally and stream back
   656  fin:
   657  	ecode, err = goi.txfini()
   658  	if err == nil {
   659  		debug.Assert(ecode == 0, ecode)
   660  		return 0, nil
   661  	}
   662  	goi.lom.Uncache()
   663  	if goi.retry {
   664  		goi.retry = false
   665  		if !retried {
   666  			nlog.Warningf("GET %s: retrying...", goi.lom)
   667  			retried = true // only once
   668  			goto do
   669  		}
   670  		nlog.Warningf("GET %s: failed retrying %v(%d)", goi.lom, err, ecode)
   671  	}
   672  	return ecode, err
   673  }
   674  
   675  // upgrade rlock => wlock
   676  // done early to prevent multiple cold-readers duplicating network/disk operation and overwriting each other
   677  func (goi *getOI) _coldLock() (loaded bool, err error) {
   678  	var (
   679  		t, lom = goi.t, goi.lom
   680  		now    int64
   681  	)
   682  outer:
   683  	for lom.UpgradeLock() {
   684  		if erl := lom.Load(true /*cache it*/, true /*locked*/); erl == nil {
   685  			// nothing to do
   686  			// (lock was upgraded by another goroutine that had also performed PUT on our behalf)
   687  			return true, nil
   688  		}
   689  		switch {
   690  		case now == 0:
   691  			now = mono.NanoTime()
   692  			fallthrough
   693  		case mono.Since(now) < max(cmn.Rom.CplaneOperation(), 2*time.Second):
   694  			nlog.Errorln(t.String()+": failed to load", lom.String(), err, "- retrying...")
   695  		default:
   696  			err = cmn.NewErrBusy("object", lom.Cname())
   697  			break outer
   698  		}
   699  	}
   700  	return
   701  }
   702  
   703  func (goi *getOI) _coldPut(res *core.GetReaderResult) (int, error) {
   704  	var (
   705  		t, lom = goi.t, goi.lom
   706  		poi    = allocPOI()
   707  	)
   708  	{
   709  		poi.t = t
   710  		poi.lom = lom
   711  		poi.config = cmn.GCO.Get()
   712  		poi.r = res.R
   713  		poi.size = res.Size
   714  		poi.workFQN = fs.CSM.Gen(lom, fs.WorkfileType, fs.WorkfileColdget)
   715  		poi.atime = goi.atime
   716  		poi.owt = cmn.OwtGet
   717  		poi.cksumToUse = res.ExpCksum // expected checksum (to validate if the bucket's `validate_cold_get == true`)
   718  		poi.coldGET = true
   719  	}
   720  	code, err := poi.putObject()
   721  	freePOI(poi)
   722  
   723  	if err != nil {
   724  		lom.Unlock(true)
   725  		nlog.Infoln(ftcg+"(put)", lom.Cname(), err)
   726  		return code, err
   727  	}
   728  
   729  	// load, downgrade lock, inc stats
   730  	if err = lom.Load(true /*cache it*/, true /*locked*/); err != nil {
   731  		lom.Unlock(true)
   732  		err = fmt.Errorf("unexpected failure to load %s: %w", lom, err) // (unlikely)
   733  		nlog.Errorln(err)
   734  		return http.StatusInternalServerError, err
   735  	}
   736  
   737  	lom.DowngradeLock()
   738  	return 0, nil
   739  }
   740  
   741  // - validate checksums
   742  // - if corrupted and IsAIS, try to recover from redundant replicas or EC slices
   743  // - otherwise, rely on the remote backend for recovery (tradeoff; TODO: make it configurable)
   744  func (goi *getOI) validateRecover() (coldGet bool, code int, err error) {
   745  	var (
   746  		lom     = goi.lom
   747  		retried bool
   748  	)
   749  validate:
   750  	err = lom.ValidateMetaChecksum()
   751  	if err == nil {
   752  		err = lom.ValidateContentChecksum()
   753  	}
   754  	if err == nil {
   755  		return
   756  	}
   757  	code = http.StatusInternalServerError
   758  	if _, ok := err.(*cos.ErrBadCksum); !ok {
   759  		return
   760  	}
   761  	if !lom.Bck().IsAIS() {
   762  		coldGet = true
   763  		return
   764  	}
   765  
   766  	nlog.Warningln(err)
   767  	redundant := lom.HasCopies() || lom.ECEnabled()
   768  	//
   769  	// return err if there's no redundancy OR already recovered once (and failed)
   770  	//
   771  	if retried || !redundant {
   772  		//
   773  		// TODO: mark `deleted` and postpone actual deletion
   774  		//
   775  		if erl := lom.Remove(true /*force through rlock*/); erl != nil {
   776  			nlog.Warningf("%s: failed to remove corrupted %s, err: %v", goi.t, lom, erl)
   777  		}
   778  		return
   779  	}
   780  	//
   781  	// try to recover from BAD CHECKSUM
   782  	//
   783  	cos.RemoveFile(lom.FQN) // TODO: ditto
   784  
   785  	if lom.HasCopies() {
   786  		retried = true
   787  		goi.lom.Unlock(false)
   788  		// lookup and restore the object from local replicas
   789  		restored := lom.RestoreToLocation()
   790  		goi.lom.Lock(false)
   791  		if restored {
   792  			nlog.Warningf("%s: recovered corrupted %s from local replica", goi.t, lom)
   793  			code = 0
   794  			goto validate
   795  		}
   796  	}
   797  	if lom.ECEnabled() {
   798  		retried = true
   799  		goi.lom.Unlock(false)
   800  		cos.RemoveFile(lom.FQN)
   801  		_, code, err = goi.restoreFromAny(true /*skipLomRestore*/)
   802  		goi.lom.Lock(false)
   803  		if err == nil {
   804  			nlog.Warningf("%s: recovered corrupted %s from EC slices", goi.t, lom)
   805  			code = 0
   806  			goto validate
   807  		}
   808  	}
   809  
   810  	// TODO: ditto
   811  	if erl := lom.Remove(true /*force through rlock*/); erl != nil {
   812  		nlog.Warningf("%s: failed to remove corrupted %s, err: %v", goi.t, lom, erl)
   813  	}
   814  	return
   815  }
   816  
   817  // attempt to restore an object from any/all of the below:
   818  // 1) local copies (other FSes on this target)
   819  // 2) other targets (when resilvering or rebalancing is running (aka GFN))
   820  // 3) other targets if the bucket erasure coded
   821  // 4) Cloud
   822  func (goi *getOI) restoreFromAny(skipLomRestore bool) (doubleCheck bool, ecode int, err error) {
   823  	var (
   824  		tsi  *meta.Snode
   825  		smap = goi.t.owner.smap.get()
   826  	)
   827  	// NOTE: including targets 'in maintenance mode'
   828  	tsi, err = smap.HrwHash2Tall(goi.lom.Digest())
   829  	if err != nil {
   830  		return
   831  	}
   832  	if !skipLomRestore {
   833  		// when resilvering:
   834  		// (whether or not resilvering is active depends on the context: mountpath events vs GET)
   835  		var (
   836  			resMarked = xreg.GetResilverMarked()
   837  			running   = resMarked.Xact != nil
   838  			gfnActive = goi.t.res.IsActive(3 /*interval-of-inactivity multiplier*/)
   839  		)
   840  		if resMarked.Interrupted || running || gfnActive {
   841  			if goi.lom.RestoreToLocation() { // from copies
   842  				nlog.Infof("%s restored to location", goi.lom)
   843  				return
   844  			}
   845  			doubleCheck = running
   846  		}
   847  	}
   848  
   849  	// when rebalancing: cluster-wide lookup (aka "get from neighbor" or GFN)
   850  	var (
   851  		gfnNode   *meta.Snode
   852  		marked    = xreg.GetRebMarked()
   853  		running   = marked.Xact != nil
   854  		gfnActive = reb.IsGFN() // GFN(global rebalance)
   855  		ecEnabled = goi.lom.ECEnabled()
   856  		// TODO: when not enough EC targets to restore a sliced object,
   857  		// we might still be able to restore from the object's full replica
   858  		enoughECRestoreTargets = goi.lom.Bprops().EC.RequiredRestoreTargets() <= smap.CountActiveTs()
   859  	)
   860  	if running {
   861  		doubleCheck = true
   862  	}
   863  	if running && tsi.ID() != goi.t.SID() {
   864  		if goi.t.headt2t(goi.lom, tsi, smap) {
   865  			gfnNode = tsi
   866  			goto gfn
   867  		}
   868  	}
   869  	if running || !enoughECRestoreTargets ||
   870  		((marked.Interrupted || marked.Restarted || gfnActive) && !ecEnabled) {
   871  		gfnNode = goi.t.headObjBcast(goi.lom, smap)
   872  	}
   873  gfn:
   874  	if gfnNode != nil {
   875  		if goi.getFromNeighbor(goi.lom, gfnNode) {
   876  			return
   877  		}
   878  	}
   879  
   880  	// restore from existing EC slices, if possible
   881  	ecErr := ec.ECM.RestoreObject(goi.lom)
   882  	if ecErr == nil {
   883  		ecErr = goi.lom.Load(true /*cache it*/, false /*locked*/) // TODO: optimize locking
   884  		debug.AssertNoErr(ecErr)
   885  		if ecErr == nil {
   886  			nlog.Infoln(goi.t.String(), "EC-recovered", goi.lom.String())
   887  			return
   888  		}
   889  		err = cmn.NewErrFailedTo(goi.t, "load EC-recovered", goi.lom.Cname(), ecErr)
   890  	} else if ecErr != ec.ErrorECDisabled {
   891  		err = cmn.NewErrFailedTo(goi.t, "EC-recover", goi.lom.Cname(), ecErr)
   892  		if cmn.IsErrCapExceeded(ecErr) {
   893  			ecode = http.StatusInsufficientStorage
   894  		}
   895  		return
   896  	}
   897  
   898  	if err != nil {
   899  		err = cmn.NewErrFailedTo(goi.t, "goi-restore-any", goi.lom.Cname(), err)
   900  	} else {
   901  		err = cos.NewErrNotFound(goi.t, goi.lom.Cname())
   902  	}
   903  	ecode = http.StatusNotFound
   904  	return
   905  }
   906  
   907  func (goi *getOI) getFromNeighbor(lom *core.LOM, tsi *meta.Snode) bool {
   908  	query := lom.Bck().NewQuery()
   909  	query.Set(apc.QparamIsGFNRequest, "true")
   910  	reqArgs := cmn.AllocHra()
   911  	{
   912  		reqArgs.Method = http.MethodGet
   913  		reqArgs.Base = tsi.URL(cmn.NetIntraData)
   914  		reqArgs.Header = http.Header{
   915  			apc.HdrCallerID:   []string{goi.t.SID()},
   916  			apc.HdrCallerName: []string{goi.t.callerName()},
   917  		}
   918  		reqArgs.Path = apc.URLPathObjects.Join(lom.Bck().Name, lom.ObjName)
   919  		reqArgs.Query = query
   920  	}
   921  	config := cmn.GCO.Get()
   922  	req, _, cancel, err := reqArgs.ReqWithTimeout(config.Timeout.SendFile.D())
   923  	if err != nil {
   924  		debug.AssertNoErr(err)
   925  		return false
   926  	}
   927  	defer cancel()
   928  
   929  	resp, err := g.client.data.Do(req) //nolint:bodyclose // closed by `poi.putObject`
   930  	cmn.FreeHra(reqArgs)
   931  	if err != nil {
   932  		nlog.Errorf("%s: gfn failure, %s %q, err: %v", goi.t, tsi, lom, err)
   933  		return false
   934  	}
   935  
   936  	cksumToUse := lom.ObjAttrs().FromHeader(resp.Header)
   937  	workFQN := fs.CSM.Gen(lom, fs.WorkfileType, fs.WorkfileRemote)
   938  	poi := allocPOI()
   939  	{
   940  		poi.t = goi.t
   941  		poi.lom = lom
   942  		poi.config = config
   943  		poi.r = resp.Body
   944  		poi.owt = cmn.OwtRebalance
   945  		poi.workFQN = workFQN
   946  		poi.atime = lom.ObjAttrs().Atime
   947  		poi.cksumToUse = cksumToUse
   948  	}
   949  	ecode, erp := poi.putObject()
   950  	freePOI(poi)
   951  	if erp == nil {
   952  		if cmn.Rom.FastV(5, cos.SmoduleAIS) {
   953  			nlog.Infof("%s: gfn %s <= %s", goi.t, goi.lom, tsi)
   954  		}
   955  		return true
   956  	}
   957  	nlog.Errorf("%s: gfn-GET failed to PUT locally: %v(%d)", goi.t, erp, ecode)
   958  	return false
   959  }
   960  
   961  func (goi *getOI) txfini() (ecode int, err error) {
   962  	var (
   963  		lmfh *os.File
   964  		hrng *htrange
   965  		fqn  = goi.lom.FQN
   966  		dpq  = goi.dpq
   967  	)
   968  	if !goi.cold && !dpq.isGFN {
   969  		fqn = goi.lom.LBGet() // best-effort GET load balancing (see also mirror.findLeastUtilized())
   970  	}
   971  	// open
   972  	lmfh, err = os.Open(fqn)
   973  	if err != nil {
   974  		if os.IsNotExist(err) {
   975  			ecode = http.StatusNotFound
   976  			goi.retry = true // (!lom.IsAIS() || lom.ECEnabled() || GFN...)
   977  		} else {
   978  			goi.t.fsErr(err, fqn)
   979  			ecode = http.StatusInternalServerError
   980  			err = cmn.NewErrFailedTo(goi.t, "goi-finalize", goi.lom.Cname(), err, ecode)
   981  		}
   982  		return ecode, err
   983  	}
   984  
   985  	whdr := goi.w.Header()
   986  
   987  	// transmit (range, arch, regular)
   988  	switch {
   989  	case goi.ranges.Range != "":
   990  		debug.Assert(!dpq.isArch())
   991  		rsize := goi.lom.SizeBytes()
   992  		if goi.ranges.Size > 0 {
   993  			rsize = goi.ranges.Size
   994  		}
   995  		if hrng, ecode, err = goi.rngToHeader(whdr, rsize); err != nil {
   996  			break
   997  		}
   998  		err = goi._txrng(fqn, lmfh, whdr, hrng)
   999  	case dpq.isArch():
  1000  		err = goi._txarch(fqn, lmfh, whdr)
  1001  	default:
  1002  		err = goi._txreg(fqn, lmfh, whdr)
  1003  	}
  1004  
  1005  	cos.Close(lmfh)
  1006  	return ecode, err
  1007  }
  1008  
  1009  func (goi *getOI) _txrng(fqn string, lmfh *os.File, whdr http.Header, hrng *htrange) (err error) {
  1010  	var (
  1011  		r     io.Reader
  1012  		lom   = goi.lom
  1013  		sgl   *memsys.SGL
  1014  		cksum = lom.Checksum()
  1015  		size  int64
  1016  	)
  1017  	ckconf := lom.CksumConf()
  1018  	cksumRange := ckconf.Type != cos.ChecksumNone && ckconf.EnableReadRange
  1019  	size = hrng.Length
  1020  	r = io.NewSectionReader(lmfh, hrng.Start, hrng.Length)
  1021  	if cksumRange {
  1022  		sgl = goi.t.gmm.NewSGL(size)
  1023  		_, cksumH, err := cos.CopyAndChecksum(sgl /*as ReaderFrom*/, r, nil, ckconf.Type)
  1024  		if err != nil {
  1025  			sgl.Free()
  1026  			return err
  1027  		}
  1028  		r = sgl
  1029  		if cksumH != nil {
  1030  			cksum = &cksumH.Cksum
  1031  		}
  1032  	}
  1033  
  1034  	// set response header
  1035  	whdr.Set(cos.HdrContentType, cos.ContentBinary)
  1036  	cmn.ToHeader(lom.ObjAttrs(), whdr, size, cksum)
  1037  
  1038  	buf, slab := goi.t.gmm.AllocSize(min(size, memsys.DefaultBuf2Size))
  1039  	err = goi.transmit(r, buf, fqn)
  1040  	slab.Free(buf)
  1041  	if sgl != nil {
  1042  		sgl.Free()
  1043  	}
  1044  	return err
  1045  }
  1046  
  1047  // in particular, setup reader and writer and set headers
  1048  func (goi *getOI) _txreg(fqn string, lmfh *os.File, whdr http.Header) (err error) {
  1049  	var (
  1050  		dpq   = goi.dpq
  1051  		lom   = goi.lom
  1052  		cksum = lom.Checksum()
  1053  		size  = lom.SizeBytes()
  1054  	)
  1055  	// set response header
  1056  	whdr.Set(cos.HdrContentType, cos.ContentBinary)
  1057  	cmn.ToHeader(lom.ObjAttrs(), whdr, size, cksum)
  1058  	if dpq.isS3 {
  1059  		// (expecting user to set bucket checksum = md5)
  1060  		s3.SetEtag(whdr, lom)
  1061  	}
  1062  
  1063  	buf, slab := goi.t.gmm.AllocSize(min(size, memsys.DefaultBuf2Size))
  1064  	err = goi.transmit(lmfh, buf, fqn)
  1065  	slab.Free(buf)
  1066  	return err
  1067  }
  1068  
  1069  // TODO: checksum
  1070  func (goi *getOI) _txarch(fqn string, lmfh *os.File, whdr http.Header) error {
  1071  	var (
  1072  		ar  archive.Reader
  1073  		dpq = goi.dpq
  1074  		lom = goi.lom
  1075  	)
  1076  	mime, err := archive.MimeFile(lmfh, goi.t.smm, dpq.arch.mime, lom.ObjName)
  1077  	if err != nil {
  1078  		return err
  1079  	}
  1080  	ar, err = archive.NewReader(mime, lmfh, lom.SizeBytes())
  1081  	if err != nil {
  1082  		return fmt.Errorf("failed to open %s: %w", lom.Cname(), err)
  1083  	}
  1084  
  1085  	// single
  1086  	if dpq.arch.path != "" {
  1087  		debug.Assert(dpq.arch.mmode == "", dpq.arch.mmode)
  1088  		var csl cos.ReadCloseSizer
  1089  		csl, err = ar.ReadOne(dpq.arch.path)
  1090  		if err != nil {
  1091  			return cmn.NewErrFailedTo(goi.t, "extract "+dpq._archstr()+" from", lom.Cname(), err)
  1092  		}
  1093  		if csl == nil {
  1094  			return cos.NewErrNotFound(goi.t, dpq._archstr()+" in "+lom.Cname())
  1095  		}
  1096  		// found
  1097  		whdr.Set(cos.HdrContentType, cos.ContentBinary)
  1098  		buf, slab := goi.t.gmm.AllocSize(min(csl.Size(), memsys.DefaultBuf2Size))
  1099  		err = goi.transmit(csl, buf, fqn)
  1100  		slab.Free(buf)
  1101  		csl.Close()
  1102  		return err
  1103  	}
  1104  
  1105  	// multi match; writing & streaming tar =>(directly)=> response writer
  1106  	debug.Assert(dpq.arch.mmode != "")
  1107  	rcb := _newRcb(goi.w)
  1108  	whdr.Set(cos.HdrContentType, cos.ContentTar)
  1109  	err = ar.ReadUntil(rcb, dpq.arch.regx, dpq.arch.mmode)
  1110  	if err != nil {
  1111  		err = cmn.NewErrFailedTo(goi.t, "extract files that match "+dpq._archstr()+" from", lom.Cname(), err)
  1112  	}
  1113  	if err == nil && rcb.num == 0 {
  1114  		// none found
  1115  		return cos.NewErrNotFound(goi.t, dpq._archstr()+" in "+lom.Cname())
  1116  	}
  1117  	rcb.fini()
  1118  	return err
  1119  }
  1120  
  1121  func (goi *getOI) transmit(r io.Reader, buf []byte, fqn string) error {
  1122  	written, err := cos.CopyBuffer(goi.w, r, buf)
  1123  	if err != nil {
  1124  		if !cos.IsRetriableConnErr(err) {
  1125  			goi.t.fsErr(err, fqn)
  1126  		}
  1127  		nlog.Errorln(cmn.NewErrFailedTo(goi.t, "GET", fqn, err))
  1128  		// at this point, error is already written into the response -
  1129  		// return special code to indicate just that
  1130  		return errSendingResp
  1131  	}
  1132  	// Update objects sent during GFN. Thanks to this we will not
  1133  	// have to resend them in rebalance. In case of a race between rebalance
  1134  	// and GFN the former wins, resulting in duplicated transmission.
  1135  	if goi.dpq.isGFN {
  1136  		goi.t.reb.FilterAdd(cos.UnsafeB(goi.lom.Uname()))
  1137  	} else if !goi.cold { // GFN & cold-GET: must be already loaded w/ atime set
  1138  		if err := goi.lom.Load(false /*cache it*/, true /*locked*/); err != nil {
  1139  			nlog.Errorf("%s: GET post-transmission failure: %v", goi.t, err)
  1140  			return errSendingResp
  1141  		}
  1142  		goi.lom.SetAtimeUnix(goi.atime)
  1143  		goi.lom.Recache()
  1144  	}
  1145  	//
  1146  	// stats
  1147  	//
  1148  	goi.stats(written)
  1149  	return nil
  1150  }
  1151  
  1152  func (goi *getOI) stats(written int64) {
  1153  	goi.t.statsT.AddMany(
  1154  		cos.NamedVal64{Name: stats.GetCount, Value: 1},
  1155  		cos.NamedVal64{Name: stats.GetSize, Value: written},
  1156  		cos.NamedVal64{Name: stats.GetThroughput, Value: written},                // vis-à-vis user (as written m.b. range)
  1157  		cos.NamedVal64{Name: stats.GetLatency, Value: mono.SinceNano(goi.ltime)}, // see also: stats.GetColdRwLatency
  1158  	)
  1159  	if goi.verchanged {
  1160  		goi.t.statsT.AddMany(
  1161  			cos.NamedVal64{Name: stats.VerChangeCount, Value: 1},
  1162  			cos.NamedVal64{Name: stats.VerChangeSize, Value: goi.lom.SizeBytes()},
  1163  		)
  1164  	}
  1165  }
  1166  
  1167  // - parse and validate user specified read range (goi.ranges)
  1168  // - set response header accordingly
  1169  func (goi *getOI) rngToHeader(resphdr http.Header, size int64) (hrng *htrange, ecode int, err error) {
  1170  	var ranges []htrange
  1171  	ranges, err = parseMultiRange(goi.ranges.Range, size)
  1172  	if err != nil {
  1173  		if cmn.IsErrRangeNotSatisfiable(err) {
  1174  			// https://datatracker.ietf.org/doc/html/rfc7233#section-4.2
  1175  			resphdr.Set(cos.HdrContentRange, fmt.Sprintf("%s*/%d", cos.HdrContentRangeValPrefix, size))
  1176  		}
  1177  		ecode = http.StatusRequestedRangeNotSatisfiable
  1178  		return
  1179  	}
  1180  	if len(ranges) == 0 {
  1181  		return
  1182  	}
  1183  	if len(ranges) > 1 {
  1184  		err = cmn.NewErrUnsupp("multi-range read", goi.lom.Cname())
  1185  		ecode = http.StatusRequestedRangeNotSatisfiable
  1186  		return
  1187  	}
  1188  	if goi.dpq.arch.path != "" {
  1189  		err = cmn.NewErrUnsupp("range-read archived file", goi.dpq.arch.path)
  1190  		ecode = http.StatusRequestedRangeNotSatisfiable
  1191  		return
  1192  	}
  1193  
  1194  	// set response header
  1195  	hrng = &ranges[0]
  1196  	resphdr.Set(cos.HdrAcceptRanges, "bytes")
  1197  	resphdr.Set(cos.HdrContentRange, hrng.contentRange(size))
  1198  	return
  1199  }
  1200  
  1201  //
  1202  // APPEND a file or multiple files:
  1203  // - as a new object, if doesn't exist
  1204  // - to an existing object, if exists
  1205  //
  1206  
  1207  func (a *apndOI) do(r *http.Request) (packedHdl string, ecode int, err error) {
  1208  	var (
  1209  		cksumValue    = r.Header.Get(apc.HdrObjCksumVal)
  1210  		cksumType     = r.Header.Get(apc.HdrObjCksumType)
  1211  		contentLength = r.Header.Get(cos.HdrContentLength)
  1212  	)
  1213  	if contentLength != "" {
  1214  		if size, ers := strconv.ParseInt(contentLength, 10, 64); ers == nil {
  1215  			a.size = size
  1216  		}
  1217  	}
  1218  	if cksumValue != "" {
  1219  		a.cksum = cos.NewCksum(cksumType, cksumValue)
  1220  	}
  1221  
  1222  	switch a.op {
  1223  	case apc.AppendOp:
  1224  		buf, slab := a.t.gmm.Alloc()
  1225  		packedHdl, ecode, err = a.apnd(buf)
  1226  		slab.Free(buf)
  1227  	case apc.FlushOp:
  1228  		ecode, err = a.flush()
  1229  	default:
  1230  		err = fmt.Errorf("invalid operation %q (expecting either %q or %q) - check %q query",
  1231  			a.op, apc.AppendOp, apc.FlushOp, apc.QparamAppendType)
  1232  	}
  1233  
  1234  	return packedHdl, ecode, err
  1235  }
  1236  
  1237  func (a *apndOI) apnd(buf []byte) (packedHdl string, ecode int, err error) {
  1238  	var (
  1239  		fh      *os.File
  1240  		workFQN = a.hdl.workFQN
  1241  	)
  1242  	if workFQN == "" {
  1243  		workFQN = fs.CSM.Gen(a.lom, fs.WorkfileType, fs.WorkfileAppend)
  1244  		a.lom.Lock(false)
  1245  		if a.lom.Load(false /*cache it*/, false /*locked*/) == nil {
  1246  			_, a.hdl.partialCksum, err = cos.CopyFile(a.lom.FQN, workFQN, buf, a.lom.CksumType())
  1247  			a.lom.Unlock(false)
  1248  			if err != nil {
  1249  				ecode = http.StatusInternalServerError
  1250  				return
  1251  			}
  1252  			fh, err = os.OpenFile(workFQN, os.O_APPEND|os.O_WRONLY, cos.PermRWR)
  1253  		} else {
  1254  			a.lom.Unlock(false)
  1255  			a.hdl.partialCksum = cos.NewCksumHash(a.lom.CksumType())
  1256  			fh, err = a.lom.CreateFile(workFQN)
  1257  		}
  1258  	} else {
  1259  		fh, err = os.OpenFile(workFQN, os.O_APPEND|os.O_WRONLY, cos.PermRWR)
  1260  		debug.Assert(a.hdl.partialCksum != nil)
  1261  	}
  1262  	if err != nil { // failed to open or create
  1263  		ecode = http.StatusInternalServerError
  1264  		return
  1265  	}
  1266  
  1267  	w := cos.NewWriterMulti(fh, a.hdl.partialCksum.H)
  1268  	_, err = cos.CopyBuffer(w, a.r, buf)
  1269  	cos.Close(fh)
  1270  	if err != nil {
  1271  		ecode = http.StatusInternalServerError
  1272  		return
  1273  	}
  1274  
  1275  	packedHdl = a.pack(workFQN)
  1276  
  1277  	// stats (TODO: add `stats.FlushCount` for symmetry)
  1278  	lat := time.Now().UnixNano() - a.started
  1279  	a.t.statsT.AddMany(
  1280  		cos.NamedVal64{Name: stats.AppendCount, Value: 1},
  1281  		cos.NamedVal64{Name: stats.AppendLatency, Value: lat},
  1282  	)
  1283  	if cmn.Rom.FastV(4, cos.SmoduleAIS) {
  1284  		nlog.Infof("APPEND %s: %s", a.lom, lat)
  1285  	}
  1286  	return
  1287  }
  1288  
  1289  func (a *apndOI) flush() (int, error) {
  1290  	if a.hdl.workFQN == "" {
  1291  		return 0, fmt.Errorf("failed to finalize append-file operation: empty source in the %+v handle", a.hdl)
  1292  	}
  1293  
  1294  	// finalize checksum
  1295  	debug.Assert(a.hdl.partialCksum != nil)
  1296  	a.hdl.partialCksum.Finalize()
  1297  	partialCksum := a.hdl.partialCksum.Clone()
  1298  	if !a.cksum.IsEmpty() && !partialCksum.Equal(a.cksum) {
  1299  		return http.StatusInternalServerError, cos.NewErrDataCksum(partialCksum, a.cksum)
  1300  	}
  1301  
  1302  	params := core.PromoteParams{
  1303  		Bck:    a.lom.Bck(),
  1304  		Cksum:  partialCksum,
  1305  		Config: a.config,
  1306  		PromoteArgs: apc.PromoteArgs{
  1307  			SrcFQN:       a.hdl.workFQN,
  1308  			ObjName:      a.lom.ObjName,
  1309  			OverwriteDst: true,
  1310  			DeleteSrc:    true, // NOTE: always overwrite and remove
  1311  		},
  1312  	}
  1313  	return a.t.Promote(&params)
  1314  }
  1315  
  1316  func (a *apndOI) parse(packedHdl string) error {
  1317  	if packedHdl == "" {
  1318  		return nil
  1319  	}
  1320  	items, err := preParse(packedHdl)
  1321  	if err != nil {
  1322  		return err
  1323  	}
  1324  	a.hdl.partialCksum = cos.NewCksumHash(items[2])
  1325  	buf, err := base64.StdEncoding.DecodeString(items[3])
  1326  	if err != nil {
  1327  		return err
  1328  	}
  1329  	if err := a.hdl.partialCksum.H.(encoding.BinaryUnmarshaler).UnmarshalBinary(buf); err != nil {
  1330  		return err
  1331  	}
  1332  
  1333  	a.hdl.nodeID = items[0]
  1334  	a.hdl.workFQN = items[1]
  1335  	return nil
  1336  }
  1337  
  1338  func (a *apndOI) pack(workFQN string) string {
  1339  	buf, err := a.hdl.partialCksum.H.(encoding.BinaryMarshaler).MarshalBinary()
  1340  	debug.AssertNoErr(err)
  1341  	cksumTy := a.hdl.partialCksum.Type()
  1342  	cksumBinary := base64.StdEncoding.EncodeToString(buf)
  1343  	return a.t.SID() + appendHandleSepa + workFQN + appendHandleSepa + cksumTy + appendHandleSepa + cksumBinary
  1344  }
  1345  
  1346  //
  1347  // COPY (object | reader)
  1348  //
  1349  
  1350  // main method
  1351  func (coi *copyOI) do(t *target, dm *bundle.DataMover, lom *core.LOM) (size int64, err error) {
  1352  	if coi.DryRun {
  1353  		return coi._dryRun(lom, coi.ObjnameTo)
  1354  	}
  1355  
  1356  	// DP == nil: use default (no-op transform) if source bucket is remote
  1357  	if coi.DP == nil && lom.Bck().IsRemote() {
  1358  		coi.DP = &core.LDP{}
  1359  	}
  1360  
  1361  	// 1: dst location
  1362  	smap := t.owner.smap.Get()
  1363  	tsi, errN := smap.HrwName2T(coi.BckTo.MakeUname(coi.ObjnameTo))
  1364  	if errN != nil {
  1365  		return 0, errN
  1366  	}
  1367  	if tsi.ID() != t.SID() {
  1368  		return coi.send(t, dm, lom, coi.ObjnameTo, tsi)
  1369  	}
  1370  
  1371  	// dst is this target
  1372  	// 2, 3: with transformation and without
  1373  	dst := core.AllocLOM(coi.ObjnameTo)
  1374  	if err := dst.InitBck(coi.BckTo.Bucket()); err != nil {
  1375  		core.FreeLOM(dst)
  1376  		return 0, err
  1377  	}
  1378  	if coi.DP != nil {
  1379  		var ecode int
  1380  		size, ecode, err = coi._reader(t, dm, lom, dst)
  1381  		debug.Assert(ecode != http.StatusNotFound || cos.IsNotExist(err, 0), err, ecode)
  1382  	} else {
  1383  		size, err = coi._regular(t, lom, dst)
  1384  	}
  1385  	core.FreeLOM(dst)
  1386  
  1387  	return size, err
  1388  }
  1389  
  1390  func (coi *copyOI) _dryRun(lom *core.LOM, objnameTo string) (size int64, err error) {
  1391  	if coi.DP == nil {
  1392  		if lom.Uname() != coi.BckTo.MakeUname(objnameTo) {
  1393  			size = lom.SizeBytes()
  1394  		}
  1395  		return size, nil
  1396  	}
  1397  
  1398  	// discard the reader and be done
  1399  	var reader io.ReadCloser
  1400  	if reader, _, err = coi.DP.Reader(lom, false, false); err != nil {
  1401  		return 0, err
  1402  	}
  1403  	size, err = io.Copy(io.Discard, reader)
  1404  	reader.Close()
  1405  	return size, err
  1406  }
  1407  
  1408  // PUT DP(lom) => dst
  1409  // The DP reader is responsible for any read-locking of the source lom.
  1410  //
  1411  // NOTE: no assumpions are being made on whether the source lom is present in cluster.
  1412  // (can be a "pure" metadata of a (non-existing) Cloud object; accordingly, DP's reader must
  1413  // be able to hande cold get, warm get, etc.)
  1414  //
  1415  // If destination bucket is remote:
  1416  // - create a local replica of the object on one of the targets, and
  1417  // - PUT to the relevant backend
  1418  // An option for _not_ storing the object _in_ the cluster would be a _feature_ that can be
  1419  // further debated.
  1420  func (coi *copyOI) _reader(t *target, dm *bundle.DataMover, lom, dst *core.LOM) (size int64, _ int, _ error) {
  1421  	reader, oah, errN := coi.DP.Reader(lom, coi.LatestVer, coi.Sync)
  1422  	if errN != nil {
  1423  		return 0, 0, errN
  1424  	}
  1425  	if lom.Bck().Equal(coi.BckTo, true, true) {
  1426  		dst.SetVersion(oah.Version())
  1427  	}
  1428  
  1429  	poi := allocPOI()
  1430  	{
  1431  		poi.t = t
  1432  		poi.lom = dst
  1433  		poi.config = coi.Config
  1434  		poi.r = reader
  1435  		poi.owt = coi.OWT
  1436  		poi.xctn = coi.Xact // on behalf of
  1437  		poi.workFQN = fs.CSM.Gen(dst, fs.WorkfileType, "copy-dp")
  1438  		poi.atime = oah.AtimeUnix()
  1439  		poi.cksumToUse = oah.Checksum()
  1440  	}
  1441  	if dm != nil {
  1442  		poi.owt = dm.OWT() // (compare with _send)
  1443  	}
  1444  	ecode, err := poi.putObject()
  1445  	freePOI(poi)
  1446  	if err == nil {
  1447  		// xaction stats: inc locally processed (and see data mover for in and out objs)
  1448  		size = oah.SizeBytes()
  1449  	}
  1450  	return size, ecode, err
  1451  }
  1452  
  1453  func (coi *copyOI) _regular(t *target, lom, dst *core.LOM) (size int64, _ error) {
  1454  	if lom.FQN == dst.FQN { // resilvering with a single mountpath?
  1455  		return
  1456  	}
  1457  	lcopy := lom.Uname() == dst.Uname() // n-way copy
  1458  	lom.Lock(lcopy)
  1459  	defer lom.Unlock(lcopy)
  1460  
  1461  	if err := lom.Load(false /*cache it*/, true /*locked*/); err != nil {
  1462  		if !cos.IsNotExist(err, 0) {
  1463  			err = cmn.NewErrFailedTo(t, "coi-load", lom.Cname(), err)
  1464  		}
  1465  		return 0, err
  1466  	}
  1467  
  1468  	// w-lock the destination unless already locked (above)
  1469  	if !lcopy {
  1470  		dst.Lock(true)
  1471  		defer dst.Unlock(true)
  1472  		if err := dst.Load(false /*cache it*/, true /*locked*/); err == nil {
  1473  			if lom.EqCksum(dst.Checksum()) {
  1474  				return 0, nil
  1475  			}
  1476  		} else if cmn.IsErrBucketNought(err) {
  1477  			return 0, err
  1478  		}
  1479  	}
  1480  	dst2, err := lom.Copy2FQN(dst.FQN, coi.Buf)
  1481  	if err == nil {
  1482  		size = lom.SizeBytes()
  1483  		if coi.Finalize {
  1484  			t.putMirror(dst2)
  1485  		}
  1486  	}
  1487  	if dst2 != nil {
  1488  		core.FreeLOM(dst2)
  1489  	}
  1490  	return size, err
  1491  }
  1492  
  1493  // send object => designated target
  1494  // * source is a LOM or a reader (that may be reading from remote)
  1495  // * one of the two equivalent transmission mechanisms: PUT or transport Send
  1496  func (coi *copyOI) send(t *target, dm *bundle.DataMover, lom *core.LOM, objNameTo string, tsi *meta.Snode) (size int64, err error) {
  1497  	debug.Assert(coi.OWT > 0)
  1498  	sargs := allocSnda()
  1499  	{
  1500  		sargs.objNameTo = objNameTo
  1501  		sargs.tsi = tsi
  1502  		sargs.dm = dm
  1503  		sargs.owt = coi.OWT
  1504  	}
  1505  	if dm != nil {
  1506  		sargs.owt = dm.OWT() // takes precedence
  1507  	}
  1508  	size, err = coi._send(t, lom, sargs)
  1509  	freeSnda(sargs)
  1510  	return
  1511  }
  1512  
  1513  func (coi *copyOI) _send(t *target, lom *core.LOM, sargs *sendArgs) (size int64, _ error) {
  1514  	debug.Assert(!coi.DryRun)
  1515  	if sargs.dm != nil {
  1516  		// clone the `lom` to use it in the async operation (free it via `_sendObjDM` callback)
  1517  		lom = lom.CloneMD(lom.FQN)
  1518  	}
  1519  
  1520  	switch {
  1521  	case coi.OWT == cmn.OwtPromote:
  1522  		// 1. promote
  1523  		debug.Assert(coi.DP == nil)
  1524  		debug.Assert(sargs.owt == cmn.OwtPromote)
  1525  
  1526  		fh, err := cos.NewFileHandle(lom.FQN)
  1527  		if err != nil {
  1528  			if os.IsNotExist(err) {
  1529  				return 0, nil
  1530  			}
  1531  			return 0, cmn.NewErrFailedTo(t, "open", lom.Cname(), err)
  1532  		}
  1533  		fi, err := fh.Stat()
  1534  		if err != nil {
  1535  			fh.Close()
  1536  			return 0, cmn.NewErrFailedTo(t, "fstat", lom.Cname(), err)
  1537  		}
  1538  		size = fi.Size()
  1539  		sargs.reader, sargs.objAttrs = fh, lom
  1540  	case coi.DP == nil:
  1541  		// 2. migrate/replicate lom
  1542  
  1543  		lom.Lock(false)
  1544  		if err := lom.Load(false /*cache it*/, true /*locked*/); err != nil {
  1545  			lom.Unlock(false)
  1546  			return 0, nil
  1547  		}
  1548  		reader, err := lom.NewDeferROC()
  1549  		if err != nil {
  1550  			return 0, err
  1551  		}
  1552  		size = lom.SizeBytes()
  1553  		sargs.reader, sargs.objAttrs = reader, lom
  1554  	default:
  1555  		// 3. DP transform (possibly, no-op)
  1556  		// If the object is not present call t.Backend.GetObjReader
  1557  		reader, oah, err := coi.DP.Reader(lom, coi.LatestVer, coi.Sync)
  1558  		if err != nil {
  1559  			return
  1560  		}
  1561  		// returns cos.ContentLengthUnknown (-1) if post-transform size is unknown
  1562  		size = oah.SizeBytes()
  1563  		sargs.reader, sargs.objAttrs = reader, oah
  1564  	}
  1565  
  1566  	// do
  1567  	var err error
  1568  	sargs.bckTo = coi.BckTo
  1569  	if sargs.dm != nil {
  1570  		err = coi._dm(lom /*for attrs*/, sargs)
  1571  	} else {
  1572  		err = coi.put(t, sargs)
  1573  	}
  1574  	return size, err
  1575  }
  1576  
  1577  // use data mover to transmit objects to other targets
  1578  // (compare with coi.put())
  1579  func (coi *copyOI) _dm(lom *core.LOM, sargs *sendArgs) error {
  1580  	debug.Assert(sargs.dm.OWT() == sargs.owt)
  1581  	debug.Assert(sargs.dm.GetXact() == coi.Xact || sargs.dm.GetXact().ID() == coi.Xact.ID())
  1582  	o := transport.AllocSend()
  1583  	hdr, oa := &o.Hdr, sargs.objAttrs
  1584  	{
  1585  		hdr.Bck.Copy(sargs.bckTo.Bucket())
  1586  		hdr.ObjName = sargs.objNameTo
  1587  		hdr.ObjAttrs.CopyFrom(oa, false /*skip cksum*/)
  1588  	}
  1589  	o.Callback = func(_ *transport.ObjHdr, _ io.ReadCloser, _ any, _ error) {
  1590  		core.FreeLOM(lom)
  1591  	}
  1592  	return sargs.dm.Send(o, sargs.reader, sargs.tsi)
  1593  }
  1594  
  1595  // PUT(lom) => destination target (compare with coi.dm())
  1596  // always closes params.Reader, either explicitly or via Do()
  1597  func (coi *copyOI) put(t *target, sargs *sendArgs) error {
  1598  	var (
  1599  		hdr   = make(http.Header, 8)
  1600  		query = sargs.bckTo.NewQuery()
  1601  	)
  1602  	cmn.ToHeader(sargs.objAttrs, hdr, sargs.objAttrs.SizeBytes(true))
  1603  	hdr.Set(apc.HdrT2TPutterID, t.SID())
  1604  	query.Set(apc.QparamOWT, sargs.owt.ToS())
  1605  	if coi.Xact != nil {
  1606  		query.Set(apc.QparamUUID, coi.Xact.ID())
  1607  	}
  1608  	reqArgs := cmn.HreqArgs{
  1609  		Method: http.MethodPut,
  1610  		Base:   sargs.tsi.URL(cmn.NetIntraData),
  1611  		Path:   apc.URLPathObjects.Join(sargs.bckTo.Name, sargs.objNameTo),
  1612  		Query:  query,
  1613  		Header: hdr,
  1614  		BodyR:  sargs.reader,
  1615  	}
  1616  	req, _, cancel, err := reqArgs.ReqWithTimeout(coi.Config.Timeout.SendFile.D())
  1617  	if err != nil {
  1618  		cos.Close(sargs.reader)
  1619  		return fmt.Errorf("unexpected failure to create request, err: %w", err)
  1620  	}
  1621  	defer cancel()
  1622  	resp, err := g.client.data.Do(req)
  1623  	if err != nil {
  1624  		return cmn.NewErrFailedTo(t, "coi.put "+sargs.bckTo.Name+"/"+sargs.objNameTo, sargs.tsi, err)
  1625  	}
  1626  	cos.DrainReader(resp.Body)
  1627  	resp.Body.Close()
  1628  	return nil
  1629  }
  1630  
  1631  func (coi *copyOI) stats(size int64, err error) {
  1632  	if err == nil && coi.Xact != nil {
  1633  		coi.Xact.ObjsAdd(1, size)
  1634  	}
  1635  }
  1636  
  1637  //
  1638  // PUT a new shard _or_ APPEND to an existing one (w/ read/write/list via cmn/archive)
  1639  //
  1640  
  1641  func (a *putA2I) do() (int, error) {
  1642  	if a.filename == "" {
  1643  		return 0, errors.New("archive path is not defined")
  1644  	}
  1645  	// standard library does not support appending to tgz, zip, and such;
  1646  	// for TAR there is an optimizing workaround not requiring a full copy
  1647  	if a.mime == archive.ExtTar && !a.put {
  1648  		var (
  1649  			err       error
  1650  			fh        *os.File
  1651  			size      int64
  1652  			tarFormat tar.Format
  1653  			workFQN   = fs.CSM.Gen(a.lom, fs.WorkfileType, fs.WorkfileAppendToArch)
  1654  		)
  1655  		if err = os.Rename(a.lom.FQN, workFQN); err != nil {
  1656  			return http.StatusInternalServerError, err
  1657  		}
  1658  		fh, tarFormat, err = archive.OpenTarSeekEnd(a.lom.Cname(), workFQN)
  1659  		if err != nil {
  1660  			if errV := a.lom.RenameFrom(workFQN); errV != nil {
  1661  				return http.StatusInternalServerError, errV
  1662  			}
  1663  			if err == archive.ErrTarIsEmpty {
  1664  				a.put = true
  1665  				goto cpap
  1666  			}
  1667  			return http.StatusInternalServerError, err
  1668  		}
  1669  		// do - fast
  1670  		if size, err = a.fast(fh, tarFormat); err == nil {
  1671  			// NOTE: checksum traded off
  1672  			if err = a.finalize(size, cos.NoneCksum, workFQN); err == nil {
  1673  				return http.StatusInternalServerError, nil // ok
  1674  			}
  1675  		}
  1676  		if errV := a.lom.RenameFrom(workFQN); errV != nil {
  1677  			nlog.Errorf(fmtNested, a.t, err, "append and rename back", workFQN, errV)
  1678  		}
  1679  		return http.StatusInternalServerError, err
  1680  	}
  1681  
  1682  cpap: // copy + append
  1683  	var (
  1684  		err       error
  1685  		lmfh, wfh *os.File
  1686  		workFQN   string
  1687  		cksum     cos.CksumHashSize
  1688  		aw        archive.Writer
  1689  	)
  1690  	workFQN = fs.CSM.Gen(a.lom, fs.WorkfileType, fs.WorkfileAppendToArch)
  1691  	wfh, err = os.OpenFile(workFQN, os.O_CREATE|os.O_WRONLY, cos.PermRWR)
  1692  	if err != nil {
  1693  		return http.StatusInternalServerError, err
  1694  	}
  1695  	// currently, arch writers only use size and time but it may change
  1696  	oah := cos.SimpleOAH{Size: a.size, Atime: a.started}
  1697  	if a.put {
  1698  		// when append becomes PUT (TODO: checksum type)
  1699  		cksum.Init(cos.ChecksumXXHash)
  1700  		aw = archive.NewWriter(a.mime, wfh, &cksum, nil /*opts*/)
  1701  		err = aw.Write(a.filename, oah, a.r)
  1702  		aw.Fini()
  1703  	} else {
  1704  		// copy + append
  1705  		lmfh, err = a.lom.OpenFile()
  1706  		if err != nil {
  1707  			cos.Close(wfh)
  1708  			return http.StatusNotFound, err
  1709  		}
  1710  		cksum.Init(a.lom.CksumType())
  1711  		aw = archive.NewWriter(a.mime, wfh, &cksum, nil)
  1712  		err = aw.Copy(lmfh, a.lom.SizeBytes())
  1713  		if err == nil {
  1714  			err = aw.Write(a.filename, oah, a.r)
  1715  		}
  1716  		aw.Fini() // in that order
  1717  		cos.Close(lmfh)
  1718  	}
  1719  
  1720  	// finalize
  1721  	cos.Close(wfh)
  1722  	if err == nil {
  1723  		cksum.Finalize()
  1724  		err = a.finalize(cksum.Size, cksum.Clone(), workFQN)
  1725  	} else {
  1726  		cos.RemoveFile(workFQN)
  1727  	}
  1728  	return a.reterr(err)
  1729  }
  1730  
  1731  // TAR only - fast & direct
  1732  func (a *putA2I) fast(rwfh *os.File, tarFormat tar.Format) (size int64, err error) {
  1733  	var (
  1734  		buf, slab = a.t.gmm.AllocSize(a.size)
  1735  		tw        = tar.NewWriter(rwfh)
  1736  		hdr       = tar.Header{
  1737  			Typeflag: tar.TypeReg,
  1738  			Name:     a.filename,
  1739  			Size:     a.size,
  1740  			ModTime:  time.Unix(0, a.started),
  1741  			Mode:     int64(cos.PermRWRR),
  1742  			Format:   tarFormat,
  1743  		}
  1744  	)
  1745  	tw.WriteHeader(&hdr)
  1746  	_, err = io.CopyBuffer(tw, a.r, buf) // append
  1747  	cos.Close(tw)
  1748  	if err == nil {
  1749  		size, err = rwfh.Seek(0, io.SeekCurrent)
  1750  	}
  1751  	slab.Free(buf)
  1752  	cos.Close(rwfh)
  1753  	return
  1754  }
  1755  
  1756  func (*putA2I) reterr(err error) (int, error) {
  1757  	ecode := http.StatusInternalServerError
  1758  	if cmn.IsErrCapExceeded(err) {
  1759  		ecode = http.StatusInsufficientStorage
  1760  	}
  1761  	return ecode, err
  1762  }
  1763  
  1764  func (a *putA2I) finalize(size int64, cksum *cos.Cksum, fqn string) error {
  1765  	debug.Func(func() {
  1766  		finfo, err := os.Stat(fqn)
  1767  		debug.AssertNoErr(err)
  1768  		debug.Assertf(finfo.Size() == size, "%d != %d", finfo.Size(), size)
  1769  	})
  1770  	// done
  1771  	if err := a.lom.RenameFrom(fqn); err != nil {
  1772  		return err
  1773  	}
  1774  	a.lom.SetSize(size)
  1775  	a.lom.SetCksum(cksum)
  1776  	a.lom.SetAtimeUnix(a.started)
  1777  	if err := a.lom.Persist(); err != nil {
  1778  		return err
  1779  	}
  1780  	if a.lom.ECEnabled() {
  1781  		if err := ec.ECM.EncodeObject(a.lom, nil); err != nil && err != ec.ErrorECDisabled {
  1782  			return err
  1783  		}
  1784  	}
  1785  	a.t.putMirror(a.lom)
  1786  	return nil
  1787  }
  1788  
  1789  //
  1790  // put mirorr (main)
  1791  //
  1792  
  1793  func (t *target) putMirror(lom *core.LOM) {
  1794  	mconfig := lom.MirrorConf()
  1795  	if !mconfig.Enabled {
  1796  		return
  1797  	}
  1798  	if mpathCnt := fs.NumAvail(); mpathCnt < int(mconfig.Copies) {
  1799  		t.statsT.IncErr(stats.ErrPutMirrorCount)
  1800  		nanotim := mono.NanoTime()
  1801  		if nanotim&0x7 == 7 {
  1802  			if mpathCnt == 0 {
  1803  				nlog.Errorf("%s: %v", t, cmn.ErrNoMountpaths)
  1804  			} else {
  1805  				nlog.Errorf(fmtErrInsuffMpaths2, t, mpathCnt, lom, mconfig.Copies)
  1806  			}
  1807  		}
  1808  		return
  1809  	}
  1810  	rns := xreg.RenewPutMirror(lom)
  1811  	if rns.Err != nil {
  1812  		nlog.Errorf("%s: %s %v", t, lom, rns.Err)
  1813  		debug.AssertNoErr(rns.Err)
  1814  		return
  1815  	}
  1816  	xctn := rns.Entry.Get()
  1817  	xputlrep := xctn.(*mirror.XactPut)
  1818  	xputlrep.Repl(lom)
  1819  }
  1820  
  1821  // TODO:
  1822  // - CopyBuffer
  1823  // - currently, only tar - add message pack (what else?)
  1824  // - Call(..., *tar.Header) to avoid typecast
  1825  
  1826  type rcbCtx struct {
  1827  	w   io.Writer
  1828  	tw  *tar.Writer
  1829  	num int
  1830  }
  1831  
  1832  var _ archive.ArchRCB = (*rcbCtx)(nil)
  1833  
  1834  func _newRcb(w io.Writer) (c *rcbCtx) {
  1835  	c = &rcbCtx{w: w}
  1836  	return c
  1837  }
  1838  
  1839  func (c *rcbCtx) Call(_ string, reader cos.ReadCloseSizer, hdr any) (_ bool /*stop*/, err error) {
  1840  	if c.tw == nil {
  1841  		debug.Assert(c.num == 0)
  1842  		c.tw = tar.NewWriter(c.w)
  1843  	}
  1844  	c.num++
  1845  	tarHdr, ok := hdr.(*tar.Header)
  1846  	debug.Assert(ok)
  1847  	if err = c.tw.WriteHeader(tarHdr); err == nil {
  1848  		_, err = io.Copy(c.tw, reader)
  1849  	}
  1850  	return false, err
  1851  }
  1852  
  1853  func (c *rcbCtx) fini() {
  1854  	if c.tw != nil {
  1855  		debug.Assert(c.num > 0)
  1856  		c.tw.Close()
  1857  	}
  1858  }
  1859  
  1860  //
  1861  // mem pools
  1862  //
  1863  
  1864  var (
  1865  	goiPool, poiPool, sndPool sync.Pool
  1866  
  1867  	goi0 getOI
  1868  	poi0 putOI
  1869  	snd0 sendArgs
  1870  )
  1871  
  1872  func allocGOI() (a *getOI) {
  1873  	if v := goiPool.Get(); v != nil {
  1874  		a = v.(*getOI)
  1875  		return
  1876  	}
  1877  	return &getOI{}
  1878  }
  1879  
  1880  func freeGOI(a *getOI) {
  1881  	*a = goi0
  1882  	goiPool.Put(a)
  1883  }
  1884  
  1885  func allocPOI() (a *putOI) {
  1886  	if v := poiPool.Get(); v != nil {
  1887  		a = v.(*putOI)
  1888  		return
  1889  	}
  1890  	return &putOI{}
  1891  }
  1892  
  1893  func freePOI(a *putOI) {
  1894  	*a = poi0
  1895  	poiPool.Put(a)
  1896  }
  1897  
  1898  func allocSnda() (a *sendArgs) {
  1899  	if v := sndPool.Get(); v != nil {
  1900  		a = v.(*sendArgs)
  1901  		return
  1902  	}
  1903  	return &sendArgs{}
  1904  }
  1905  
  1906  func freeSnda(a *sendArgs) {
  1907  	*a = snd0
  1908  	sndPool.Put(a)
  1909  }