github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/handler.go (about)

     1  // Package dsort provides distributed massively parallel resharding for very large datasets.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package dsort
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"net/http"
    11  	"net/url"
    12  	"regexp"
    13  	"strconv"
    14  
    15  	"github.com/NVIDIA/aistore/api/apc"
    16  	"github.com/NVIDIA/aistore/cmn"
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  	"github.com/NVIDIA/aistore/cmn/debug"
    19  	"github.com/NVIDIA/aistore/cmn/nlog"
    20  	"github.com/NVIDIA/aistore/core"
    21  	"github.com/NVIDIA/aistore/core/meta"
    22  	"github.com/NVIDIA/aistore/ext/dsort/shard"
    23  	"github.com/NVIDIA/aistore/fs"
    24  	"github.com/NVIDIA/aistore/stats"
    25  	"github.com/NVIDIA/aistore/sys"
    26  	"github.com/NVIDIA/aistore/xact/xreg"
    27  	jsoniter "github.com/json-iterator/go"
    28  	"github.com/tinylib/msgp/msgp"
    29  )
    30  
    31  type response struct {
    32  	si         *meta.Snode
    33  	res        []byte
    34  	err        error
    35  	statusCode int
    36  }
    37  
    38  //////////////////
    39  ///// PROXY //////
    40  //////////////////
    41  
    42  var psi core.Node
    43  
    44  // POST /v1/sort
    45  func PstartHandler(w http.ResponseWriter, r *http.Request, parsc *ParsedReq) {
    46  	var (
    47  		err  error
    48  		pars = parsc.pars
    49  	)
    50  	pars.TargetOrderSalt = []byte(cos.FormatNowStamp())
    51  
    52  	// TODO: handle case when bucket was removed during dsort job - this should
    53  	// stop whole operation. Maybe some listeners as we have on smap change?
    54  	// This would also be helpful for Downloader (in the middle of downloading
    55  	// large file the bucket can be easily deleted).
    56  
    57  	pars.DsorterType, err = dsorterType(pars)
    58  	if err != nil {
    59  		cmn.WriteErr(w, r, err)
    60  		return
    61  	}
    62  
    63  	b, err := js.Marshal(pars)
    64  	if err != nil {
    65  		s := fmt.Sprintf("unable to marshal RequestSpec: %+v, err: %v", pars, err)
    66  		cmn.WriteErrMsg(w, r, s, http.StatusInternalServerError)
    67  		return
    68  	}
    69  
    70  	var (
    71  		managerUUID = PrefixJobID + cos.GenUUID() // compare w/ p.httpdlpost
    72  		smap        = psi.Sowner().Get()
    73  	)
    74  
    75  	// Starting dsort has two phases:
    76  	// 1. Initialization, ensures that all targets successfully initialized all
    77  	//    structures and are ready to receive requests: start, metrics, abort
    78  	// 2. Start, where we request targets to start the dsort.
    79  	//
    80  	// This prevents bugs where one targets would just start dsort (other did
    81  	// not have yet initialized) and starts to communicate with other targets
    82  	// but because they are not ready with their initialization will not recognize
    83  	// given dsort job. Also bug where we could send abort (which triggers cleanup)
    84  	// to not yet initialized target.
    85  
    86  	// phase 1
    87  	if cmn.Rom.FastV(4, cos.SmoduleDsort) {
    88  		nlog.Infof("[dsort] %s broadcasting init request to all targets", managerUUID)
    89  	}
    90  	path := apc.URLPathdSortInit.Join(managerUUID)
    91  	responses := bcast(http.MethodPost, path, nil, b, smap)
    92  	if err := _handleResp(w, r, smap, managerUUID, responses); err != nil {
    93  		return
    94  	}
    95  
    96  	// phase 2
    97  	if cmn.Rom.FastV(4, cos.SmoduleDsort) {
    98  		nlog.Infof("[dsort] %s broadcasting start request to all targets", managerUUID)
    99  	}
   100  	path = apc.URLPathdSortStart.Join(managerUUID)
   101  	responses = bcast(http.MethodPost, path, nil, nil, smap)
   102  	if err := _handleResp(w, r, smap, managerUUID, responses); err != nil {
   103  		return
   104  	}
   105  
   106  	w.Write([]byte(managerUUID))
   107  }
   108  
   109  func _handleResp(w http.ResponseWriter, r *http.Request, smap *meta.Smap, managerUUID string, responses []response) error {
   110  	for _, resp := range responses {
   111  		if resp.err == nil {
   112  			continue
   113  		}
   114  		// cleanup
   115  		path := apc.URLPathdSortAbort.Join(managerUUID)
   116  		_ = bcast(http.MethodDelete, path, nil, nil, smap)
   117  
   118  		msg := fmt.Sprintf("failed to start [dsort] %s: %v(%d)", managerUUID, resp.err, resp.statusCode)
   119  		cmn.WriteErrMsg(w, r, msg, http.StatusInternalServerError)
   120  		return resp.err
   121  	}
   122  	return nil
   123  }
   124  
   125  // GET /v1/sort
   126  func PgetHandler(w http.ResponseWriter, r *http.Request) {
   127  	if !checkHTTPMethod(w, r, http.MethodGet) {
   128  		return
   129  	}
   130  	query := r.URL.Query()
   131  	managerUUID := query.Get(apc.QparamUUID)
   132  	if managerUUID == "" {
   133  		plistHandler(w, r, query)
   134  		return
   135  	}
   136  
   137  	pmetricsHandler(w, r, query)
   138  }
   139  
   140  // GET /v1/sort?regex=...
   141  func plistHandler(w http.ResponseWriter, r *http.Request, query url.Values) {
   142  	var (
   143  		path     = apc.URLPathdSortList.S
   144  		regexStr = query.Get(apc.QparamRegex)
   145  	)
   146  	if regexStr != "" {
   147  		if _, err := regexp.CompilePOSIX(regexStr); err != nil {
   148  			cmn.WriteErr(w, r, err)
   149  			return
   150  		}
   151  	}
   152  	responses := bcast(http.MethodGet, path, query, nil, psi.Sowner().Get())
   153  
   154  	resultList := make([]*JobInfo, 0, 4)
   155  	for _, r := range responses {
   156  		if r.err != nil {
   157  			nlog.Errorln(r.err)
   158  			continue
   159  		}
   160  
   161  		var targetMetrics []*JobInfo
   162  		err := jsoniter.Unmarshal(r.res, &targetMetrics)
   163  		debug.AssertNoErr(err)
   164  
   165  		for _, job := range targetMetrics {
   166  			var found bool
   167  			for _, oldMetric := range resultList {
   168  				if oldMetric.ID == job.ID {
   169  					oldMetric.Aggregate(job)
   170  					found = true
   171  					break
   172  				}
   173  			}
   174  			if !found {
   175  				resultList = append(resultList, job)
   176  			}
   177  		}
   178  	}
   179  
   180  	w.Write(cos.MustMarshal(resultList))
   181  }
   182  
   183  // GET /v1/sort?id=...
   184  func pmetricsHandler(w http.ResponseWriter, r *http.Request, query url.Values) {
   185  	var (
   186  		smap        = psi.Sowner().Get()
   187  		all         = make(map[string]*JobInfo, smap.CountActiveTs())
   188  		managerUUID = query.Get(apc.QparamUUID)
   189  		path        = apc.URLPathdSortMetrics.Join(managerUUID)
   190  		responses   = bcast(http.MethodGet, path, nil, nil, smap)
   191  		notFound    int
   192  	)
   193  	for _, resp := range responses {
   194  		if resp.statusCode == http.StatusNotFound {
   195  			// Probably new target which does not know anything about this dsort op.
   196  			notFound++
   197  			continue
   198  		}
   199  		if resp.err != nil {
   200  			cmn.WriteErr(w, r, resp.err, resp.statusCode)
   201  			return
   202  		}
   203  		j := &JobInfo{}
   204  		if err := js.Unmarshal(resp.res, j); err != nil {
   205  			cmn.WriteErr(w, r, err, http.StatusInternalServerError)
   206  			return
   207  		}
   208  		all[resp.si.ID()] = j
   209  	}
   210  
   211  	if notFound == len(responses) && notFound > 0 {
   212  		msg := fmt.Sprintf("%s: [dsort] %s does not exist", core.T, managerUUID)
   213  		cmn.WriteErrMsg(w, r, msg, http.StatusNotFound)
   214  		return
   215  	}
   216  	w.Write(cos.MustMarshal(all))
   217  }
   218  
   219  // DELETE /v1/sort/abort
   220  func PabortHandler(w http.ResponseWriter, r *http.Request) {
   221  	if !checkHTTPMethod(w, r, http.MethodDelete) {
   222  		return
   223  	}
   224  	_, err := parseURL(w, r, 0, apc.URLPathdSortAbort.L)
   225  	if err != nil {
   226  		return
   227  	}
   228  
   229  	var (
   230  		query       = r.URL.Query()
   231  		managerUUID = query.Get(apc.QparamUUID)
   232  		path        = apc.URLPathdSortAbort.Join(managerUUID)
   233  		responses   = bcast(http.MethodDelete, path, nil, nil, psi.Sowner().Get())
   234  	)
   235  	allNotFound := true
   236  	for _, resp := range responses {
   237  		if resp.statusCode == http.StatusNotFound {
   238  			continue
   239  		}
   240  		allNotFound = false
   241  
   242  		if resp.err != nil {
   243  			cmn.WriteErr(w, r, resp.err, resp.statusCode)
   244  			return
   245  		}
   246  	}
   247  	if allNotFound {
   248  		err := cos.NewErrNotFound(core.T, "dsort job "+managerUUID)
   249  		cmn.WriteErr(w, r, err, http.StatusNotFound)
   250  		return
   251  	}
   252  }
   253  
   254  // DELETE /v1/sort
   255  func PremoveHandler(w http.ResponseWriter, r *http.Request) {
   256  	if !checkHTTPMethod(w, r, http.MethodDelete) {
   257  		return
   258  	}
   259  	_, err := parseURL(w, r, 0, apc.URLPathdSort.L)
   260  	if err != nil {
   261  		return
   262  	}
   263  
   264  	var (
   265  		smap        = psi.Sowner().Get()
   266  		query       = r.URL.Query()
   267  		managerUUID = query.Get(apc.QparamUUID)
   268  		path        = apc.URLPathdSortMetrics.Join(managerUUID)
   269  		responses   = bcast(http.MethodGet, path, nil, nil, smap)
   270  	)
   271  
   272  	// First, broadcast to see if process is cleaned up first
   273  	seenOne := false
   274  	for _, resp := range responses {
   275  		if resp.statusCode == http.StatusNotFound {
   276  			// Probably new target which does not know anything about this dsort op.
   277  			continue
   278  		}
   279  		if resp.err != nil {
   280  			cmn.WriteErr(w, r, resp.err, resp.statusCode)
   281  			return
   282  		}
   283  		metrics := &Metrics{}
   284  		if err := js.Unmarshal(resp.res, &metrics); err != nil {
   285  			cmn.WriteErr(w, r, err, http.StatusInternalServerError)
   286  			return
   287  		}
   288  		if !metrics.Archived.Load() {
   289  			cmn.WriteErrMsg(w, r, fmt.Sprintf("%s process %s still in progress and cannot be removed",
   290  				apc.ActDsort, managerUUID))
   291  			return
   292  		}
   293  		seenOne = true
   294  	}
   295  	if !seenOne {
   296  		s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID)
   297  		cmn.WriteErrMsg(w, r, s, http.StatusNotFound)
   298  		return
   299  	}
   300  
   301  	// Next, broadcast the remove once we've checked that all targets have run cleanup
   302  	path = apc.URLPathdSortRemove.Join(managerUUID)
   303  	responses = bcast(http.MethodDelete, path, nil, nil, smap)
   304  	var failed []string //nolint:prealloc // will remain not allocated when no errors
   305  	for _, r := range responses {
   306  		if r.statusCode == http.StatusOK {
   307  			continue
   308  		}
   309  		failed = append(failed, fmt.Sprintf("%v: (%v) %v", r.si.ID(), r.statusCode, string(r.res)))
   310  	}
   311  	if len(failed) != 0 {
   312  		err := fmt.Errorf("got errors while broadcasting remove: %v", failed)
   313  		cmn.WriteErr(w, r, err)
   314  	}
   315  }
   316  
   317  // Determine dsorter type. We need to make this decision based on (e.g.) size targets' memory.
   318  func dsorterType(pars *parsedReqSpec) (string, error) {
   319  	if pars.DsorterType != "" {
   320  		return pars.DsorterType, nil // in case the dsorter type is already set, we need to respect it
   321  	}
   322  
   323  	// Get memory stats from targets
   324  	var (
   325  		totalAvailMemory  uint64
   326  		err               error
   327  		path              = apc.URLPathDae.S
   328  		moreThanThreshold = true
   329  	)
   330  
   331  	dsorterMemThreshold, err := cos.ParseSize(pars.DsorterMemThreshold, cos.UnitsIEC)
   332  	debug.AssertNoErr(err)
   333  
   334  	query := make(url.Values)
   335  	query.Add(apc.QparamWhat, apc.WhatNodeStatsAndStatus)
   336  	responses := bcast(http.MethodGet, path, query, nil, psi.Sowner().Get())
   337  	for _, response := range responses {
   338  		if response.err != nil {
   339  			return "", response.err
   340  		}
   341  
   342  		daemonStatus := stats.NodeStatus{}
   343  		if err := jsoniter.Unmarshal(response.res, &daemonStatus); err != nil {
   344  			return "", err
   345  		}
   346  
   347  		memStat := sys.MemStat{Total: daemonStatus.MemCPUInfo.MemAvail + daemonStatus.MemCPUInfo.MemUsed}
   348  		dsortAvailMemory := calcMaxMemoryUsage(pars.MaxMemUsage, &memStat)
   349  		totalAvailMemory += dsortAvailMemory
   350  		moreThanThreshold = moreThanThreshold && dsortAvailMemory > uint64(dsorterMemThreshold)
   351  	}
   352  
   353  	// TODO: currently, we have import cycle: dsort -> api -> dsort. Need to
   354  	// think of a way to get the total size of bucket without copy-and-paste
   355  	// the API code.
   356  	//
   357  	// baseParams := &api.BaseParams{
   358  	// 	Client: http.DefaultClient,
   359  	// 	URL:    g.smap.Get().Primary.URL(cmn.NetIntraControl),
   360  	// }
   361  	// msg := &apc.LsoMsg{Props: "size,status"}
   362  	// objList, err := api.ListObjects(baseParams, pars.Bucket, msg, 0)
   363  	// if err != nil {
   364  	// 	return "", err
   365  	// }
   366  	//
   367  	// totalBucketSize := uint64(0)
   368  	// for _, obj := range objList.Entries {
   369  	// 	if obj.IsStatusOK() {
   370  	// 		totalBucketSize += uint64(obj.Size)
   371  	// 	}
   372  	// }
   373  	//
   374  	// if totalBucketSize < totalAvailMemory {
   375  	// 	// "general type" is capable of extracting whole dataset into memory
   376  	// 	// In this case the creation phase is super fast.
   377  	// 	return GeneralType, nil
   378  	// }
   379  
   380  	if moreThanThreshold {
   381  		// If there is enough memory to use "memory type", we should do that.
   382  		// It behaves better for cases when we have a lot of memory available.
   383  		return MemType, nil
   384  	}
   385  
   386  	// For all other cases we should use "general type", as we don't know
   387  	// exactly what to expect, so we should prepare for the worst.
   388  	return GeneralType, nil
   389  }
   390  
   391  ///////////////////
   392  ///// TARGET //////
   393  ///////////////////
   394  
   395  // [METHOD] /v1/sort
   396  func TargetHandler(w http.ResponseWriter, r *http.Request) {
   397  	apiItems, err := parseURL(w, r, 1, apc.URLPathdSort.L)
   398  	if err != nil {
   399  		return
   400  	}
   401  
   402  	switch apiItems[0] {
   403  	case apc.Init:
   404  		tinitHandler(w, r)
   405  	case apc.Start:
   406  		tstartHandler(w, r)
   407  	case apc.Records:
   408  		Managers.recordsHandler(w, r)
   409  	case apc.Shards:
   410  		Managers.shardsHandler(w, r)
   411  	case apc.Abort:
   412  		tabortHandler(w, r)
   413  	case apc.Remove:
   414  		tremoveHandler(w, r)
   415  	case apc.UList:
   416  		tlistHandler(w, r)
   417  	case apc.Metrics:
   418  		tmetricsHandler(w, r)
   419  	case apc.FinishedAck:
   420  		tfiniHandler(w, r)
   421  	default:
   422  		cmn.WriteErrMsg(w, r, "invalid path")
   423  	}
   424  }
   425  
   426  // /v1/sort/init.
   427  // receive parsedReqSpec and initialize dsort manager
   428  func tinitHandler(w http.ResponseWriter, r *http.Request) {
   429  	if !checkHTTPMethod(w, r, http.MethodPost) {
   430  		return
   431  	}
   432  	// disallow to run when above high wm (let alone OOS)
   433  	cs := fs.Cap()
   434  	if errCap := cs.Err(); errCap != nil {
   435  		cmn.WriteErr(w, r, errCap, http.StatusInsufficientStorage)
   436  		return
   437  	}
   438  
   439  	apiItems, errV := parseURL(w, r, 1, apc.URLPathdSortInit.L)
   440  	if errV != nil {
   441  		return
   442  	}
   443  	var (
   444  		pars   *parsedReqSpec
   445  		b, err = io.ReadAll(r.Body)
   446  	)
   447  	if err != nil {
   448  		cmn.WriteErr(w, r, fmt.Errorf("[dsort]: failed to receive request: %w", err))
   449  		return
   450  	}
   451  	if err = js.Unmarshal(b, &pars); err != nil {
   452  		err := fmt.Errorf(cmn.FmtErrUnmarshal, apc.ActDsort, "parsedReqSpec", cos.BHead(b), err)
   453  		cmn.WriteErr(w, r, err)
   454  		return
   455  	}
   456  
   457  	managerUUID := apiItems[0]
   458  	m, err := Managers.Add(managerUUID) // NOTE: returns manager locked iff err == nil
   459  	if err != nil {
   460  		cmn.WriteErr(w, r, err)
   461  		return
   462  	}
   463  	if err = m.init(pars); err != nil {
   464  		cmn.WriteErr(w, r, err)
   465  	} else {
   466  		// setup xaction
   467  		debug.Assert(!pars.OutputBck.IsEmpty())
   468  		custom := &xreg.DsortArgs{BckFrom: meta.CloneBck(&pars.InputBck), BckTo: meta.CloneBck(&pars.OutputBck)}
   469  		rns := xreg.RenewDsort(managerUUID, custom)
   470  		debug.AssertNoErr(rns.Err)
   471  		xctn := rns.Entry.Get()
   472  		debug.Assert(xctn.ID() == managerUUID, xctn.ID()+" vs "+managerUUID)
   473  
   474  		m.xctn = xctn.(*xaction)
   475  	}
   476  	m.unlock()
   477  }
   478  
   479  // /v1/sort/start.
   480  // There are three major phases to this function:
   481  //  1. extractLocalShards
   482  //  2. participateInRecordDistribution
   483  //  3. distributeShardRecords
   484  func tstartHandler(w http.ResponseWriter, r *http.Request) {
   485  	if !checkHTTPMethod(w, r, http.MethodPost) {
   486  		return
   487  	}
   488  	apiItems, err := parseURL(w, r, 1, apc.URLPathdSortStart.L)
   489  	if err != nil {
   490  		return
   491  	}
   492  
   493  	managerUUID := apiItems[0]
   494  	m, exists := Managers.Get(managerUUID, false /*incl. archived*/)
   495  	if !exists {
   496  		s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID)
   497  		cmn.WriteErrMsg(w, r, s, http.StatusNotFound)
   498  		return
   499  	}
   500  
   501  	go m.startDsort()
   502  }
   503  
   504  func (m *Manager) startDsort() {
   505  	if err := m.start(); err != nil {
   506  		m.errHandler(err)
   507  		return
   508  	}
   509  
   510  	nlog.Infof("[dsort] %s broadcasting finished ack to other targets", m.ManagerUUID)
   511  	path := apc.URLPathdSortAck.Join(m.ManagerUUID, core.T.SID())
   512  	bcast(http.MethodPut, path, nil, nil, core.T.Sowner().Get(), core.T.Snode())
   513  }
   514  
   515  func (m *Manager) errHandler(err error) {
   516  	nlog.InfoDepth(1, "Error:", err)
   517  
   518  	// If we were aborted by some other process this means that we do not
   519  	// broadcast abort (we assume that daemon aborted us, aborted also others).
   520  	if !m.aborted() {
   521  		// Self-abort: better do it before sending broadcast to avoid
   522  		// inconsistent state: other have aborted but we didn't due to some
   523  		// problem.
   524  		if isReportableError(err) {
   525  			m.abort(err)
   526  		} else {
   527  			m.abort(nil)
   528  		}
   529  
   530  		nlog.Warningln("broadcasting abort to other targets")
   531  		path := apc.URLPathdSortAbort.Join(m.ManagerUUID)
   532  		bcast(http.MethodDelete, path, nil, nil, core.T.Sowner().Get(), core.T.Snode())
   533  	}
   534  }
   535  
   536  // shardsHandler is the handler for the HTTP endpoint /v1/sort/shards.
   537  // A valid POST to this endpoint results in a new shard being created locally based on the contents
   538  // of the incoming request body. The shard is then sent to the correct target in the cluster as per HRW.
   539  func (managers *ManagerGroup) shardsHandler(w http.ResponseWriter, r *http.Request) {
   540  	if !checkHTTPMethod(w, r, http.MethodPost) {
   541  		return
   542  	}
   543  	apiItems, err := parseURL(w, r, 1, apc.URLPathdSortShards.L)
   544  	if err != nil {
   545  		return
   546  	}
   547  	managerUUID := apiItems[0]
   548  	m, exists := managers.Get(managerUUID, false /*incl. archived*/)
   549  	if !exists {
   550  		s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID)
   551  		cmn.WriteErrMsg(w, r, s, http.StatusNotFound)
   552  		return
   553  	}
   554  
   555  	if !m.inProgress() {
   556  		cmn.WriteErrMsg(w, r, fmt.Sprintf("no %s process in progress", apc.ActDsort))
   557  		return
   558  	}
   559  	if m.aborted() {
   560  		cmn.WriteErrMsg(w, r, apc.ActDsort+" process was aborted")
   561  		return
   562  	}
   563  
   564  	var (
   565  		buf, slab   = g.mm.AllocSize(serializationBufSize)
   566  		tmpMetadata = &CreationPhaseMetadata{}
   567  	)
   568  	defer slab.Free(buf)
   569  
   570  	if err := tmpMetadata.DecodeMsg(msgp.NewReaderBuf(r.Body, buf)); err != nil {
   571  		err = fmt.Errorf(cmn.FmtErrUnmarshal, apc.ActDsort, "creation phase metadata", "-", err)
   572  		cmn.WriteErr(w, r, err, http.StatusInternalServerError)
   573  		return
   574  	}
   575  
   576  	if !m.inProgress() || m.aborted() {
   577  		cmn.WriteErrMsg(w, r, fmt.Sprintf("no %s process", apc.ActDsort))
   578  		return
   579  	}
   580  
   581  	m.creationPhase.metadata = *tmpMetadata
   582  	m.startShardCreation <- struct{}{}
   583  }
   584  
   585  // recordsHandler is the handler /v1/sort/records.
   586  // A valid POST to this endpoint updates this target's dsortManager.Records with the
   587  // []Records from the request body, along with some related state variables.
   588  func (managers *ManagerGroup) recordsHandler(w http.ResponseWriter, r *http.Request) {
   589  	if !checkHTTPMethod(w, r, http.MethodPost) {
   590  		return
   591  	}
   592  	apiItems, err := parseURL(w, r, 1, apc.URLPathdSortRecords.L)
   593  	if err != nil {
   594  		return
   595  	}
   596  	managerUUID := apiItems[0]
   597  	m, exists := managers.Get(managerUUID, false /*incl. archived*/)
   598  	if !exists {
   599  		s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID)
   600  		cmn.WriteErrMsg(w, r, s, http.StatusNotFound)
   601  		return
   602  	}
   603  	if !m.inProgress() {
   604  		cmn.WriteErrMsg(w, r, fmt.Sprintf("no %s process in progress", apc.ActDsort))
   605  		return
   606  	}
   607  	if m.aborted() {
   608  		cmn.WriteErrMsg(w, r, apc.ActDsort+" process was aborted")
   609  		return
   610  	}
   611  
   612  	query := r.URL.Query()
   613  	totalShardSize, err := strconv.ParseInt(query.Get(apc.QparamTotalCompressedSize), 10, 64)
   614  	if err != nil {
   615  		s := fmt.Sprintf("invalid %s in request to %s, err: %v",
   616  			apc.QparamTotalCompressedSize, r.URL.String(), err)
   617  		cmn.WriteErrMsg(w, r, s)
   618  		return
   619  	}
   620  	totalExtractedSize, err := strconv.ParseInt(query.Get(apc.QparamTotalUncompressedSize), 10, 64)
   621  	if err != nil {
   622  		s := fmt.Sprintf("invalid %s in request to %s, err: %v",
   623  			apc.QparamTotalUncompressedSize, r.URL.String(), err)
   624  		cmn.WriteErrMsg(w, r, s)
   625  		return
   626  	}
   627  	d, err := strconv.ParseUint(query.Get(apc.QparamTotalInputShardsExtracted), 10, 64)
   628  	if err != nil {
   629  		s := fmt.Sprintf("invalid %s in request to %s, err: %v",
   630  			apc.QparamTotalInputShardsExtracted, r.URL.String(), err)
   631  		cmn.WriteErrMsg(w, r, s)
   632  		return
   633  	}
   634  
   635  	var (
   636  		buf, slab = g.mm.AllocSize(serializationBufSize)
   637  		records   = shard.NewRecords(int(d))
   638  	)
   639  	defer slab.Free(buf)
   640  
   641  	if err := records.DecodeMsg(msgp.NewReaderBuf(r.Body, buf)); err != nil {
   642  		err = fmt.Errorf(cmn.FmtErrUnmarshal, apc.ActDsort, "records", "-", err)
   643  		cmn.WriteErr(w, r, err, http.StatusInternalServerError)
   644  		return
   645  	}
   646  
   647  	m.addSizes(totalShardSize, totalExtractedSize)
   648  	m.recm.EnqueueRecords(records)
   649  	m.incrementReceived()
   650  
   651  	if cmn.Rom.FastV(4, cos.SmoduleDsort) {
   652  		nlog.Infof(
   653  			"[dsort] %s total times received records from another target: %d",
   654  			m.ManagerUUID, m.received.count.Load(),
   655  		)
   656  	}
   657  }
   658  
   659  // /v1/sort/abort.
   660  // A valid DELETE to this endpoint aborts currently running sort job and cleans
   661  // up the state.
   662  func tabortHandler(w http.ResponseWriter, r *http.Request) {
   663  	if !checkHTTPMethod(w, r, http.MethodDelete) {
   664  		return
   665  	}
   666  	apiItems, err := parseURL(w, r, 1, apc.URLPathdSortAbort.L)
   667  	if err != nil {
   668  		return
   669  	}
   670  
   671  	managerUUID := apiItems[0]
   672  	m, exists := Managers.Get(managerUUID, true /*incl. archived*/)
   673  	if !exists {
   674  		s := fmt.Sprintf("%s: [dsort] %s does not exist", core.T, managerUUID)
   675  		cmn.WriteErrMsg(w, r, s, http.StatusNotFound)
   676  		return
   677  	}
   678  	if m.Metrics.Archived.Load() {
   679  		s := fmt.Sprintf("%s: [dsort] %s is already archived", core.T, managerUUID)
   680  		cmn.WriteErrMsg(w, r, s, http.StatusGone)
   681  		return
   682  	}
   683  
   684  	err = fmt.Errorf("%s: [dsort] %s aborted", core.T, managerUUID)
   685  	m.abort(err)
   686  }
   687  
   688  func tremoveHandler(w http.ResponseWriter, r *http.Request) {
   689  	if !checkHTTPMethod(w, r, http.MethodDelete) {
   690  		return
   691  	}
   692  	apiItems, err := parseURL(w, r, 1, apc.URLPathdSortRemove.L)
   693  	if err != nil {
   694  		return
   695  	}
   696  
   697  	managerUUID := apiItems[0]
   698  	if err := Managers.Remove(managerUUID); err != nil {
   699  		cmn.WriteErr(w, r, err)
   700  		return
   701  	}
   702  }
   703  
   704  func tlistHandler(w http.ResponseWriter, r *http.Request) {
   705  	var (
   706  		query      = r.URL.Query()
   707  		regexStr   = query.Get(apc.QparamRegex)
   708  		onlyActive = cos.IsParseBool(query.Get(apc.QparamOnlyActive))
   709  		regex      *regexp.Regexp
   710  	)
   711  	if !checkHTTPMethod(w, r, http.MethodGet) {
   712  		return
   713  	}
   714  	if regexStr != "" {
   715  		var err error
   716  		if regex, err = regexp.CompilePOSIX(regexStr); err != nil {
   717  			cmn.WriteErr(w, r, err)
   718  			return
   719  		}
   720  	}
   721  
   722  	w.Write(cos.MustMarshal(Managers.List(regex, onlyActive)))
   723  }
   724  
   725  // /v1/sort/metrics.
   726  // A valid GET to this endpoint sends response with sort metrics.
   727  func tmetricsHandler(w http.ResponseWriter, r *http.Request) {
   728  	if !checkHTTPMethod(w, r, http.MethodGet) {
   729  		return
   730  	}
   731  	apiItems, err := parseURL(w, r, 1, apc.URLPathdSortMetrics.L)
   732  	if err != nil {
   733  		return
   734  	}
   735  
   736  	managerUUID := apiItems[0]
   737  	m, exists := Managers.Get(managerUUID, true /*incl. archived*/)
   738  	if !exists {
   739  		s := fmt.Sprintf("%s: [dsort] %s does not exist", core.T, managerUUID)
   740  		cmn.WriteErrMsg(w, r, s, http.StatusNotFound)
   741  		return
   742  	}
   743  
   744  	m.Metrics.lock()
   745  	m.Metrics.update()
   746  	j := m.Metrics.ToJobInfo(m.ManagerUUID, m.Pars)
   747  	j.Metrics = m.Metrics
   748  	body := cos.MustMarshal(j)
   749  	m.Metrics.unlock()
   750  
   751  	w.Write(body)
   752  }
   753  
   754  // /v1/sort/finished-ack.
   755  // A valid PUT to this endpoint acknowledges that tid has finished dsort operation.
   756  func tfiniHandler(w http.ResponseWriter, r *http.Request) {
   757  	if !checkHTTPMethod(w, r, http.MethodPut) {
   758  		return
   759  	}
   760  	apiItems, err := parseURL(w, r, 2, apc.URLPathdSortAck.L)
   761  	if err != nil {
   762  		return
   763  	}
   764  
   765  	managerUUID, tid := apiItems[0], apiItems[1]
   766  	m, exists := Managers.Get(managerUUID, false /*incl. archived*/)
   767  	if !exists {
   768  		s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID)
   769  		cmn.WriteErrMsg(w, r, s, http.StatusNotFound)
   770  		return
   771  	}
   772  
   773  	m.updateFinishedAck(tid)
   774  }
   775  
   776  //
   777  // http helpers
   778  //
   779  
   780  func checkHTTPMethod(w http.ResponseWriter, r *http.Request, expected string) bool {
   781  	if r.Method != expected {
   782  		s := fmt.Sprintf("invalid method '%s %s', expecting '%s'", r.Method, r.URL.String(), expected)
   783  		cmn.WriteErrMsg(w, r, s)
   784  		return false
   785  	}
   786  	return true
   787  }
   788  
   789  func parseURL(w http.ResponseWriter, r *http.Request, itemsAfter int, items []string) ([]string, error) {
   790  	items, err := cmn.ParseURL(r.URL.Path, items, itemsAfter, true)
   791  	if err != nil {
   792  		cmn.WriteErr(w, r, err)
   793  		return nil, err
   794  	}
   795  
   796  	return items, err
   797  }