github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dsort/handler.go (about) 1 // Package dsort provides distributed massively parallel resharding for very large datasets. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package dsort 6 7 import ( 8 "fmt" 9 "io" 10 "net/http" 11 "net/url" 12 "regexp" 13 "strconv" 14 15 "github.com/NVIDIA/aistore/api/apc" 16 "github.com/NVIDIA/aistore/cmn" 17 "github.com/NVIDIA/aistore/cmn/cos" 18 "github.com/NVIDIA/aistore/cmn/debug" 19 "github.com/NVIDIA/aistore/cmn/nlog" 20 "github.com/NVIDIA/aistore/core" 21 "github.com/NVIDIA/aistore/core/meta" 22 "github.com/NVIDIA/aistore/ext/dsort/shard" 23 "github.com/NVIDIA/aistore/fs" 24 "github.com/NVIDIA/aistore/stats" 25 "github.com/NVIDIA/aistore/sys" 26 "github.com/NVIDIA/aistore/xact/xreg" 27 jsoniter "github.com/json-iterator/go" 28 "github.com/tinylib/msgp/msgp" 29 ) 30 31 type response struct { 32 si *meta.Snode 33 res []byte 34 err error 35 statusCode int 36 } 37 38 ////////////////// 39 ///// PROXY ////// 40 ////////////////// 41 42 var psi core.Node 43 44 // POST /v1/sort 45 func PstartHandler(w http.ResponseWriter, r *http.Request, parsc *ParsedReq) { 46 var ( 47 err error 48 pars = parsc.pars 49 ) 50 pars.TargetOrderSalt = []byte(cos.FormatNowStamp()) 51 52 // TODO: handle case when bucket was removed during dsort job - this should 53 // stop whole operation. Maybe some listeners as we have on smap change? 54 // This would also be helpful for Downloader (in the middle of downloading 55 // large file the bucket can be easily deleted). 56 57 pars.DsorterType, err = dsorterType(pars) 58 if err != nil { 59 cmn.WriteErr(w, r, err) 60 return 61 } 62 63 b, err := js.Marshal(pars) 64 if err != nil { 65 s := fmt.Sprintf("unable to marshal RequestSpec: %+v, err: %v", pars, err) 66 cmn.WriteErrMsg(w, r, s, http.StatusInternalServerError) 67 return 68 } 69 70 var ( 71 managerUUID = PrefixJobID + cos.GenUUID() // compare w/ p.httpdlpost 72 smap = psi.Sowner().Get() 73 ) 74 75 // Starting dsort has two phases: 76 // 1. Initialization, ensures that all targets successfully initialized all 77 // structures and are ready to receive requests: start, metrics, abort 78 // 2. Start, where we request targets to start the dsort. 79 // 80 // This prevents bugs where one targets would just start dsort (other did 81 // not have yet initialized) and starts to communicate with other targets 82 // but because they are not ready with their initialization will not recognize 83 // given dsort job. Also bug where we could send abort (which triggers cleanup) 84 // to not yet initialized target. 85 86 // phase 1 87 if cmn.Rom.FastV(4, cos.SmoduleDsort) { 88 nlog.Infof("[dsort] %s broadcasting init request to all targets", managerUUID) 89 } 90 path := apc.URLPathdSortInit.Join(managerUUID) 91 responses := bcast(http.MethodPost, path, nil, b, smap) 92 if err := _handleResp(w, r, smap, managerUUID, responses); err != nil { 93 return 94 } 95 96 // phase 2 97 if cmn.Rom.FastV(4, cos.SmoduleDsort) { 98 nlog.Infof("[dsort] %s broadcasting start request to all targets", managerUUID) 99 } 100 path = apc.URLPathdSortStart.Join(managerUUID) 101 responses = bcast(http.MethodPost, path, nil, nil, smap) 102 if err := _handleResp(w, r, smap, managerUUID, responses); err != nil { 103 return 104 } 105 106 w.Write([]byte(managerUUID)) 107 } 108 109 func _handleResp(w http.ResponseWriter, r *http.Request, smap *meta.Smap, managerUUID string, responses []response) error { 110 for _, resp := range responses { 111 if resp.err == nil { 112 continue 113 } 114 // cleanup 115 path := apc.URLPathdSortAbort.Join(managerUUID) 116 _ = bcast(http.MethodDelete, path, nil, nil, smap) 117 118 msg := fmt.Sprintf("failed to start [dsort] %s: %v(%d)", managerUUID, resp.err, resp.statusCode) 119 cmn.WriteErrMsg(w, r, msg, http.StatusInternalServerError) 120 return resp.err 121 } 122 return nil 123 } 124 125 // GET /v1/sort 126 func PgetHandler(w http.ResponseWriter, r *http.Request) { 127 if !checkHTTPMethod(w, r, http.MethodGet) { 128 return 129 } 130 query := r.URL.Query() 131 managerUUID := query.Get(apc.QparamUUID) 132 if managerUUID == "" { 133 plistHandler(w, r, query) 134 return 135 } 136 137 pmetricsHandler(w, r, query) 138 } 139 140 // GET /v1/sort?regex=... 141 func plistHandler(w http.ResponseWriter, r *http.Request, query url.Values) { 142 var ( 143 path = apc.URLPathdSortList.S 144 regexStr = query.Get(apc.QparamRegex) 145 ) 146 if regexStr != "" { 147 if _, err := regexp.CompilePOSIX(regexStr); err != nil { 148 cmn.WriteErr(w, r, err) 149 return 150 } 151 } 152 responses := bcast(http.MethodGet, path, query, nil, psi.Sowner().Get()) 153 154 resultList := make([]*JobInfo, 0, 4) 155 for _, r := range responses { 156 if r.err != nil { 157 nlog.Errorln(r.err) 158 continue 159 } 160 161 var targetMetrics []*JobInfo 162 err := jsoniter.Unmarshal(r.res, &targetMetrics) 163 debug.AssertNoErr(err) 164 165 for _, job := range targetMetrics { 166 var found bool 167 for _, oldMetric := range resultList { 168 if oldMetric.ID == job.ID { 169 oldMetric.Aggregate(job) 170 found = true 171 break 172 } 173 } 174 if !found { 175 resultList = append(resultList, job) 176 } 177 } 178 } 179 180 w.Write(cos.MustMarshal(resultList)) 181 } 182 183 // GET /v1/sort?id=... 184 func pmetricsHandler(w http.ResponseWriter, r *http.Request, query url.Values) { 185 var ( 186 smap = psi.Sowner().Get() 187 all = make(map[string]*JobInfo, smap.CountActiveTs()) 188 managerUUID = query.Get(apc.QparamUUID) 189 path = apc.URLPathdSortMetrics.Join(managerUUID) 190 responses = bcast(http.MethodGet, path, nil, nil, smap) 191 notFound int 192 ) 193 for _, resp := range responses { 194 if resp.statusCode == http.StatusNotFound { 195 // Probably new target which does not know anything about this dsort op. 196 notFound++ 197 continue 198 } 199 if resp.err != nil { 200 cmn.WriteErr(w, r, resp.err, resp.statusCode) 201 return 202 } 203 j := &JobInfo{} 204 if err := js.Unmarshal(resp.res, j); err != nil { 205 cmn.WriteErr(w, r, err, http.StatusInternalServerError) 206 return 207 } 208 all[resp.si.ID()] = j 209 } 210 211 if notFound == len(responses) && notFound > 0 { 212 msg := fmt.Sprintf("%s: [dsort] %s does not exist", core.T, managerUUID) 213 cmn.WriteErrMsg(w, r, msg, http.StatusNotFound) 214 return 215 } 216 w.Write(cos.MustMarshal(all)) 217 } 218 219 // DELETE /v1/sort/abort 220 func PabortHandler(w http.ResponseWriter, r *http.Request) { 221 if !checkHTTPMethod(w, r, http.MethodDelete) { 222 return 223 } 224 _, err := parseURL(w, r, 0, apc.URLPathdSortAbort.L) 225 if err != nil { 226 return 227 } 228 229 var ( 230 query = r.URL.Query() 231 managerUUID = query.Get(apc.QparamUUID) 232 path = apc.URLPathdSortAbort.Join(managerUUID) 233 responses = bcast(http.MethodDelete, path, nil, nil, psi.Sowner().Get()) 234 ) 235 allNotFound := true 236 for _, resp := range responses { 237 if resp.statusCode == http.StatusNotFound { 238 continue 239 } 240 allNotFound = false 241 242 if resp.err != nil { 243 cmn.WriteErr(w, r, resp.err, resp.statusCode) 244 return 245 } 246 } 247 if allNotFound { 248 err := cos.NewErrNotFound(core.T, "dsort job "+managerUUID) 249 cmn.WriteErr(w, r, err, http.StatusNotFound) 250 return 251 } 252 } 253 254 // DELETE /v1/sort 255 func PremoveHandler(w http.ResponseWriter, r *http.Request) { 256 if !checkHTTPMethod(w, r, http.MethodDelete) { 257 return 258 } 259 _, err := parseURL(w, r, 0, apc.URLPathdSort.L) 260 if err != nil { 261 return 262 } 263 264 var ( 265 smap = psi.Sowner().Get() 266 query = r.URL.Query() 267 managerUUID = query.Get(apc.QparamUUID) 268 path = apc.URLPathdSortMetrics.Join(managerUUID) 269 responses = bcast(http.MethodGet, path, nil, nil, smap) 270 ) 271 272 // First, broadcast to see if process is cleaned up first 273 seenOne := false 274 for _, resp := range responses { 275 if resp.statusCode == http.StatusNotFound { 276 // Probably new target which does not know anything about this dsort op. 277 continue 278 } 279 if resp.err != nil { 280 cmn.WriteErr(w, r, resp.err, resp.statusCode) 281 return 282 } 283 metrics := &Metrics{} 284 if err := js.Unmarshal(resp.res, &metrics); err != nil { 285 cmn.WriteErr(w, r, err, http.StatusInternalServerError) 286 return 287 } 288 if !metrics.Archived.Load() { 289 cmn.WriteErrMsg(w, r, fmt.Sprintf("%s process %s still in progress and cannot be removed", 290 apc.ActDsort, managerUUID)) 291 return 292 } 293 seenOne = true 294 } 295 if !seenOne { 296 s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID) 297 cmn.WriteErrMsg(w, r, s, http.StatusNotFound) 298 return 299 } 300 301 // Next, broadcast the remove once we've checked that all targets have run cleanup 302 path = apc.URLPathdSortRemove.Join(managerUUID) 303 responses = bcast(http.MethodDelete, path, nil, nil, smap) 304 var failed []string //nolint:prealloc // will remain not allocated when no errors 305 for _, r := range responses { 306 if r.statusCode == http.StatusOK { 307 continue 308 } 309 failed = append(failed, fmt.Sprintf("%v: (%v) %v", r.si.ID(), r.statusCode, string(r.res))) 310 } 311 if len(failed) != 0 { 312 err := fmt.Errorf("got errors while broadcasting remove: %v", failed) 313 cmn.WriteErr(w, r, err) 314 } 315 } 316 317 // Determine dsorter type. We need to make this decision based on (e.g.) size targets' memory. 318 func dsorterType(pars *parsedReqSpec) (string, error) { 319 if pars.DsorterType != "" { 320 return pars.DsorterType, nil // in case the dsorter type is already set, we need to respect it 321 } 322 323 // Get memory stats from targets 324 var ( 325 totalAvailMemory uint64 326 err error 327 path = apc.URLPathDae.S 328 moreThanThreshold = true 329 ) 330 331 dsorterMemThreshold, err := cos.ParseSize(pars.DsorterMemThreshold, cos.UnitsIEC) 332 debug.AssertNoErr(err) 333 334 query := make(url.Values) 335 query.Add(apc.QparamWhat, apc.WhatNodeStatsAndStatus) 336 responses := bcast(http.MethodGet, path, query, nil, psi.Sowner().Get()) 337 for _, response := range responses { 338 if response.err != nil { 339 return "", response.err 340 } 341 342 daemonStatus := stats.NodeStatus{} 343 if err := jsoniter.Unmarshal(response.res, &daemonStatus); err != nil { 344 return "", err 345 } 346 347 memStat := sys.MemStat{Total: daemonStatus.MemCPUInfo.MemAvail + daemonStatus.MemCPUInfo.MemUsed} 348 dsortAvailMemory := calcMaxMemoryUsage(pars.MaxMemUsage, &memStat) 349 totalAvailMemory += dsortAvailMemory 350 moreThanThreshold = moreThanThreshold && dsortAvailMemory > uint64(dsorterMemThreshold) 351 } 352 353 // TODO: currently, we have import cycle: dsort -> api -> dsort. Need to 354 // think of a way to get the total size of bucket without copy-and-paste 355 // the API code. 356 // 357 // baseParams := &api.BaseParams{ 358 // Client: http.DefaultClient, 359 // URL: g.smap.Get().Primary.URL(cmn.NetIntraControl), 360 // } 361 // msg := &apc.LsoMsg{Props: "size,status"} 362 // objList, err := api.ListObjects(baseParams, pars.Bucket, msg, 0) 363 // if err != nil { 364 // return "", err 365 // } 366 // 367 // totalBucketSize := uint64(0) 368 // for _, obj := range objList.Entries { 369 // if obj.IsStatusOK() { 370 // totalBucketSize += uint64(obj.Size) 371 // } 372 // } 373 // 374 // if totalBucketSize < totalAvailMemory { 375 // // "general type" is capable of extracting whole dataset into memory 376 // // In this case the creation phase is super fast. 377 // return GeneralType, nil 378 // } 379 380 if moreThanThreshold { 381 // If there is enough memory to use "memory type", we should do that. 382 // It behaves better for cases when we have a lot of memory available. 383 return MemType, nil 384 } 385 386 // For all other cases we should use "general type", as we don't know 387 // exactly what to expect, so we should prepare for the worst. 388 return GeneralType, nil 389 } 390 391 /////////////////// 392 ///// TARGET ////// 393 /////////////////// 394 395 // [METHOD] /v1/sort 396 func TargetHandler(w http.ResponseWriter, r *http.Request) { 397 apiItems, err := parseURL(w, r, 1, apc.URLPathdSort.L) 398 if err != nil { 399 return 400 } 401 402 switch apiItems[0] { 403 case apc.Init: 404 tinitHandler(w, r) 405 case apc.Start: 406 tstartHandler(w, r) 407 case apc.Records: 408 Managers.recordsHandler(w, r) 409 case apc.Shards: 410 Managers.shardsHandler(w, r) 411 case apc.Abort: 412 tabortHandler(w, r) 413 case apc.Remove: 414 tremoveHandler(w, r) 415 case apc.UList: 416 tlistHandler(w, r) 417 case apc.Metrics: 418 tmetricsHandler(w, r) 419 case apc.FinishedAck: 420 tfiniHandler(w, r) 421 default: 422 cmn.WriteErrMsg(w, r, "invalid path") 423 } 424 } 425 426 // /v1/sort/init. 427 // receive parsedReqSpec and initialize dsort manager 428 func tinitHandler(w http.ResponseWriter, r *http.Request) { 429 if !checkHTTPMethod(w, r, http.MethodPost) { 430 return 431 } 432 // disallow to run when above high wm (let alone OOS) 433 cs := fs.Cap() 434 if errCap := cs.Err(); errCap != nil { 435 cmn.WriteErr(w, r, errCap, http.StatusInsufficientStorage) 436 return 437 } 438 439 apiItems, errV := parseURL(w, r, 1, apc.URLPathdSortInit.L) 440 if errV != nil { 441 return 442 } 443 var ( 444 pars *parsedReqSpec 445 b, err = io.ReadAll(r.Body) 446 ) 447 if err != nil { 448 cmn.WriteErr(w, r, fmt.Errorf("[dsort]: failed to receive request: %w", err)) 449 return 450 } 451 if err = js.Unmarshal(b, &pars); err != nil { 452 err := fmt.Errorf(cmn.FmtErrUnmarshal, apc.ActDsort, "parsedReqSpec", cos.BHead(b), err) 453 cmn.WriteErr(w, r, err) 454 return 455 } 456 457 managerUUID := apiItems[0] 458 m, err := Managers.Add(managerUUID) // NOTE: returns manager locked iff err == nil 459 if err != nil { 460 cmn.WriteErr(w, r, err) 461 return 462 } 463 if err = m.init(pars); err != nil { 464 cmn.WriteErr(w, r, err) 465 } else { 466 // setup xaction 467 debug.Assert(!pars.OutputBck.IsEmpty()) 468 custom := &xreg.DsortArgs{BckFrom: meta.CloneBck(&pars.InputBck), BckTo: meta.CloneBck(&pars.OutputBck)} 469 rns := xreg.RenewDsort(managerUUID, custom) 470 debug.AssertNoErr(rns.Err) 471 xctn := rns.Entry.Get() 472 debug.Assert(xctn.ID() == managerUUID, xctn.ID()+" vs "+managerUUID) 473 474 m.xctn = xctn.(*xaction) 475 } 476 m.unlock() 477 } 478 479 // /v1/sort/start. 480 // There are three major phases to this function: 481 // 1. extractLocalShards 482 // 2. participateInRecordDistribution 483 // 3. distributeShardRecords 484 func tstartHandler(w http.ResponseWriter, r *http.Request) { 485 if !checkHTTPMethod(w, r, http.MethodPost) { 486 return 487 } 488 apiItems, err := parseURL(w, r, 1, apc.URLPathdSortStart.L) 489 if err != nil { 490 return 491 } 492 493 managerUUID := apiItems[0] 494 m, exists := Managers.Get(managerUUID, false /*incl. archived*/) 495 if !exists { 496 s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID) 497 cmn.WriteErrMsg(w, r, s, http.StatusNotFound) 498 return 499 } 500 501 go m.startDsort() 502 } 503 504 func (m *Manager) startDsort() { 505 if err := m.start(); err != nil { 506 m.errHandler(err) 507 return 508 } 509 510 nlog.Infof("[dsort] %s broadcasting finished ack to other targets", m.ManagerUUID) 511 path := apc.URLPathdSortAck.Join(m.ManagerUUID, core.T.SID()) 512 bcast(http.MethodPut, path, nil, nil, core.T.Sowner().Get(), core.T.Snode()) 513 } 514 515 func (m *Manager) errHandler(err error) { 516 nlog.InfoDepth(1, "Error:", err) 517 518 // If we were aborted by some other process this means that we do not 519 // broadcast abort (we assume that daemon aborted us, aborted also others). 520 if !m.aborted() { 521 // Self-abort: better do it before sending broadcast to avoid 522 // inconsistent state: other have aborted but we didn't due to some 523 // problem. 524 if isReportableError(err) { 525 m.abort(err) 526 } else { 527 m.abort(nil) 528 } 529 530 nlog.Warningln("broadcasting abort to other targets") 531 path := apc.URLPathdSortAbort.Join(m.ManagerUUID) 532 bcast(http.MethodDelete, path, nil, nil, core.T.Sowner().Get(), core.T.Snode()) 533 } 534 } 535 536 // shardsHandler is the handler for the HTTP endpoint /v1/sort/shards. 537 // A valid POST to this endpoint results in a new shard being created locally based on the contents 538 // of the incoming request body. The shard is then sent to the correct target in the cluster as per HRW. 539 func (managers *ManagerGroup) shardsHandler(w http.ResponseWriter, r *http.Request) { 540 if !checkHTTPMethod(w, r, http.MethodPost) { 541 return 542 } 543 apiItems, err := parseURL(w, r, 1, apc.URLPathdSortShards.L) 544 if err != nil { 545 return 546 } 547 managerUUID := apiItems[0] 548 m, exists := managers.Get(managerUUID, false /*incl. archived*/) 549 if !exists { 550 s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID) 551 cmn.WriteErrMsg(w, r, s, http.StatusNotFound) 552 return 553 } 554 555 if !m.inProgress() { 556 cmn.WriteErrMsg(w, r, fmt.Sprintf("no %s process in progress", apc.ActDsort)) 557 return 558 } 559 if m.aborted() { 560 cmn.WriteErrMsg(w, r, apc.ActDsort+" process was aborted") 561 return 562 } 563 564 var ( 565 buf, slab = g.mm.AllocSize(serializationBufSize) 566 tmpMetadata = &CreationPhaseMetadata{} 567 ) 568 defer slab.Free(buf) 569 570 if err := tmpMetadata.DecodeMsg(msgp.NewReaderBuf(r.Body, buf)); err != nil { 571 err = fmt.Errorf(cmn.FmtErrUnmarshal, apc.ActDsort, "creation phase metadata", "-", err) 572 cmn.WriteErr(w, r, err, http.StatusInternalServerError) 573 return 574 } 575 576 if !m.inProgress() || m.aborted() { 577 cmn.WriteErrMsg(w, r, fmt.Sprintf("no %s process", apc.ActDsort)) 578 return 579 } 580 581 m.creationPhase.metadata = *tmpMetadata 582 m.startShardCreation <- struct{}{} 583 } 584 585 // recordsHandler is the handler /v1/sort/records. 586 // A valid POST to this endpoint updates this target's dsortManager.Records with the 587 // []Records from the request body, along with some related state variables. 588 func (managers *ManagerGroup) recordsHandler(w http.ResponseWriter, r *http.Request) { 589 if !checkHTTPMethod(w, r, http.MethodPost) { 590 return 591 } 592 apiItems, err := parseURL(w, r, 1, apc.URLPathdSortRecords.L) 593 if err != nil { 594 return 595 } 596 managerUUID := apiItems[0] 597 m, exists := managers.Get(managerUUID, false /*incl. archived*/) 598 if !exists { 599 s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID) 600 cmn.WriteErrMsg(w, r, s, http.StatusNotFound) 601 return 602 } 603 if !m.inProgress() { 604 cmn.WriteErrMsg(w, r, fmt.Sprintf("no %s process in progress", apc.ActDsort)) 605 return 606 } 607 if m.aborted() { 608 cmn.WriteErrMsg(w, r, apc.ActDsort+" process was aborted") 609 return 610 } 611 612 query := r.URL.Query() 613 totalShardSize, err := strconv.ParseInt(query.Get(apc.QparamTotalCompressedSize), 10, 64) 614 if err != nil { 615 s := fmt.Sprintf("invalid %s in request to %s, err: %v", 616 apc.QparamTotalCompressedSize, r.URL.String(), err) 617 cmn.WriteErrMsg(w, r, s) 618 return 619 } 620 totalExtractedSize, err := strconv.ParseInt(query.Get(apc.QparamTotalUncompressedSize), 10, 64) 621 if err != nil { 622 s := fmt.Sprintf("invalid %s in request to %s, err: %v", 623 apc.QparamTotalUncompressedSize, r.URL.String(), err) 624 cmn.WriteErrMsg(w, r, s) 625 return 626 } 627 d, err := strconv.ParseUint(query.Get(apc.QparamTotalInputShardsExtracted), 10, 64) 628 if err != nil { 629 s := fmt.Sprintf("invalid %s in request to %s, err: %v", 630 apc.QparamTotalInputShardsExtracted, r.URL.String(), err) 631 cmn.WriteErrMsg(w, r, s) 632 return 633 } 634 635 var ( 636 buf, slab = g.mm.AllocSize(serializationBufSize) 637 records = shard.NewRecords(int(d)) 638 ) 639 defer slab.Free(buf) 640 641 if err := records.DecodeMsg(msgp.NewReaderBuf(r.Body, buf)); err != nil { 642 err = fmt.Errorf(cmn.FmtErrUnmarshal, apc.ActDsort, "records", "-", err) 643 cmn.WriteErr(w, r, err, http.StatusInternalServerError) 644 return 645 } 646 647 m.addSizes(totalShardSize, totalExtractedSize) 648 m.recm.EnqueueRecords(records) 649 m.incrementReceived() 650 651 if cmn.Rom.FastV(4, cos.SmoduleDsort) { 652 nlog.Infof( 653 "[dsort] %s total times received records from another target: %d", 654 m.ManagerUUID, m.received.count.Load(), 655 ) 656 } 657 } 658 659 // /v1/sort/abort. 660 // A valid DELETE to this endpoint aborts currently running sort job and cleans 661 // up the state. 662 func tabortHandler(w http.ResponseWriter, r *http.Request) { 663 if !checkHTTPMethod(w, r, http.MethodDelete) { 664 return 665 } 666 apiItems, err := parseURL(w, r, 1, apc.URLPathdSortAbort.L) 667 if err != nil { 668 return 669 } 670 671 managerUUID := apiItems[0] 672 m, exists := Managers.Get(managerUUID, true /*incl. archived*/) 673 if !exists { 674 s := fmt.Sprintf("%s: [dsort] %s does not exist", core.T, managerUUID) 675 cmn.WriteErrMsg(w, r, s, http.StatusNotFound) 676 return 677 } 678 if m.Metrics.Archived.Load() { 679 s := fmt.Sprintf("%s: [dsort] %s is already archived", core.T, managerUUID) 680 cmn.WriteErrMsg(w, r, s, http.StatusGone) 681 return 682 } 683 684 err = fmt.Errorf("%s: [dsort] %s aborted", core.T, managerUUID) 685 m.abort(err) 686 } 687 688 func tremoveHandler(w http.ResponseWriter, r *http.Request) { 689 if !checkHTTPMethod(w, r, http.MethodDelete) { 690 return 691 } 692 apiItems, err := parseURL(w, r, 1, apc.URLPathdSortRemove.L) 693 if err != nil { 694 return 695 } 696 697 managerUUID := apiItems[0] 698 if err := Managers.Remove(managerUUID); err != nil { 699 cmn.WriteErr(w, r, err) 700 return 701 } 702 } 703 704 func tlistHandler(w http.ResponseWriter, r *http.Request) { 705 var ( 706 query = r.URL.Query() 707 regexStr = query.Get(apc.QparamRegex) 708 onlyActive = cos.IsParseBool(query.Get(apc.QparamOnlyActive)) 709 regex *regexp.Regexp 710 ) 711 if !checkHTTPMethod(w, r, http.MethodGet) { 712 return 713 } 714 if regexStr != "" { 715 var err error 716 if regex, err = regexp.CompilePOSIX(regexStr); err != nil { 717 cmn.WriteErr(w, r, err) 718 return 719 } 720 } 721 722 w.Write(cos.MustMarshal(Managers.List(regex, onlyActive))) 723 } 724 725 // /v1/sort/metrics. 726 // A valid GET to this endpoint sends response with sort metrics. 727 func tmetricsHandler(w http.ResponseWriter, r *http.Request) { 728 if !checkHTTPMethod(w, r, http.MethodGet) { 729 return 730 } 731 apiItems, err := parseURL(w, r, 1, apc.URLPathdSortMetrics.L) 732 if err != nil { 733 return 734 } 735 736 managerUUID := apiItems[0] 737 m, exists := Managers.Get(managerUUID, true /*incl. archived*/) 738 if !exists { 739 s := fmt.Sprintf("%s: [dsort] %s does not exist", core.T, managerUUID) 740 cmn.WriteErrMsg(w, r, s, http.StatusNotFound) 741 return 742 } 743 744 m.Metrics.lock() 745 m.Metrics.update() 746 j := m.Metrics.ToJobInfo(m.ManagerUUID, m.Pars) 747 j.Metrics = m.Metrics 748 body := cos.MustMarshal(j) 749 m.Metrics.unlock() 750 751 w.Write(body) 752 } 753 754 // /v1/sort/finished-ack. 755 // A valid PUT to this endpoint acknowledges that tid has finished dsort operation. 756 func tfiniHandler(w http.ResponseWriter, r *http.Request) { 757 if !checkHTTPMethod(w, r, http.MethodPut) { 758 return 759 } 760 apiItems, err := parseURL(w, r, 2, apc.URLPathdSortAck.L) 761 if err != nil { 762 return 763 } 764 765 managerUUID, tid := apiItems[0], apiItems[1] 766 m, exists := Managers.Get(managerUUID, false /*incl. archived*/) 767 if !exists { 768 s := fmt.Sprintf("invalid request: job %q does not exist", managerUUID) 769 cmn.WriteErrMsg(w, r, s, http.StatusNotFound) 770 return 771 } 772 773 m.updateFinishedAck(tid) 774 } 775 776 // 777 // http helpers 778 // 779 780 func checkHTTPMethod(w http.ResponseWriter, r *http.Request, expected string) bool { 781 if r.Method != expected { 782 s := fmt.Sprintf("invalid method '%s %s', expecting '%s'", r.Method, r.URL.String(), expected) 783 cmn.WriteErrMsg(w, r, s) 784 return false 785 } 786 return true 787 } 788 789 func parseURL(w http.ResponseWriter, r *http.Request, itemsAfter int, items []string) ([]string, error) { 790 items, err := cmn.ParseURL(r.URL.Path, items, itemsAfter, true) 791 if err != nil { 792 cmn.WriteErr(w, r, err) 793 return nil, err 794 } 795 796 return items, err 797 }