github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/dload/xact.go (about)

     1  // Package dload implements functionality to download resources into AIS cluster from external source.
     2  /*
     3   * Copyright (c) 2018-2022, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package dload
     6  
     7  import (
     8  	"io"
     9  	"net/http"
    10  	"regexp"
    11  	"sync"
    12  	"time"
    13  
    14  	"github.com/NVIDIA/aistore/api/apc"
    15  	"github.com/NVIDIA/aistore/cmn"
    16  	"github.com/NVIDIA/aistore/cmn/nlog"
    17  	"github.com/NVIDIA/aistore/core"
    18  	"github.com/NVIDIA/aistore/core/meta"
    19  	"github.com/NVIDIA/aistore/xact"
    20  	"github.com/NVIDIA/aistore/xact/xreg"
    21  )
    22  
    23  // =============================== Summary ====================================
    24  //
    25  // Downloader is a long running task that provides a AIS a means to download
    26  // objects from the internet by providing a URL(referred to as a link) to the
    27  // server where the object exists. Downloader does not make the HTTP GET
    28  // requests to download the objects itself- it purely manages the lifecycle of
    29  // joggers. It translates requests to internal representation and forwards them
    30  // to the Dispatcher. Dispatcher then dispatches the request to correct jogger instance.
    31  //
    32  // ====== API ======
    33  //
    34  // API exposed to the rest of the code includes the following operations:
    35  //   * Run      - to run
    36  //   * Stop     - to stop
    37  //   * Download    - to download a new object from a URL
    38  //   * Abort       - to abort a previously requested download (currently queued or currently downloading)
    39  //   * Status      - to request the status of a previously requested download
    40  // The Download, Abort and Status requests are encapsulated into an internal
    41  // request object, added to a dispatcher's request queue and then are dispatched by dispatcher
    42  // to the correct jogger. The remaining operations are private to the Downloader and
    43  // are used only internally. Dispatcher is implemented as goroutine listening for
    44  // incoming requests from Downloader
    45  //
    46  // Each jogger, which corresponds to one mountpath, has a download channel
    47  // (downloadCh) where download requests, that are dispatched from Dispatcher, are
    48  // queued. Thus, downloads occur on a per-mountpath basis and are handled one at
    49  // a time by jogger as they arrive.
    50  //
    51  // ====== Downloading ======
    52  //
    53  // After Downloader received a download job, it sends the job to Dispatcher.
    54  // Dispatcher processes one job at the time, extracting objects to download
    55  // from job in batches. When joggers queues have available space for new objects
    56  // to download, dispatcher puts objects to download in these queues. If joggers
    57  // are currently full, dispatcher waits with dispatching next batch until they aren't.
    58  //
    59  // Single object's download is represented as object of `task` type, and there is at
    60  // most one active task assigned to any jogger at any given time. The
    61  // tasks are created when dispatcher wants to schedule download of an object
    62  // for jogger and are destroyed when the download is aborted, finished or
    63  // fails.
    64  //
    65  // After a task is created, a separate goroutine is spun up to make the
    66  // actual GET request (jogger's download method). The goroutine for the jogger
    67  // sits idle awaiting an abort(failed or aborted by user) or finish message from the
    68  // goroutine responsible for the actual download.
    69  //
    70  // ====== Aborting ======
    71  //
    72  // When Dispatcher receives an abort request, it aborts running task or
    73  // if the task is scheduled but is not yet processed, then it is removed
    74  // from queue (see: put, get). If the task is running, `cancel` function is
    75  // invoked to abort task's request.
    76  //
    77  // ====== Status Updates ======
    78  //
    79  // Status updates are made possible by progressReader that overwrites the
    80  // io.Reader's Read method to additionally notify a Reporter Func.
    81  // The notification includes the number of bytes read so far from the GET
    82  // response body.
    83  //
    84  // When Dispatcher receives a status update request, it dispatches to a separate
    85  // jogger goroutine that checks if the downloaded completed. Otherwise it checks
    86  // if it is currently being downloaded. If it is being currently downloaded, it
    87  // returns the progress. Otherwise, it returns that the object hasn't been
    88  // downloaded yet. Now, the file may never be downloaded if the download (request)
    89  // was never queued to the downloadCh.
    90  //
    91  // Status updates are either reported in terms of size or size and percentage.
    92  // When downloading an object from a server, we attempt to obtain the object size
    93  // using the "Content-Length" field returned in the Header.
    94  // NOTE: Not all servers respond with a "Content-Length" request header.
    95  // In these cases progress percentage is not returned, just the current
    96  // downloaded size (in bytes).
    97  //
    98  // ====== Notes ======
    99  //
   100  // Downloader assumes that any type of download request is first sent to a proxy
   101  // and then redirected to the correct target's Downloader (the proxy uses the
   102  // HRW algorithm to determine the target). It is not possible to directly hit a
   103  // Target's download endpoint to force an object to be downloaded to that
   104  // Target, all request must go through a proxy first.
   105  //
   106  // ================================ Summary ====================================
   107  
   108  const (
   109  	actRemove = "REMOVE"
   110  	actAbort  = "ABORT"
   111  	actStatus = "STATUS"
   112  	actList   = "LIST"
   113  )
   114  
   115  type (
   116  	factory struct {
   117  		xreg.RenewBase
   118  		xctn *Xact
   119  		bck  *meta.Bck
   120  	}
   121  	Xact struct {
   122  		xact.DemandBase
   123  		p          *factory
   124  		dispatcher *dispatcher
   125  	}
   126  
   127  	// The result of calling one of Downloader's public methods is encapsulated
   128  	// in a response object, which is used to communicate the outcome of the
   129  	// request.
   130  	response struct {
   131  		value      any
   132  		err        error
   133  		statusCode int
   134  	}
   135  
   136  	// Calling Downloader's public methods results in creation of a request
   137  	// for admin related tasks (i.e. aborting and status updates). These
   138  	// objects are used by Downloader to process the request, and are then
   139  	// dispatched to the correct jogger to be handled.
   140  	request struct {
   141  		action     string         // one of: adminAbort, adminList, adminStatus, adminRemove
   142  		id         string         // id of the job task
   143  		regex      *regexp.Regexp // regex of descriptions to return if id is empty
   144  		response   *response      // where the outcome of the request is written
   145  		onlyActive bool           // request status of only active tasks
   146  	}
   147  
   148  	progressReader struct {
   149  		r        io.Reader
   150  		reporter func(n int64)
   151  	}
   152  )
   153  
   154  // interface guard
   155  var (
   156  	_ xact.Demand    = (*Xact)(nil)
   157  	_ xreg.Renewable = (*factory)(nil)
   158  	_ io.ReadCloser  = (*progressReader)(nil)
   159  )
   160  
   161  /////////////
   162  // factory //
   163  /////////////
   164  
   165  func (*factory) New(args xreg.Args, _ *meta.Bck) xreg.Renewable {
   166  	return &factory{RenewBase: xreg.RenewBase{Args: args}, bck: args.Custom.(*meta.Bck)}
   167  }
   168  
   169  func (p *factory) Start() error {
   170  	xdl := newXact(p)
   171  	p.xctn = xdl
   172  	go xdl.Run(nil)
   173  	return nil
   174  }
   175  
   176  func (*factory) Kind() string     { return apc.ActDownload }
   177  func (p *factory) Get() core.Xact { return p.xctn }
   178  
   179  func (*factory) WhenPrevIsRunning(xreg.Renewable) (xreg.WPR, error) {
   180  	return xreg.WprKeepAndStartNew, nil
   181  }
   182  
   183  //////////
   184  // Xact //
   185  //////////
   186  
   187  func newXact(p *factory) (xld *Xact) {
   188  	xld = &Xact{p: p}
   189  	xld.dispatcher = newDispatcher(xld)
   190  	xld.DemandBase.Init(p.UUID(), apc.Download, p.bck, 0 /*use default*/)
   191  	return
   192  }
   193  
   194  func (xld *Xact) Run(*sync.WaitGroup) {
   195  	nlog.Infoln("starting", xld.Name())
   196  	err := xld.dispatcher.run()
   197  	if err != nil {
   198  		xld.AddErr(err)
   199  	}
   200  	xld.stop()
   201  }
   202  
   203  // stop terminates the downloader and all dependent entities.
   204  func (xld *Xact) stop() {
   205  	xld.DemandBase.Stop()
   206  	xld.Finish()
   207  }
   208  
   209  func (xld *Xact) Download(job jobif) (resp any, statusCode int, err error) {
   210  	xld.IncPending()
   211  	defer xld.DecPending()
   212  
   213  	dljob := g.store.setJob(job)
   214  
   215  	select {
   216  	case xld.dispatcher.workCh <- job:
   217  		return dljob.id, http.StatusOK, nil // TODO -- FIXME: dljob.clone() all the way to client (+below)
   218  	default:
   219  		select {
   220  		case xld.dispatcher.workCh <- job:
   221  			return dljob.id, http.StatusOK, nil
   222  		case <-time.After(cmn.Rom.CplaneOperation()):
   223  			return "downloader job queue is full", http.StatusTooManyRequests, nil
   224  		}
   225  	}
   226  }
   227  
   228  func (xld *Xact) AbortJob(id string) (resp any, statusCode int, err error) {
   229  	xld.IncPending()
   230  	req := &request{action: actAbort, id: id}
   231  	resp, statusCode, err = xld.dispatcher.adminReq(req)
   232  	xld.DecPending()
   233  	return
   234  }
   235  
   236  func (xld *Xact) RemoveJob(id string) (resp any, statusCode int, err error) {
   237  	xld.IncPending()
   238  	req := &request{action: actRemove, id: id}
   239  	resp, statusCode, err = xld.dispatcher.adminReq(req)
   240  	xld.DecPending()
   241  	return
   242  }
   243  
   244  func (xld *Xact) JobStatus(id string, onlyActive bool) (resp any, statusCode int, err error) {
   245  	xld.IncPending()
   246  	req := &request{action: actStatus, id: id, onlyActive: onlyActive}
   247  	resp, statusCode, err = xld.dispatcher.adminReq(req)
   248  	xld.DecPending()
   249  	return
   250  }
   251  
   252  func (xld *Xact) Snap() (snap *core.Snap) {
   253  	snap = &core.Snap{}
   254  	xld.ToSnap(snap)
   255  
   256  	snap.IdleX = xld.IsIdle()
   257  	return
   258  }
   259  
   260  /////////////
   261  // request //
   262  /////////////
   263  
   264  func (req *request) rsp(value any, err error, statusCode int) {
   265  	req.response = &response{value: value, err: err, statusCode: statusCode}
   266  }
   267  func (req *request) errRsp(err error, statusCode int) { req.rsp(nil, err, statusCode) }
   268  func (req *request) okRsp(value any)                  { req.rsp(value, nil, http.StatusOK) }
   269  
   270  ////////////////////
   271  // progressReader //
   272  ////////////////////
   273  
   274  func (pr *progressReader) Read(p []byte) (n int, err error) {
   275  	n, err = pr.r.Read(p)
   276  	pr.reporter(int64(n))
   277  	return
   278  }
   279  
   280  func (pr *progressReader) Close() error {
   281  	pr.r = nil
   282  	pr.reporter = nil
   283  	return nil
   284  }