github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/xact/xs/lrit.go (about)

     1  // Package xs is a collection of eXtended actions (xactions), including multi-object
     2  // operations, list-objects, (cluster) rebalance and (target) resilver, ETL, and more.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package xs
     7  
     8  import (
     9  	"sync"
    10  
    11  	"github.com/NVIDIA/aistore/api/apc"
    12  	"github.com/NVIDIA/aistore/cmn"
    13  	"github.com/NVIDIA/aistore/cmn/cos"
    14  	"github.com/NVIDIA/aistore/cmn/nlog"
    15  	"github.com/NVIDIA/aistore/core"
    16  	"github.com/NVIDIA/aistore/core/meta"
    17  	"github.com/NVIDIA/aistore/fs"
    18  	"github.com/NVIDIA/aistore/xact/xreg"
    19  )
    20  
    21  // Assorted multi-object (list/range templated) xactions: evict, delete, prefetch multiple objects
    22  //
    23  // Supported range syntax includes:
    24  //   1. bash-extension style: `file-{0..100}`
    25  //   2. at-style: `file-@100`
    26  //   3. if none of the above, fall back to just prefix matching
    27  
    28  // TODO:
    29  // - user-assigned (configurable) num-workers
    30  // - jogger(s) per mountpath type concurrency
    31  
    32  const (
    33  	lrpList = iota + 1
    34  	lrpRange
    35  	lrpPrefix
    36  )
    37  
    38  // common for all list-range
    39  type (
    40  	// one multi-object operation work item
    41  	lrwi interface {
    42  		do(*core.LOM, *lriterator)
    43  	}
    44  	// a strict subset of core.Xact, includes only the methods
    45  	// lriterator needs for itself
    46  	lrxact interface {
    47  		IsAborted() bool
    48  		Finished() bool
    49  	}
    50  
    51  	// running concurrency
    52  	lrpair struct {
    53  		lom *core.LOM
    54  		wi  lrwi
    55  	}
    56  	lrworker struct {
    57  		lrit *lriterator
    58  	}
    59  
    60  	// common multi-object operation context and list|range|prefix logic
    61  	lriterator struct {
    62  		parent lrxact
    63  		msg    *apc.ListRange
    64  		bck    *meta.Bck
    65  		pt     *cos.ParsedTemplate
    66  		prefix string
    67  		lrp    int // { lrpList, ... } enum
    68  
    69  		// running concurrency
    70  		workCh  chan lrpair
    71  		workers []*lrworker
    72  		wg      sync.WaitGroup
    73  	}
    74  )
    75  
    76  // concrete list-range type xactions (see also: archive.go)
    77  type (
    78  	TestXFactory struct{ prfFactory } // tests only
    79  )
    80  
    81  // interface guard
    82  var (
    83  	_ core.Xact = (*evictDelete)(nil)
    84  	_ core.Xact = (*prefetch)(nil)
    85  
    86  	_ xreg.Renewable = (*evdFactory)(nil)
    87  	_ xreg.Renewable = (*prfFactory)(nil)
    88  
    89  	_ lrwi = (*evictDelete)(nil)
    90  	_ lrwi = (*prefetch)(nil)
    91  )
    92  
    93  ////////////////
    94  // lriterator //
    95  ////////////////
    96  
    97  func (r *lriterator) init(xctn lrxact, msg *apc.ListRange, bck *meta.Bck, blocking ...bool) error {
    98  	avail := fs.GetAvail()
    99  	l := len(avail)
   100  	if l == 0 {
   101  		return cmn.ErrNoMountpaths
   102  	}
   103  	r.parent = xctn
   104  	r.msg = msg
   105  	r.bck = bck
   106  
   107  	// list is the simplest and always single-threaded
   108  	if msg.IsList() {
   109  		r.lrp = lrpList
   110  		return nil
   111  	}
   112  	if err := r._inipr(msg); err != nil {
   113  		return err
   114  	}
   115  	if l == 1 {
   116  		return nil
   117  	}
   118  	if len(blocking) > 0 && blocking[0] {
   119  		return nil
   120  	}
   121  
   122  	// num-workers == num-mountpaths but note:
   123  	// these are not _joggers_
   124  	r.workers = make([]*lrworker, 0, l)
   125  	for range avail {
   126  		r.workers = append(r.workers, &lrworker{r})
   127  	}
   128  	r.workCh = make(chan lrpair, l)
   129  	return nil
   130  }
   131  
   132  // [NOTE] treating an empty ("") or wildcard ('*') template
   133  // as an empty prefix, to facilitate all-objects scope, e.g.:
   134  // - "copy entire source bucket", or even
   135  // - "archive entire bucket as a single shard" (caution!)
   136  
   137  func (r *lriterator) _inipr(msg *apc.ListRange) error {
   138  	pt, err := cos.NewParsedTemplate(msg.Template)
   139  	if err != nil {
   140  		if err == cos.ErrEmptyTemplate {
   141  			pt.Prefix = cos.EmptyMatchAll
   142  			goto pref
   143  		}
   144  		return err
   145  	}
   146  	if err := cmn.ValidatePrefix(pt.Prefix); err != nil {
   147  		nlog.Errorln(err)
   148  		return err
   149  	}
   150  	if len(pt.Ranges) != 0 {
   151  		r.pt = &pt
   152  		r.lrp = lrpRange
   153  		return nil
   154  	}
   155  pref:
   156  	r.prefix = pt.Prefix
   157  	r.lrp = lrpPrefix
   158  	return nil
   159  }
   160  
   161  func (r *lriterator) run(wi lrwi, smap *meta.Smap) (err error) {
   162  	for _, worker := range r.workers {
   163  		r.wg.Add(1)
   164  		go worker.run()
   165  	}
   166  	switch r.lrp {
   167  	case lrpList:
   168  		err = r._list(wi, smap)
   169  	case lrpRange:
   170  		err = r._range(wi, smap)
   171  	case lrpPrefix:
   172  		err = r._prefix(wi, smap)
   173  	}
   174  	return err
   175  }
   176  
   177  func (r *lriterator) wait() {
   178  	if r.workers == nil {
   179  		return
   180  	}
   181  	close(r.workCh)
   182  	r.wg.Wait()
   183  }
   184  
   185  func (r *lriterator) done() bool { return r.parent.IsAborted() || r.parent.Finished() }
   186  
   187  func (r *lriterator) _list(wi lrwi, smap *meta.Smap) error {
   188  	r.lrp = lrpList
   189  	for _, objName := range r.msg.ObjNames {
   190  		if r.done() {
   191  			break
   192  		}
   193  		lom := core.AllocLOM(objName)
   194  		done, err := r.do(lom, wi, smap)
   195  		if err != nil {
   196  			core.FreeLOM(lom)
   197  			return err
   198  		}
   199  		if done {
   200  			core.FreeLOM(lom)
   201  		}
   202  	}
   203  	return nil
   204  }
   205  
   206  func (r *lriterator) _range(wi lrwi, smap *meta.Smap) error {
   207  	r.pt.InitIter()
   208  	for objName, hasNext := r.pt.Next(); hasNext; objName, hasNext = r.pt.Next() {
   209  		if r.done() {
   210  			return nil
   211  		}
   212  		lom := core.AllocLOM(objName)
   213  		done, err := r.do(lom, wi, smap)
   214  		if err != nil {
   215  			core.FreeLOM(lom)
   216  			return err
   217  		}
   218  		if done {
   219  			core.FreeLOM(lom)
   220  		}
   221  	}
   222  	return nil
   223  }
   224  
   225  // (compare with ais/plstcx)
   226  func (r *lriterator) _prefix(wi lrwi, smap *meta.Smap) error {
   227  	var (
   228  		err     error
   229  		ecode   int
   230  		lst     *cmn.LsoRes
   231  		msg     = &apc.LsoMsg{Prefix: r.prefix, Props: apc.GetPropsStatus}
   232  		npg     = newNpgCtx(r.bck, msg, noopCb, nil /*core.LsoInvCtx bucket inventory*/)
   233  		bremote = r.bck.IsRemote()
   234  	)
   235  	if err := r.bck.Init(core.T.Bowner()); err != nil {
   236  		return err
   237  	}
   238  	if !bremote {
   239  		smap = nil // not needed
   240  	}
   241  	for {
   242  		if r.done() {
   243  			break
   244  		}
   245  		if bremote {
   246  			lst = &cmn.LsoRes{Entries: allocLsoEntries()}
   247  			ecode, err = core.T.Backend(r.bck).ListObjects(r.bck, msg, lst) // (TODO comment above)
   248  		} else {
   249  			npg.page.Entries = allocLsoEntries()
   250  			err = npg.nextPageA()
   251  			lst = &npg.page
   252  		}
   253  		if err != nil {
   254  			nlog.Errorln(core.T.String()+":", err, ecode)
   255  			freeLsoEntries(lst.Entries)
   256  			return err
   257  		}
   258  		for _, be := range lst.Entries {
   259  			if !be.IsStatusOK() {
   260  				continue
   261  			}
   262  			if be.IsDir() || cos.IsLastB(be.Name, '/') { // skip virtual dir (apc.EntryIsDir)
   263  				continue
   264  			}
   265  			if r.done() {
   266  				freeLsoEntries(lst.Entries)
   267  				return nil
   268  			}
   269  			lom := core.AllocLOM(be.Name)
   270  			done, err := r.do(lom, wi, smap)
   271  			if err != nil {
   272  				core.FreeLOM(lom)
   273  				freeLsoEntries(lst.Entries)
   274  				return err
   275  			}
   276  			if done {
   277  				core.FreeLOM(lom)
   278  			}
   279  		}
   280  		freeLsoEntries(lst.Entries)
   281  		// last page listed
   282  		if lst.ContinuationToken == "" {
   283  			break
   284  		}
   285  		// token for the next page
   286  		msg.ContinuationToken = lst.ContinuationToken
   287  	}
   288  	return nil
   289  }
   290  
   291  func (r *lriterator) do(lom *core.LOM, wi lrwi, smap *meta.Smap) (bool /*this lom done*/, error) {
   292  	if err := lom.InitBck(r.bck.Bucket()); err != nil {
   293  		return false, err
   294  	}
   295  	// (smap != nil) to filter non-locals
   296  	if smap != nil {
   297  		_, local, err := lom.HrwTarget(smap)
   298  		if err != nil {
   299  			return false, err
   300  		}
   301  		if !local {
   302  			return true, nil
   303  		}
   304  	}
   305  
   306  	if r.workers == nil {
   307  		wi.do(lom, r)
   308  		return true, nil
   309  	}
   310  	r.workCh <- lrpair{lom, wi} // lom eventually freed below
   311  	return false, nil
   312  }
   313  
   314  //////////////
   315  // lrworker //
   316  //////////////
   317  
   318  func (worker *lrworker) run() {
   319  	for {
   320  		lrpair, ok := <-worker.lrit.workCh
   321  		if !ok {
   322  			break
   323  		}
   324  		lrpair.wi.do(lrpair.lom, worker.lrit)
   325  		core.FreeLOM(lrpair.lom)
   326  		if worker.lrit.parent.IsAborted() {
   327  			break
   328  		}
   329  	}
   330  	worker.lrit.wg.Done()
   331  }