github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/xact/api.go (about)

     1  // Package xact provides core functionality for the AIStore eXtended Actions (xactions).
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package xact
     6  
     7  import (
     8  	"errors"
     9  	"fmt"
    10  	"sort"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/NVIDIA/aistore/api/apc"
    15  	"github.com/NVIDIA/aistore/cmn"
    16  	"github.com/NVIDIA/aistore/cmn/cos"
    17  	"github.com/NVIDIA/aistore/cmn/debug"
    18  	"github.com/NVIDIA/aistore/core"
    19  )
    20  
    21  const (
    22  	ScopeG  = iota + 1 // cluster
    23  	ScopeB             // bucket
    24  	ScopeGB            // (one bucket) | (all buckets)
    25  	ScopeT             // target
    26  )
    27  
    28  const (
    29  	SepaID = ","
    30  
    31  	LeftID  = "["
    32  	RightID = "]"
    33  )
    34  
    35  // global waiting tunables
    36  // (used in: `api.WaitForXactionIC` and `api.WaitForXactionNode`)
    37  const (
    38  	DefWaitTimeShort = time.Minute        // zero `ArgsMsg.Timeout` defaults to
    39  	DefWaitTimeLong  = 7 * 24 * time.Hour // when `ArgsMsg.Timeout` is negative
    40  	MaxProbingFreq   = 30 * time.Second   // as the name implies
    41  	MinPollTime      = 2 * time.Second    // ditto
    42  	MaxPollTime      = 2 * time.Minute    // can grow up to
    43  
    44  	// number of consecutive 'idle' xaction states, with possible numeric
    45  	// values translating as follows:
    46  	// 1: fully rely on xact.IsIdle() logic with no extra checks whatsoever
    47  	// 2: one additional IsIdle() call after MinPollTime
    48  	// 3: two additional IsIdle() calls spaced at MinPollTime interval, and so on.
    49  	NumConsecutiveIdle = 2
    50  )
    51  
    52  type (
    53  	// either xaction ID or Kind must be specified
    54  	// is getting passed via ActMsg.Value w/ MorphMarshal extraction
    55  	ArgsMsg struct {
    56  		ID   string // xaction UUID
    57  		Kind string // xaction kind _or_ name (see `xact.Table`)
    58  
    59  		// optional parameters
    60  		DaemonID    string        // node that runs this xaction
    61  		Bck         cmn.Bck       // bucket
    62  		Buckets     []cmn.Bck     // list of buckets (e.g., copy-bucket, lru-evict, etc.)
    63  		Timeout     time.Duration // max time to wait
    64  		Force       bool          // force
    65  		OnlyRunning bool          // only for running xactions
    66  	}
    67  
    68  	// simplified JSON-tagged version of the above
    69  	QueryMsg struct {
    70  		OnlyRunning *bool     `json:"show_active"`
    71  		Bck         cmn.Bck   `json:"bck"`
    72  		ID          string    `json:"id"`
    73  		Kind        string    `json:"kind"`
    74  		DaemonID    string    `json:"node,omitempty"`
    75  		Buckets     []cmn.Bck `json:"buckets,omitempty"`
    76  	}
    77  
    78  	// primarily: `api.QueryXactionSnaps`
    79  	MultiSnap map[string][]*core.Snap // by target ID (tid)
    80  )
    81  
    82  type (
    83  	Descriptor struct {
    84  		DisplayName string          // as implied
    85  		Access      apc.AccessAttrs // access permissions (see: apc.Access*)
    86  		Scope       int             // ScopeG (global), etc. - the enum above
    87  		Startable   bool            // true if user can start this xaction (e.g., via `api.StartXaction`)
    88  		Metasync    bool            // true if this xaction changes (and metasyncs) cluster metadata
    89  		RefreshCap  bool            // refresh capacity stats upon completion
    90  
    91  		// see xreg for "limited coexistence"
    92  		Rebalance      bool // moves data between nodes
    93  		Resilver       bool // moves data between mountpaths
    94  		ConflictRebRes bool // conflicts with rebalance/resilver
    95  		AbortRebRes    bool // gets aborted upon rebalance/resilver - currently, all `ext`-ensions
    96  
    97  		// xaction has an intermediate `idle` state whereby it "idles" between requests
    98  		// (see related: xact/demand.go)
    99  		Idles bool
   100  
   101  		// xaction returns extended xaction-specific stats
   102  		// (see related: `Snap.Ext` in core/xaction.go)
   103  		ExtendedStats bool
   104  	}
   105  )
   106  
   107  ////////////////
   108  // Descriptor //
   109  ////////////////
   110  
   111  // `xact.Table` is a static, public, and global Kind=>[Xaction Descriptor] map that contains
   112  // xaction kinds and static properties, such as `Startable`, `Owned`, etc.
   113  // In particular, "startability" is narrowly defined as ability to start xaction
   114  // via `api.StartXaction`
   115  // (whereby copying bucket, for instance, requires a separate `api.CopyBucket`, etc.)
   116  var Table = map[string]Descriptor{
   117  	// bucket-less xactions that will typically have a 'cluster' scope (with resilver being a notable exception)
   118  	apc.ActElection:  {DisplayName: "elect-primary", Scope: ScopeG, Startable: false},
   119  	apc.ActRebalance: {Scope: ScopeG, Startable: true, Metasync: true, Rebalance: true},
   120  
   121  	apc.ActETLInline: {Scope: ScopeG, Startable: false, AbortRebRes: true},
   122  
   123  	// (one bucket) | (all buckets)
   124  	apc.ActLRU:          {DisplayName: "lru-eviction", Scope: ScopeGB, Startable: true},
   125  	apc.ActStoreCleanup: {DisplayName: "cleanup", Scope: ScopeGB, Startable: true},
   126  	apc.ActSummaryBck: {
   127  		DisplayName: "summary",
   128  		Scope:       ScopeGB,
   129  		Access:      apc.AceObjLIST | apc.AceBckHEAD,
   130  		Startable:   false,
   131  		Metasync:    false,
   132  	},
   133  
   134  	// single target (node)
   135  	apc.ActResilver: {Scope: ScopeT, Startable: true, Resilver: true},
   136  
   137  	// on-demand EC and n-way replication
   138  	// (non-startable, triggered by PUT => erasure-coded or mirrored bucket)
   139  	apc.ActECGet:     {Scope: ScopeB, Startable: false, Idles: true, ExtendedStats: true},
   140  	apc.ActECPut:     {Scope: ScopeB, Startable: false, RefreshCap: true, Idles: true, ExtendedStats: true},
   141  	apc.ActECRespond: {Scope: ScopeB, Startable: false, Idles: true},
   142  	apc.ActPutCopies: {Scope: ScopeB, Startable: false, RefreshCap: true, Idles: true},
   143  
   144  	//
   145  	// on-demand multi-object (consider setting ConflictRebRes = true)
   146  	//
   147  	apc.ActArchive: {Scope: ScopeB, Access: apc.AccessRW, Startable: false, RefreshCap: true, Idles: true},
   148  	apc.ActCopyObjects: {
   149  		DisplayName: "copy-objects",
   150  		Scope:       ScopeB,
   151  		Access:      apc.AccessRW, // apc.AceCreateBucket is checked as well but only if ais://dst doesn't exist
   152  		Startable:   false,
   153  		RefreshCap:  true,
   154  		Idles:       true,
   155  	},
   156  	apc.ActETLObjects: {
   157  		DisplayName: "etl-objects",
   158  		Scope:       ScopeB,
   159  		Access:      apc.AccessRW, // ditto
   160  		Startable:   false,
   161  		RefreshCap:  true,
   162  		Idles:       true,
   163  		AbortRebRes: true,
   164  	},
   165  
   166  	apc.ActBlobDl: {Access: apc.AccessRW, Scope: ScopeB, Startable: true, AbortRebRes: true, RefreshCap: true},
   167  
   168  	apc.ActDownload: {Access: apc.AccessRW, Scope: ScopeG, Startable: false, Idles: true, AbortRebRes: true},
   169  
   170  	// in its own class
   171  	apc.ActDsort: {
   172  		DisplayName:    "dsort",
   173  		Scope:          ScopeB,
   174  		Access:         apc.AccessRW,
   175  		Startable:      false,
   176  		RefreshCap:     true,
   177  		ConflictRebRes: true,
   178  		ExtendedStats:  true,
   179  		AbortRebRes:    true,
   180  	},
   181  
   182  	// multi-object
   183  	apc.ActPromote: {
   184  		DisplayName: "promote-files",
   185  		Scope:       ScopeB,
   186  		Access:      apc.AcePromote,
   187  		Startable:   false,
   188  		RefreshCap:  true,
   189  	},
   190  	apc.ActEvictObjects: {
   191  		DisplayName: "evict-objects",
   192  		Scope:       ScopeB,
   193  		Access:      apc.AceObjDELETE,
   194  		Startable:   false,
   195  		RefreshCap:  true,
   196  	},
   197  	apc.ActDeleteObjects: {
   198  		DisplayName: "delete-objects",
   199  		Scope:       ScopeB,
   200  		Access:      apc.AceObjDELETE,
   201  		Startable:   false,
   202  		RefreshCap:  true,
   203  	},
   204  	apc.ActPrefetchObjects: {
   205  		DisplayName: "prefetch-objects",
   206  		Scope:       ScopeB,
   207  		Access:      apc.AccessRW,
   208  		Startable:   true,
   209  		RefreshCap:  true,
   210  	},
   211  
   212  	// entire bucket (storage svcs)
   213  	apc.ActECEncode: {
   214  		DisplayName:    "ec-bucket",
   215  		Scope:          ScopeB,
   216  		Access:         apc.AccessRW,
   217  		Startable:      true,
   218  		Metasync:       true,
   219  		RefreshCap:     true,
   220  		ConflictRebRes: true,
   221  	},
   222  	apc.ActMakeNCopies: {
   223  		DisplayName: "mirror",
   224  		Scope:       ScopeB,
   225  		Access:      apc.AccessRW,
   226  		Startable:   true,
   227  		Metasync:    true,
   228  		RefreshCap:  true,
   229  	},
   230  	apc.ActMoveBck: {
   231  		DisplayName:    "rename-bucket",
   232  		Scope:          ScopeB,
   233  		Access:         apc.AceMoveBucket,
   234  		Startable:      false, // executing this one cannot be done via `api.StartXaction`
   235  		Metasync:       true,
   236  		Rebalance:      true,
   237  		ConflictRebRes: true,
   238  	},
   239  	apc.ActCopyBck: {
   240  		DisplayName:    "copy-bucket",
   241  		Scope:          ScopeB,
   242  		Access:         apc.AccessRW, // apc.AceCreateBucket ditto
   243  		Startable:      false,        // ditto
   244  		Metasync:       true,
   245  		RefreshCap:     true,
   246  		ConflictRebRes: true,
   247  	},
   248  	apc.ActETLBck: {
   249  		DisplayName: "etl-bucket",
   250  		Scope:       ScopeB,
   251  		Access:      apc.AccessRW, // ditto
   252  		Startable:   false,        // ditto
   253  		Metasync:    true,
   254  		RefreshCap:  true,
   255  		AbortRebRes: true,
   256  	},
   257  
   258  	apc.ActList: {Scope: ScopeB, Access: apc.AceObjLIST, Startable: false, Metasync: false, Idles: true},
   259  
   260  	// cache management, internal usage
   261  	apc.ActLoadLomCache:   {DisplayName: "warm-up-metadata", Scope: ScopeB, Startable: true},
   262  	apc.ActInvalListCache: {Scope: ScopeB, Access: apc.AceObjLIST, Startable: false},
   263  }
   264  
   265  func IsValidKind(kind string) bool {
   266  	_, ok := Table[kind]
   267  	return ok
   268  }
   269  
   270  func GetDescriptor(kindOrName string) (string, Descriptor, error) {
   271  	kind, dtor := getDtor(kindOrName)
   272  	if dtor == nil {
   273  		return "", Descriptor{}, fmt.Errorf("not found xaction %q", kindOrName)
   274  	}
   275  	return kind, *dtor, nil
   276  }
   277  
   278  func GetKindName(kindOrName string) (kind, name string) {
   279  	if kindOrName == "" {
   280  		return
   281  	}
   282  	var dtor *Descriptor
   283  	kind, dtor = getDtor(kindOrName)
   284  	if dtor == nil {
   285  		return
   286  	}
   287  	name = dtor.DisplayName
   288  	if name == "" {
   289  		name = kind
   290  	}
   291  	return
   292  }
   293  
   294  func Cname(kind, uuid string) string { return kind + LeftID + uuid + RightID }
   295  
   296  func ParseCname(cname string) (xactKind, xactID string, _ error) {
   297  	const efmt = "invalid name %q"
   298  	l := len(cname)
   299  	if l == 0 || cname[l-1] != RightID[0] {
   300  		return "", "", fmt.Errorf(efmt, cname)
   301  	}
   302  	i := strings.IndexByte(cname, LeftID[0])
   303  	if i < 0 {
   304  		return "", "", fmt.Errorf(efmt, cname)
   305  	}
   306  	xactKind, xactID = cname[:i], cname[i+1:l-1]
   307  	return xactKind, xactID, nil
   308  }
   309  
   310  func IdlesBeforeFinishing(kindOrName string) bool {
   311  	_, dtor := getDtor(kindOrName)
   312  	debug.Assert(dtor != nil)
   313  	return dtor.Idles
   314  }
   315  
   316  func ListDisplayNames(onlyStartable bool) (names []string) {
   317  	names = make([]string, 0, len(Table))
   318  	for kind, dtor := range Table {
   319  		if onlyStartable && !dtor.Startable {
   320  			continue
   321  		}
   322  		if dtor.DisplayName != "" {
   323  			names = append(names, dtor.DisplayName)
   324  		} else {
   325  			names = append(names, kind)
   326  		}
   327  	}
   328  	sort.Strings(names)
   329  	return
   330  }
   331  
   332  func IsSameScope(kindOrName string, scs ...int) bool {
   333  	_, dtor := getDtor(kindOrName)
   334  	if dtor == nil {
   335  		return false
   336  	}
   337  	scope, scope2 := scs[0], 0
   338  	if len(scs) > 1 {
   339  		scope2 = scs[1]
   340  	}
   341  	return dtor.Scope == scope || dtor.Scope == scope2
   342  }
   343  
   344  func getDtor(kindOrName string) (string, *Descriptor) {
   345  	if dtor, ok := Table[kindOrName]; ok {
   346  		return kindOrName, &dtor
   347  	}
   348  	for kind, dtor := range Table {
   349  		if dtor.DisplayName == kindOrName {
   350  			return kind, &dtor
   351  		}
   352  	}
   353  	return "", nil
   354  }
   355  
   356  /////////////
   357  // ArgsMsg //
   358  /////////////
   359  
   360  func (args *ArgsMsg) String() (s string) {
   361  	if args.ID == "" {
   362  		s = "x-" + args.Kind
   363  	} else {
   364  		s = fmt.Sprintf("x-%s[%s]", args.Kind, args.ID)
   365  	}
   366  	if !args.Bck.IsEmpty() {
   367  		s += "-" + args.Bck.String()
   368  	}
   369  	if args.Timeout > 0 {
   370  		s += "-" + args.Timeout.String()
   371  	}
   372  	if args.DaemonID != "" {
   373  		s += "-node[" + args.DaemonID + "]"
   374  	}
   375  	return
   376  }
   377  
   378  //////////////
   379  // QueryMsg //
   380  //////////////
   381  
   382  func (msg *QueryMsg) String() (s string) {
   383  	if msg.ID == "" {
   384  		s = "x-" + msg.Kind
   385  	} else {
   386  		s = fmt.Sprintf("x-%s[%s]", msg.Kind, msg.ID)
   387  	}
   388  	if !msg.Bck.IsEmpty() {
   389  		s += "-" + msg.Bck.String()
   390  	}
   391  	if msg.DaemonID != "" {
   392  		s += "-node[" + msg.DaemonID + "]"
   393  	}
   394  	if msg.OnlyRunning != nil && *msg.OnlyRunning {
   395  		s += "-only-running"
   396  	}
   397  	return
   398  }
   399  
   400  ///////////////
   401  // MultiSnap //
   402  ///////////////
   403  
   404  // NOTE: when xaction UUID is not specified: require the same kind _and_
   405  // a single running uuid (otherwise, IsAborted() et al. can only produce ambiguous results)
   406  func (xs MultiSnap) checkEmptyID(xid string) error {
   407  	var kind, uuid string
   408  	if xid != "" {
   409  		debug.Assert(IsValidUUID(xid), xid)
   410  		return nil
   411  	}
   412  	for _, snaps := range xs {
   413  		for _, xsnap := range snaps {
   414  			if kind == "" {
   415  				kind = xsnap.Kind
   416  			} else if kind != xsnap.Kind {
   417  				return fmt.Errorf("invalid multi-snap Kind: %q vs %q", kind, xsnap.Kind)
   418  			}
   419  			if xsnap.Running() {
   420  				if uuid == "" {
   421  					uuid = xsnap.ID
   422  				} else if uuid != xsnap.ID {
   423  					return fmt.Errorf("invalid multi-snap UUID: %q vs %q", uuid, xsnap.ID)
   424  				}
   425  			}
   426  		}
   427  	}
   428  	return nil
   429  }
   430  
   431  func (xs MultiSnap) GetUUIDs() []string {
   432  	uuids := make(cos.StrSet, 2)
   433  	for _, snaps := range xs {
   434  		for _, xsnap := range snaps {
   435  			uuids[xsnap.ID] = struct{}{}
   436  		}
   437  	}
   438  	return uuids.ToSlice()
   439  }
   440  
   441  func (xs MultiSnap) RunningTarget(xid string) (string /*tid*/, *core.Snap, error) {
   442  	if err := xs.checkEmptyID(xid); err != nil {
   443  		return "", nil, err
   444  	}
   445  	for tid, snaps := range xs {
   446  		for _, xsnap := range snaps {
   447  			if (xid == xsnap.ID || xid == "") && xsnap.Running() {
   448  				return tid, xsnap, nil
   449  			}
   450  		}
   451  	}
   452  	return "", nil, nil
   453  }
   454  
   455  func (xs MultiSnap) IsAborted(xid string) (bool, error) {
   456  	if err := xs.checkEmptyID(xid); err != nil {
   457  		return false, err
   458  	}
   459  	for _, snaps := range xs {
   460  		for _, xsnap := range snaps {
   461  			if (xid == xsnap.ID || xid == "") && xsnap.IsAborted() {
   462  				return true, nil
   463  			}
   464  		}
   465  	}
   466  	return false, nil
   467  }
   468  
   469  // (all targets, all xactions)
   470  func (xs MultiSnap) IsIdle(xid string) (aborted, running, notstarted bool) {
   471  	if xid != "" {
   472  		debug.Assert(IsValidUUID(xid), xid)
   473  		return xs._get(xid)
   474  	}
   475  	uuids := xs.GetUUIDs()
   476  	for _, xid = range uuids {
   477  		a, r, ns := xs._get(xid)
   478  		aborted = aborted || a
   479  		notstarted = notstarted || ns
   480  		running = running || r
   481  	}
   482  	return aborted, running, notstarted
   483  }
   484  
   485  // (all targets, given xaction)
   486  func (xs MultiSnap) _get(xid string) (aborted, running, notstarted bool) {
   487  	var nt, nr, ns, nf int
   488  	for _, snaps := range xs {
   489  		nt++
   490  		for _, xsnap := range snaps {
   491  			if xid != xsnap.ID {
   492  				continue
   493  			}
   494  			nf++
   495  			// (one target, one xaction)
   496  			switch {
   497  			case xsnap.IsAborted():
   498  				return true, false, false
   499  			case !xsnap.Started():
   500  				ns++
   501  			case !xsnap.IsIdle():
   502  				nr++
   503  			}
   504  			break
   505  		}
   506  	}
   507  	running = nr > 0
   508  	notstarted = ns > 0 || nf == 0
   509  	return
   510  }
   511  
   512  func (xs MultiSnap) ObjCounts(xid string) (locObjs, outObjs, inObjs int64) {
   513  	if xid == "" {
   514  		uuids := xs.GetUUIDs()
   515  		debug.Assert(len(uuids) == 1, uuids)
   516  		xid = uuids[0]
   517  	}
   518  	for _, snaps := range xs {
   519  		for _, xsnap := range snaps {
   520  			if xid == xsnap.ID {
   521  				locObjs += xsnap.Stats.Objs
   522  				outObjs += xsnap.Stats.OutObjs
   523  				inObjs += xsnap.Stats.InObjs
   524  			}
   525  		}
   526  	}
   527  	return
   528  }
   529  
   530  func (xs MultiSnap) ByteCounts(xid string) (locBytes, outBytes, inBytes int64) {
   531  	if xid == "" {
   532  		uuids := xs.GetUUIDs()
   533  		debug.Assert(len(uuids) == 1, uuids)
   534  		xid = uuids[0]
   535  	}
   536  	for _, snaps := range xs {
   537  		for _, xsnap := range snaps {
   538  			if xid == xsnap.ID {
   539  				locBytes += xsnap.Stats.Bytes
   540  				outBytes += xsnap.Stats.OutBytes
   541  				inBytes += xsnap.Stats.InBytes
   542  			}
   543  		}
   544  	}
   545  	return
   546  }
   547  
   548  func (xs MultiSnap) TotalRunningTime(xid string) (time.Duration, error) {
   549  	debug.Assert(IsValidUUID(xid), xid)
   550  	var (
   551  		start, end     time.Time
   552  		found, running bool
   553  	)
   554  	for _, snaps := range xs {
   555  		for _, xsnap := range snaps {
   556  			if xid == xsnap.ID {
   557  				found = true
   558  				running = running || xsnap.Running()
   559  				if !xsnap.StartTime.IsZero() {
   560  					if start.IsZero() || xsnap.StartTime.Before(start) {
   561  						start = xsnap.StartTime
   562  					}
   563  				}
   564  				if !xsnap.EndTime.IsZero() && xsnap.EndTime.After(end) {
   565  					end = xsnap.EndTime
   566  				}
   567  			}
   568  		}
   569  	}
   570  	if !found {
   571  		return 0, errors.New("xaction [" + xid + "] not found")
   572  	}
   573  	if running {
   574  		end = time.Now()
   575  	}
   576  	return end.Sub(start), nil
   577  }