github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/cmn/bck.go (about)

     1  // Package cmn provides common constants, types, and utilities for AIS clients
     2  // and AIStore.
     3  /*
     4   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     5   */
     6  package cmn
     7  
     8  import (
     9  	"errors"
    10  	"fmt"
    11  	"net/url"
    12  	"path/filepath"
    13  	"sort"
    14  	"strings"
    15  
    16  	"github.com/NVIDIA/aistore/api/apc"
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  	"github.com/NVIDIA/aistore/cmn/debug"
    19  )
    20  
    21  type (
    22  	// Ns (or Namespace) adds additional layer for scoping the data under
    23  	// the same provider. It allows to have same dataset and bucket names
    24  	// under different namespaces what allows for easy data manipulation without
    25  	// affecting data in different namespaces.
    26  	Ns struct {
    27  		// UUID of other remote AIS cluster (for now only used for AIS). Note
    28  		// that we can have different namespaces which refer to same UUID (cluster).
    29  		// This means that in a sense UUID is a parent of the actual namespace.
    30  		UUID string `json:"uuid" yaml:"uuid"`
    31  		// Name uniquely identifies a namespace under the same UUID (which may
    32  		// be empty) and is used in building FQN for the objects.
    33  		Name string `json:"name" yaml:"name"`
    34  	}
    35  
    36  	Bck struct {
    37  		Props    *Bprops `json:"-"`
    38  		Name     string  `json:"name" yaml:"name"`
    39  		Provider string  `json:"provider" yaml:"provider"` // NOTE: see api/apc/provider.go for supported enum
    40  		Ns       Ns      `json:"namespace" yaml:"namespace" list:"omitempty"`
    41  	}
    42  
    43  	// Represents the AIS bucket, object and URL associated with a HTTP resource
    44  	HTTPBckObj struct {
    45  		Bck        Bck
    46  		ObjName    string
    47  		OrigURLBck string // HTTP URL of the bucket (object name excluded)
    48  	}
    49  
    50  	QueryBcks Bck
    51  
    52  	Bcks []Bck
    53  )
    54  
    55  const (
    56  	// NsGlobalUname is hardcoded here to avoid allocating it via Uname()
    57  	// (the most common use case)
    58  	NsGlobalUname = "@#"
    59  )
    60  
    61  var (
    62  	// NsGlobal represents *this* cluster's global namespace that is used by default when
    63  	// no specific namespace was defined or provided by the user.
    64  	NsGlobal = Ns{}
    65  	// NsAnyRemote represents any remote cluster. As such, NsGlobalRemote applies
    66  	// exclusively to AIS (provider) given that other Backend providers are remote by definition.
    67  	NsAnyRemote = Ns{UUID: string(apc.NsUUIDPrefix)}
    68  )
    69  
    70  // A note on validation logic: cmn.Bck vs cmn.QueryBcks - same structures,
    71  // different types.
    72  //
    73  //  1. Validation of a concrete bucket checks that bucket name is set and is valid.
    74  //     If the provider is not set the default will be used, see `NormalizeProvider`.
    75  //     This case is handled in `newBckFromQuery` and `newBckFromQueryUname`. The
    76  //     CLI counterpart is `parseBckURI`.
    77  //  2. Validation of query buckets. Here all parts of the structure all optional.
    78  //     This case is handled in `newQueryBcksFromQuery`. The CLI counterpart is
    79  //     `parseQueryBckURI`.
    80  // These 2 cases have a slightly different logic for the validation but the
    81  // validation functions are always the same. Bucket name (`bck.ValidateName`)
    82  // and bucket namespace (`bck.Ns.Validate`) validation is straightforward
    83  // as we only need to check that the strings contain only valid characters. Bucket
    84  // provider validation on the other hand a little bit more tricky as we have so
    85  // called "normalized providers" and their aliases. Normalized providers are the
    86  // providers registered in `Providers` set. Almost any provider that is being
    87  // validated goes through `NormalizeProvider` which converts aliases to
    88  // normalized form or sets default provider if the provider is empty. But there
    89  // are cases where we already expect **only** the normalized providers, for
    90  // example in FQN parsing. For this case `IsProvider` function must be
    91  // used.
    92  //
    93  // Similar concepts are applied when bucket is provided as URI,
    94  // eg. `ais://@uuid#ns/bucket_name`. URI form is heavily used by CLI. Parsing
    95  // is handled by `ParseBckObjectURI` which by itself doesn't do much validation.
    96  // The validation happens in aforementioned CLI specific parse functions.
    97  
    98  func NormalizeProvider(provider string) (p string, err error) {
    99  	if p = apc.NormalizeProvider(provider); p == "" {
   100  		err = &ErrInvalidBackendProvider{Bck{Provider: provider}}
   101  	}
   102  	return
   103  }
   104  
   105  ////////
   106  // Ns //
   107  ////////
   108  
   109  // Parses [@uuid][#namespace]. It does a little bit more than just parsing
   110  // a string from `Uname` so that logic can be reused in different places.
   111  func ParseNsUname(s string) (n Ns) {
   112  	if s == NsGlobalUname {
   113  		return NsGlobal // to speedup the common case (here and elsewhere)
   114  	}
   115  	if s != "" && s[0] == apc.NsUUIDPrefix {
   116  		s = s[1:]
   117  	}
   118  	idx := strings.IndexByte(s, apc.NsNamePrefix)
   119  	if idx == -1 {
   120  		n.UUID = s
   121  	} else {
   122  		n.UUID = s[:idx]
   123  		n.Name = s[idx+1:]
   124  	}
   125  	return
   126  }
   127  
   128  func (n Ns) String() (res string) {
   129  	if n.IsGlobal() {
   130  		return
   131  	}
   132  	if n.IsAnyRemote() {
   133  		return string(apc.NsUUIDPrefix)
   134  	}
   135  	if n.UUID != "" {
   136  		res += string(apc.NsUUIDPrefix) + n.UUID
   137  	}
   138  	if n.Name != "" {
   139  		res += string(apc.NsNamePrefix) + n.Name
   140  	}
   141  	return
   142  }
   143  
   144  func (n Ns) Len() int {
   145  	if n.IsGlobal() {
   146  		return len(NsGlobalUname)
   147  	}
   148  	return 2 + len(n.UUID) + len(n.Name)
   149  }
   150  
   151  func (n Ns) Uname() string {
   152  	if n.IsGlobal() {
   153  		return NsGlobalUname
   154  	}
   155  	l := n.Len()
   156  	b := make([]byte, l)
   157  	n._copy(b, l)
   158  	return cos.UnsafeS(b)
   159  }
   160  
   161  func (n Ns) _copy(b []byte, l int) int {
   162  	b[0] = apc.NsUUIDPrefix
   163  	off := 1
   164  	off += copy(b[off:], cos.UnsafeB(n.UUID))
   165  	b[off] = apc.NsNamePrefix
   166  	off++
   167  	off += copy(b[off:], cos.UnsafeB(n.Name))
   168  	debug.Assert(off == l)
   169  	return off
   170  }
   171  
   172  func (n Ns) validate() error {
   173  	if n.IsGlobal() {
   174  		return nil
   175  	}
   176  	if cos.IsAlphaNice(n.UUID) && cos.IsAlphaPlus(n.Name) {
   177  		return nil
   178  	}
   179  	return fmt.Errorf(fmtErrNamespace, n.UUID, n.Name)
   180  }
   181  
   182  func (n Ns) contains(other Ns) bool {
   183  	if n.IsGlobal() {
   184  		// If query is empty (ie., global) we accept any non-remote namespace
   185  		return !other.IsRemote()
   186  	}
   187  	if n.IsAnyRemote() {
   188  		return other.IsRemote()
   189  	}
   190  	if n.UUID == other.UUID && n.Name == "" {
   191  		return true
   192  	}
   193  	return n == other
   194  }
   195  
   196  /////////
   197  // Bck (value)
   198  /////////
   199  
   200  func (b Bck) Equal(other *Bck) bool {
   201  	return b.Name == other.Name && b.Provider == other.Provider && b.Ns == other.Ns
   202  }
   203  
   204  func (b Bck) String() (s string) {
   205  	if b.Ns.IsGlobal() {
   206  		if b.Provider == "" {
   207  			return b.Name
   208  		}
   209  		s = apc.ToScheme(b.Provider) + apc.BckProviderSeparator + b.Name
   210  	} else {
   211  		s = apc.ToScheme(b.Provider) + apc.BckProviderSeparator + b.Ns.String() + "/" + b.Name
   212  	}
   213  	if back := b.Backend(); back != nil {
   214  		s += "->" + back.String()
   215  	}
   216  	return s
   217  }
   218  
   219  // unique name => Bck (use MakeUname above to perform the reverse translation)
   220  func ParseUname(uname string) (b Bck, objName string) {
   221  	var prev, itemIdx int
   222  	for i := range len(uname) {
   223  		if uname[i] != filepath.Separator {
   224  			continue
   225  		}
   226  
   227  		item := uname[prev:i]
   228  		switch itemIdx {
   229  		case 0:
   230  			b.Provider = item
   231  		case 1:
   232  			b.Ns = ParseNsUname(item)
   233  		case 2:
   234  			b.Name = item
   235  			objName = uname[i+1:]
   236  			return
   237  		}
   238  
   239  		itemIdx++
   240  		prev = i + 1
   241  	}
   242  	return
   243  }
   244  
   245  /////////
   246  // Bck (ref)
   247  /////////
   248  
   249  func (b *Bck) Copy(src *Bck) { *b = *src }
   250  
   251  func (b *Bck) Less(other *Bck) bool {
   252  	if QueryBcks(*b).Contains(other) {
   253  		return true
   254  	}
   255  	if b.Provider != other.Provider {
   256  		return b.Provider < other.Provider
   257  	}
   258  	sb, so := b.Ns.String(), other.Ns.String()
   259  	if sb != so {
   260  		return sb < so
   261  	}
   262  	return b.Name < other.Name
   263  }
   264  
   265  func (b *Bck) Validate() (err error) {
   266  	err = b.ValidateName()
   267  	if err == nil {
   268  		err = b.Ns.validate()
   269  	}
   270  	return
   271  }
   272  
   273  func (b *Bck) ValidateName() (err error) {
   274  	if b.Name == "" {
   275  		return errors.New("bucket name is missing")
   276  	}
   277  	if b.Name == "." {
   278  		return fmt.Errorf(fmtErrBckName, b.Name)
   279  	}
   280  	if !cos.IsAlphaPlus(b.Name) {
   281  		err = fmt.Errorf(fmtErrBckName, b.Name)
   282  	}
   283  	return
   284  }
   285  
   286  // ditto
   287  func ValidatePrefix(s string) error {
   288  	if !strings.Contains(s, "../") {
   289  		return nil
   290  	}
   291  	return fmt.Errorf("invalid prefix %q", s)
   292  }
   293  
   294  // canonical name, with or without object
   295  func (b *Bck) Cname(objname string) (s string) {
   296  	sch := apc.ToScheme(b.Provider)
   297  	if b.Ns.IsGlobal() {
   298  		s = sch + apc.BckProviderSeparator + b.Name
   299  	} else {
   300  		s = fmt.Sprintf("%s%s%s/%s", sch, apc.BckProviderSeparator, b.Ns, b.Name)
   301  	}
   302  	if objname == "" {
   303  		return
   304  	}
   305  	return s + cos.PathSeparator + objname
   306  }
   307  
   308  func (b *Bck) IsEmpty() bool {
   309  	return b == nil || (b.Name == "" && b.Provider == "" && b.Ns == NsGlobal)
   310  }
   311  
   312  // QueryBcks (see below) is a Bck that _can_ have an empty Name.
   313  func (b *Bck) IsQuery() bool { return b.Name == "" }
   314  
   315  func (b *Bck) LenUnameGlob(objName string) int {
   316  	return len(b.Provider) + 1 + len(NsGlobalUname) + 1 + len(b.Name) + 1 + len(objName) // compare with the below
   317  }
   318  
   319  // Bck => unique name (use ParseUname below to translate back)
   320  func (b *Bck) MakeUname(objName string) string {
   321  	var (
   322  		// TODO: non-global case can be optimized via b.Ns._copy(buf)
   323  		nsUname = b.Ns.Uname()
   324  		l       = len(b.Provider) + 1 + len(nsUname) + 1 + len(b.Name) + 1 + len(objName) // compare with the above
   325  		buf     = make([]byte, 0, l)
   326  	)
   327  	return b.ubuf(buf, nsUname, objName)
   328  }
   329  
   330  func (b *Bck) ubuf(buf []byte, nsUname, objName string) string {
   331  	buf = append(buf, b.Provider...)
   332  	buf = append(buf, filepath.Separator)
   333  	buf = append(buf, nsUname...)
   334  	buf = append(buf, filepath.Separator)
   335  	buf = append(buf, b.Name...)
   336  	buf = append(buf, filepath.Separator)
   337  	buf = append(buf, objName...)
   338  	return cos.UnsafeS(buf)
   339  }
   340  
   341  //
   342  // Is-Whats
   343  //
   344  
   345  func (n Ns) IsGlobal() bool    { return n == NsGlobal }
   346  func (n Ns) IsAnyRemote() bool { return n == NsAnyRemote }
   347  func (n Ns) IsRemote() bool    { return n.UUID != "" }
   348  
   349  func (b *Bck) Backend() *Bck {
   350  	bprops := b.Props
   351  	if bprops == nil {
   352  		return nil
   353  	}
   354  	if bprops.BackendBck.Name == "" {
   355  		return nil
   356  	}
   357  	return &bprops.BackendBck
   358  }
   359  
   360  func (b *Bck) RemoteBck() *Bck {
   361  	if bck := b.Backend(); bck != nil {
   362  		return bck
   363  	}
   364  	if apc.IsRemoteProvider(b.Provider) || b.IsRemoteAIS() {
   365  		return b
   366  	}
   367  	return nil
   368  }
   369  
   370  func (b *Bck) IsAIS() bool {
   371  	return b.Provider == apc.AIS && !b.Ns.IsRemote() && b.Backend() == nil
   372  }
   373  
   374  func (b *Bck) IsRemoteAIS() bool { return b.Provider == apc.AIS && b.Ns.IsRemote() }
   375  func (b *Bck) IsHTTP() bool      { return b.Provider == apc.HTTP }
   376  
   377  func (b *Bck) IsRemote() bool {
   378  	return apc.IsRemoteProvider(b.Provider) || b.IsRemoteAIS() || b.Backend() != nil
   379  }
   380  
   381  //
   382  // NOTE: for more Is* accessors (e.g. IsRemoteS3), see also: core/meta/bck.go
   383  //
   384  
   385  func (b *Bck) IsCloud() bool {
   386  	if apc.IsCloudProvider(b.Provider) {
   387  		return true
   388  	}
   389  	backend := b.Backend()
   390  	if backend == nil {
   391  		return false
   392  	}
   393  	return apc.IsCloudProvider(backend.Provider)
   394  }
   395  
   396  // A subset of remote backends that maintain assorted items of versioning information -
   397  // the items including ETag, checksum, etc. - that, in turn, can be used to populate `ObjAttrs`
   398  // * see related: `ObjAttrs.Equal`
   399  func (b *Bck) HasVersioningMD() bool { return b.IsCloud() || b.IsRemoteAIS() }
   400  
   401  func (b *Bck) HasProvider() bool { return b.Provider != "" }
   402  
   403  //
   404  // useful helpers
   405  //
   406  
   407  func (b *Bck) NewQuery() (q url.Values) {
   408  	q = make(url.Values, 1)
   409  	if b.Provider != "" {
   410  		q.Set(apc.QparamProvider, b.Provider)
   411  	}
   412  	if !b.Ns.IsGlobal() {
   413  		q.Set(apc.QparamNamespace, b.Ns.Uname())
   414  	}
   415  	return
   416  }
   417  
   418  func (b *Bck) AddToQuery(query url.Values) url.Values {
   419  	if b.Provider != "" {
   420  		if query == nil {
   421  			query = make(url.Values, 1)
   422  		}
   423  		query.Set(apc.QparamProvider, b.Provider)
   424  	}
   425  	if !b.Ns.IsGlobal() {
   426  		if query == nil {
   427  			query = make(url.Values)
   428  		}
   429  		query.Set(apc.QparamNamespace, b.Ns.Uname())
   430  	}
   431  	return query
   432  }
   433  
   434  func (b *Bck) AddUnameToQuery(query url.Values, uparam string) url.Values {
   435  	if query == nil {
   436  		query = make(url.Values)
   437  	}
   438  	uname := b.MakeUname("")
   439  	query.Set(uparam, uname)
   440  	return query
   441  }
   442  
   443  func DelBckFromQuery(query url.Values) url.Values {
   444  	query.Del(apc.QparamProvider)
   445  	query.Del(apc.QparamNamespace)
   446  	return query
   447  }
   448  
   449  ///////////////
   450  // QueryBcks //
   451  ///////////////
   452  
   453  // QueryBcks is a Bck that _can_ have an empty Name. (TODO: extend to support prefix and regex.)
   454  func (qbck *QueryBcks) IsBucket() bool { return !(*Bck)(qbck).IsQuery() }
   455  
   456  func (qbck QueryBcks) String() string {
   457  	if qbck.IsEmpty() {
   458  		return ""
   459  	}
   460  	if qbck.Name == "" {
   461  		p := qbck.Provider
   462  		if p == "" {
   463  			p = apc.AIS // querying default = apc.NormalizeProvider("")
   464  		}
   465  		if qbck.Ns.IsGlobal() {
   466  			return apc.ToScheme(p) + apc.BckProviderSeparator
   467  		}
   468  		return fmt.Sprintf("%s%s%s", apc.ToScheme(p), apc.BckProviderSeparator, qbck.Ns)
   469  	}
   470  	b := Bck(qbck)
   471  	return b.String()
   472  }
   473  
   474  func (qbck *QueryBcks) IsAIS() bool       { b := (*Bck)(qbck); return b.IsAIS() }
   475  func (qbck *QueryBcks) IsHTTP() bool      { b := (*Bck)(qbck); return b.IsHTTP() }
   476  func (qbck *QueryBcks) IsRemoteAIS() bool { b := (*Bck)(qbck); return b.IsRemoteAIS() }
   477  func (qbck *QueryBcks) IsCloud() bool     { return apc.IsCloudProvider(qbck.Provider) }
   478  
   479  func (qbck *QueryBcks) IsEmpty() bool { b := (*Bck)(qbck); return b.IsEmpty() }
   480  
   481  func (qbck *QueryBcks) NewQuery() url.Values {
   482  	bck := (*Bck)(qbck)
   483  	return bck.NewQuery()
   484  }
   485  
   486  func (qbck *QueryBcks) AddToQuery(query url.Values) {
   487  	bck := (*Bck)(qbck)
   488  	_ = bck.AddToQuery(query)
   489  }
   490  
   491  func (qbck *QueryBcks) Validate() (err error) {
   492  	if qbck.Name != "" {
   493  		bck := Bck(*qbck)
   494  		if err := bck.ValidateName(); err != nil {
   495  			return err
   496  		}
   497  	}
   498  	if qbck.Provider != "" {
   499  		qbck.Provider, err = NormalizeProvider(qbck.Provider)
   500  		if err != nil {
   501  			return err
   502  		}
   503  	}
   504  	if qbck.Ns != NsGlobal && qbck.Ns != NsAnyRemote {
   505  		return qbck.Ns.validate()
   506  	}
   507  	return nil
   508  }
   509  
   510  func (qbck QueryBcks) Equal(bck *Bck) bool { return Bck(qbck).Equal(bck) }
   511  
   512  func (qbck QueryBcks) Contains(other *Bck) bool {
   513  	if qbck.Name != "" {
   514  		// NOTE: named bucket with no provider is assumed to be ais://
   515  		if other.Provider == "" {
   516  			other.Provider = apc.AIS
   517  		}
   518  		if qbck.Provider == "" {
   519  			qbck.Provider = other.Provider //nolint:revive // if not set we match the expected
   520  		}
   521  		return qbck.Equal(other)
   522  	}
   523  	ok := qbck.Provider == other.Provider || qbck.Provider == ""
   524  	return ok && qbck.Ns.contains(other.Ns)
   525  }
   526  
   527  //////////
   528  // Bcks //
   529  //////////
   530  
   531  // interface guard
   532  var _ sort.Interface = (*Bcks)(nil)
   533  
   534  func (bcks Bcks) Len() int {
   535  	return len(bcks)
   536  }
   537  
   538  func (bcks Bcks) Less(i, j int) bool {
   539  	return bcks[i].Less(&bcks[j])
   540  }
   541  
   542  func (bcks Bcks) Swap(i, j int) {
   543  	bcks[i], bcks[j] = bcks[j], bcks[i]
   544  }
   545  
   546  func (bcks Bcks) Select(query QueryBcks) (filtered Bcks) {
   547  	for i := range bcks {
   548  		if query.Contains(&bcks[i]) {
   549  			filtered = append(filtered, bcks[i])
   550  		}
   551  	}
   552  	return filtered
   553  }
   554  
   555  func (bcks Bcks) Equal(other Bcks) bool {
   556  	if len(bcks) != len(other) {
   557  		return false
   558  	}
   559  	for i := range bcks {
   560  		var found bool
   561  		for j := range other {
   562  			if bcks[i].Equal(&other[j]) {
   563  				found = true
   564  				break
   565  			}
   566  		}
   567  		if !found {
   568  			return false
   569  		}
   570  	}
   571  	return true
   572  }
   573  
   574  ////////////////
   575  // HTTPBckObj //
   576  ////////////////
   577  
   578  func NewHTTPObj(u *url.URL) *HTTPBckObj {
   579  	hbo := &HTTPBckObj{
   580  		Bck: Bck{
   581  			Provider: apc.HTTP,
   582  			Ns:       NsGlobal,
   583  		},
   584  	}
   585  	hbo.OrigURLBck, hbo.ObjName = filepath.Split(u.Path)
   586  	hbo.OrigURLBck = u.Scheme + apc.BckProviderSeparator + u.Host + hbo.OrigURLBck
   587  	hbo.Bck.Name = OrigURLBck2Name(hbo.OrigURLBck)
   588  	return hbo
   589  }
   590  
   591  func NewHTTPObjPath(rawURL string) (*HTTPBckObj, error) {
   592  	urlObj, err := url.ParseRequestURI(rawURL)
   593  	if err != nil {
   594  		return nil, err
   595  	}
   596  	return NewHTTPObj(urlObj), nil
   597  }