github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/api/ls.go (about)

     1  // Package api provides native Go-based API/SDK over HTTP(S).
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package api
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"net/http"
    11  	"net/url"
    12  	"strconv"
    13  	"time"
    14  
    15  	"github.com/NVIDIA/aistore/api/apc"
    16  	"github.com/NVIDIA/aistore/cmn"
    17  	"github.com/NVIDIA/aistore/cmn/cos"
    18  	"github.com/NVIDIA/aistore/cmn/debug"
    19  	"github.com/NVIDIA/aistore/cmn/mono"
    20  )
    21  
    22  const (
    23  	maxListPageRetries = 3
    24  
    25  	msgpBufSize = 16 * cos.KiB
    26  )
    27  
    28  type (
    29  	LsoCounter struct {
    30  		startTime int64 // time operation started
    31  		callAfter int64 // callback after
    32  		callback  LsoCB
    33  		count     int
    34  		done      bool
    35  	}
    36  	LsoCB func(*LsoCounter)
    37  
    38  	// additional and optional list-objects args (compare with: GetArgs, PutArgs)
    39  	ListArgs struct {
    40  		Callback  LsoCB
    41  		CallAfter time.Duration
    42  		Header    http.Header // to optimize listing very large buckets, e.g.: Header.Set(apc.HdrInventory, "true")
    43  		Limit     int64
    44  	}
    45  )
    46  
    47  // ListBuckets returns buckets for provided query, where
    48  // - `fltPresence` is one of { apc.FltExists, apc.FltPresent, ... } - see api/apc/query.go
    49  // - ListBuckets utilizes `cmn.QueryBcks` - control structure that's practically identical to `cmn.Bck`,
    50  // except for the fact that some or all its fields can be empty (to facilitate the corresponding query).
    51  // See also: QueryBuckets, ListObjects
    52  func ListBuckets(bp BaseParams, qbck cmn.QueryBcks, fltPresence int) (cmn.Bcks, error) {
    53  	q := make(url.Values, 4)
    54  	q.Set(apc.QparamFltPresence, strconv.Itoa(fltPresence))
    55  	qbck.AddToQuery(q)
    56  
    57  	bp.Method = http.MethodGet
    58  	reqParams := AllocRp()
    59  	{
    60  		reqParams.BaseParams = bp
    61  		reqParams.Path = apc.URLPathBuckets.S
    62  		// NOTE: bucket name
    63  		// - qbck.IsBucket() to differentiate between list-objects and list-buckets (operations)
    64  		// - list-buckets own correctness (see QueryBuckets below)
    65  		reqParams.Body = cos.MustMarshal(apc.ActMsg{Action: apc.ActList, Name: qbck.Name})
    66  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
    67  		reqParams.Query = q
    68  	}
    69  	bcks := cmn.Bcks{}
    70  	_, err := reqParams.DoReqAny(&bcks)
    71  	FreeRp(reqParams)
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	return bcks, nil
    76  }
    77  
    78  // QueryBuckets is a little convenience helper. It returns true if the selection contains
    79  // at least one bucket that satisfies the (qbck) criteria.
    80  // - `fltPresence` - as per QparamFltPresence enum (see api/apc/query.go)
    81  func QueryBuckets(bp BaseParams, qbck cmn.QueryBcks, fltPresence int) (bool, error) {
    82  	bcks, err := ListBuckets(bp, qbck, fltPresence)
    83  	return len(bcks) > 0, err
    84  }
    85  
    86  // ListObjects returns a list of objects in a bucket - a slice of structures in the
    87  // `cmn.LsoRes` that look like `cmn.LsoEnt`.
    88  //
    89  // The `numObjects` argument is the maximum number of objects to be returned
    90  // (where 0 (zero) means returning all objects in the bucket).
    91  //
    92  // This API supports numerous options and flags. In particular, `apc.LsoMsg`
    93  // structure supports "opening" objects formatted as one of the supported
    94  // archival types and include contents of archived directories in generated
    95  // result sets.
    96  //
    97  // In addition, `lsmsg` (`apc.LsoMsg`) provides options (flags) to optimize
    98  // the request's latency, to list anonymous public-access Cloud buckets, and more.
    99  // Further details at `api/apc/lsmsg.go` source.
   100  //
   101  // AIS supports listing buckets that have millions of objects.
   102  // For large and very large buckets, it is strongly recommended to use the
   103  // `ListObjectsPage` API - effectively, an iterator returning _next_
   104  // listed page along with associated _continuation token_.
   105  //
   106  // See also:
   107  // - docs/cli/* for CLI usage examples
   108  // - `apc.LsoMsg`
   109  // - `api.ListObjectsPage`
   110  func ListObjects(bp BaseParams, bck cmn.Bck, lsmsg *apc.LsoMsg, args ListArgs) (*cmn.LsoRes, error) {
   111  	reqParams := lsoReq(bp, bck, &args)
   112  	if lsmsg == nil {
   113  		lsmsg = &apc.LsoMsg{}
   114  	} else {
   115  		lsmsg.UUID, lsmsg.ContinuationToken = "", "" // new
   116  	}
   117  	lst, err := lso(reqParams, lsmsg, args)
   118  
   119  	freeMbuf(reqParams.buf)
   120  	FreeRp(reqParams)
   121  	return lst, err
   122  }
   123  
   124  func lsoReq(bp BaseParams, bck cmn.Bck, args *ListArgs) *ReqParams {
   125  	hdr := args.Header
   126  	if hdr == nil {
   127  		hdr = make(http.Header, 2)
   128  	}
   129  
   130  	// NOTE:
   131  	// unlike S3 API (that aistore also provides), native Go-based API always utilizes
   132  	// message pack serialization (of the list-objects results), with performance
   133  	// improvement that proved to be _significant_, esp. in large-scale benchmarks
   134  
   135  	hdr.Set(cos.HdrAccept, cos.ContentMsgPack)
   136  	hdr.Set(cos.HdrContentType, cos.ContentJSON)
   137  	bp.Method = http.MethodGet
   138  	reqParams := AllocRp()
   139  	{
   140  		reqParams.BaseParams = bp
   141  		reqParams.Path = apc.URLPathBuckets.Join(bck.Name)
   142  		reqParams.Header = hdr
   143  		reqParams.Query = bck.NewQuery()
   144  		reqParams.buf = allocMbuf() // msgpack
   145  	}
   146  	return reqParams
   147  }
   148  
   149  // `toRead` holds the remaining number of objects to list (that is, unless we are listing
   150  // the entire bucket). Each iteration lists a page of objects and reduces `toRead`
   151  // accordingly. When the latter gets below page size, we perform the final
   152  // iteration for the reduced page.
   153  func lso(reqParams *ReqParams, lsmsg *apc.LsoMsg, args ListArgs) (lst *cmn.LsoRes, _ error) {
   154  	var (
   155  		ctx     *LsoCounter
   156  		toRead  = args.Limit
   157  		listAll = args.Limit == 0
   158  	)
   159  	if args.Callback != nil {
   160  		ctx = &LsoCounter{startTime: mono.NanoTime(), callback: args.Callback, count: -1}
   161  		ctx.callAfter = ctx.startTime + args.CallAfter.Nanoseconds()
   162  	}
   163  	for pageNum := 1; listAll || toRead > 0; pageNum++ {
   164  		if !listAll {
   165  			lsmsg.PageSize = toRead
   166  		}
   167  		actMsg := apc.ActMsg{Action: apc.ActList, Value: lsmsg}
   168  		reqParams.Body = cos.MustMarshal(actMsg)
   169  
   170  		page, err := lsoPage(reqParams)
   171  		if err != nil {
   172  			return nil, err
   173  		}
   174  		if pageNum == 1 {
   175  			lst = page
   176  			lsmsg.UUID = page.UUID
   177  			debug.Assert(cos.IsValidUUID(lst.UUID), lst.UUID)
   178  		} else {
   179  			lst.Entries = append(lst.Entries, page.Entries...)
   180  			lst.ContinuationToken = page.ContinuationToken
   181  			lst.Flags |= page.Flags
   182  			debug.Assert(lst.UUID == page.UUID, lst.UUID, page.UUID)
   183  		}
   184  		if ctx != nil && ctx.mustCall() {
   185  			ctx.count = len(lst.Entries)
   186  			if page.ContinuationToken == "" {
   187  				ctx.finish()
   188  			}
   189  			ctx.callback(ctx)
   190  		}
   191  		if page.ContinuationToken == "" { // listed all pages
   192  			break
   193  		}
   194  		toRead = max(toRead-int64(len(page.Entries)), 0)
   195  		lsmsg.ContinuationToken = page.ContinuationToken
   196  	}
   197  	return lst, nil
   198  }
   199  
   200  // w/ limited retry and increasing timeout
   201  func lsoPage(reqParams *ReqParams) (_ *cmn.LsoRes, err error) {
   202  	for range maxListPageRetries {
   203  		page := &cmn.LsoRes{}
   204  		if _, err = reqParams.DoReqAny(page); err == nil {
   205  			return page, nil
   206  		}
   207  		if !errors.Is(err, context.DeadlineExceeded) {
   208  			break
   209  		}
   210  		client := *reqParams.BaseParams.Client
   211  		client.Timeout += client.Timeout >> 1
   212  		reqParams.BaseParams.Client = &client
   213  	}
   214  	return nil, err
   215  }
   216  
   217  // ListObjectsPage returns the first page of bucket objects.
   218  // On success the function updates `lsmsg.ContinuationToken` which client then can reuse
   219  // to fetch the next page.
   220  // See also:
   221  // - docs/cli/* for CLI usage examples
   222  // - `apc.LsoMsg`
   223  // - `api.ListObjects`
   224  func ListObjectsPage(bp BaseParams, bck cmn.Bck, lsmsg *apc.LsoMsg, args ListArgs) (*cmn.LsoRes, error) {
   225  	reqParams := lsoReq(bp, bck, &args)
   226  	if lsmsg == nil {
   227  		lsmsg = &apc.LsoMsg{}
   228  	}
   229  	actMsg := apc.ActMsg{Action: apc.ActList, Value: lsmsg}
   230  	reqParams.Body = cos.MustMarshal(actMsg)
   231  
   232  	// no need to preallocate bucket entries slice (msgpack does it)
   233  	page := &cmn.LsoRes{}
   234  	_, err := reqParams.DoReqAny(page)
   235  	freeMbuf(reqParams.buf)
   236  	FreeRp(reqParams)
   237  	if err != nil {
   238  		return nil, err
   239  	}
   240  	lsmsg.UUID = page.UUID
   241  	lsmsg.ContinuationToken = page.ContinuationToken
   242  	return page, nil
   243  }
   244  
   245  // TODO: obsolete this function after introducing mechanism to detect remote bucket changes.
   246  func ListObjectsInvalidateCache(bp BaseParams, bck cmn.Bck) error {
   247  	var (
   248  		path = apc.URLPathBuckets.Join(bck.Name)
   249  		q    = url.Values{}
   250  	)
   251  	bp.Method = http.MethodPost
   252  	reqParams := AllocRp()
   253  	{
   254  		reqParams.Query = bck.AddToQuery(q)
   255  		reqParams.BaseParams = bp
   256  		reqParams.Path = path
   257  		reqParams.Body = cos.MustMarshal(apc.ActMsg{Action: apc.ActInvalListCache})
   258  		reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}}
   259  	}
   260  	err := reqParams.DoRequest()
   261  	FreeRp(reqParams)
   262  	return err
   263  }
   264  
   265  ////////////////
   266  // LsoCounter //
   267  ////////////////
   268  
   269  func (ctx *LsoCounter) IsFinished() bool       { return ctx.done }
   270  func (ctx *LsoCounter) Elapsed() time.Duration { return mono.Since(ctx.startTime) }
   271  func (ctx *LsoCounter) Count() int             { return ctx.count }
   272  
   273  // private
   274  
   275  func (ctx *LsoCounter) mustCall() bool {
   276  	return ctx.callAfter == ctx.startTime /*immediate*/ ||
   277  		mono.NanoTime() >= ctx.callAfter
   278  }
   279  
   280  func (ctx *LsoCounter) finish() { ctx.done = true }