github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/api/ls.go (about) 1 // Package api provides native Go-based API/SDK over HTTP(S). 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package api 6 7 import ( 8 "context" 9 "errors" 10 "net/http" 11 "net/url" 12 "strconv" 13 "time" 14 15 "github.com/NVIDIA/aistore/api/apc" 16 "github.com/NVIDIA/aistore/cmn" 17 "github.com/NVIDIA/aistore/cmn/cos" 18 "github.com/NVIDIA/aistore/cmn/debug" 19 "github.com/NVIDIA/aistore/cmn/mono" 20 ) 21 22 const ( 23 maxListPageRetries = 3 24 25 msgpBufSize = 16 * cos.KiB 26 ) 27 28 type ( 29 LsoCounter struct { 30 startTime int64 // time operation started 31 callAfter int64 // callback after 32 callback LsoCB 33 count int 34 done bool 35 } 36 LsoCB func(*LsoCounter) 37 38 // additional and optional list-objects args (compare with: GetArgs, PutArgs) 39 ListArgs struct { 40 Callback LsoCB 41 CallAfter time.Duration 42 Header http.Header // to optimize listing very large buckets, e.g.: Header.Set(apc.HdrInventory, "true") 43 Limit int64 44 } 45 ) 46 47 // ListBuckets returns buckets for provided query, where 48 // - `fltPresence` is one of { apc.FltExists, apc.FltPresent, ... } - see api/apc/query.go 49 // - ListBuckets utilizes `cmn.QueryBcks` - control structure that's practically identical to `cmn.Bck`, 50 // except for the fact that some or all its fields can be empty (to facilitate the corresponding query). 51 // See also: QueryBuckets, ListObjects 52 func ListBuckets(bp BaseParams, qbck cmn.QueryBcks, fltPresence int) (cmn.Bcks, error) { 53 q := make(url.Values, 4) 54 q.Set(apc.QparamFltPresence, strconv.Itoa(fltPresence)) 55 qbck.AddToQuery(q) 56 57 bp.Method = http.MethodGet 58 reqParams := AllocRp() 59 { 60 reqParams.BaseParams = bp 61 reqParams.Path = apc.URLPathBuckets.S 62 // NOTE: bucket name 63 // - qbck.IsBucket() to differentiate between list-objects and list-buckets (operations) 64 // - list-buckets own correctness (see QueryBuckets below) 65 reqParams.Body = cos.MustMarshal(apc.ActMsg{Action: apc.ActList, Name: qbck.Name}) 66 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 67 reqParams.Query = q 68 } 69 bcks := cmn.Bcks{} 70 _, err := reqParams.DoReqAny(&bcks) 71 FreeRp(reqParams) 72 if err != nil { 73 return nil, err 74 } 75 return bcks, nil 76 } 77 78 // QueryBuckets is a little convenience helper. It returns true if the selection contains 79 // at least one bucket that satisfies the (qbck) criteria. 80 // - `fltPresence` - as per QparamFltPresence enum (see api/apc/query.go) 81 func QueryBuckets(bp BaseParams, qbck cmn.QueryBcks, fltPresence int) (bool, error) { 82 bcks, err := ListBuckets(bp, qbck, fltPresence) 83 return len(bcks) > 0, err 84 } 85 86 // ListObjects returns a list of objects in a bucket - a slice of structures in the 87 // `cmn.LsoRes` that look like `cmn.LsoEnt`. 88 // 89 // The `numObjects` argument is the maximum number of objects to be returned 90 // (where 0 (zero) means returning all objects in the bucket). 91 // 92 // This API supports numerous options and flags. In particular, `apc.LsoMsg` 93 // structure supports "opening" objects formatted as one of the supported 94 // archival types and include contents of archived directories in generated 95 // result sets. 96 // 97 // In addition, `lsmsg` (`apc.LsoMsg`) provides options (flags) to optimize 98 // the request's latency, to list anonymous public-access Cloud buckets, and more. 99 // Further details at `api/apc/lsmsg.go` source. 100 // 101 // AIS supports listing buckets that have millions of objects. 102 // For large and very large buckets, it is strongly recommended to use the 103 // `ListObjectsPage` API - effectively, an iterator returning _next_ 104 // listed page along with associated _continuation token_. 105 // 106 // See also: 107 // - docs/cli/* for CLI usage examples 108 // - `apc.LsoMsg` 109 // - `api.ListObjectsPage` 110 func ListObjects(bp BaseParams, bck cmn.Bck, lsmsg *apc.LsoMsg, args ListArgs) (*cmn.LsoRes, error) { 111 reqParams := lsoReq(bp, bck, &args) 112 if lsmsg == nil { 113 lsmsg = &apc.LsoMsg{} 114 } else { 115 lsmsg.UUID, lsmsg.ContinuationToken = "", "" // new 116 } 117 lst, err := lso(reqParams, lsmsg, args) 118 119 freeMbuf(reqParams.buf) 120 FreeRp(reqParams) 121 return lst, err 122 } 123 124 func lsoReq(bp BaseParams, bck cmn.Bck, args *ListArgs) *ReqParams { 125 hdr := args.Header 126 if hdr == nil { 127 hdr = make(http.Header, 2) 128 } 129 130 // NOTE: 131 // unlike S3 API (that aistore also provides), native Go-based API always utilizes 132 // message pack serialization (of the list-objects results), with performance 133 // improvement that proved to be _significant_, esp. in large-scale benchmarks 134 135 hdr.Set(cos.HdrAccept, cos.ContentMsgPack) 136 hdr.Set(cos.HdrContentType, cos.ContentJSON) 137 bp.Method = http.MethodGet 138 reqParams := AllocRp() 139 { 140 reqParams.BaseParams = bp 141 reqParams.Path = apc.URLPathBuckets.Join(bck.Name) 142 reqParams.Header = hdr 143 reqParams.Query = bck.NewQuery() 144 reqParams.buf = allocMbuf() // msgpack 145 } 146 return reqParams 147 } 148 149 // `toRead` holds the remaining number of objects to list (that is, unless we are listing 150 // the entire bucket). Each iteration lists a page of objects and reduces `toRead` 151 // accordingly. When the latter gets below page size, we perform the final 152 // iteration for the reduced page. 153 func lso(reqParams *ReqParams, lsmsg *apc.LsoMsg, args ListArgs) (lst *cmn.LsoRes, _ error) { 154 var ( 155 ctx *LsoCounter 156 toRead = args.Limit 157 listAll = args.Limit == 0 158 ) 159 if args.Callback != nil { 160 ctx = &LsoCounter{startTime: mono.NanoTime(), callback: args.Callback, count: -1} 161 ctx.callAfter = ctx.startTime + args.CallAfter.Nanoseconds() 162 } 163 for pageNum := 1; listAll || toRead > 0; pageNum++ { 164 if !listAll { 165 lsmsg.PageSize = toRead 166 } 167 actMsg := apc.ActMsg{Action: apc.ActList, Value: lsmsg} 168 reqParams.Body = cos.MustMarshal(actMsg) 169 170 page, err := lsoPage(reqParams) 171 if err != nil { 172 return nil, err 173 } 174 if pageNum == 1 { 175 lst = page 176 lsmsg.UUID = page.UUID 177 debug.Assert(cos.IsValidUUID(lst.UUID), lst.UUID) 178 } else { 179 lst.Entries = append(lst.Entries, page.Entries...) 180 lst.ContinuationToken = page.ContinuationToken 181 lst.Flags |= page.Flags 182 debug.Assert(lst.UUID == page.UUID, lst.UUID, page.UUID) 183 } 184 if ctx != nil && ctx.mustCall() { 185 ctx.count = len(lst.Entries) 186 if page.ContinuationToken == "" { 187 ctx.finish() 188 } 189 ctx.callback(ctx) 190 } 191 if page.ContinuationToken == "" { // listed all pages 192 break 193 } 194 toRead = max(toRead-int64(len(page.Entries)), 0) 195 lsmsg.ContinuationToken = page.ContinuationToken 196 } 197 return lst, nil 198 } 199 200 // w/ limited retry and increasing timeout 201 func lsoPage(reqParams *ReqParams) (_ *cmn.LsoRes, err error) { 202 for range maxListPageRetries { 203 page := &cmn.LsoRes{} 204 if _, err = reqParams.DoReqAny(page); err == nil { 205 return page, nil 206 } 207 if !errors.Is(err, context.DeadlineExceeded) { 208 break 209 } 210 client := *reqParams.BaseParams.Client 211 client.Timeout += client.Timeout >> 1 212 reqParams.BaseParams.Client = &client 213 } 214 return nil, err 215 } 216 217 // ListObjectsPage returns the first page of bucket objects. 218 // On success the function updates `lsmsg.ContinuationToken` which client then can reuse 219 // to fetch the next page. 220 // See also: 221 // - docs/cli/* for CLI usage examples 222 // - `apc.LsoMsg` 223 // - `api.ListObjects` 224 func ListObjectsPage(bp BaseParams, bck cmn.Bck, lsmsg *apc.LsoMsg, args ListArgs) (*cmn.LsoRes, error) { 225 reqParams := lsoReq(bp, bck, &args) 226 if lsmsg == nil { 227 lsmsg = &apc.LsoMsg{} 228 } 229 actMsg := apc.ActMsg{Action: apc.ActList, Value: lsmsg} 230 reqParams.Body = cos.MustMarshal(actMsg) 231 232 // no need to preallocate bucket entries slice (msgpack does it) 233 page := &cmn.LsoRes{} 234 _, err := reqParams.DoReqAny(page) 235 freeMbuf(reqParams.buf) 236 FreeRp(reqParams) 237 if err != nil { 238 return nil, err 239 } 240 lsmsg.UUID = page.UUID 241 lsmsg.ContinuationToken = page.ContinuationToken 242 return page, nil 243 } 244 245 // TODO: obsolete this function after introducing mechanism to detect remote bucket changes. 246 func ListObjectsInvalidateCache(bp BaseParams, bck cmn.Bck) error { 247 var ( 248 path = apc.URLPathBuckets.Join(bck.Name) 249 q = url.Values{} 250 ) 251 bp.Method = http.MethodPost 252 reqParams := AllocRp() 253 { 254 reqParams.Query = bck.AddToQuery(q) 255 reqParams.BaseParams = bp 256 reqParams.Path = path 257 reqParams.Body = cos.MustMarshal(apc.ActMsg{Action: apc.ActInvalListCache}) 258 reqParams.Header = http.Header{cos.HdrContentType: []string{cos.ContentJSON}} 259 } 260 err := reqParams.DoRequest() 261 FreeRp(reqParams) 262 return err 263 } 264 265 //////////////// 266 // LsoCounter // 267 //////////////// 268 269 func (ctx *LsoCounter) IsFinished() bool { return ctx.done } 270 func (ctx *LsoCounter) Elapsed() time.Duration { return mono.Since(ctx.startTime) } 271 func (ctx *LsoCounter) Count() int { return ctx.count } 272 273 // private 274 275 func (ctx *LsoCounter) mustCall() bool { 276 return ctx.callAfter == ctx.startTime /*immediate*/ || 277 mono.NanoTime() >= ctx.callAfter 278 } 279 280 func (ctx *LsoCounter) finish() { ctx.done = true }