github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/prxlso.go (about) 1 // Package ais provides core functionality for the AIStore object storage. 2 /* 3 * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package ais 6 7 import ( 8 "sort" 9 "strings" 10 "sync" 11 "time" 12 13 "github.com/NVIDIA/aistore/cmn" 14 "github.com/NVIDIA/aistore/cmn/atomic" 15 "github.com/NVIDIA/aistore/cmn/debug" 16 "github.com/NVIDIA/aistore/cmn/mono" 17 "github.com/NVIDIA/aistore/hk" 18 ) 19 20 // Brief theory of operation ================================================ 21 // 22 // * BUFFER - container for a single request that keeps entries so they won't 23 // be re-requested. Thanks to buffering, we eliminate the case when a given 24 // object is requested more than once. 25 // * CACHE - container shared by multiple requests which are identified with 26 // the same id. Thanks to caching, we reuse previously calculated requests. 27 // 28 // Buffering is designed to work for a single request and is identified by 29 // list-objects uuid. Each buffer consists of: 30 // - a *main buffer* that in turn contains entries ready to be returned to the 31 // client (user), and 32 // - *leftovers* - per target structures consisting of entries that couldn't 33 // be included into the *main buffer* yet. 34 // When a buffer doesn't contain enough entries, the new entries 35 // are loaded and added to *leftovers*. After this, they are merged and put 36 // into the *main buffer* so they can be returned to the client. 37 // 38 // Caching is thread safe and is used across multiple requests (clients). 39 // Each request is identified by its `cacheReqID`. List-objects requests 40 // that share the same ID will also share a common cache. 41 // 42 // Cache consists of contiguous intervals of `cmn.LsoEnt`. 43 // Cached response (to a request) is valid if and only if the request can be 44 // fulfilled by a single cache interval (otherwise, cache cannot be trusted 45 // as we don't know how many objects can fit in the requested interval). 46 47 // internal timers (rough estimates) 48 const ( 49 cacheIntervalTTL = 10 * time.Minute // *cache interval's* time to live 50 lsobjBufferTTL = 10 * time.Minute // *lsobj buffer* time to live 51 qmTimeHk = 10 * time.Minute // housekeeping timer 52 qmTimeHkMax = time.Hour // max HK time (when no activity whatsoever) 53 ) 54 55 type ( 56 // Request buffer per target. 57 lsobjBufferTarget struct { 58 // Leftovers entries which we keep locally so they will not be requested 59 // again by the proxy. Out of these `currentBuff` is extended. 60 entries cmn.LsoEntries 61 // Determines if the target is done with listing. 62 done bool 63 } 64 65 // Request buffer that corresponds to a single `uuid`. 66 lsobjBuffer struct { 67 // Contains the last entry that was returned to the user. 68 nextToken string 69 // Currently maintained buffer that keeps the entries sorted 70 // and ready to be dispatched to the client. 71 currentBuff cmn.LsoEntries 72 // Buffers for each target that are finally merged and the entries are 73 // appended to the `currentBuff`. 74 leftovers map[string]*lsobjBufferTarget // targetID (string) -> target buffer 75 // Timestamp of the last access to this buffer. Idle buffers get removed 76 // after `lsobjBufferTTL`. 77 lastAccess atomic.Int64 78 } 79 80 // Contains all lsobj buffers. 81 lsobjBuffers struct { 82 buffers sync.Map // request uuid (string) -> buffer (*lsobjBuffer) 83 } 84 85 // Cache request ID. This identifies and splits requests into 86 // multiple caches that these requests can use. 87 cacheReqID struct { 88 bck *cmn.Bck 89 prefix string 90 } 91 92 // Single (contiguous) interval of `cmn.LsoEnt`. 93 cacheInterval struct { 94 // Contains the previous entry (`ContinuationToken`) that was requested 95 // to get this interval. Thanks to this we can match and merge two 96 // adjacent intervals. 97 token string 98 // Entries that are contained in this interval. They are sorted and ready 99 // to be dispatched to the client. 100 entries cmn.LsoEntries 101 // Contains the timestamp of the last access to this interval. Idle interval 102 // gets removed after `cacheIntervalTTL`. 103 lastAccess int64 104 // Determines if this is the last page/interval (no more objects after 105 // the last entry). 106 last bool 107 } 108 109 // Contains additional parameters to interval request. 110 reqParams struct { 111 prefix string 112 } 113 114 // Single cache that corresponds to single `cacheReqID`. 115 lsobjCache struct { 116 mtx sync.RWMutex 117 intervals []*cacheInterval 118 } 119 120 // Contains all lsobj caches. 121 lsobjCaches struct { 122 caches sync.Map // cache id (cacheReqID) -> cache (*lsobjCache) 123 } 124 125 lsobjMem struct { 126 b *lsobjBuffers 127 c *lsobjCaches 128 d time.Duration 129 } 130 ) 131 132 func (qm *lsobjMem) init() { 133 qm.b = &lsobjBuffers{} 134 qm.c = &lsobjCaches{} 135 qm.d = qmTimeHk 136 hk.Reg("lsobj-buffer-cache"+hk.NameSuffix, qm.housekeep, qmTimeHk) 137 } 138 139 func (qm *lsobjMem) housekeep() time.Duration { 140 num := qm.b.housekeep() 141 num += qm.c.housekeep() 142 if num == 0 { 143 qm.d = min(qm.d+qmTimeHk, qmTimeHkMax) 144 } else { 145 qm.d = qmTimeHk 146 } 147 return qm.d 148 } 149 150 ///////////////// 151 // lsobjBuffer // 152 ///////////////// 153 154 // mergeTargetBuffers merges `b.leftovers` buffers into `b.currentBuff`. 155 // It returns `filled` equal to `true` if there was anything to merge, otherwise `false`. 156 func (b *lsobjBuffer) mergeTargetBuffers() (filled bool) { 157 var ( 158 totalCnt int 159 allDone = true 160 ) 161 // If `b.leftovers` is empty then there was no initial `set`. 162 if len(b.leftovers) == 0 { 163 return false 164 } 165 for _, list := range b.leftovers { 166 totalCnt += len(list.entries) 167 allDone = allDone && list.done 168 } 169 // If there are no entries and some targets are not yet done then there wasn't `set`. 170 if totalCnt == 0 && !allDone { 171 return false 172 } 173 174 var ( 175 minObj string 176 entries = make(cmn.LsoEntries, 0, totalCnt) 177 ) 178 for _, list := range b.leftovers { 179 for i := range list.entries { 180 if list.entries[i] == nil { 181 list.entries = list.entries[:i] 182 break 183 } 184 } 185 entries = append(entries, list.entries...) 186 187 if list.done || len(list.entries) == 0 { 188 continue 189 } 190 if minObj == "" || list.entries[len(list.entries)-1].Name < minObj { 191 minObj = list.entries[len(list.entries)-1].Name 192 } 193 } 194 195 cmn.SortLso(entries) 196 197 if minObj != "" { 198 idx := sort.Search(len(entries), func(i int) bool { 199 return entries[i].Name > minObj 200 }) 201 entries = entries[:idx] 202 } 203 for id := range b.leftovers { 204 b.leftovers[id].entries = nil 205 } 206 b.currentBuff = append(b.currentBuff, entries...) 207 return true 208 } 209 210 func (b *lsobjBuffer) get(token string, size int64) (entries cmn.LsoEntries, hasEnough bool) { 211 b.lastAccess.Store(mono.NanoTime()) 212 213 // If user requested something before what we have currently in the buffer 214 // then we just need to forget it. 215 if token < b.nextToken { 216 b.leftovers = nil 217 b.currentBuff = nil 218 b.nextToken = token 219 return nil, false 220 } 221 222 filled := b.mergeTargetBuffers() 223 224 // Move to first object after token. 225 idx := sort.Search(len(b.currentBuff), func(i int) bool { 226 return b.currentBuff[i].Name > token 227 }) 228 entries = b.currentBuff[idx:] 229 230 if size > int64(len(entries)) { 231 // In case we don't have enough entries and we haven't filled anything then 232 // we must request more (if filled then we don't have enough because it's end). 233 if !filled { 234 return nil, false 235 } 236 size = int64(len(entries)) 237 } 238 239 // Move buffer after returned entries. 240 b.currentBuff = entries[size:] 241 // Select only the entries that need to be returned to user. 242 entries = entries[:size] 243 if len(entries) > 0 { 244 b.nextToken = entries[len(entries)-1].Name 245 } 246 return entries, true 247 } 248 249 func (b *lsobjBuffer) set(id string, entries cmn.LsoEntries, size int64) { 250 if b.leftovers == nil { 251 b.leftovers = make(map[string]*lsobjBufferTarget, 5) 252 } 253 b.leftovers[id] = &lsobjBufferTarget{ 254 entries: entries, 255 done: len(entries) < int(size), 256 } 257 b.lastAccess.Store(mono.NanoTime()) 258 } 259 260 func (b *lsobjBuffers) last(id, token string) string { 261 v, ok := b.buffers.LoadOrStore(id, &lsobjBuffer{}) 262 if !ok { 263 return token 264 } 265 buffer := v.(*lsobjBuffer) 266 if len(buffer.currentBuff) == 0 { 267 return token 268 } 269 last := buffer.currentBuff[len(buffer.currentBuff)-1].Name 270 if cmn.TokenGreaterEQ(token, last) { 271 return token 272 } 273 return last 274 } 275 276 func (b *lsobjBuffers) get(id, token string, size int64) (entries cmn.LsoEntries, hasEnough bool) { 277 v, _ := b.buffers.LoadOrStore(id, &lsobjBuffer{}) 278 return v.(*lsobjBuffer).get(token, size) 279 } 280 281 func (b *lsobjBuffers) set(id, targetID string, entries cmn.LsoEntries, size int64) { 282 v, _ := b.buffers.LoadOrStore(id, &lsobjBuffer{}) 283 v.(*lsobjBuffer).set(targetID, entries, size) 284 } 285 286 func (b *lsobjBuffers) housekeep() (num int) { 287 b.buffers.Range(func(key, value any) bool { 288 buffer := value.(*lsobjBuffer) 289 num++ 290 if mono.Since(buffer.lastAccess.Load()) > lsobjBufferTTL { 291 b.buffers.Delete(key) 292 } 293 return true 294 }) 295 return 296 } 297 298 /////////////////// 299 // cacheInterval // 300 /////////////////// 301 302 func (ci *cacheInterval) contains(token string) bool { 303 if ci.token == token { 304 return true 305 } 306 if len(ci.entries) > 0 { 307 return ci.entries[0].Name <= token && token <= ci.entries[len(ci.entries)-1].Name 308 } 309 return false 310 } 311 312 func (ci *cacheInterval) get(token string, objCnt int64, params reqParams) (entries cmn.LsoEntries, hasEnough bool) { 313 ci.lastAccess = mono.NanoTime() 314 entries = ci.entries 315 316 start := ci.find(token) 317 if params.prefix != "" { 318 // Move `start` to first entry that starts with `params.prefix`. 319 for ; start < uint(len(entries)); start++ { 320 if strings.HasPrefix(entries[start].Name, params.prefix) { 321 break 322 } 323 if entries[start].Name > params.prefix { 324 // Prefix is fully contained in the interval (but there are no entries), examples: 325 // * interval = ["a", "z"], token = "", objCnt = 1, prefix = "b" 326 // * interval = ["a", "z"], token = "a", objCnt = 1, prefix = "b" 327 return cmn.LsoEntries{}, true 328 } 329 } 330 if !ci.last && start == uint(len(entries)) { 331 // Prefix is out of the interval (right boundary), examples: 332 // * interval = ["b", "y"], token = "", objCnt = 1, prefix = "z" 333 // * interval = ["b", "y"], token = "", objCnt = 1, prefix = "ya" 334 return nil, false 335 } 336 } 337 entries = entries[start:] 338 339 end := min(len(entries), int(objCnt)) 340 if params.prefix != "" { 341 // Move `end-1` to last entry that starts with `params.prefix`. 342 for ; end > 0; end-- { 343 if strings.HasPrefix(entries[end-1].Name, params.prefix) { 344 break 345 } 346 } 347 if !ci.last && end < len(entries) { 348 // We filtered out entries that start with `params.prefix` and 349 // the entries are fully contained in the interval, examples: 350 // * interval = ["a", "ma", "mb", "z"], token = "", objCnt = 4, prefix = "m" 351 // * interval = ["a", "z"], token = "", objCnt = 2, prefix = "a" 352 return entries[:end], true 353 } 354 } 355 entries = entries[:end] 356 357 if ci.last || len(entries) >= int(objCnt) { 358 return entries, true 359 } 360 return nil, false 361 } 362 363 func (ci *cacheInterval) find(token string) (idx uint) { 364 if ci.token == token { 365 return 0 366 } 367 return uint(sort.Search(len(ci.entries), func(i int) bool { 368 return ci.entries[i].Name > token 369 })) 370 } 371 372 func (ci *cacheInterval) append(objs *cacheInterval) { 373 idx := ci.find(objs.token) 374 ci.entries = append(ci.entries[:idx], objs.entries...) 375 ci.last = objs.last 376 ci.lastAccess = mono.NanoTime() 377 } 378 379 func (ci *cacheInterval) prepend(objs *cacheInterval) { 380 debug.Assert(!objs.last) 381 objs.append(ci) 382 *ci = *objs 383 } 384 385 //////////////// 386 // lsobjCache // 387 //////////////// 388 389 // PRECONDITION: `c.mtx` must be at least rlocked. 390 func (c *lsobjCache) findInterval(token string) *cacheInterval { 391 // TODO: finding intervals should be faster than just walking. 392 for _, interval := range c.intervals { 393 if interval.contains(token) { 394 return interval 395 } 396 } 397 return nil 398 } 399 400 // PRECONDITION: `c.mtx` must be locked. 401 func (c *lsobjCache) merge(start, end, cur *cacheInterval) { 402 debug.AssertRWMutexLocked(&c.mtx) 403 404 if start == nil && end == nil { 405 c.intervals = append(c.intervals, cur) 406 } else if start != nil && end == nil { 407 start.append(cur) 408 } else if start == nil && end != nil { 409 end.prepend(cur) 410 } else if start != nil && end != nil { 411 if start == end { 412 // `cur` is part of some interval. 413 return 414 } 415 416 start.append(cur) 417 start.append(end) 418 c.removeInterval(end) 419 } else { 420 debug.Assert(false) 421 } 422 } 423 424 // PRECONDITION: `c.mtx` must be locked. 425 func (c *lsobjCache) removeInterval(ci *cacheInterval) { 426 debug.AssertRWMutexLocked(&c.mtx) 427 428 // TODO: this should be faster 429 for idx := range c.intervals { 430 if c.intervals[idx] == ci { 431 ci.entries = nil 432 c.intervals = append(c.intervals[:idx], c.intervals[idx+1:]...) 433 return 434 } 435 } 436 } 437 438 func (c *lsobjCache) get(token string, objCnt int64, params reqParams) (entries cmn.LsoEntries, hasEnough bool) { 439 c.mtx.RLock() 440 if interval := c.findInterval(token); interval != nil { 441 entries, hasEnough = interval.get(token, objCnt, params) 442 } 443 c.mtx.RUnlock() 444 return 445 } 446 447 func (c *lsobjCache) set(token string, entries cmn.LsoEntries, size int64) { 448 var ( 449 end *cacheInterval 450 cur = &cacheInterval{ 451 token: token, 452 entries: entries, 453 last: len(entries) < int(size), 454 lastAccess: mono.NanoTime(), 455 } 456 ) 457 c.mtx.Lock() 458 start := c.findInterval(token) 459 if len(cur.entries) > 0 { 460 end = c.findInterval(entries[len(entries)-1].Name) 461 } 462 c.merge(start, end, cur) 463 c.mtx.Unlock() 464 } 465 466 func (c *lsobjCache) invalidate() { 467 c.mtx.Lock() 468 c.intervals = nil 469 c.mtx.Unlock() 470 } 471 472 ///////////////// 473 // lsobjCaches // 474 ///////////////// 475 476 func (c *lsobjCaches) get(reqID cacheReqID, token string, objCnt int64) (entries cmn.LsoEntries, hasEnough bool) { 477 if v, ok := c.caches.Load(reqID); ok { 478 if entries, hasEnough = v.(*lsobjCache).get(token, objCnt, reqParams{}); hasEnough { 479 return 480 } 481 } 482 483 // When `prefix` is requested we must also check if there is enough entries 484 // in the "main" (whole bucket) cache with given prefix. 485 if reqID.prefix != "" { 486 // We must adjust parameters and cache id. 487 params := reqParams{prefix: reqID.prefix} 488 reqID = cacheReqID{bck: reqID.bck} 489 490 if v, ok := c.caches.Load(reqID); ok { 491 return v.(*lsobjCache).get(token, objCnt, params) 492 } 493 } 494 return nil, false 495 } 496 497 func (c *lsobjCaches) set(reqID cacheReqID, token string, entries cmn.LsoEntries, size int64) { 498 v, _ := c.caches.LoadOrStore(reqID, &lsobjCache{}) 499 v.(*lsobjCache).set(token, entries, size) 500 } 501 502 func (c *lsobjCaches) invalidate(bck *cmn.Bck) { 503 c.caches.Range(func(key, value any) bool { 504 id := key.(cacheReqID) 505 if id.bck.Equal(bck) { 506 value.(*lsobjCache).invalidate() 507 } 508 return true 509 }) 510 } 511 512 // TODO: factor-in memory pressure. 513 func (c *lsobjCaches) housekeep() (num int) { 514 var toRemove []*cacheInterval 515 c.caches.Range(func(key, value any) bool { 516 cache := value.(*lsobjCache) 517 cache.mtx.Lock() 518 for _, interval := range cache.intervals { 519 num++ 520 if mono.Since(interval.lastAccess) > cacheIntervalTTL { 521 toRemove = append(toRemove, interval) 522 } 523 } 524 for _, interval := range toRemove { 525 cache.removeInterval(interval) 526 } 527 if len(cache.intervals) == 0 { 528 c.caches.Delete(key) 529 } 530 cache.mtx.Unlock() 531 toRemove = toRemove[:0] 532 return true 533 }) 534 return 535 }