github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/prxlso.go (about)

     1  // Package ais provides core functionality for the AIStore object storage.
     2  /*
     3   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package ais
     6  
     7  import (
     8  	"sort"
     9  	"strings"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/NVIDIA/aistore/cmn"
    14  	"github.com/NVIDIA/aistore/cmn/atomic"
    15  	"github.com/NVIDIA/aistore/cmn/debug"
    16  	"github.com/NVIDIA/aistore/cmn/mono"
    17  	"github.com/NVIDIA/aistore/hk"
    18  )
    19  
    20  //  Brief theory of operation ================================================
    21  //
    22  //  * BUFFER - container for a single request that keeps entries so they won't
    23  //    be re-requested. Thanks to buffering, we eliminate the case when a given
    24  //    object is requested more than once.
    25  //  * CACHE  - container shared by multiple requests which are identified with
    26  //    the same id. Thanks to caching, we reuse previously calculated requests.
    27  //
    28  // Buffering is designed to work for a single request and is identified by
    29  // list-objects uuid. Each buffer consists of:
    30  // - a *main buffer* that in turn contains entries ready to be returned to the
    31  // client (user), and
    32  // - *leftovers* - per target structures consisting of entries that couldn't
    33  // be included into the *main buffer* yet.
    34  // When a buffer doesn't contain enough entries, the new entries
    35  // are loaded and added to *leftovers*. After this, they are merged and put
    36  // into the *main buffer* so they can be returned to the client.
    37  //
    38  // Caching is thread safe and is used across multiple requests (clients).
    39  // Each request is identified by its `cacheReqID`. List-objects requests
    40  // that share the same ID will also share a common cache.
    41  //
    42  // Cache consists of contiguous intervals of `cmn.LsoEnt`.
    43  // Cached response (to a request) is valid if and only if the request can be
    44  // fulfilled by a single cache interval (otherwise, cache cannot be trusted
    45  // as we don't know how many objects can fit in the requested interval).
    46  
    47  // internal timers (rough estimates)
    48  const (
    49  	cacheIntervalTTL = 10 * time.Minute // *cache interval's* time to live
    50  	lsobjBufferTTL   = 10 * time.Minute // *lsobj buffer* time to live
    51  	qmTimeHk         = 10 * time.Minute // housekeeping timer
    52  	qmTimeHkMax      = time.Hour        // max HK time (when no activity whatsoever)
    53  )
    54  
    55  type (
    56  	// Request buffer per target.
    57  	lsobjBufferTarget struct {
    58  		// Leftovers entries which we keep locally so they will not be requested
    59  		// again by the proxy. Out of these `currentBuff` is extended.
    60  		entries cmn.LsoEntries
    61  		// Determines if the target is done with listing.
    62  		done bool
    63  	}
    64  
    65  	// Request buffer that corresponds to a single `uuid`.
    66  	lsobjBuffer struct {
    67  		// Contains the last entry that was returned to the user.
    68  		nextToken string
    69  		// Currently maintained buffer that keeps the entries sorted
    70  		// and ready to be dispatched to the client.
    71  		currentBuff cmn.LsoEntries
    72  		// Buffers for each target that are finally merged and the entries are
    73  		// appended to the `currentBuff`.
    74  		leftovers map[string]*lsobjBufferTarget // targetID (string) -> target buffer
    75  		// Timestamp of the last access to this buffer. Idle buffers get removed
    76  		// after `lsobjBufferTTL`.
    77  		lastAccess atomic.Int64
    78  	}
    79  
    80  	// Contains all lsobj buffers.
    81  	lsobjBuffers struct {
    82  		buffers sync.Map // request uuid (string) -> buffer (*lsobjBuffer)
    83  	}
    84  
    85  	// Cache request ID. This identifies and splits requests into
    86  	// multiple caches that these requests can use.
    87  	cacheReqID struct {
    88  		bck    *cmn.Bck
    89  		prefix string
    90  	}
    91  
    92  	// Single (contiguous) interval of `cmn.LsoEnt`.
    93  	cacheInterval struct {
    94  		// Contains the previous entry (`ContinuationToken`) that was requested
    95  		// to get this interval. Thanks to this we can match and merge two
    96  		// adjacent intervals.
    97  		token string
    98  		// Entries that are contained in this interval. They are sorted and ready
    99  		// to be dispatched to the client.
   100  		entries cmn.LsoEntries
   101  		// Contains the timestamp of the last access to this interval. Idle interval
   102  		// gets removed after `cacheIntervalTTL`.
   103  		lastAccess int64
   104  		// Determines if this is the last page/interval (no more objects after
   105  		// the last entry).
   106  		last bool
   107  	}
   108  
   109  	// Contains additional parameters to interval request.
   110  	reqParams struct {
   111  		prefix string
   112  	}
   113  
   114  	// Single cache that corresponds to single `cacheReqID`.
   115  	lsobjCache struct {
   116  		mtx       sync.RWMutex
   117  		intervals []*cacheInterval
   118  	}
   119  
   120  	// Contains all lsobj caches.
   121  	lsobjCaches struct {
   122  		caches sync.Map // cache id (cacheReqID) -> cache (*lsobjCache)
   123  	}
   124  
   125  	lsobjMem struct {
   126  		b *lsobjBuffers
   127  		c *lsobjCaches
   128  		d time.Duration
   129  	}
   130  )
   131  
   132  func (qm *lsobjMem) init() {
   133  	qm.b = &lsobjBuffers{}
   134  	qm.c = &lsobjCaches{}
   135  	qm.d = qmTimeHk
   136  	hk.Reg("lsobj-buffer-cache"+hk.NameSuffix, qm.housekeep, qmTimeHk)
   137  }
   138  
   139  func (qm *lsobjMem) housekeep() time.Duration {
   140  	num := qm.b.housekeep()
   141  	num += qm.c.housekeep()
   142  	if num == 0 {
   143  		qm.d = min(qm.d+qmTimeHk, qmTimeHkMax)
   144  	} else {
   145  		qm.d = qmTimeHk
   146  	}
   147  	return qm.d
   148  }
   149  
   150  /////////////////
   151  // lsobjBuffer //
   152  /////////////////
   153  
   154  // mergeTargetBuffers merges `b.leftovers` buffers into `b.currentBuff`.
   155  // It returns `filled` equal to `true` if there was anything to merge, otherwise `false`.
   156  func (b *lsobjBuffer) mergeTargetBuffers() (filled bool) {
   157  	var (
   158  		totalCnt int
   159  		allDone  = true
   160  	)
   161  	// If `b.leftovers` is empty then there was no initial `set`.
   162  	if len(b.leftovers) == 0 {
   163  		return false
   164  	}
   165  	for _, list := range b.leftovers {
   166  		totalCnt += len(list.entries)
   167  		allDone = allDone && list.done
   168  	}
   169  	// If there are no entries and some targets are not yet done then there wasn't `set`.
   170  	if totalCnt == 0 && !allDone {
   171  		return false
   172  	}
   173  
   174  	var (
   175  		minObj  string
   176  		entries = make(cmn.LsoEntries, 0, totalCnt)
   177  	)
   178  	for _, list := range b.leftovers {
   179  		for i := range list.entries {
   180  			if list.entries[i] == nil {
   181  				list.entries = list.entries[:i]
   182  				break
   183  			}
   184  		}
   185  		entries = append(entries, list.entries...)
   186  
   187  		if list.done || len(list.entries) == 0 {
   188  			continue
   189  		}
   190  		if minObj == "" || list.entries[len(list.entries)-1].Name < minObj {
   191  			minObj = list.entries[len(list.entries)-1].Name
   192  		}
   193  	}
   194  
   195  	cmn.SortLso(entries)
   196  
   197  	if minObj != "" {
   198  		idx := sort.Search(len(entries), func(i int) bool {
   199  			return entries[i].Name > minObj
   200  		})
   201  		entries = entries[:idx]
   202  	}
   203  	for id := range b.leftovers {
   204  		b.leftovers[id].entries = nil
   205  	}
   206  	b.currentBuff = append(b.currentBuff, entries...)
   207  	return true
   208  }
   209  
   210  func (b *lsobjBuffer) get(token string, size int64) (entries cmn.LsoEntries, hasEnough bool) {
   211  	b.lastAccess.Store(mono.NanoTime())
   212  
   213  	// If user requested something before what we have currently in the buffer
   214  	// then we just need to forget it.
   215  	if token < b.nextToken {
   216  		b.leftovers = nil
   217  		b.currentBuff = nil
   218  		b.nextToken = token
   219  		return nil, false
   220  	}
   221  
   222  	filled := b.mergeTargetBuffers()
   223  
   224  	// Move to first object after token.
   225  	idx := sort.Search(len(b.currentBuff), func(i int) bool {
   226  		return b.currentBuff[i].Name > token
   227  	})
   228  	entries = b.currentBuff[idx:]
   229  
   230  	if size > int64(len(entries)) {
   231  		// In case we don't have enough entries and we haven't filled anything then
   232  		// we must request more (if filled then we don't have enough because it's end).
   233  		if !filled {
   234  			return nil, false
   235  		}
   236  		size = int64(len(entries))
   237  	}
   238  
   239  	// Move buffer after returned entries.
   240  	b.currentBuff = entries[size:]
   241  	// Select only the entries that need to be returned to user.
   242  	entries = entries[:size]
   243  	if len(entries) > 0 {
   244  		b.nextToken = entries[len(entries)-1].Name
   245  	}
   246  	return entries, true
   247  }
   248  
   249  func (b *lsobjBuffer) set(id string, entries cmn.LsoEntries, size int64) {
   250  	if b.leftovers == nil {
   251  		b.leftovers = make(map[string]*lsobjBufferTarget, 5)
   252  	}
   253  	b.leftovers[id] = &lsobjBufferTarget{
   254  		entries: entries,
   255  		done:    len(entries) < int(size),
   256  	}
   257  	b.lastAccess.Store(mono.NanoTime())
   258  }
   259  
   260  func (b *lsobjBuffers) last(id, token string) string {
   261  	v, ok := b.buffers.LoadOrStore(id, &lsobjBuffer{})
   262  	if !ok {
   263  		return token
   264  	}
   265  	buffer := v.(*lsobjBuffer)
   266  	if len(buffer.currentBuff) == 0 {
   267  		return token
   268  	}
   269  	last := buffer.currentBuff[len(buffer.currentBuff)-1].Name
   270  	if cmn.TokenGreaterEQ(token, last) {
   271  		return token
   272  	}
   273  	return last
   274  }
   275  
   276  func (b *lsobjBuffers) get(id, token string, size int64) (entries cmn.LsoEntries, hasEnough bool) {
   277  	v, _ := b.buffers.LoadOrStore(id, &lsobjBuffer{})
   278  	return v.(*lsobjBuffer).get(token, size)
   279  }
   280  
   281  func (b *lsobjBuffers) set(id, targetID string, entries cmn.LsoEntries, size int64) {
   282  	v, _ := b.buffers.LoadOrStore(id, &lsobjBuffer{})
   283  	v.(*lsobjBuffer).set(targetID, entries, size)
   284  }
   285  
   286  func (b *lsobjBuffers) housekeep() (num int) {
   287  	b.buffers.Range(func(key, value any) bool {
   288  		buffer := value.(*lsobjBuffer)
   289  		num++
   290  		if mono.Since(buffer.lastAccess.Load()) > lsobjBufferTTL {
   291  			b.buffers.Delete(key)
   292  		}
   293  		return true
   294  	})
   295  	return
   296  }
   297  
   298  ///////////////////
   299  // cacheInterval //
   300  ///////////////////
   301  
   302  func (ci *cacheInterval) contains(token string) bool {
   303  	if ci.token == token {
   304  		return true
   305  	}
   306  	if len(ci.entries) > 0 {
   307  		return ci.entries[0].Name <= token && token <= ci.entries[len(ci.entries)-1].Name
   308  	}
   309  	return false
   310  }
   311  
   312  func (ci *cacheInterval) get(token string, objCnt int64, params reqParams) (entries cmn.LsoEntries, hasEnough bool) {
   313  	ci.lastAccess = mono.NanoTime()
   314  	entries = ci.entries
   315  
   316  	start := ci.find(token)
   317  	if params.prefix != "" {
   318  		// Move `start` to first entry that starts with `params.prefix`.
   319  		for ; start < uint(len(entries)); start++ {
   320  			if strings.HasPrefix(entries[start].Name, params.prefix) {
   321  				break
   322  			}
   323  			if entries[start].Name > params.prefix {
   324  				// Prefix is fully contained in the interval (but there are no entries), examples:
   325  				//  * interval = ["a", "z"], token = "", objCnt = 1, prefix = "b"
   326  				//  * interval = ["a", "z"], token = "a", objCnt = 1, prefix = "b"
   327  				return cmn.LsoEntries{}, true
   328  			}
   329  		}
   330  		if !ci.last && start == uint(len(entries)) {
   331  			// Prefix is out of the interval (right boundary), examples:
   332  			//  * interval = ["b", "y"], token = "", objCnt = 1, prefix = "z"
   333  			//  * interval = ["b", "y"], token = "", objCnt = 1, prefix = "ya"
   334  			return nil, false
   335  		}
   336  	}
   337  	entries = entries[start:]
   338  
   339  	end := min(len(entries), int(objCnt))
   340  	if params.prefix != "" {
   341  		// Move `end-1` to last entry that starts with `params.prefix`.
   342  		for ; end > 0; end-- {
   343  			if strings.HasPrefix(entries[end-1].Name, params.prefix) {
   344  				break
   345  			}
   346  		}
   347  		if !ci.last && end < len(entries) {
   348  			// We filtered out entries that start with `params.prefix` and
   349  			// the entries are fully contained in the interval, examples:
   350  			//  * interval = ["a", "ma", "mb", "z"], token = "", objCnt = 4, prefix = "m"
   351  			//  * interval = ["a", "z"], token = "", objCnt = 2, prefix = "a"
   352  			return entries[:end], true
   353  		}
   354  	}
   355  	entries = entries[:end]
   356  
   357  	if ci.last || len(entries) >= int(objCnt) {
   358  		return entries, true
   359  	}
   360  	return nil, false
   361  }
   362  
   363  func (ci *cacheInterval) find(token string) (idx uint) {
   364  	if ci.token == token {
   365  		return 0
   366  	}
   367  	return uint(sort.Search(len(ci.entries), func(i int) bool {
   368  		return ci.entries[i].Name > token
   369  	}))
   370  }
   371  
   372  func (ci *cacheInterval) append(objs *cacheInterval) {
   373  	idx := ci.find(objs.token)
   374  	ci.entries = append(ci.entries[:idx], objs.entries...)
   375  	ci.last = objs.last
   376  	ci.lastAccess = mono.NanoTime()
   377  }
   378  
   379  func (ci *cacheInterval) prepend(objs *cacheInterval) {
   380  	debug.Assert(!objs.last)
   381  	objs.append(ci)
   382  	*ci = *objs
   383  }
   384  
   385  ////////////////
   386  // lsobjCache //
   387  ////////////////
   388  
   389  // PRECONDITION: `c.mtx` must be at least rlocked.
   390  func (c *lsobjCache) findInterval(token string) *cacheInterval {
   391  	// TODO: finding intervals should be faster than just walking.
   392  	for _, interval := range c.intervals {
   393  		if interval.contains(token) {
   394  			return interval
   395  		}
   396  	}
   397  	return nil
   398  }
   399  
   400  // PRECONDITION: `c.mtx` must be locked.
   401  func (c *lsobjCache) merge(start, end, cur *cacheInterval) {
   402  	debug.AssertRWMutexLocked(&c.mtx)
   403  
   404  	if start == nil && end == nil {
   405  		c.intervals = append(c.intervals, cur)
   406  	} else if start != nil && end == nil {
   407  		start.append(cur)
   408  	} else if start == nil && end != nil {
   409  		end.prepend(cur)
   410  	} else if start != nil && end != nil {
   411  		if start == end {
   412  			// `cur` is part of some interval.
   413  			return
   414  		}
   415  
   416  		start.append(cur)
   417  		start.append(end)
   418  		c.removeInterval(end)
   419  	} else {
   420  		debug.Assert(false)
   421  	}
   422  }
   423  
   424  // PRECONDITION: `c.mtx` must be locked.
   425  func (c *lsobjCache) removeInterval(ci *cacheInterval) {
   426  	debug.AssertRWMutexLocked(&c.mtx)
   427  
   428  	// TODO: this should be faster
   429  	for idx := range c.intervals {
   430  		if c.intervals[idx] == ci {
   431  			ci.entries = nil
   432  			c.intervals = append(c.intervals[:idx], c.intervals[idx+1:]...)
   433  			return
   434  		}
   435  	}
   436  }
   437  
   438  func (c *lsobjCache) get(token string, objCnt int64, params reqParams) (entries cmn.LsoEntries, hasEnough bool) {
   439  	c.mtx.RLock()
   440  	if interval := c.findInterval(token); interval != nil {
   441  		entries, hasEnough = interval.get(token, objCnt, params)
   442  	}
   443  	c.mtx.RUnlock()
   444  	return
   445  }
   446  
   447  func (c *lsobjCache) set(token string, entries cmn.LsoEntries, size int64) {
   448  	var (
   449  		end *cacheInterval
   450  		cur = &cacheInterval{
   451  			token:      token,
   452  			entries:    entries,
   453  			last:       len(entries) < int(size),
   454  			lastAccess: mono.NanoTime(),
   455  		}
   456  	)
   457  	c.mtx.Lock()
   458  	start := c.findInterval(token)
   459  	if len(cur.entries) > 0 {
   460  		end = c.findInterval(entries[len(entries)-1].Name)
   461  	}
   462  	c.merge(start, end, cur)
   463  	c.mtx.Unlock()
   464  }
   465  
   466  func (c *lsobjCache) invalidate() {
   467  	c.mtx.Lock()
   468  	c.intervals = nil
   469  	c.mtx.Unlock()
   470  }
   471  
   472  /////////////////
   473  // lsobjCaches //
   474  /////////////////
   475  
   476  func (c *lsobjCaches) get(reqID cacheReqID, token string, objCnt int64) (entries cmn.LsoEntries, hasEnough bool) {
   477  	if v, ok := c.caches.Load(reqID); ok {
   478  		if entries, hasEnough = v.(*lsobjCache).get(token, objCnt, reqParams{}); hasEnough {
   479  			return
   480  		}
   481  	}
   482  
   483  	// When `prefix` is requested we must also check if there is enough entries
   484  	// in the "main" (whole bucket) cache with given prefix.
   485  	if reqID.prefix != "" {
   486  		// We must adjust parameters and cache id.
   487  		params := reqParams{prefix: reqID.prefix}
   488  		reqID = cacheReqID{bck: reqID.bck}
   489  
   490  		if v, ok := c.caches.Load(reqID); ok {
   491  			return v.(*lsobjCache).get(token, objCnt, params)
   492  		}
   493  	}
   494  	return nil, false
   495  }
   496  
   497  func (c *lsobjCaches) set(reqID cacheReqID, token string, entries cmn.LsoEntries, size int64) {
   498  	v, _ := c.caches.LoadOrStore(reqID, &lsobjCache{})
   499  	v.(*lsobjCache).set(token, entries, size)
   500  }
   501  
   502  func (c *lsobjCaches) invalidate(bck *cmn.Bck) {
   503  	c.caches.Range(func(key, value any) bool {
   504  		id := key.(cacheReqID)
   505  		if id.bck.Equal(bck) {
   506  			value.(*lsobjCache).invalidate()
   507  		}
   508  		return true
   509  	})
   510  }
   511  
   512  // TODO: factor-in memory pressure.
   513  func (c *lsobjCaches) housekeep() (num int) {
   514  	var toRemove []*cacheInterval
   515  	c.caches.Range(func(key, value any) bool {
   516  		cache := value.(*lsobjCache)
   517  		cache.mtx.Lock()
   518  		for _, interval := range cache.intervals {
   519  			num++
   520  			if mono.Since(interval.lastAccess) > cacheIntervalTTL {
   521  				toRemove = append(toRemove, interval)
   522  			}
   523  		}
   524  		for _, interval := range toRemove {
   525  			cache.removeInterval(interval)
   526  		}
   527  		if len(cache.intervals) == 0 {
   528  			c.caches.Delete(key)
   529  		}
   530  		cache.mtx.Unlock()
   531  		toRemove = toRemove[:0]
   532  		return true
   533  	})
   534  	return
   535  }