storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/cmd/fs-tree-walk-pool.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package cmd
    18  
    19  import (
    20  	"errors"
    21  	"reflect"
    22  	"sync"
    23  	"time"
    24  )
    25  
    26  // Global lookup timeout.
    27  const (
    28  	globalLookupTimeout    = time.Minute * 30 // 30minutes.
    29  	treeWalkEntryLimit     = 50
    30  	treeWalkSameEntryLimit = 4
    31  )
    32  
    33  // listParams - list object params used for list object map
    34  type listParams struct {
    35  	bucket    string
    36  	recursive bool
    37  	marker    string
    38  	prefix    string
    39  }
    40  
    41  // errWalkAbort - returned by doTreeWalk() if it returns prematurely.
    42  // doTreeWalk() can return prematurely if
    43  // 1) treeWalk is timed out by the timer go-routine.
    44  // 2) there is an error during tree walk.
    45  var errWalkAbort = errors.New("treeWalk abort")
    46  
    47  // treeWalk - represents the go routine that does the file tree walk.
    48  type treeWalk struct {
    49  	added      time.Time
    50  	resultCh   chan TreeWalkResult
    51  	endWalkCh  chan struct{}   // To signal when treeWalk go-routine should end.
    52  	endTimerCh chan<- struct{} // To signal when timer go-routine should end.
    53  }
    54  
    55  // TreeWalkPool - pool of treeWalk go routines.
    56  // A treeWalk is added to the pool by Set() and removed either by
    57  // doing a Release() or if the concerned timer goes off.
    58  // treeWalkPool's purpose is to maintain active treeWalk go-routines in a map so that
    59  // it can be looked up across related list calls.
    60  type TreeWalkPool struct {
    61  	mu      sync.Mutex
    62  	pool    map[listParams][]treeWalk
    63  	timeOut time.Duration
    64  }
    65  
    66  // NewTreeWalkPool - initialize new tree walk pool.
    67  func NewTreeWalkPool(timeout time.Duration) *TreeWalkPool {
    68  	tPool := &TreeWalkPool{
    69  		pool:    make(map[listParams][]treeWalk),
    70  		timeOut: timeout,
    71  	}
    72  	return tPool
    73  }
    74  
    75  // Release - selects a treeWalk from the pool based on the input
    76  // listParams, removes it from the pool, and returns the TreeWalkResult
    77  // channel.
    78  // Returns nil if listParams does not have an associated treeWalk.
    79  func (t *TreeWalkPool) Release(params listParams) (resultCh chan TreeWalkResult, endWalkCh chan struct{}) {
    80  	t.mu.Lock()
    81  	defer t.mu.Unlock()
    82  	walks, ok := t.pool[params] // Pick the valid walks.
    83  	if !ok || len(walks) == 0 {
    84  		// Release return nil if params not found.
    85  		return nil, nil
    86  	}
    87  
    88  	// Pop out the first valid walk entry.
    89  	walk := walks[0]
    90  	walks = walks[1:]
    91  	if len(walks) > 0 {
    92  		t.pool[params] = walks
    93  	} else {
    94  		delete(t.pool, params)
    95  	}
    96  	walk.endTimerCh <- struct{}{}
    97  	return walk.resultCh, walk.endWalkCh
    98  }
    99  
   100  // Set - adds a treeWalk to the treeWalkPool.
   101  // Also starts a timer go-routine that ends when:
   102  // 1) time.After() expires after t.timeOut seconds.
   103  //    The expiration is needed so that the treeWalk go-routine resources are freed after a timeout
   104  //    if the S3 client does only partial listing of objects.
   105  // 2) Release() signals the timer go-routine to end on endTimerCh.
   106  //    During listing the timer should not timeout and end the treeWalk go-routine, hence the
   107  //    timer go-routine should be ended.
   108  func (t *TreeWalkPool) Set(params listParams, resultCh chan TreeWalkResult, endWalkCh chan struct{}) {
   109  	t.mu.Lock()
   110  	defer t.mu.Unlock()
   111  	// If we are above the limit delete at least one entry from the pool.
   112  	if len(t.pool) > treeWalkEntryLimit {
   113  		age := time.Now()
   114  		var oldest listParams
   115  		for k, v := range t.pool {
   116  			if len(v) == 0 {
   117  				delete(t.pool, k)
   118  				continue
   119  			}
   120  			// The first element is the oldest, so we only check that.
   121  			e := v[0]
   122  			if e.added.Before(age) {
   123  				oldest = k
   124  				age = e.added
   125  			}
   126  		}
   127  		// Invalidate and delete oldest.
   128  		if walks, ok := t.pool[oldest]; ok && len(walks) > 0 {
   129  			endCh := walks[0].endTimerCh
   130  			endWalkCh := walks[0].endWalkCh
   131  			if len(walks) > 1 {
   132  				// Move walks forward
   133  				copy(walks, walks[1:])
   134  				walks = walks[:len(walks)-1]
   135  				t.pool[oldest] = walks
   136  			} else {
   137  				// Only entry, just delete.
   138  				delete(t.pool, oldest)
   139  			}
   140  			select {
   141  			case endCh <- struct{}{}:
   142  				close(endWalkCh)
   143  			default:
   144  			}
   145  		} else {
   146  			// Shouldn't happen, but just in case.
   147  			delete(t.pool, oldest)
   148  		}
   149  	}
   150  
   151  	// Should be a buffered channel so that Release() never blocks.
   152  	endTimerCh := make(chan struct{}, 1)
   153  	walkInfo := treeWalk{
   154  		added:      UTCNow(),
   155  		resultCh:   resultCh,
   156  		endWalkCh:  endWalkCh,
   157  		endTimerCh: endTimerCh,
   158  	}
   159  
   160  	// Append new walk info.
   161  	walks := t.pool[params]
   162  	if len(walks) < treeWalkSameEntryLimit {
   163  		t.pool[params] = append(walks, walkInfo)
   164  	} else {
   165  		// We are at limit, invalidate oldest, move list down and add new as last.
   166  		select {
   167  		case walks[0].endTimerCh <- struct{}{}:
   168  			close(walks[0].endWalkCh)
   169  		default:
   170  		}
   171  		copy(walks, walks[1:])
   172  		walks[len(walks)-1] = walkInfo
   173  	}
   174  
   175  	// Timer go-routine which times out after t.timeOut seconds.
   176  	go func(endTimerCh <-chan struct{}, walkInfo treeWalk) {
   177  		select {
   178  		// Wait until timeOut
   179  		case <-time.After(t.timeOut):
   180  			// Timeout has expired. Remove the treeWalk from treeWalkPool and
   181  			// end the treeWalk go-routine.
   182  			t.mu.Lock()
   183  			defer t.mu.Unlock()
   184  			walks, ok := t.pool[params]
   185  			if ok {
   186  				// Trick of filtering without allocating
   187  				// https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
   188  				nwalks := walks[:0]
   189  				// Look for walkInfo, remove it from the walks list.
   190  				for _, walk := range walks {
   191  					if !reflect.DeepEqual(walk, walkInfo) {
   192  						nwalks = append(nwalks, walk)
   193  					}
   194  				}
   195  				if len(nwalks) == 0 {
   196  					// No more treeWalk go-routines associated with listParams
   197  					// hence remove map entry.
   198  					delete(t.pool, params)
   199  				} else {
   200  					// There are more treeWalk go-routines associated with listParams
   201  					// hence save the list in the map.
   202  					t.pool[params] = nwalks
   203  				}
   204  			}
   205  			// Signal the treeWalk go-routine to die.
   206  			close(endWalkCh)
   207  		case <-endTimerCh:
   208  			return
   209  		}
   210  	}(endTimerCh, walkInfo)
   211  }