github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/handler-api.go

github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/cmd/handler-api.go (about)

     1  // Copyright (c) 2015-2021 MinIO, Inc.
     2  //
     3  // This file is part of MinIO Object Storage stack
     4  //
     5  // This program is free software: you can redistribute it and/or modify
     6  // it under the terms of the GNU Affero General Public License as published by
     7  // the Free Software Foundation, either version 3 of the License, or
     8  // (at your option) any later version.
     9  //
    10  // This program is distributed in the hope that it will be useful
    11  // but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  // GNU Affero General Public License for more details.
    14  //
    15  // You should have received a copy of the GNU Affero General Public License
    16  // along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  
    18  package cmd
    19  
    20  import (
    21  	"math"
    22  	"net/http"
    23  	"os"
    24  	"runtime"
    25  	"strconv"
    26  	"strings"
    27  	"sync"
    28  	"time"
    29  
    30  	"github.com/shirou/gopsutil/v3/mem"
    31  
    32  	"github.com/minio/minio/internal/config/api"
    33  	xioutil "github.com/minio/minio/internal/ioutil"
    34  	"github.com/minio/minio/internal/logger"
    35  	"github.com/minio/minio/internal/mcontext"
    36  )
    37  
    38  type apiConfig struct {
    39  	mu sync.RWMutex
    40  
    41  	requestsDeadline time.Duration
    42  	requestsPool     chan struct{}
    43  	clusterDeadline  time.Duration
    44  	listQuorum       string
    45  	corsAllowOrigins []string
    46  	// total drives per erasure set across pools.
    47  	totalDriveCount       int
    48  	replicationPriority   string
    49  	replicationMaxWorkers int
    50  	transitionWorkers     int
    51  
    52  	staleUploadsExpiry          time.Duration
    53  	staleUploadsCleanupInterval time.Duration
    54  	deleteCleanupInterval       time.Duration
    55  	enableODirect               bool
    56  	gzipObjects                 bool
    57  	rootAccess                  bool
    58  	syncEvents                  bool
    59  	objectMaxVersions           int64
    60  }
    61  
    62  const (
    63  	cgroupV1MemLimitFile = "/sys/fs/cgroup/memory/memory.limit_in_bytes"
    64  	cgroupV2MemLimitFile = "/sys/fs/cgroup/memory.max"
    65  	cgroupMemNoLimit     = 9223372036854771712
    66  )
    67  
    68  func cgroupMemLimit() (limit uint64) {
    69  	buf, err := os.ReadFile(cgroupV2MemLimitFile)
    70  	if err != nil {
    71  		buf, err = os.ReadFile(cgroupV1MemLimitFile)
    72  	}
    73  	if err != nil {
    74  		return 0
    75  	}
    76  	limit, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
    77  	if err != nil {
    78  		// The kernel can return valid but non integer values
    79  		// but still, no need to interpret more
    80  		return 0
    81  	}
    82  	if limit == cgroupMemNoLimit {
    83  		// No limit set, It's the highest positive signed 64-bit
    84  		// integer (2^63-1), rounded down to multiples of 4096 (2^12),
    85  		// the most common page size on x86 systems - for cgroup_limits.
    86  		return 0
    87  	}
    88  	return limit
    89  }
    90  
    91  func availableMemory() (available uint64) {
    92  	available = 2048 * blockSizeV2 * 2 // Default to 4 GiB when we can't find the limits.
    93  
    94  	if runtime.GOOS == "linux" {
    95  		// Useful in container mode
    96  		limit := cgroupMemLimit()
    97  		if limit > 0 {
    98  			// A valid value is found, return its 75%
    99  			available = (limit * 3) / 4
   100  			return
   101  		}
   102  	} // for all other platforms limits are based on virtual memory.
   103  
   104  	memStats, err := mem.VirtualMemory()
   105  	if err != nil {
   106  		return
   107  	}
   108  	// A valid value is available return its 75%
   109  	available = (memStats.Available * 3) / 4
   110  	return
   111  }
   112  
   113  func (t *apiConfig) init(cfg api.Config, setDriveCounts []int) {
   114  	t.mu.Lock()
   115  	defer t.mu.Unlock()
   116  
   117  	clusterDeadline := cfg.ClusterDeadline
   118  	if clusterDeadline == 0 {
   119  		clusterDeadline = 10 * time.Second
   120  	}
   121  	t.clusterDeadline = clusterDeadline
   122  	corsAllowOrigin := cfg.CorsAllowOrigin
   123  	if len(corsAllowOrigin) == 0 {
   124  		corsAllowOrigin = []string{"*"}
   125  	}
   126  	t.corsAllowOrigins = corsAllowOrigin
   127  
   128  	maxSetDrives := 0
   129  	for _, setDriveCount := range setDriveCounts {
   130  		t.totalDriveCount += setDriveCount
   131  		if setDriveCount > maxSetDrives {
   132  			maxSetDrives = setDriveCount
   133  		}
   134  	}
   135  
   136  	var apiRequestsMaxPerNode int
   137  	if cfg.RequestsMax <= 0 {
   138  		// Returns 75% of max memory allowed
   139  		maxMem := availableMemory()
   140  
   141  		// max requests per node is calculated as
   142  		// total_ram / ram_per_request
   143  		// ram_per_request is (2MiB+128KiB) * driveCount \
   144  		//    + 2 * 10MiB (default erasure block size v1) + 2 * 1MiB (default erasure block size v2)
   145  		blockSize := xioutil.BlockSizeLarge + xioutil.BlockSizeSmall
   146  		apiRequestsMaxPerNode = int(maxMem / uint64(maxSetDrives*blockSize+int(blockSizeV1*2+blockSizeV2*2)))
   147  		if globalIsDistErasure {
   148  			logger.Info("Automatically configured API requests per node based on available memory on the system: %d", apiRequestsMaxPerNode)
   149  		}
   150  	} else {
   151  		apiRequestsMaxPerNode = cfg.RequestsMax
   152  		if n := totalNodeCount(); n > 0 {
   153  			apiRequestsMaxPerNode /= n
   154  		}
   155  	}
   156  
   157  	if cap(t.requestsPool) != apiRequestsMaxPerNode {
   158  		// Only replace if needed.
   159  		// Existing requests will use the previous limit,
   160  		// but new requests will use the new limit.
   161  		// There will be a short overlap window,
   162  		// but this shouldn't last long.
   163  		t.requestsPool = make(chan struct{}, apiRequestsMaxPerNode)
   164  	}
   165  	t.requestsDeadline = cfg.RequestsDeadline
   166  	listQuorum := cfg.ListQuorum
   167  	if listQuorum == "" {
   168  		listQuorum = "strict"
   169  	}
   170  	t.listQuorum = listQuorum
   171  	if globalReplicationPool != nil &&
   172  		(cfg.ReplicationPriority != t.replicationPriority || cfg.ReplicationMaxWorkers != t.replicationMaxWorkers) {
   173  		globalReplicationPool.ResizeWorkerPriority(cfg.ReplicationPriority, cfg.ReplicationMaxWorkers)
   174  	}
   175  	t.replicationPriority = cfg.ReplicationPriority
   176  	t.replicationMaxWorkers = cfg.ReplicationMaxWorkers
   177  
   178  	// N B api.transition_workers will be deprecated
   179  	if globalTransitionState != nil {
   180  		globalTransitionState.UpdateWorkers(cfg.TransitionWorkers)
   181  	}
   182  	t.transitionWorkers = cfg.TransitionWorkers
   183  
   184  	t.staleUploadsExpiry = cfg.StaleUploadsExpiry
   185  	t.staleUploadsCleanupInterval = cfg.StaleUploadsCleanupInterval
   186  	t.deleteCleanupInterval = cfg.DeleteCleanupInterval
   187  	t.enableODirect = cfg.EnableODirect
   188  	t.gzipObjects = cfg.GzipObjects
   189  	t.rootAccess = cfg.RootAccess
   190  	t.syncEvents = cfg.SyncEvents
   191  	t.objectMaxVersions = cfg.ObjectMaxVersions
   192  }
   193  
   194  func (t *apiConfig) odirectEnabled() bool {
   195  	t.mu.RLock()
   196  	defer t.mu.RUnlock()
   197  
   198  	return t.enableODirect
   199  }
   200  
   201  func (t *apiConfig) shouldGzipObjects() bool {
   202  	t.mu.RLock()
   203  	defer t.mu.RUnlock()
   204  
   205  	return t.gzipObjects
   206  }
   207  
   208  func (t *apiConfig) permitRootAccess() bool {
   209  	t.mu.RLock()
   210  	defer t.mu.RUnlock()
   211  
   212  	return t.rootAccess
   213  }
   214  
   215  func (t *apiConfig) getListQuorum() string {
   216  	t.mu.RLock()
   217  	defer t.mu.RUnlock()
   218  
   219  	if t.listQuorum == "" {
   220  		return "strict"
   221  	}
   222  
   223  	return t.listQuorum
   224  }
   225  
   226  func (t *apiConfig) getCorsAllowOrigins() []string {
   227  	t.mu.RLock()
   228  	defer t.mu.RUnlock()
   229  
   230  	if len(t.corsAllowOrigins) == 0 {
   231  		return []string{"*"}
   232  	}
   233  
   234  	corsAllowOrigins := make([]string, len(t.corsAllowOrigins))
   235  	copy(corsAllowOrigins, t.corsAllowOrigins)
   236  	return corsAllowOrigins
   237  }
   238  
   239  func (t *apiConfig) getStaleUploadsCleanupInterval() time.Duration {
   240  	t.mu.RLock()
   241  	defer t.mu.RUnlock()
   242  
   243  	if t.staleUploadsCleanupInterval == 0 {
   244  		return 6 * time.Hour // default 6 hours
   245  	}
   246  
   247  	return t.staleUploadsCleanupInterval
   248  }
   249  
   250  func (t *apiConfig) getStaleUploadsExpiry() time.Duration {
   251  	t.mu.RLock()
   252  	defer t.mu.RUnlock()
   253  
   254  	if t.staleUploadsExpiry == 0 {
   255  		return 24 * time.Hour // default 24 hours
   256  	}
   257  
   258  	return t.staleUploadsExpiry
   259  }
   260  
   261  func (t *apiConfig) getDeleteCleanupInterval() time.Duration {
   262  	t.mu.RLock()
   263  	defer t.mu.RUnlock()
   264  
   265  	if t.deleteCleanupInterval == 0 {
   266  		return 5 * time.Minute // every 5 minutes
   267  	}
   268  
   269  	return t.deleteCleanupInterval
   270  }
   271  
   272  func (t *apiConfig) getClusterDeadline() time.Duration {
   273  	t.mu.RLock()
   274  	defer t.mu.RUnlock()
   275  
   276  	if t.clusterDeadline == 0 {
   277  		return 10 * time.Second
   278  	}
   279  
   280  	return t.clusterDeadline
   281  }
   282  
   283  func (t *apiConfig) getRequestsPoolCapacity() int {
   284  	t.mu.RLock()
   285  	defer t.mu.RUnlock()
   286  
   287  	return cap(t.requestsPool)
   288  }
   289  
   290  func (t *apiConfig) getRequestsPool() (chan struct{}, time.Duration) {
   291  	t.mu.RLock()
   292  	defer t.mu.RUnlock()
   293  
   294  	if t.requestsPool == nil {
   295  		return nil, 10 * time.Second
   296  	}
   297  
   298  	if t.requestsDeadline <= 0 {
   299  		return t.requestsPool, 10 * time.Second
   300  	}
   301  
   302  	return t.requestsPool, t.requestsDeadline
   303  }
   304  
   305  // maxClients throttles the S3 API calls
   306  func maxClients(f http.HandlerFunc) http.HandlerFunc {
   307  	return func(w http.ResponseWriter, r *http.Request) {
   308  		globalHTTPStats.incS3RequestsIncoming()
   309  
   310  		if r.Header.Get(globalObjectPerfUserMetadata) == "" {
   311  			if val := globalServiceFreeze.Load(); val != nil {
   312  				if unlock, ok := val.(chan struct{}); ok && unlock != nil {
   313  					// Wait until unfrozen.
   314  					select {
   315  					case <-unlock:
   316  					case <-r.Context().Done():
   317  						// if client canceled we don't need to wait here forever.
   318  						return
   319  					}
   320  				}
   321  			}
   322  		}
   323  
   324  		pool, deadline := globalAPIConfig.getRequestsPool()
   325  		if pool == nil {
   326  			f.ServeHTTP(w, r)
   327  			return
   328  		}
   329  
   330  		globalHTTPStats.addRequestsInQueue(1)
   331  
   332  		if tc, ok := r.Context().Value(mcontext.ContextTraceKey).(*mcontext.TraceCtxt); ok {
   333  			tc.FuncName = "s3.MaxClients"
   334  		}
   335  
   336  		deadlineTimer := time.NewTimer(deadline)
   337  		defer deadlineTimer.Stop()
   338  
   339  		select {
   340  		case pool <- struct{}{}:
   341  			defer func() { <-pool }()
   342  			globalHTTPStats.addRequestsInQueue(-1)
   343  			f.ServeHTTP(w, r)
   344  		case <-deadlineTimer.C:
   345  			// Send a http timeout message
   346  			writeErrorResponse(r.Context(), w,
   347  				errorCodes.ToAPIErr(ErrTooManyRequests),
   348  				r.URL)
   349  			globalHTTPStats.addRequestsInQueue(-1)
   350  			return
   351  		case <-r.Context().Done():
   352  			// When the client disconnects before getting the S3 handler
   353  			// status code response, set the status code to 499 so this request
   354  			// will be properly audited and traced.
   355  			w.WriteHeader(499)
   356  			globalHTTPStats.addRequestsInQueue(-1)
   357  			return
   358  		}
   359  	}
   360  }
   361  
   362  func (t *apiConfig) getReplicationOpts() replicationPoolOpts {
   363  	t.mu.RLock()
   364  	defer t.mu.RUnlock()
   365  
   366  	if t.replicationPriority == "" {
   367  		return replicationPoolOpts{
   368  			Priority:   "auto",
   369  			MaxWorkers: WorkerMaxLimit,
   370  		}
   371  	}
   372  
   373  	return replicationPoolOpts{
   374  		Priority:   t.replicationPriority,
   375  		MaxWorkers: t.replicationMaxWorkers,
   376  	}
   377  }
   378  
   379  func (t *apiConfig) getTransitionWorkers() int {
   380  	t.mu.RLock()
   381  	defer t.mu.RUnlock()
   382  
   383  	if t.transitionWorkers <= 0 {
   384  		return runtime.GOMAXPROCS(0) / 2
   385  	}
   386  
   387  	return t.transitionWorkers
   388  }
   389  
   390  func (t *apiConfig) isSyncEventsEnabled() bool {
   391  	t.mu.RLock()
   392  	defer t.mu.RUnlock()
   393  
   394  	return t.syncEvents
   395  }
   396  
   397  func (t *apiConfig) getObjectMaxVersions() int64 {
   398  	t.mu.RLock()
   399  	defer t.mu.RUnlock()
   400  
   401  	if t.objectMaxVersions <= 0 {
   402  		// defaults to 'IntMax' when unset.
   403  		return math.MaxInt64
   404  	}
   405  
   406  	return t.objectMaxVersions
   407  }