github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/rwstress_test.go (about)

     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     6  
     7  import (
     8  	"fmt"
     9  	"net/http"
    10  	"testing"
    11  
    12  	"github.com/NVIDIA/aistore/api"
    13  	"github.com/NVIDIA/aistore/cmn"
    14  	"github.com/NVIDIA/aistore/cmn/cos"
    15  	"github.com/NVIDIA/aistore/core/meta"
    16  	"github.com/NVIDIA/aistore/tools"
    17  	"github.com/NVIDIA/aistore/tools/readers"
    18  	"github.com/NVIDIA/aistore/tools/trand"
    19  )
    20  
    21  const (
    22  	rwdir    = "rwstress"
    23  	fileSize = 32 * cos.KiB
    24  )
    25  
    26  type opRes struct {
    27  	op  string
    28  	err error
    29  }
    30  
    31  // generates a list of random file names and a buffer to keep random data for filling up files
    32  func generateRandomNames(fileCount int) {
    33  	fileNames = make([]string, fileCount)
    34  	for i := range fileCount {
    35  		fileNames[i] = trand.String(20)
    36  	}
    37  }
    38  
    39  var (
    40  	fileNames []string
    41  	numLoops  int
    42  	numFiles  int
    43  	opFuncMap = map[string]func(string, string, cmn.Bck) opRes{
    44  		http.MethodPut:    opPut,
    45  		http.MethodGet:    opGet,
    46  		http.MethodDelete: opDelete,
    47  	}
    48  )
    49  
    50  func parallelOpLoop(bck cmn.Bck, cksumType string,
    51  	errCh chan opRes, opFunc func(string, string, cmn.Bck) opRes) {
    52  	var (
    53  		fileCount = len(fileNames)
    54  		wg        = cos.NewLimitedWaitGroup(40, 0)
    55  	)
    56  	for range numLoops {
    57  		for idx := range fileCount {
    58  			objName := fmt.Sprintf("%s/%s", rwdir, fileNames[idx])
    59  			wg.Add(1)
    60  			go func(objName string) {
    61  				defer wg.Done()
    62  				errCh <- opFunc(objName, cksumType, bck)
    63  			}(objName)
    64  		}
    65  	}
    66  	wg.Wait()
    67  }
    68  
    69  func opPut(objName, cksumType string, bck cmn.Bck) opRes {
    70  	r, err := readers.NewRand(fileSize, cksumType)
    71  	if err != nil {
    72  		return opRes{http.MethodPut, err}
    73  	}
    74  	putArgs := api.PutArgs{
    75  		BaseParams: baseParams,
    76  		Bck:        bck,
    77  		ObjName:    objName,
    78  		Cksum:      r.Cksum(),
    79  		Reader:     r,
    80  	}
    81  	_, err = api.PutObject(&putArgs)
    82  	return opRes{http.MethodPut, err}
    83  }
    84  
    85  func opGet(objName, _ string, bck cmn.Bck) opRes {
    86  	_, err := api.GetObject(baseParams, bck, objName, nil)
    87  	return opRes{http.MethodGet, err}
    88  }
    89  
    90  func opDelete(objName, _ string, bck cmn.Bck) opRes {
    91  	err := api.DeleteObject(baseParams, bck, objName)
    92  	return opRes{http.MethodDelete, err}
    93  }
    94  
    95  func multiOp(opNames ...string) func(string, string, cmn.Bck) opRes {
    96  	var opr opRes
    97  	for _, opName := range opNames {
    98  		opr.op += opName
    99  	}
   100  	return func(objName, cksumType string, bck cmn.Bck) opRes {
   101  		for _, opName := range opNames {
   102  			opFunc := opFuncMap[opName]
   103  			res := opFunc(objName, cksumType, bck)
   104  			if res.err != nil {
   105  				opr.err = res.err
   106  				break
   107  			}
   108  		}
   109  		return opr
   110  	}
   111  }
   112  
   113  func reportErr(t *testing.T, errCh chan opRes, ignoreStatusNotFound bool) {
   114  	const maxErrCount = 10
   115  	var i int
   116  	for opRes := range errCh {
   117  		if opRes.err == nil {
   118  			continue
   119  		}
   120  		status := api.HTTPStatus(opRes.err)
   121  		if status == http.StatusNotFound && ignoreStatusNotFound {
   122  			continue
   123  		}
   124  		i++
   125  		if i > maxErrCount {
   126  			t.Fatalf("%s failed %v", opRes.op, opRes.err)
   127  			return
   128  		}
   129  		t.Errorf("%s failed %v", opRes.op, opRes.err)
   130  	}
   131  }
   132  
   133  func initRWStress(t *testing.T, bck cmn.Bck, cksumType string) {
   134  	errChanSize := numLoops * numFiles
   135  	errCh := make(chan opRes, errChanSize)
   136  	parallelOpLoop(bck, cksumType, errCh, opPut)
   137  	close(errCh)
   138  	reportErr(t, errCh, false)
   139  }
   140  
   141  func cleanRWStress(bck cmn.Bck, cksumType string) {
   142  	errChanSize := numLoops * numFiles
   143  	errCh := make(chan opRes, errChanSize)
   144  	parallelOpLoop(bck, cksumType, errCh, opDelete)
   145  	close(errCh)
   146  	// Ignoring errors here since this is a post test cleanup
   147  }
   148  
   149  func parallelPutGetStress(t *testing.T) {
   150  	runProviderTests(t, func(t *testing.T, bck *meta.Bck) {
   151  		if bck.IsCloud() {
   152  			t.Skipf("skipping %s for Cloud bucket %s", t.Name(), bck.Bucket())
   153  		}
   154  		var (
   155  			errChanSize = numLoops * numFiles * 2
   156  			errCh       = make(chan opRes, errChanSize)
   157  			cksumType   = bck.Props.Cksum.Type
   158  			b           = bck.Clone()
   159  		)
   160  
   161  		initRWStress(t, b, cksumType)
   162  		parallelOpLoop(b, cksumType, errCh, opPut)
   163  		parallelOpLoop(b, cksumType, errCh, opGet)
   164  		close(errCh)
   165  		reportErr(t, errCh, false)
   166  		cleanRWStress(b, cksumType)
   167  	})
   168  }
   169  
   170  func multiOpStress(opNames ...string) func(t *testing.T) {
   171  	return func(t *testing.T) {
   172  		runProviderTests(t, func(t *testing.T, bck *meta.Bck) {
   173  			if bck.IsCloud() {
   174  				t.Skipf("skipping %s for Cloud bucket %s", t.Name(), bck.Bucket())
   175  			}
   176  			var (
   177  				errChanSize = numLoops * numFiles * 3
   178  				errCh       = make(chan opRes, errChanSize)
   179  				cksumType   = bck.Props.Cksum.Type
   180  				b           = bck.Clone()
   181  			)
   182  
   183  			parallelOpLoop(b, cksumType, errCh, multiOp(opNames...))
   184  			close(errCh)
   185  			reportErr(t, errCh, true)
   186  			cleanRWStress(b, cksumType)
   187  		})
   188  	}
   189  }
   190  
   191  // All sub-tests are skipped for GCP as GCP is flaky as most operations require backoff:
   192  //  1. More than only 1(one) PUT per second for a single object ends with:
   193  //     429 - backoff starts at `1 second` and increases up to `64s`
   194  //  2. Too many requests may end with:
   195  //     502 & 503 - backoff starts at `1 minute`
   196  //  3. Too quick GET(HEAD) after PUT may return 404:
   197  //     PUTGETDELETE failed {"status":404,"message":"storage: object doesn't exist","method":"GET"
   198  //     Reason: PUT needs some time to update object version and if GET comes
   199  //     in the middle, GET returns 404 because the new version is still processing
   200  //
   201  // Summing up: GCP is not suitable for any stress test, so it is skipped
   202  func rwstress(t *testing.T) {
   203  	generateRandomNames(numFiles)
   204  	m := ioContext{t: t}
   205  	m.saveCluState(tools.RandomProxyURL())
   206  	t.Run("parallelputget", parallelPutGetStress)
   207  	t.Run("putdelete", multiOpStress(http.MethodPut, http.MethodGet))
   208  	t.Run("putgetdelete", multiOpStress(http.MethodPut, http.MethodGet, http.MethodDelete))
   209  	m.checkCluState(m.smap)
   210  }
   211  
   212  func TestRWStressShort(t *testing.T) {
   213  	numLoops = 8
   214  	numFiles = 25
   215  	rwstress(t)
   216  }
   217  
   218  func TestRWStress(t *testing.T) {
   219  	tools.CheckSkip(t, &tools.SkipTestArgs{Long: true})
   220  
   221  	numLoops = 30
   222  	numFiles = 1000
   223  	rwstress(t)
   224  }