github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/rwstress_test.go (about) 1 // Package integration_test. 2 /* 3 * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved. 4 */ 5 package integration_test 6 7 import ( 8 "fmt" 9 "net/http" 10 "testing" 11 12 "github.com/NVIDIA/aistore/api" 13 "github.com/NVIDIA/aistore/cmn" 14 "github.com/NVIDIA/aistore/cmn/cos" 15 "github.com/NVIDIA/aistore/core/meta" 16 "github.com/NVIDIA/aistore/tools" 17 "github.com/NVIDIA/aistore/tools/readers" 18 "github.com/NVIDIA/aistore/tools/trand" 19 ) 20 21 const ( 22 rwdir = "rwstress" 23 fileSize = 32 * cos.KiB 24 ) 25 26 type opRes struct { 27 op string 28 err error 29 } 30 31 // generates a list of random file names and a buffer to keep random data for filling up files 32 func generateRandomNames(fileCount int) { 33 fileNames = make([]string, fileCount) 34 for i := range fileCount { 35 fileNames[i] = trand.String(20) 36 } 37 } 38 39 var ( 40 fileNames []string 41 numLoops int 42 numFiles int 43 opFuncMap = map[string]func(string, string, cmn.Bck) opRes{ 44 http.MethodPut: opPut, 45 http.MethodGet: opGet, 46 http.MethodDelete: opDelete, 47 } 48 ) 49 50 func parallelOpLoop(bck cmn.Bck, cksumType string, 51 errCh chan opRes, opFunc func(string, string, cmn.Bck) opRes) { 52 var ( 53 fileCount = len(fileNames) 54 wg = cos.NewLimitedWaitGroup(40, 0) 55 ) 56 for range numLoops { 57 for idx := range fileCount { 58 objName := fmt.Sprintf("%s/%s", rwdir, fileNames[idx]) 59 wg.Add(1) 60 go func(objName string) { 61 defer wg.Done() 62 errCh <- opFunc(objName, cksumType, bck) 63 }(objName) 64 } 65 } 66 wg.Wait() 67 } 68 69 func opPut(objName, cksumType string, bck cmn.Bck) opRes { 70 r, err := readers.NewRand(fileSize, cksumType) 71 if err != nil { 72 return opRes{http.MethodPut, err} 73 } 74 putArgs := api.PutArgs{ 75 BaseParams: baseParams, 76 Bck: bck, 77 ObjName: objName, 78 Cksum: r.Cksum(), 79 Reader: r, 80 } 81 _, err = api.PutObject(&putArgs) 82 return opRes{http.MethodPut, err} 83 } 84 85 func opGet(objName, _ string, bck cmn.Bck) opRes { 86 _, err := api.GetObject(baseParams, bck, objName, nil) 87 return opRes{http.MethodGet, err} 88 } 89 90 func opDelete(objName, _ string, bck cmn.Bck) opRes { 91 err := api.DeleteObject(baseParams, bck, objName) 92 return opRes{http.MethodDelete, err} 93 } 94 95 func multiOp(opNames ...string) func(string, string, cmn.Bck) opRes { 96 var opr opRes 97 for _, opName := range opNames { 98 opr.op += opName 99 } 100 return func(objName, cksumType string, bck cmn.Bck) opRes { 101 for _, opName := range opNames { 102 opFunc := opFuncMap[opName] 103 res := opFunc(objName, cksumType, bck) 104 if res.err != nil { 105 opr.err = res.err 106 break 107 } 108 } 109 return opr 110 } 111 } 112 113 func reportErr(t *testing.T, errCh chan opRes, ignoreStatusNotFound bool) { 114 const maxErrCount = 10 115 var i int 116 for opRes := range errCh { 117 if opRes.err == nil { 118 continue 119 } 120 status := api.HTTPStatus(opRes.err) 121 if status == http.StatusNotFound && ignoreStatusNotFound { 122 continue 123 } 124 i++ 125 if i > maxErrCount { 126 t.Fatalf("%s failed %v", opRes.op, opRes.err) 127 return 128 } 129 t.Errorf("%s failed %v", opRes.op, opRes.err) 130 } 131 } 132 133 func initRWStress(t *testing.T, bck cmn.Bck, cksumType string) { 134 errChanSize := numLoops * numFiles 135 errCh := make(chan opRes, errChanSize) 136 parallelOpLoop(bck, cksumType, errCh, opPut) 137 close(errCh) 138 reportErr(t, errCh, false) 139 } 140 141 func cleanRWStress(bck cmn.Bck, cksumType string) { 142 errChanSize := numLoops * numFiles 143 errCh := make(chan opRes, errChanSize) 144 parallelOpLoop(bck, cksumType, errCh, opDelete) 145 close(errCh) 146 // Ignoring errors here since this is a post test cleanup 147 } 148 149 func parallelPutGetStress(t *testing.T) { 150 runProviderTests(t, func(t *testing.T, bck *meta.Bck) { 151 if bck.IsCloud() { 152 t.Skipf("skipping %s for Cloud bucket %s", t.Name(), bck.Bucket()) 153 } 154 var ( 155 errChanSize = numLoops * numFiles * 2 156 errCh = make(chan opRes, errChanSize) 157 cksumType = bck.Props.Cksum.Type 158 b = bck.Clone() 159 ) 160 161 initRWStress(t, b, cksumType) 162 parallelOpLoop(b, cksumType, errCh, opPut) 163 parallelOpLoop(b, cksumType, errCh, opGet) 164 close(errCh) 165 reportErr(t, errCh, false) 166 cleanRWStress(b, cksumType) 167 }) 168 } 169 170 func multiOpStress(opNames ...string) func(t *testing.T) { 171 return func(t *testing.T) { 172 runProviderTests(t, func(t *testing.T, bck *meta.Bck) { 173 if bck.IsCloud() { 174 t.Skipf("skipping %s for Cloud bucket %s", t.Name(), bck.Bucket()) 175 } 176 var ( 177 errChanSize = numLoops * numFiles * 3 178 errCh = make(chan opRes, errChanSize) 179 cksumType = bck.Props.Cksum.Type 180 b = bck.Clone() 181 ) 182 183 parallelOpLoop(b, cksumType, errCh, multiOp(opNames...)) 184 close(errCh) 185 reportErr(t, errCh, true) 186 cleanRWStress(b, cksumType) 187 }) 188 } 189 } 190 191 // All sub-tests are skipped for GCP as GCP is flaky as most operations require backoff: 192 // 1. More than only 1(one) PUT per second for a single object ends with: 193 // 429 - backoff starts at `1 second` and increases up to `64s` 194 // 2. Too many requests may end with: 195 // 502 & 503 - backoff starts at `1 minute` 196 // 3. Too quick GET(HEAD) after PUT may return 404: 197 // PUTGETDELETE failed {"status":404,"message":"storage: object doesn't exist","method":"GET" 198 // Reason: PUT needs some time to update object version and if GET comes 199 // in the middle, GET returns 404 because the new version is still processing 200 // 201 // Summing up: GCP is not suitable for any stress test, so it is skipped 202 func rwstress(t *testing.T) { 203 generateRandomNames(numFiles) 204 m := ioContext{t: t} 205 m.saveCluState(tools.RandomProxyURL()) 206 t.Run("parallelputget", parallelPutGetStress) 207 t.Run("putdelete", multiOpStress(http.MethodPut, http.MethodGet)) 208 t.Run("putgetdelete", multiOpStress(http.MethodPut, http.MethodGet, http.MethodDelete)) 209 m.checkCluState(m.smap) 210 } 211 212 func TestRWStressShort(t *testing.T) { 213 numLoops = 8 214 numFiles = 25 215 rwstress(t) 216 } 217 218 func TestRWStress(t *testing.T) { 219 tools.CheckSkip(t, &tools.SkipTestArgs{Long: true}) 220 221 numLoops = 30 222 numFiles = 1000 223 rwstress(t) 224 }