github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ais/test/etl_cp_multiobj_test.go (about)

     1  // Package integration_test.
     2  /*
     3   * Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package integration_test
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"testing"
    11  	"time"
    12  
    13  	"github.com/NVIDIA/aistore/api"
    14  	"github.com/NVIDIA/aistore/api/apc"
    15  	"github.com/NVIDIA/aistore/cmn"
    16  	"github.com/NVIDIA/aistore/cmn/cos"
    17  	"github.com/NVIDIA/aistore/cmn/debug"
    18  	"github.com/NVIDIA/aistore/ext/etl"
    19  	"github.com/NVIDIA/aistore/tools"
    20  	"github.com/NVIDIA/aistore/tools/readers"
    21  	"github.com/NVIDIA/aistore/tools/tassert"
    22  	"github.com/NVIDIA/aistore/tools/tetl"
    23  	"github.com/NVIDIA/aistore/tools/tlog"
    24  	"github.com/NVIDIA/aistore/xact"
    25  )
    26  
    27  func TestETLMultiObj(t *testing.T) {
    28  	tools.CheckSkip(t, &tools.SkipTestArgs{RequiredDeployment: tools.ClusterTypeK8s})
    29  	tetl.CheckNoRunningETLContainers(t, baseParams)
    30  
    31  	const (
    32  		objCnt      = 100
    33  		copyCnt     = 20
    34  		rangeStart  = 10
    35  		transformer = tetl.MD5
    36  		etlCommType = etl.Hpush
    37  		objSize     = cos.KiB
    38  		cksumType   = cos.ChecksumMD5
    39  	)
    40  	var (
    41  		proxyURL   = tools.RandomProxyURL(t)
    42  		baseParams = tools.BaseAPIParams(proxyURL)
    43  		bcktests   = []struct {
    44  			srcRemote      bool
    45  			evictRemoteSrc bool
    46  			dstRemote      bool
    47  		}{
    48  			{false, false, false},
    49  			{true, false, false},
    50  			{true, true, false},
    51  			{false, false, true},
    52  		}
    53  	)
    54  
    55  	_ = tetl.InitSpec(t, baseParams, transformer, etlCommType)
    56  	t.Cleanup(func() { tetl.StopAndDeleteETL(t, baseParams, transformer) })
    57  
    58  	for _, bcktest := range bcktests {
    59  		m := ioContext{
    60  			t:         t,
    61  			num:       objCnt,
    62  			fileSize:  512,
    63  			fixedSize: true, // see checkETLStats below
    64  		}
    65  		if bcktest.srcRemote {
    66  			m.bck = cliBck
    67  			m.deleteRemoteBckObjs = true
    68  		} else {
    69  			m.bck = cmn.Bck{Name: "etlsrc_" + cos.GenTie(), Provider: apc.AIS}
    70  			tools.CreateBucket(t, proxyURL, m.bck, nil, true /*cleanup*/)
    71  		}
    72  		m.init(true /*cleanup*/)
    73  
    74  		if bcktest.srcRemote {
    75  			if bcktest.evictRemoteSrc {
    76  				tlog.Logf("evicting %s\n", m.bck)
    77  				//
    78  				// evict all _cached_ data from the "local" cluster
    79  				// keep the src bucket in the "local" BMD though
    80  				//
    81  				err := api.EvictRemoteBucket(baseParams, m.bck, true /*keep empty src bucket in the BMD*/)
    82  				tassert.CheckFatal(t, err)
    83  			}
    84  		}
    85  
    86  		tlog.Logf("PUT %d objects (size %d) => %s/test/a-*\n", objCnt, objSize, m.bck)
    87  		for i := range objCnt {
    88  			r, _ := readers.NewRand(objSize, cksumType)
    89  			_, err := api.PutObject(&api.PutArgs{
    90  				BaseParams: baseParams,
    91  				Bck:        m.bck,
    92  				ObjName:    fmt.Sprintf("test/a-%04d", i),
    93  				Reader:     r,
    94  				Size:       objSize,
    95  			})
    96  			tassert.CheckFatal(t, err)
    97  		}
    98  
    99  		for _, ty := range []string{"range", "list"} {
   100  			tname := fmt.Sprintf("%s-%s-%s", transformer, strings.TrimSuffix(etlCommType, "://"), ty)
   101  			if bcktest.srcRemote {
   102  				if bcktest.evictRemoteSrc {
   103  					tname += "/from-evicted-remote"
   104  				} else {
   105  					tname += "/from-remote"
   106  				}
   107  			} else {
   108  				debug.Assert(!bcktest.evictRemoteSrc)
   109  				tname += "/from-ais"
   110  			}
   111  			if bcktest.dstRemote {
   112  				tname += "/to-remote"
   113  			} else {
   114  				tname += "/to-ais"
   115  			}
   116  			t.Run(tname, func(t *testing.T) {
   117  				var bckTo cmn.Bck
   118  				if bcktest.dstRemote {
   119  					bckTo = cliBck
   120  					dstm := ioContext{t: t, bck: bckTo}
   121  					dstm.del()
   122  					t.Cleanup(func() { dstm.del() })
   123  				} else {
   124  					bckTo = cmn.Bck{Name: "etldst_" + cos.GenTie(), Provider: apc.AIS}
   125  					// NOTE: ais will create dst bucket on the fly
   126  
   127  					t.Cleanup(func() { tools.DestroyBucket(t, proxyURL, bckTo) })
   128  				}
   129  				template := "test/a-" +
   130  					fmt.Sprintf("{%04d..%04d}", rangeStart, rangeStart+copyCnt-1)
   131  				testETLMultiObj(t, transformer, m.bck, bckTo, template, ty, bcktest.evictRemoteSrc)
   132  			})
   133  		}
   134  	}
   135  }
   136  
   137  func testETLMultiObj(t *testing.T, etlName string, bckFrom, bckTo cmn.Bck, fileRange, opType string, evictRemoteSrc bool) {
   138  	pt, err := cos.ParseBashTemplate(fileRange)
   139  	tassert.CheckFatal(t, err)
   140  
   141  	var (
   142  		xid        string
   143  		proxyURL   = tools.RandomProxyURL(t)
   144  		baseParams = tools.BaseAPIParams(proxyURL)
   145  
   146  		objList        = pt.ToSlice()
   147  		objCnt         = len(objList)
   148  		requestTimeout = 30 * time.Second
   149  		tcomsg         = cmn.TCObjsMsg{ToBck: bckTo}
   150  	)
   151  	tcomsg.Transform.Name = etlName
   152  	tcomsg.Transform.Timeout = cos.Duration(requestTimeout)
   153  
   154  	if opType == "list" {
   155  		tcomsg.ListRange.ObjNames = objList
   156  	} else {
   157  		tcomsg.ListRange.Template = fileRange
   158  	}
   159  
   160  	tlog.Logf("Starting multi-object ETL[%s] ...\n", etlName)
   161  	if evictRemoteSrc {
   162  		xid, err = api.ETLMultiObj(baseParams, bckFrom, &tcomsg, apc.FltExists)
   163  	} else {
   164  		xid, err = api.ETLMultiObj(baseParams, bckFrom, &tcomsg)
   165  	}
   166  	tassert.CheckFatal(t, err)
   167  
   168  	tlog.Logf("Running x-etl[%s]: %s => %s ...\n", xid, bckFrom.Cname(""), bckTo.Cname(""))
   169  
   170  	wargs := xact.ArgsMsg{ID: xid, Kind: apc.ActETLObjects}
   171  	err = api.WaitForXactionIdle(baseParams, &wargs)
   172  	tassert.CheckFatal(t, err)
   173  
   174  	list, err := api.ListObjects(baseParams, bckTo, nil, api.ListArgs{})
   175  	tassert.CheckFatal(t, err)
   176  	tassert.Errorf(t, len(list.Entries) == objCnt, "expected %d objects from offline ETL, got %d", objCnt, len(list.Entries))
   177  	for _, objName := range objList {
   178  		err := api.DeleteObject(baseParams, bckTo, objName)
   179  		tassert.CheckError(t, err)
   180  		tlog.Logf("%s\n", bckTo.Cname(objName))
   181  	}
   182  }