github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/tools/tetl/etl.go (about)

     1  // Package tetl provides helpers for ETL.
     2  /*
     3   * Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package tetl
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"net/http"
    12  	"os"
    13  	"strings"
    14  	"testing"
    15  	"time"
    16  
    17  	"github.com/NVIDIA/aistore/api"
    18  	"github.com/NVIDIA/aistore/api/apc"
    19  	"github.com/NVIDIA/aistore/cmn"
    20  	"github.com/NVIDIA/aistore/cmn/cos"
    21  	"github.com/NVIDIA/aistore/cmn/k8s"
    22  	"github.com/NVIDIA/aistore/core/meta"
    23  	"github.com/NVIDIA/aistore/ext/etl"
    24  	"github.com/NVIDIA/aistore/tools"
    25  	"github.com/NVIDIA/aistore/tools/tassert"
    26  	"github.com/NVIDIA/aistore/tools/tlog"
    27  	"github.com/NVIDIA/aistore/xact"
    28  	corev1 "k8s.io/api/core/v1"
    29  )
    30  
    31  const (
    32  	commTypeAnnotation    = "communication_type"
    33  	waitTimeoutAnnotation = "wait_timeout"
    34  
    35  	Tar2TF        = "tar2tf"
    36  	Echo          = "transformer-echo"
    37  	EchoGolang    = "echo-go"
    38  	MD5           = "transformer-md5"
    39  	Tar2tfFilters = "tar2tf-filters"
    40  	tar2tfFilter  = `
    41  {
    42    "conversions": [
    43      { "type": "Decode", "ext_name": "png"},
    44      { "type": "Rotate", "ext_name": "png"}
    45    ],
    46    "selections": [
    47      { "ext_name": "png" },
    48      { "ext_name": "cls" }
    49    ]
    50  }
    51  `
    52  )
    53  
    54  var (
    55  	links = map[string]string{
    56  		MD5:           "https://raw.githubusercontent.com/NVIDIA/ais-etl/master/transformers/md5/pod.yaml",
    57  		Tar2TF:        "https://raw.githubusercontent.com/NVIDIA/ais-etl/master/transformers/tar2tf/pod.yaml",
    58  		Tar2tfFilters: "https://raw.githubusercontent.com/NVIDIA/ais-etl/master/transformers/tar2tf/pod.yaml",
    59  		Echo:          "https://raw.githubusercontent.com/NVIDIA/ais-etl/master/transformers/echo/pod.yaml",
    60  		EchoGolang:    "https://raw.githubusercontent.com/NVIDIA/ais-etl/master/transformers/go_echo/pod.yaml",
    61  	}
    62  
    63  	client = &http.Client{}
    64  )
    65  
    66  func validateETLName(name string) error {
    67  	if _, ok := links[name]; !ok {
    68  		return fmt.Errorf("%s is invalid etlName, expected predefined (%s, %s, %s)", name, Echo, Tar2TF, MD5)
    69  	}
    70  	return nil
    71  }
    72  
    73  func GetTransformYaml(etlName string) ([]byte, error) {
    74  	if err := validateETLName(etlName); err != nil {
    75  		return nil, err
    76  	}
    77  
    78  	var resp *http.Response
    79  	// with retry in case github in unavailable for a moment
    80  	err := cmn.NetworkCallWithRetry(&cmn.RetryArgs{
    81  		Call: func() (code int, err error) {
    82  			resp, err = client.Get(links[etlName]) //nolint:bodyclose // see defer close below
    83  			return
    84  		},
    85  		Action:   "get transform yaml for ETL[" + etlName + "]",
    86  		SoftErr:  3,
    87  		HardErr:  1,
    88  		IsClient: true,
    89  	})
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  	defer resp.Body.Close()
    94  
    95  	b, err := io.ReadAll(resp.Body)
    96  	if err != nil {
    97  		return nil, err
    98  	}
    99  
   100  	if resp.StatusCode != http.StatusOK {
   101  		return nil, fmt.Errorf("%s: %s", resp.Status, string(b))
   102  	}
   103  
   104  	specStr := os.Expand(string(b), func(v string) string {
   105  		// Hack: Neither os.Expand, nor os.ExpandEnv supports bash env variable default-value
   106  		// syntax. The whole ${VAR:-default} is matched as v.
   107  		if strings.Contains(v, "COMMUNICATION_TYPE") {
   108  			return etl.Hpull
   109  		}
   110  		if strings.Contains(v, "DOCKER_REGISTRY_URL") {
   111  			return "aistore"
   112  		}
   113  		if etlName == Tar2tfFilters {
   114  			if strings.Contains(v, "OPTION_KEY") {
   115  				return "--spec"
   116  			}
   117  			if strings.Contains(v, "OPTION_VALUE") {
   118  				return tar2tfFilter
   119  			}
   120  		}
   121  		return ""
   122  	})
   123  
   124  	return []byte(specStr), nil
   125  }
   126  
   127  func StopAndDeleteETL(t *testing.T, bp api.BaseParams, etlName string) {
   128  	if t.Failed() {
   129  		tlog.Logln("Fetching logs from ETL containers")
   130  		if logsByTarget, err := api.ETLLogs(bp, etlName); err == nil {
   131  			for _, etlLogs := range logsByTarget {
   132  				tlog.Logln(headETLLogs(etlLogs, 10*cos.KiB))
   133  			}
   134  		} else {
   135  			tlog.Logf("Error retrieving ETL[%s] logs: %v\n", etlName, err)
   136  		}
   137  	}
   138  	tlog.Logf("Stopping ETL[%s]\n", etlName)
   139  
   140  	if err := api.ETLStop(bp, etlName); err != nil {
   141  		tlog.Logf("Stopping ETL[%s] failed; err %v\n", etlName, err)
   142  	} else {
   143  		tlog.Logf("ETL[%s] stopped\n", etlName)
   144  	}
   145  	err := api.ETLDelete(bp, etlName)
   146  	tassert.CheckFatal(t, err)
   147  }
   148  
   149  func headETLLogs(etlLogs etl.Logs, maxLen int) string {
   150  	logs, l := etlLogs.Logs, len(etlLogs.Logs)
   151  	if maxLen < l {
   152  		logs = logs[:maxLen]
   153  	}
   154  	str := fmt.Sprintf("%s logs:\n%s", meta.Tname(etlLogs.TargetID), string(logs))
   155  	if maxLen < l {
   156  		str += fmt.Sprintf("\nand %d bytes more...", l-maxLen)
   157  	}
   158  	return str
   159  }
   160  
   161  func WaitForContainersStopped(t *testing.T, bp api.BaseParams) {
   162  	tlog.Logln("Waiting for ETL containers to stop...")
   163  	var (
   164  		etls         etl.InfoList
   165  		stopDeadline = time.Now().Add(20 * time.Second)
   166  		interval     = 2 * time.Second
   167  		err          error
   168  	)
   169  
   170  	for {
   171  		etls, err = api.ETLList(bp)
   172  		tassert.CheckFatal(t, err)
   173  		if len(etls) == 0 {
   174  			tlog.Logln("ETL containers stopped successfully")
   175  			return
   176  		}
   177  		if time.Now().After(stopDeadline) {
   178  			break
   179  		}
   180  		tlog.Logf("ETLs %+v still running, waiting %s... \n", etls, interval)
   181  		time.Sleep(interval)
   182  	}
   183  
   184  	err = fmt.Errorf("expected all ETLs to stop, got %+v still running", etls)
   185  	tassert.CheckFatal(t, err)
   186  }
   187  
   188  func WaitForAborted(bp api.BaseParams, xid, kind string, timeout time.Duration) error {
   189  	tlog.Logf("Waiting for ETL x-%s[%s] to abort...\n", kind, xid)
   190  	args := xact.ArgsMsg{ID: xid, Kind: kind, Timeout: timeout /* total timeout */}
   191  	status, err := api.WaitForXactionIC(bp, &args)
   192  	if err == nil {
   193  		if !status.Aborted() {
   194  			err = fmt.Errorf("expected ETL x-%s[%s] status to indicate 'abort', got: %+v", kind, xid, status)
   195  		}
   196  		return err
   197  	}
   198  	tlog.Logf("Aborting ETL x-%s[%s]\n", kind, xid)
   199  	if abortErr := api.AbortXaction(bp, &args); abortErr != nil {
   200  		tlog.Logf("Nested error: failed to abort upon api.wait failure: %v\n", abortErr)
   201  	}
   202  	return err
   203  }
   204  
   205  // NOTE: relies on x-kind to choose the waiting method
   206  // TODO -- FIXME: remove and simplify - here and everywhere
   207  func WaitForFinished(bp api.BaseParams, xid, kind string, timeout time.Duration) (err error) {
   208  	tlog.Logf("Waiting for ETL x-%s[%s] to finish...\n", kind, xid)
   209  	args := xact.ArgsMsg{ID: xid, Kind: kind, Timeout: timeout /* total timeout */}
   210  	if xact.IdlesBeforeFinishing(kind) {
   211  		err = api.WaitForXactionIdle(bp, &args)
   212  	} else {
   213  		_, err = api.WaitForXactionIC(bp, &args)
   214  	}
   215  	if err == nil {
   216  		return
   217  	}
   218  	tlog.Logf("Aborting ETL x-%s[%s]\n", kind, xid)
   219  	if abortErr := api.AbortXaction(bp, &args); abortErr != nil {
   220  		tlog.Logf("Nested error: failed to abort upon api.wait failure: %v\n", abortErr)
   221  	}
   222  	return err
   223  }
   224  
   225  func ReportXactionStatus(bp api.BaseParams, xid string, stopCh *cos.StopCh, interval time.Duration, totalObj int) {
   226  	go func() {
   227  		var (
   228  			xactStart = time.Now()
   229  			etlTicker = time.NewTicker(interval)
   230  		)
   231  		defer etlTicker.Stop()
   232  		for {
   233  			select {
   234  			case <-etlTicker.C:
   235  				// Check number of objects transformed.
   236  				xs, err := api.QueryXactionSnaps(bp, &xact.ArgsMsg{ID: xid})
   237  				if err != nil {
   238  					tlog.Logf("Failed to get x-etl[%s] stats: %v\n", xid, err)
   239  					continue
   240  				}
   241  				locObjs, outObjs, inObjs := xs.ObjCounts(xid)
   242  				tlog.Logf("ETL[%s] progress: (objs=%d, outObjs=%d, inObjs=%d) out of %d objects\n",
   243  					xid, locObjs, outObjs, inObjs, totalObj)
   244  				locBytes, outBytes, inBytes := xs.ByteCounts(xid)
   245  				bps := float64(locBytes+outBytes) / time.Since(xactStart).Seconds()
   246  				bpsStr := cos.ToSizeIEC(int64(bps), 2) + "/s"
   247  				tlog.Logf("ETL[%s] progress: (bytes=%d, outBytes=%d, inBytes=%d), %sBps\n",
   248  					xid, locBytes, outBytes, inBytes, bpsStr)
   249  			case <-stopCh.Listen():
   250  				return
   251  			}
   252  		}
   253  	}()
   254  }
   255  
   256  func InitSpec(t *testing.T, bp api.BaseParams, etlName, comm string) (xid string) {
   257  	tlog.Logf("InitSpec ETL[%s], communicator %s\n", etlName, comm)
   258  
   259  	msg := &etl.InitSpecMsg{}
   260  	msg.IDX = etlName
   261  	msg.CommTypeX = comm
   262  	spec, err := GetTransformYaml(etlName)
   263  	tassert.CheckFatal(t, err)
   264  	msg.Spec = spec
   265  	tassert.Fatalf(t, msg.Name() == etlName, "%q vs %q", msg.Name(), etlName) // assert
   266  
   267  	xid, err = api.ETLInit(bp, msg)
   268  	tassert.CheckFatal(t, err)
   269  	tassert.Errorf(t, cos.IsValidUUID(xid), "expected valid xaction ID, got %q", xid)
   270  
   271  	tlog.Logf("ETL %q: running x-etl-spec[%s]\n", etlName, xid)
   272  
   273  	// reread `InitMsg` and compare with the specified
   274  	etlMsg, err := api.ETLGetInitMsg(bp, etlName)
   275  	tassert.CheckFatal(t, err)
   276  
   277  	initSpec := etlMsg.(*etl.InitSpecMsg)
   278  	tassert.Errorf(t, initSpec.Name() == etlName, "expected etlName %s != %s", etlName, initSpec.Name())
   279  	tassert.Errorf(t, initSpec.CommType() == comm, "expected communicator type %s != %s", comm, initSpec.CommType())
   280  	tassert.Errorf(t, bytes.Equal(spec, initSpec.Spec), "pod specs differ")
   281  
   282  	return
   283  }
   284  
   285  func InitCode(t *testing.T, bp api.BaseParams, msg *etl.InitCodeMsg) (xid string) {
   286  	id, err := api.ETLInit(bp, msg)
   287  	tassert.CheckFatal(t, err)
   288  	tassert.Errorf(t, cos.IsValidUUID(id), "expected valid xaction ID, got %q", xid)
   289  	xid = id
   290  
   291  	// reread `InitMsg` and compare with the specified
   292  	etlMsg, err := api.ETLGetInitMsg(bp, msg.Name())
   293  	tassert.CheckFatal(t, err)
   294  
   295  	initCode := etlMsg.(*etl.InitCodeMsg)
   296  	tassert.Errorf(t, initCode.Name() == msg.Name(), "expected etlName %q != %q", msg.Name(), initCode.Name())
   297  	tassert.Errorf(t, msg.CommType() == "" || initCode.CommType() == msg.CommType(),
   298  		"expected communicator type %s != %s", msg.CommType(), initCode.CommType())
   299  	tassert.Errorf(t, msg.Runtime == initCode.Runtime, "expected runtime %s != %s", msg.Runtime, initCode.Runtime)
   300  	tassert.Errorf(t, bytes.Equal(msg.Code, initCode.Code), "ETL codes differ")
   301  	tassert.Errorf(t, bytes.Equal(msg.Deps, initCode.Deps), "ETL dependencies differ")
   302  
   303  	return
   304  }
   305  
   306  func ETLBucketWithCleanup(t *testing.T, bp api.BaseParams, bckFrom, bckTo cmn.Bck, msg *apc.TCBMsg) string {
   307  	xid, err := api.ETLBucket(bp, bckFrom, bckTo, msg)
   308  	tassert.CheckFatal(t, err)
   309  
   310  	t.Cleanup(func() {
   311  		tools.DestroyBucket(t, bp.URL, bckTo)
   312  	})
   313  
   314  	tlog.Logf("ETL[%s]: running %s => %s xaction %q\n",
   315  		msg.Transform.Name, bckFrom.Cname(""), bckTo.Cname(""), xid)
   316  	return xid
   317  }
   318  
   319  func ETLShouldBeRunning(t *testing.T, params api.BaseParams, etlName string) {
   320  	etls, err := api.ETLList(params)
   321  	tassert.CheckFatal(t, err)
   322  	for _, etl := range etls {
   323  		if etlName == etl.Name {
   324  			return
   325  		}
   326  	}
   327  	t.Fatalf("etl[%s] is not running (%v)", etlName, etls)
   328  }
   329  
   330  func ETLShouldNotBeRunning(t *testing.T, params api.BaseParams, etlName string) {
   331  	etls, err := api.ETLList(params)
   332  	tassert.CheckFatal(t, err)
   333  	for _, etl := range etls {
   334  		if etlName == etl.Name {
   335  			t.Fatalf("expected etl[%s] to be stopped (%v)", etlName, etls)
   336  		}
   337  	}
   338  }
   339  
   340  func CheckNoRunningETLContainers(t *testing.T, params api.BaseParams) {
   341  	etls, err := api.ETLList(params)
   342  	tassert.CheckFatal(t, err)
   343  	tassert.Fatalf(t, len(etls) == 0, "Expected no ETL running, got %+v", etls)
   344  }
   345  
   346  func SpecToInitMsg(spec []byte /*yaml*/) (msg *etl.InitSpecMsg, err error) {
   347  	errCtx := &cmn.ETLErrCtx{}
   348  	msg = &etl.InitSpecMsg{Spec: spec}
   349  	pod, err := etl.ParsePodSpec(errCtx, msg.Spec)
   350  	if err != nil {
   351  		return msg, err
   352  	}
   353  	errCtx.ETLName = pod.GetName()
   354  	msg.IDX = pod.GetName()
   355  
   356  	if err := k8s.ValidateEtlName(msg.IDX); err != nil {
   357  		return msg, err
   358  	}
   359  	// Check annotations.
   360  	msg.CommTypeX = podTransformCommType(pod)
   361  	if msg.Timeout, err = podTransformTimeout(errCtx, pod); err != nil {
   362  		return msg, err
   363  	}
   364  
   365  	return msg, msg.Validate()
   366  }
   367  
   368  func podTransformCommType(pod *corev1.Pod) string {
   369  	if pod.Annotations == nil || pod.Annotations[commTypeAnnotation] == "" {
   370  		// By default assume `Hpush`.
   371  		return etl.Hpush
   372  	}
   373  	return pod.Annotations[commTypeAnnotation]
   374  }
   375  
   376  func podTransformTimeout(errCtx *cmn.ETLErrCtx, pod *corev1.Pod) (cos.Duration, error) {
   377  	if pod.Annotations == nil || pod.Annotations[waitTimeoutAnnotation] == "" {
   378  		return 0, nil
   379  	}
   380  
   381  	v, err := time.ParseDuration(pod.Annotations[waitTimeoutAnnotation])
   382  	if err != nil {
   383  		return cos.Duration(v), cmn.NewErrETL(errCtx, err.Error()).WithPodName(pod.Name)
   384  	}
   385  	return cos.Duration(v), nil
   386  }