github.com/smartcontractkit/chainlink-testing-framework/libs@v0.0.0-20240227141906-ec710b4eb1a3/k8s/environment/environment.go (about)

     1  package environment
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"os"
     8  	"os/signal"
     9  	"strconv"
    10  	"strings"
    11  	"syscall"
    12  	"testing"
    13  	"time"
    14  
    15  	"github.com/cdk8s-team/cdk8s-core-go/cdk8s/v2"
    16  	"github.com/go-resty/resty/v2"
    17  	"github.com/google/uuid"
    18  	"github.com/imdario/mergo"
    19  	"github.com/rs/zerolog/log"
    20  	"github.com/stretchr/testify/require"
    21  
    22  	"github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/client"
    23  	"github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/config"
    24  	"github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/imports/k8s"
    25  	"github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/pkg"
    26  	a "github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/pkg/alias"
    27  	"github.com/smartcontractkit/chainlink-testing-framework/libs/logging"
    28  	"github.com/smartcontractkit/chainlink-testing-framework/libs/utils/ptr"
    29  	"github.com/smartcontractkit/chainlink-testing-framework/libs/utils/testcontext"
    30  )
    31  
    32  const (
    33  	COVERAGE_DIR       string = "cover"
    34  	FAILED_FUND_RETURN string = "FAILED_FUND_RETURN"
    35  	TEST_FAILED        string = "TEST_FAILED"
    36  )
    37  
    38  const (
    39  	ErrInvalidOCI string = "OCI chart url should be in format oci://$ECR_URL/$ECR_REGISTRY_NAME/$CHART_NAME:[?$CHART_VERSION], was %s"
    40  	ErrOCIPull    string = "failed to pull OCI repo: %s"
    41  )
    42  
    43  var (
    44  	defaultNamespaceAnnotations = map[string]*string{
    45  		"prometheus.io/scrape":                             ptr.Ptr("true"),
    46  		"backyards.banzaicloud.io/image-registry-access":   ptr.Ptr("true"),
    47  		"backyards.banzaicloud.io/public-dockerhub-access": ptr.Ptr("true"),
    48  	}
    49  )
    50  
    51  // ConnectedChart interface to interact both with cdk8s apps and helm charts
    52  type ConnectedChart interface {
    53  	// IsDeploymentNeeded
    54  	// true - we deploy/connect and expose environment data
    55  	// false - we are using external environment, but still exposing data
    56  	IsDeploymentNeeded() bool
    57  	// GetName name of the deployed part
    58  	GetName() string
    59  	// GetPath get Helm chart path, repo or local path
    60  	GetPath() string
    61  	// GetVersion gets the chart's version, empty string if none is specified
    62  	GetVersion() string
    63  	// GetProps get code props if it's typed environment
    64  	GetProps() any
    65  	// GetValues get values.yml props as map, if it's Helm
    66  	GetValues() *map[string]any
    67  	// ExportData export deployment part data in the env
    68  	ExportData(e *Environment) error
    69  }
    70  
    71  // Config is an environment common configuration, labels, annotations, connection types, readiness check, etc.
    72  type Config struct {
    73  	// TTL is time to live for the environment, used with kube-janitor
    74  	TTL time.Duration
    75  	// NamespacePrefix is a static namespace prefix
    76  	NamespacePrefix string
    77  	// Namespace is full namespace name
    78  	Namespace string
    79  	// Labels is a set of labels applied to the namespace in a format of "key=value"
    80  	Labels []string
    81  	// PodLabels is a set of labels applied to every pod in the namespace
    82  	PodLabels map[string]string
    83  	// PreventPodEviction if true sets a k8s annotation safe-to-evict=false to prevent pods from being evicted
    84  	// Note: This should only be used if your test is completely incapable of handling things like K8s rebalances without failing.
    85  	// If that is the case, it's worth the effort to make your test fault-tolerant soon. The alternative is expensive and infuriating.
    86  	PreventPodEviction bool
    87  	// Allow deployment to nodes with these tolerances
    88  	Tolerations []map[string]string
    89  	// Restrict deployment to only nodes matching a particular node role
    90  	NodeSelector map[string]string
    91  	// ReadyCheckData is settings for readiness probes checks for all deployment components
    92  	// checking that all pods are ready by default with 8 minutes timeout
    93  	//	&client.ReadyCheckData{
    94  	//		ReadinessProbeCheckSelector: "",
    95  	//		Timeout:                     15 * time.Minute,
    96  	//	}
    97  	ReadyCheckData *client.ReadyCheckData
    98  	// DryRun if true, app will just generate a manifest in local dir
    99  	DryRun bool
   100  	// InsideK8s used for long-running soak tests where you connect to env from the inside
   101  	InsideK8s bool
   102  	// NoManifestUpdate is a flag to skip manifest updating when connecting
   103  	NoManifestUpdate bool
   104  	// KeepConnection keeps connection until interrupted with a signal, useful when prototyping and debugging a new env
   105  	KeepConnection bool
   106  	// RemoveOnInterrupt automatically removes an environment on interrupt
   107  	RemoveOnInterrupt bool
   108  	// UpdateWaitInterval an interval to wait for deployment update started
   109  	UpdateWaitInterval time.Duration
   110  
   111  	// Remote Runner Specific Variables //
   112  	// JobImage an image to run environment as a job inside k8s
   113  	JobImage string
   114  	// JobLogFunction a function that will be run on each log
   115  	JobLogFunction func(*Environment, string)
   116  	// Test the testing library current Test struct
   117  	Test *testing.T
   118  	// jobDeployed used to limit us to 1 remote runner deploy
   119  	jobDeployed bool
   120  	// detachRunner should we detach the remote runner after starting the test
   121  	detachRunner bool
   122  	// fundReturnFailed the status of a fund return
   123  	fundReturnFailed bool
   124  }
   125  
   126  func defaultEnvConfig() *Config {
   127  	return &Config{
   128  		TTL:                20 * time.Minute,
   129  		NamespacePrefix:    "chainlink-test-env",
   130  		UpdateWaitInterval: 1 * time.Second,
   131  		ReadyCheckData: &client.ReadyCheckData{
   132  			ReadinessProbeCheckSelector: "",
   133  			Timeout:                     15 * time.Minute,
   134  		},
   135  	}
   136  }
   137  
   138  // Environment describes a launched test environment
   139  type Environment struct {
   140  	App                  cdk8s.App
   141  	CurrentManifest      string
   142  	root                 cdk8s.Chart
   143  	Charts               []ConnectedChart  // All connected charts in the
   144  	Cfg                  *Config           // The environment specific config
   145  	Client               *client.K8sClient // Client connecting to the K8s cluster
   146  	Fwd                  *client.Forwarder // Used to forward ports from local machine to the K8s cluster
   147  	Artifacts            *Artifacts
   148  	Chaos                *client.Chaos
   149  	httpClient           *resty.Client
   150  	URLs                 map[string][]string    // General URLs of launched resources. Uses '_local' to delineate forwarded ports
   151  	ChainlinkNodeDetails []*ChainlinkNodeDetail // ChainlinkNodeDetails has convenient details for connecting to chainlink deployments
   152  	err                  error
   153  }
   154  
   155  // ChainlinkNodeDetail contains details about a chainlink node deployment
   156  type ChainlinkNodeDetail struct {
   157  	// ChartName details the name of the Helm chart this node uses, handy for modifying deployment values
   158  	// Note: if you are using replicas of the same chart, this will be the same for all nodes
   159  	// Use NewDeployment function for Chainlink nodes to make use of this
   160  	ChartName string
   161  	// PodName is the name of the pod running the chainlink node
   162  	PodName string
   163  	// LocalIP is the URL to connect to the node from the local machine
   164  	LocalIP string
   165  	// InternalIP is the URL to connect to the node from inside the K8s cluster
   166  	InternalIP string
   167  	// DBLocalIP is the URL to connect to the node's database from the local machine
   168  	DBLocalIP string
   169  }
   170  
   171  // New creates new environment
   172  func New(cfg *Config) *Environment {
   173  	logging.Init()
   174  	if cfg == nil {
   175  		cfg = &Config{}
   176  	}
   177  	targetCfg := defaultEnvConfig()
   178  	config.MustMerge(targetCfg, cfg)
   179  	ns := os.Getenv(config.EnvVarNamespace)
   180  	if ns != "" {
   181  		cfg.Namespace = ns
   182  	}
   183  	if cfg.Namespace != "" {
   184  		log.Info().Str("Namespace", cfg.Namespace).Msg("Namespace selected")
   185  		targetCfg.Namespace = cfg.Namespace
   186  	} else {
   187  		targetCfg.Namespace = fmt.Sprintf("%s-%s", targetCfg.NamespacePrefix, uuid.NewString()[0:5])
   188  		log.Info().Str("Namespace", targetCfg.Namespace).Msg("Creating new namespace")
   189  	}
   190  	jobImage := os.Getenv(config.EnvVarJobImage)
   191  	if jobImage != "" {
   192  		targetCfg.JobImage = jobImage
   193  		targetCfg.detachRunner, _ = strconv.ParseBool(os.Getenv(config.EnvVarDetachRunner))
   194  	} else {
   195  		targetCfg.InsideK8s, _ = strconv.ParseBool(os.Getenv(config.EnvVarInsideK8s))
   196  	}
   197  
   198  	c, err := client.NewK8sClient()
   199  	if err != nil {
   200  		return &Environment{err: err}
   201  	}
   202  	e := &Environment{
   203  		URLs:   make(map[string][]string),
   204  		Charts: make([]ConnectedChart, 0),
   205  		Client: c,
   206  		Cfg:    targetCfg,
   207  		Fwd:    client.NewForwarder(c, targetCfg.KeepConnection),
   208  	}
   209  	arts, err := NewArtifacts(e.Client, e.Cfg.Namespace)
   210  	if err != nil {
   211  		log.Error().Err(err).Msg("failed to create artifacts client")
   212  		return &Environment{err: err}
   213  	}
   214  	e.Artifacts = arts
   215  
   216  	config.JSIIGlobalMu.Lock()
   217  	defer config.JSIIGlobalMu.Unlock()
   218  	if err := e.initApp(); err != nil {
   219  		log.Error().Err(err).Msg("failed to apply the initial manifest to create the namespace")
   220  		return &Environment{err: err}
   221  	}
   222  	e.Chaos = client.NewChaos(c, e.Cfg.Namespace)
   223  
   224  	// setup test cleanup if this is using a remote runner
   225  	// and not in detached mode
   226  	// and not using an existing environment
   227  	if targetCfg.JobImage != "" && !targetCfg.detachRunner && !targetCfg.NoManifestUpdate {
   228  		targetCfg.fundReturnFailed = false
   229  		if targetCfg.Test != nil {
   230  			targetCfg.Test.Cleanup(func() {
   231  				err := e.Shutdown()
   232  				require.NoError(targetCfg.Test, err)
   233  			})
   234  		}
   235  	}
   236  	return e
   237  }
   238  
   239  func (m *Environment) initApp() error {
   240  	var err error
   241  	m.App = cdk8s.NewApp(&cdk8s.AppProps{
   242  		YamlOutputType: cdk8s.YamlOutputType_FILE_PER_APP,
   243  	})
   244  	m.Cfg.Labels = append(m.Cfg.Labels, "app.kubernetes.io/managed-by=cdk8s")
   245  	owner := os.Getenv(config.EnvVarUser)
   246  	if owner == "" {
   247  		return fmt.Errorf("missing owner environment variable, please set %s to your name or if you are seeing this in CI please set it to ${{ github.actor }}", config.EnvVarUser)
   248  	}
   249  	m.Cfg.Labels = append(m.Cfg.Labels, fmt.Sprintf("owner=%s", owner))
   250  
   251  	if os.Getenv(config.EnvVarCLCommitSha) != "" {
   252  		m.Cfg.Labels = append(m.Cfg.Labels, fmt.Sprintf("commit=%s", os.Getenv(config.EnvVarCLCommitSha)))
   253  	}
   254  	testTrigger := os.Getenv(config.EnvVarTestTrigger)
   255  	if testTrigger == "" {
   256  		testTrigger = "manual"
   257  	}
   258  	m.Cfg.Labels = append(m.Cfg.Labels, fmt.Sprintf("triggered-by=%s", testTrigger))
   259  
   260  	if tolerationRole := os.Getenv(config.EnvVarToleration); tolerationRole != "" {
   261  		m.Cfg.Tolerations = []map[string]string{{
   262  			"key":      "node-role",
   263  			"operator": "Equal",
   264  			"value":    tolerationRole,
   265  			"effect":   "NoSchedule",
   266  		}}
   267  	}
   268  
   269  	if selectorRole := os.Getenv(config.EnvVarNodeSelector); selectorRole != "" {
   270  		m.Cfg.NodeSelector = map[string]string{
   271  			"node-role": selectorRole,
   272  		}
   273  	}
   274  
   275  	nsLabels, err := a.ConvertLabels(m.Cfg.Labels)
   276  	if err != nil {
   277  		return err
   278  	}
   279  	defaultNamespaceAnnotations[pkg.TTLLabelKey] = a.ShortDur(m.Cfg.TTL)
   280  	m.root = cdk8s.NewChart(m.App, ptr.Ptr(fmt.Sprintf("root-chart-%s", m.Cfg.Namespace)), &cdk8s.ChartProps{
   281  		Labels:    nsLabels,
   282  		Namespace: ptr.Ptr(m.Cfg.Namespace),
   283  	})
   284  	k8s.NewKubeNamespace(m.root, ptr.Ptr("namespace"), &k8s.KubeNamespaceProps{
   285  		Metadata: &k8s.ObjectMeta{
   286  			Name:        ptr.Ptr(m.Cfg.Namespace),
   287  			Labels:      nsLabels,
   288  			Annotations: &defaultNamespaceAnnotations,
   289  		},
   290  	})
   291  	if m.Cfg.PreventPodEviction {
   292  		zero := float64(0)
   293  		k8s.NewKubePodDisruptionBudget(m.root, ptr.Ptr("pdb"), &k8s.KubePodDisruptionBudgetProps{
   294  			Metadata: &k8s.ObjectMeta{
   295  				Name:      ptr.Ptr("clenv-pdb"),
   296  				Namespace: ptr.Ptr(m.Cfg.Namespace),
   297  			},
   298  			Spec: &k8s.PodDisruptionBudgetSpec{
   299  				MaxUnavailable: k8s.IntOrString_FromNumber(&zero),
   300  				Selector: &k8s.LabelSelector{
   301  					MatchLabels: &map[string]*string{
   302  						pkg.NamespaceLabelKey: ptr.Ptr(m.Cfg.Namespace),
   303  					},
   304  				},
   305  			},
   306  		})
   307  	}
   308  	m.CurrentManifest = *m.App.SynthYaml()
   309  	// loop retry applying the initial manifest with the namespace and other basics
   310  	ctx, cancel := context.WithTimeout(testcontext.Get(m.Cfg.Test), m.Cfg.ReadyCheckData.Timeout)
   311  	defer cancel()
   312  	startTime := time.Now()
   313  	deadline, _ := ctx.Deadline()
   314  	for {
   315  		err = m.Client.Apply(ctx, m.CurrentManifest, m.Cfg.Namespace, true)
   316  		if err == nil || ctx.Err() != nil {
   317  			break
   318  		}
   319  		elapsed := time.Since(startTime)
   320  		remaining := time.Until(deadline)
   321  		log.Debug().Err(err).Msgf("Failed to apply initial manifest, will continue to retry. Time elapsed: %s, Time until timeout %s\n", elapsed, remaining)
   322  		time.Sleep(5 * time.Second)
   323  	}
   324  	if errors.Is(ctx.Err(), context.DeadlineExceeded) {
   325  		return fmt.Errorf("failed to apply manifest within %s", m.Cfg.ReadyCheckData.Timeout)
   326  	}
   327  	if m.Cfg.PodLabels == nil {
   328  		m.Cfg.PodLabels = map[string]string{}
   329  	}
   330  	m.Cfg.PodLabels[pkg.NamespaceLabelKey] = m.Cfg.Namespace
   331  	return err
   332  }
   333  
   334  // AddChart adds a chart to the deployment
   335  func (m *Environment) AddChart(f func(root cdk8s.Chart) ConnectedChart) *Environment {
   336  	if m.err != nil {
   337  		return m
   338  	}
   339  	config.JSIIGlobalMu.Lock()
   340  	defer config.JSIIGlobalMu.Unlock()
   341  	m.Charts = append(m.Charts, f(m.root))
   342  	return m
   343  }
   344  
   345  func (m *Environment) removeChart(name string) error {
   346  	chartIndex, _, err := m.findChart(name)
   347  	if err != nil {
   348  		return err
   349  	}
   350  	m.Charts = append(m.Charts[:chartIndex], m.Charts[chartIndex+1:]...)
   351  	m.root.Node().TryRemoveChild(ptr.Ptr(name))
   352  	return nil
   353  }
   354  
   355  // findChart finds a chart by name, returning the index of it in the Charts slice, and the chart itself
   356  func (m *Environment) findChart(name string) (index int, chart ConnectedChart, err error) {
   357  	for i, c := range m.Charts {
   358  		if c.GetName() == name {
   359  			return i, c, nil
   360  		}
   361  	}
   362  	return -1, nil, fmt.Errorf("chart %s not found", name)
   363  }
   364  
   365  // ReplaceHelm entirely replaces an existing helm chart with a new one
   366  // Note: you need to call Run() after this to apply the changes. If you're modifying ConfigMap values, you'll probably
   367  // need to use RollOutStatefulSets to apply the changes to the pods. https://stackoverflow.com/questions/57356521/rollingupdate-for-stateful-set-doesnt-restart-pods-and-changes-from-updated-con
   368  func (m *Environment) ReplaceHelm(name string, chart ConnectedChart) (*Environment, error) {
   369  	if m.err != nil {
   370  		return nil, m.err
   371  	}
   372  	config.JSIIGlobalMu.Lock()
   373  	defer config.JSIIGlobalMu.Unlock()
   374  	if err := m.removeChart(name); err != nil {
   375  		return nil, err
   376  	}
   377  	if m.Cfg.JobImage != "" || !chart.IsDeploymentNeeded() {
   378  		return m, fmt.Errorf("cannot modify helm chart '%s' that does not need deployment, it may be in a remote runner or detached mode", name)
   379  	}
   380  	log.Trace().
   381  		Str("Chart", chart.GetName()).
   382  		Str("Path", chart.GetPath()).
   383  		Interface("Props", chart.GetProps()).
   384  		Interface("Values", chart.GetValues()).
   385  		Msg("Chart deployment values")
   386  	h := cdk8s.NewHelm(m.root, ptr.Ptr(chart.GetName()), &cdk8s.HelmProps{
   387  		Chart: ptr.Ptr(chart.GetPath()),
   388  		HelmFlags: &[]*string{
   389  			ptr.Ptr("--namespace"),
   390  			ptr.Ptr(m.Cfg.Namespace),
   391  		},
   392  		ReleaseName: ptr.Ptr(chart.GetName()),
   393  		Values:      chart.GetValues(),
   394  	})
   395  	addDefaultPodAnnotationsAndLabels(h, markNotSafeToEvict(m.Cfg.PreventPodEviction, nil), m.Cfg.PodLabels)
   396  	m.Charts = append(m.Charts, chart)
   397  	return m, nil
   398  }
   399  
   400  func addDefaultPodAnnotationsAndLabels(h cdk8s.Helm, annotations, labels map[string]string) {
   401  	annoatationsCopy := map[string]string{}
   402  	for k, v := range annotations {
   403  		annoatationsCopy[k] = v
   404  	}
   405  	for _, ao := range *h.ApiObjects() {
   406  		switch *ao.Kind() {
   407  		case "Deployment", "ReplicaSet", "StatefulSet":
   408  			// we aren't guaranteed to have annotations in existence so we have to dig down to see if they exist
   409  			// and add any to our current list we want to add
   410  			aj := *ao.Chart().ToJson()
   411  			// loop over the json array until we get the expected kind and look for existing annotations
   412  			for _, dep := range aj {
   413  				l := fmt.Sprint(dep)
   414  				if !strings.Contains(l, fmt.Sprintf("kind:%s", *ao.Kind())) {
   415  					continue
   416  				}
   417  				depM := dep.(map[string]interface{})
   418  				spec, ok := depM["spec"].(map[string]interface{})
   419  				if !ok {
   420  					continue
   421  				}
   422  				template, ok := spec["template"].(map[string]interface{})
   423  				if !ok {
   424  					continue
   425  				}
   426  				metadata, ok := template["metadata"].(map[string]interface{})
   427  				if !ok {
   428  					continue
   429  				}
   430  				annot, ok := metadata["annotations"].(map[string]interface{})
   431  				if !ok {
   432  					continue
   433  				}
   434  				for k, v := range annot {
   435  					annoatationsCopy[k] = v.(string)
   436  				}
   437  			}
   438  			ao.AddJsonPatch(cdk8s.JsonPatch_Add(ptr.Ptr("/spec/template/metadata/annotations"), annoatationsCopy))
   439  
   440  			// loop over the labels and apply them to both the labels and selectors
   441  			// these should in theory always have at least one label/selector combo in existence so we don't
   442  			// have to do the existence check like we do for the annotations
   443  			for k, v := range labels {
   444  				// Escape the keys according to JSON Pointer syntax in RFC 6901
   445  				escapedKey := strings.ReplaceAll(strings.ReplaceAll(k, "~", "~0"), "/", "~1")
   446  				ao.AddJsonPatch(cdk8s.JsonPatch_Add(ptr.Ptr(fmt.Sprintf("/spec/template/metadata/labels/%s", escapedKey)), v))
   447  				ao.AddJsonPatch(cdk8s.JsonPatch_Add(ptr.Ptr(fmt.Sprintf("/spec/selector/matchLabels/%s", escapedKey)), v))
   448  			}
   449  		}
   450  	}
   451  }
   452  
   453  // UpdateHelm update a helm chart with new values. The pod will launch with an `updated=true` label if it's a Chainlink node.
   454  // Note: If you're modifying ConfigMap values, you'll probably need to use RollOutStatefulSets to apply the changes to the pods.
   455  // https://stackoverflow.com/questions/57356521/rollingupdate-for-stateful-set-doesnt-restart-pods-and-changes-from-updated-con
   456  func (m *Environment) UpdateHelm(name string, values map[string]any) (*Environment, error) {
   457  	if m.err != nil {
   458  		return nil, m.err
   459  	}
   460  	_, chart, err := m.findChart(name)
   461  	if err != nil {
   462  		return nil, err
   463  	}
   464  	if _, labelsExist := values["labels"]; !labelsExist {
   465  		values["labels"] = make(map[string]*string)
   466  	}
   467  	values["labels"].(map[string]*string)["updated"] = ptr.Ptr("true")
   468  	if err = mergo.Merge(chart.GetValues(), values, mergo.WithOverride); err != nil {
   469  		return nil, err
   470  	}
   471  	return m.ReplaceHelm(name, chart)
   472  }
   473  
   474  // AddHelmCharts adds multiple helm charts to the testing environment
   475  func (m *Environment) AddHelmCharts(charts []ConnectedChart) *Environment {
   476  	if m.err != nil {
   477  		return m
   478  	}
   479  	for _, c := range charts {
   480  		m.AddHelm(c)
   481  	}
   482  	return m
   483  }
   484  
   485  // AddHelm adds a helm chart to the testing environment
   486  func (m *Environment) AddHelm(chart ConnectedChart) *Environment {
   487  	if m.err != nil {
   488  		return m
   489  	}
   490  	if m.Cfg.JobImage != "" || !chart.IsDeploymentNeeded() {
   491  		return m
   492  	}
   493  	config.JSIIGlobalMu.Lock()
   494  	defer config.JSIIGlobalMu.Unlock()
   495  
   496  	values := &map[string]any{
   497  		"tolerations":  m.Cfg.Tolerations,
   498  		"nodeSelector": m.Cfg.NodeSelector,
   499  	}
   500  	config.MustMerge(values, chart.GetValues())
   501  	log.Trace().
   502  		Str("Chart", chart.GetName()).
   503  		Str("Path", chart.GetPath()).
   504  		Interface("Props", chart.GetProps()).
   505  		Interface("Values", values).
   506  		Msg("Chart deployment values")
   507  	helmFlags := []*string{
   508  		ptr.Ptr("--namespace"),
   509  		ptr.Ptr(m.Cfg.Namespace),
   510  		ptr.Ptr("--skip-tests"),
   511  	}
   512  	if chart.GetVersion() != "" {
   513  		helmFlags = append(helmFlags, ptr.Ptr("--version"), ptr.Ptr(chart.GetVersion()))
   514  	}
   515  	chartPath, err := m.PullOCIChart(chart)
   516  	if err != nil {
   517  		m.err = err
   518  		return m
   519  	}
   520  	h := cdk8s.NewHelm(m.root, ptr.Ptr(chart.GetName()), &cdk8s.HelmProps{
   521  		Chart:       ptr.Ptr(chartPath),
   522  		HelmFlags:   &helmFlags,
   523  		ReleaseName: ptr.Ptr(chart.GetName()),
   524  		Values:      values,
   525  	})
   526  	addDefaultPodAnnotationsAndLabels(h, markNotSafeToEvict(m.Cfg.PreventPodEviction, nil), m.Cfg.PodLabels)
   527  	m.Charts = append(m.Charts, chart)
   528  	return m
   529  }
   530  
   531  // PullOCIChart handles working with OCI format repositories
   532  // https://helm.sh/docs/topics/registries/
   533  // API is not compatible between helm repos and OCI repos, so we download and untar the chart
   534  func (m *Environment) PullOCIChart(chart ConnectedChart) (string, error) {
   535  	if !strings.HasPrefix(chart.GetPath(), "oci") {
   536  		return chart.GetPath(), nil
   537  	}
   538  	cp := strings.Split(chart.GetPath(), "/")
   539  	if len(cp) != 5 {
   540  		return "", fmt.Errorf(ErrInvalidOCI, chart.GetPath())
   541  	}
   542  	sp := strings.Split(chart.GetPath(), ":")
   543  
   544  	var cmd string
   545  	var chartName string
   546  	chartName = cp[len(cp)-1]
   547  	chartDir := uuid.NewString()
   548  	switch len(sp) {
   549  	case 2:
   550  		cmd = fmt.Sprintf("helm pull %s --untar --untardir %s", chart.GetPath(), chartDir)
   551  	case 3:
   552  		chartName = strings.Split(chartName, ":")[0]
   553  		cmd = fmt.Sprintf("helm pull %s --version %s --untar --untardir %s", fmt.Sprintf("%s:%s", sp[0], sp[1]), sp[2], chartDir)
   554  	default:
   555  		return "", fmt.Errorf(ErrInvalidOCI, chart.GetPath())
   556  	}
   557  	log.Info().Str("CMD", cmd).Msg("Running helm cmd")
   558  	if err := client.ExecCmd(cmd); err != nil {
   559  		return "", fmt.Errorf(ErrOCIPull, chart.GetPath())
   560  	}
   561  	localChartPath := fmt.Sprintf("%s/%s/", chartDir, chartName)
   562  	log.Info().Str("Path", localChartPath).Msg("Local chart path")
   563  	return localChartPath, nil
   564  }
   565  
   566  // PrintExportData prints export data
   567  func (m *Environment) PrintExportData() error {
   568  	m.URLs = make(map[string][]string)
   569  	for _, c := range m.Charts {
   570  		err := c.ExportData(m)
   571  		if err != nil {
   572  			return err
   573  		}
   574  	}
   575  	log.Debug().Interface("URLs", m.URLs).Msg("Connection URLs")
   576  	return nil
   577  }
   578  
   579  // DumpLogs dumps all logs into a file
   580  func (m *Environment) DumpLogs(path string) error {
   581  	arts, err := NewArtifacts(m.Client, m.Cfg.Namespace)
   582  	if err != nil {
   583  		return err
   584  	}
   585  	if path == "" {
   586  		path = fmt.Sprintf("logs/%s-%d", m.Cfg.Namespace, time.Now().Unix())
   587  	}
   588  	return arts.DumpTestResult(path, "chainlink")
   589  }
   590  
   591  // ResourcesSummary returns resources summary for selected pods as a map, used in reports
   592  func (m *Environment) ResourcesSummary(selector string) (map[string]map[string]string, error) {
   593  	pl, err := m.Client.ListPods(m.Cfg.Namespace, selector)
   594  	if err != nil {
   595  		return nil, err
   596  	}
   597  	if len(pl.Items) == 0 {
   598  		return nil, fmt.Errorf("no pods found for selector: %s", selector)
   599  	}
   600  	resources := make(map[string]map[string]string)
   601  	for _, p := range pl.Items {
   602  		for _, c := range p.Spec.Containers {
   603  			if resources[c.Name] == nil {
   604  				resources[c.Name] = make(map[string]string)
   605  			}
   606  			cpuRes := c.Resources.Requests["cpu"]
   607  			resources[c.Name]["cpu"] = cpuRes.String()
   608  			memRes := c.Resources.Requests["memory"]
   609  			resources[c.Name]["memory"] = memRes.String()
   610  		}
   611  	}
   612  	return resources, nil
   613  }
   614  
   615  // ClearCharts recreates cdk8s app
   616  func (m *Environment) ClearCharts() error {
   617  	m.Charts = make([]ConnectedChart, 0)
   618  	if err := m.initApp(); err != nil {
   619  		log.Error().Err(err).Msg("failed to apply the initial manifest to create the namespace")
   620  		return err
   621  	}
   622  	return nil
   623  }
   624  
   625  func (m *Environment) Manifest() string {
   626  	return m.CurrentManifest
   627  }
   628  
   629  // Update current manifest based on the cdk8s app state
   630  func (m *Environment) UpdateManifest() {
   631  	config.JSIIGlobalMu.Lock()
   632  	m.CurrentManifest = *m.App.SynthYaml()
   633  	config.JSIIGlobalMu.Unlock()
   634  }
   635  
   636  // RunCustomReadyConditions Runs the environment with custom ready conditions for a supplied pod count
   637  func (m *Environment) RunCustomReadyConditions(customCheck *client.ReadyCheckData, podCount int) error {
   638  	if m.err != nil {
   639  		return m.err
   640  	}
   641  	if m.Cfg.jobDeployed {
   642  		return nil
   643  	}
   644  	if m.Cfg.JobImage != "" {
   645  		if m.Cfg.Test == nil {
   646  			return fmt.Errorf("Test must be configured in the environment when using the remote runner")
   647  		}
   648  		rrSelector := map[string]*string{pkg.NamespaceLabelKey: ptr.Ptr(m.Cfg.Namespace)}
   649  		m.AddChart(NewRunner(&Props{
   650  			BaseName:           REMOTE_RUNNER_NAME,
   651  			TargetNamespace:    m.Cfg.Namespace,
   652  			Labels:             &rrSelector,
   653  			Image:              m.Cfg.JobImage,
   654  			TestName:           m.Cfg.Test.Name(),
   655  			NoManifestUpdate:   m.Cfg.NoManifestUpdate,
   656  			PreventPodEviction: m.Cfg.PreventPodEviction,
   657  		}))
   658  	}
   659  	m.UpdateManifest()
   660  	m.ChainlinkNodeDetails = []*ChainlinkNodeDetail{} // Resets potentially old details if re-deploying
   661  	if m.Cfg.DryRun {
   662  		log.Info().Msg("Dry-run mode, manifest synthesized and saved as tmp-manifest.yaml")
   663  		return nil
   664  	}
   665  	manifestUpdate := os.Getenv(config.EnvVarNoManifestUpdate)
   666  	if manifestUpdate != "" {
   667  		mu, err := strconv.ParseBool(manifestUpdate)
   668  		if err != nil {
   669  			return fmt.Errorf("manifest update should be bool: true, false")
   670  		}
   671  		m.Cfg.NoManifestUpdate = mu
   672  	}
   673  	log.Debug().Bool("ManifestUpdate", !m.Cfg.NoManifestUpdate).Msg("Update mode")
   674  	if !m.Cfg.NoManifestUpdate || m.Cfg.JobImage != "" {
   675  		if err := m.DeployCustomReadyConditions(customCheck, podCount); err != nil {
   676  			log.Error().Err(err).Msg("Error deploying environment")
   677  			_ = m.Shutdown()
   678  			return err
   679  		}
   680  	}
   681  	if m.Cfg.JobImage != "" {
   682  		log.Info().Msg("Waiting for remote runner to complete")
   683  		// Do not wait for the job to complete if we are running something like a soak test in the remote runner
   684  		if m.Cfg.detachRunner {
   685  			return nil
   686  		}
   687  		if err := m.Client.WaitForJob(m.Cfg.Namespace, "remote-test-runner", func(message string) {
   688  			if m.Cfg.JobLogFunction != nil {
   689  				m.Cfg.JobLogFunction(m, message)
   690  			} else {
   691  				DefaultJobLogFunction(m, message)
   692  			}
   693  		}); err != nil {
   694  			return err
   695  		}
   696  		if m.Cfg.fundReturnFailed {
   697  			return fmt.Errorf("failed to return funds in remote runner")
   698  		}
   699  		m.Cfg.jobDeployed = true
   700  	} else {
   701  		if err := m.Fwd.Connect(m.Cfg.Namespace, "", m.Cfg.InsideK8s); err != nil {
   702  			return err
   703  		}
   704  		log.Debug().Interface("Ports", m.Fwd.Info).Msg("Forwarded ports")
   705  		m.Fwd.PrintLocalPorts()
   706  		if err := m.PrintExportData(); err != nil {
   707  			return err
   708  		}
   709  		arts, err := NewArtifacts(m.Client, m.Cfg.Namespace)
   710  		if err != nil {
   711  			log.Error().Err(err).Msg("failed to create artifacts client")
   712  			return err
   713  		}
   714  		m.Artifacts = arts
   715  		if len(m.URLs["goc"]) != 0 {
   716  			m.httpClient = resty.New().SetBaseURL(m.URLs["goc"][0])
   717  		}
   718  		if m.Cfg.KeepConnection {
   719  			log.Info().Msg("Keeping forwarder connections, press Ctrl+C to interrupt")
   720  			if m.Cfg.RemoveOnInterrupt {
   721  				log.Warn().Msg("Environment will be removed on interrupt")
   722  			}
   723  			ch := make(chan os.Signal, 1)
   724  			signal.Notify(ch, os.Interrupt, syscall.SIGTERM)
   725  			<-ch
   726  			log.Warn().Msg("Interrupted")
   727  			if m.Cfg.RemoveOnInterrupt {
   728  				return m.Client.RemoveNamespace(m.Cfg.Namespace)
   729  			}
   730  		}
   731  	}
   732  	return nil
   733  }
   734  
   735  // RunUpdated runs the environment and checks for pods with `updated=true` label
   736  func (m *Environment) RunUpdated(podCount int) error {
   737  	if m.err != nil {
   738  		return m.err
   739  	}
   740  	conds := &client.ReadyCheckData{
   741  		ReadinessProbeCheckSelector: "updated=true",
   742  		Timeout:                     10 * time.Minute,
   743  	}
   744  	return m.RunCustomReadyConditions(conds, podCount)
   745  }
   746  
   747  // Run deploys or connects to already created environment
   748  func (m *Environment) Run() error {
   749  	if m.err != nil {
   750  		return m.err
   751  	}
   752  	return m.RunCustomReadyConditions(nil, 0)
   753  }
   754  
   755  func (m *Environment) enumerateApps() error {
   756  	apps, err := m.Client.UniqueLabels(m.Cfg.Namespace, client.AppLabel)
   757  	if err != nil {
   758  		return err
   759  	}
   760  	for _, app := range apps {
   761  		if err := m.Client.EnumerateInstances(m.Cfg.Namespace, fmt.Sprintf("app=%s", app)); err != nil {
   762  			return err
   763  		}
   764  	}
   765  	return nil
   766  }
   767  
   768  // DeployCustomReadyConditions deploy current manifest with added custom readiness checks
   769  func (m *Environment) DeployCustomReadyConditions(customCheck *client.ReadyCheckData, customPodCount int) error {
   770  	if m.err != nil {
   771  		return m.err
   772  	}
   773  	log.Info().Str("Namespace", m.Cfg.Namespace).Msg("Deploying namespace")
   774  
   775  	if m.Cfg.DryRun {
   776  		return m.Client.DryRun(m.CurrentManifest)
   777  	}
   778  	ctx, cancel := context.WithTimeout(testcontext.Get(m.Cfg.Test), m.Cfg.ReadyCheckData.Timeout)
   779  	defer cancel()
   780  	err := m.Client.Apply(ctx, m.CurrentManifest, m.Cfg.Namespace, true)
   781  	if errors.Is(ctx.Err(), context.DeadlineExceeded) {
   782  		return fmt.Errorf("timeout waiting for environment to be ready")
   783  	}
   784  	if err != nil {
   785  		return err
   786  	}
   787  	if int64(m.Cfg.UpdateWaitInterval) != 0 {
   788  		time.Sleep(m.Cfg.UpdateWaitInterval)
   789  	}
   790  
   791  	expectedPodCount := m.findPodCountInDeploymentManifest()
   792  
   793  	if err := m.Client.WaitPodsReady(m.Cfg.Namespace, m.Cfg.ReadyCheckData, expectedPodCount); err != nil {
   794  		return err
   795  	}
   796  	if customCheck != nil {
   797  		if err := m.Client.WaitPodsReady(m.Cfg.Namespace, customCheck, customPodCount); err != nil {
   798  			return err
   799  		}
   800  	}
   801  	return m.enumerateApps()
   802  }
   803  
   804  // Deploy deploy current manifest and check logs for readiness
   805  func (m *Environment) Deploy() error {
   806  	return m.DeployCustomReadyConditions(nil, 0)
   807  }
   808  
   809  // RolloutStatefulSets applies "rollout statefulset" to all existing statefulsets in our namespace
   810  func (m *Environment) RolloutStatefulSets() error {
   811  	if m.err != nil {
   812  		return m.err
   813  	}
   814  	ctx, cancel := context.WithTimeout(testcontext.Get(m.Cfg.Test), m.Cfg.ReadyCheckData.Timeout)
   815  	defer cancel()
   816  	err := m.Client.RolloutStatefulSets(ctx, m.Cfg.Namespace)
   817  	if errors.Is(ctx.Err(), context.DeadlineExceeded) {
   818  		return fmt.Errorf("timeout waiting for rollout statefulset to complete")
   819  	}
   820  	return err
   821  }
   822  
   823  // RolloutRestartBySelector applies "rollout restart" to the selected resources
   824  func (m *Environment) RolloutRestartBySelector(resource string, selector string) error {
   825  	if m.err != nil {
   826  		return m.err
   827  	}
   828  	ctx, cancel := context.WithTimeout(testcontext.Get(m.Cfg.Test), m.Cfg.ReadyCheckData.Timeout)
   829  	defer cancel()
   830  	err := m.Client.RolloutRestartBySelector(ctx, m.Cfg.Namespace, resource, selector)
   831  	if errors.Is(ctx.Err(), context.DeadlineExceeded) {
   832  		return fmt.Errorf("timeout waiting for rollout restart to complete")
   833  	}
   834  	return err
   835  }
   836  
   837  // findPodsInDeploymentManifest finds all the pods we will be deploying
   838  func (m *Environment) findPodCountInDeploymentManifest() int {
   839  	config.JSIIGlobalMu.Lock()
   840  	defer config.JSIIGlobalMu.Unlock()
   841  	podCount := 0
   842  	charts := m.App.Charts()
   843  	for _, chart := range *charts {
   844  		json := chart.ToJson()
   845  		if json == nil {
   846  			continue
   847  		}
   848  		for _, j := range *json {
   849  			m := j.(map[string]any)
   850  			// if the kind is a deployment then we want to see if it has replicas to count towards the app count
   851  			if _, ok := m["kind"]; !ok {
   852  				continue
   853  			}
   854  			kind := m["kind"].(string)
   855  			if kind == "Deployment" || kind == "StatefulSet" {
   856  				if _, ok := m["spec"]; !ok {
   857  					continue
   858  				}
   859  				podCount += getReplicaCount(m["spec"].(map[string]any))
   860  			}
   861  		}
   862  
   863  	}
   864  	return podCount
   865  }
   866  
   867  func getReplicaCount(spec map[string]any) int {
   868  	if spec == nil {
   869  		return 0
   870  	}
   871  	if _, ok := spec["selector"]; !ok {
   872  		return 0
   873  	}
   874  	s := spec["selector"].(map[string]any)
   875  	if s == nil {
   876  		return 0
   877  	}
   878  	if _, ok := s["matchLabels"]; !ok {
   879  		return 0
   880  	}
   881  	m := s["matchLabels"].(map[string]any)
   882  	if m == nil {
   883  		return 0
   884  	}
   885  	if _, ok := m[client.AppLabel]; !ok {
   886  		return 0
   887  	}
   888  	l := m[client.AppLabel]
   889  	if l == nil {
   890  		return 0
   891  	}
   892  
   893  	replicaCount := 0
   894  	var replicas any
   895  	replicas, ok := spec["replicas"]
   896  	if ok {
   897  		replicaCount += int(replicas.(float64))
   898  	} else {
   899  		replicaCount++
   900  	}
   901  
   902  	return replicaCount
   903  }
   904  
   905  type CoverageProfileParams struct {
   906  	Force             bool     `form:"force" json:"force"`
   907  	Service           []string `form:"service" json:"service"`
   908  	Address           []string `form:"address" json:"address"`
   909  	CoverFilePatterns []string `form:"coverfile" json:"coverfile"`
   910  	SkipFilePatterns  []string `form:"skipfile" json:"skipfile"`
   911  }
   912  
   913  func (m *Environment) getCoverageList() (map[string]any, error) {
   914  	var servicesMap map[string]any
   915  	resp, err := m.httpClient.R().
   916  		SetResult(&servicesMap).
   917  		Get("v1/cover/list")
   918  	if err != nil {
   919  		return nil, err
   920  	}
   921  	if resp.Status() != "200 OK" {
   922  		return nil, fmt.Errorf("coverage service list request is not 200")
   923  	}
   924  	return servicesMap, nil
   925  }
   926  
   927  func (m *Environment) ClearCoverage() error {
   928  	servicesMap, err := m.getCoverageList()
   929  	if err != nil {
   930  		return err
   931  	}
   932  	for serviceName := range servicesMap {
   933  		r, err := m.httpClient.R().
   934  			SetBody(CoverageProfileParams{Service: []string{serviceName}}).
   935  			Post("v1/cover/clear")
   936  		if err != nil {
   937  			return err
   938  		}
   939  		if r.Status() != "200 OK" {
   940  			return fmt.Errorf("coverage service list request is not 200")
   941  		}
   942  		log.Debug().Str("Service", serviceName).Msg("Coverage cleared")
   943  	}
   944  	return nil
   945  }
   946  
   947  func (m *Environment) SaveCoverage() error {
   948  	if err := MkdirIfNotExists(COVERAGE_DIR); err != nil {
   949  		return err
   950  	}
   951  	servicesMap, err := m.getCoverageList()
   952  	if err != nil {
   953  		return err
   954  	}
   955  	log.Debug().Interface("Services", servicesMap).Msg("Services eligible for coverage")
   956  	for serviceName := range servicesMap {
   957  		r, err := m.httpClient.R().
   958  			SetBody(CoverageProfileParams{Service: []string{serviceName}}).
   959  			Post("v1/cover/profile")
   960  		if err != nil {
   961  			return err
   962  		}
   963  		if r.Status() != "200 OK" {
   964  			return fmt.Errorf("coverage service list request is not 200")
   965  		}
   966  		log.Debug().Str("Service", serviceName).Msg("Coverage received")
   967  		if err := os.WriteFile(fmt.Sprintf("%s/%s.cov", COVERAGE_DIR, serviceName), r.Body(), os.ModePerm); err != nil {
   968  			return err
   969  		}
   970  	}
   971  	return nil
   972  }
   973  
   974  // Shutdown environment, remove namespace
   975  func (m *Environment) Shutdown() error {
   976  	// don't shutdown if returning of funds failed
   977  	if m.Cfg.fundReturnFailed {
   978  		return nil
   979  	}
   980  
   981  	// don't shutdown if this is a test running remotely
   982  	if m.Cfg.InsideK8s {
   983  		return nil
   984  	}
   985  
   986  	keepEnvs := os.Getenv(config.EnvVarKeepEnvironments)
   987  	if keepEnvs == "" {
   988  		keepEnvs = "NEVER"
   989  	}
   990  
   991  	shouldShutdown := false
   992  	switch strings.ToUpper(keepEnvs) {
   993  	case "ALWAYS":
   994  		return nil
   995  	case "ONFAIL":
   996  		if m.Cfg.Test != nil {
   997  			if !m.Cfg.Test.Failed() {
   998  				shouldShutdown = true
   999  			}
  1000  		}
  1001  	case "NEVER":
  1002  		shouldShutdown = true
  1003  	default:
  1004  		log.Warn().Str("Invalid Keep Value", keepEnvs).
  1005  			Msg("Invalid 'keep_environments' value, see the KEEP_ENVIRONMENTS env var")
  1006  	}
  1007  
  1008  	if shouldShutdown {
  1009  		return m.Client.RemoveNamespace(m.Cfg.Namespace)
  1010  	}
  1011  	return nil
  1012  }
  1013  
  1014  // WillUseRemoteRunner determines if we need to start the remote runner
  1015  func (m *Environment) WillUseRemoteRunner() bool {
  1016  	val, _ := os.LookupEnv(config.EnvVarJobImage)
  1017  	return val != "" && m.Cfg != nil && m.Cfg.Test != nil && m.Cfg.Test.Name() != ""
  1018  }
  1019  
  1020  func DefaultJobLogFunction(e *Environment, message string) {
  1021  	logChunks := logging.SplitStringIntoChunks(message, 50000)
  1022  	for _, chunk := range logChunks {
  1023  		e.Cfg.Test.Log(chunk)
  1024  	}
  1025  	if strings.Contains(message, FAILED_FUND_RETURN) {
  1026  		e.Cfg.fundReturnFailed = true
  1027  	}
  1028  	if strings.Contains(message, TEST_FAILED) {
  1029  		e.Cfg.Test.Fail()
  1030  	}
  1031  }
  1032  
  1033  // markNotSafeToEvict adds the safe to evict annotation to the provided map if needed
  1034  func markNotSafeToEvict(preventPodEviction bool, m map[string]string) map[string]string {
  1035  	if m == nil {
  1036  		m = make(map[string]string)
  1037  	}
  1038  	if preventPodEviction {
  1039  		m["karpenter.sh/do-not-evict"] = "true"
  1040  		m["cluster-autoscaler.kubernetes.io/safe-to-evict"] = "false"
  1041  	}
  1042  
  1043  	return m
  1044  }