github.com/smartcontractkit/chainlink-testing-framework/libs@v0.0.0-20240227141906-ec710b4eb1a3/k8s/environment/environment.go (about) 1 package environment 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "os" 8 "os/signal" 9 "strconv" 10 "strings" 11 "syscall" 12 "testing" 13 "time" 14 15 "github.com/cdk8s-team/cdk8s-core-go/cdk8s/v2" 16 "github.com/go-resty/resty/v2" 17 "github.com/google/uuid" 18 "github.com/imdario/mergo" 19 "github.com/rs/zerolog/log" 20 "github.com/stretchr/testify/require" 21 22 "github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/client" 23 "github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/config" 24 "github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/imports/k8s" 25 "github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/pkg" 26 a "github.com/smartcontractkit/chainlink-testing-framework/libs/k8s/pkg/alias" 27 "github.com/smartcontractkit/chainlink-testing-framework/libs/logging" 28 "github.com/smartcontractkit/chainlink-testing-framework/libs/utils/ptr" 29 "github.com/smartcontractkit/chainlink-testing-framework/libs/utils/testcontext" 30 ) 31 32 const ( 33 COVERAGE_DIR string = "cover" 34 FAILED_FUND_RETURN string = "FAILED_FUND_RETURN" 35 TEST_FAILED string = "TEST_FAILED" 36 ) 37 38 const ( 39 ErrInvalidOCI string = "OCI chart url should be in format oci://$ECR_URL/$ECR_REGISTRY_NAME/$CHART_NAME:[?$CHART_VERSION], was %s" 40 ErrOCIPull string = "failed to pull OCI repo: %s" 41 ) 42 43 var ( 44 defaultNamespaceAnnotations = map[string]*string{ 45 "prometheus.io/scrape": ptr.Ptr("true"), 46 "backyards.banzaicloud.io/image-registry-access": ptr.Ptr("true"), 47 "backyards.banzaicloud.io/public-dockerhub-access": ptr.Ptr("true"), 48 } 49 ) 50 51 // ConnectedChart interface to interact both with cdk8s apps and helm charts 52 type ConnectedChart interface { 53 // IsDeploymentNeeded 54 // true - we deploy/connect and expose environment data 55 // false - we are using external environment, but still exposing data 56 IsDeploymentNeeded() bool 57 // GetName name of the deployed part 58 GetName() string 59 // GetPath get Helm chart path, repo or local path 60 GetPath() string 61 // GetVersion gets the chart's version, empty string if none is specified 62 GetVersion() string 63 // GetProps get code props if it's typed environment 64 GetProps() any 65 // GetValues get values.yml props as map, if it's Helm 66 GetValues() *map[string]any 67 // ExportData export deployment part data in the env 68 ExportData(e *Environment) error 69 } 70 71 // Config is an environment common configuration, labels, annotations, connection types, readiness check, etc. 72 type Config struct { 73 // TTL is time to live for the environment, used with kube-janitor 74 TTL time.Duration 75 // NamespacePrefix is a static namespace prefix 76 NamespacePrefix string 77 // Namespace is full namespace name 78 Namespace string 79 // Labels is a set of labels applied to the namespace in a format of "key=value" 80 Labels []string 81 // PodLabels is a set of labels applied to every pod in the namespace 82 PodLabels map[string]string 83 // PreventPodEviction if true sets a k8s annotation safe-to-evict=false to prevent pods from being evicted 84 // Note: This should only be used if your test is completely incapable of handling things like K8s rebalances without failing. 85 // If that is the case, it's worth the effort to make your test fault-tolerant soon. The alternative is expensive and infuriating. 86 PreventPodEviction bool 87 // Allow deployment to nodes with these tolerances 88 Tolerations []map[string]string 89 // Restrict deployment to only nodes matching a particular node role 90 NodeSelector map[string]string 91 // ReadyCheckData is settings for readiness probes checks for all deployment components 92 // checking that all pods are ready by default with 8 minutes timeout 93 // &client.ReadyCheckData{ 94 // ReadinessProbeCheckSelector: "", 95 // Timeout: 15 * time.Minute, 96 // } 97 ReadyCheckData *client.ReadyCheckData 98 // DryRun if true, app will just generate a manifest in local dir 99 DryRun bool 100 // InsideK8s used for long-running soak tests where you connect to env from the inside 101 InsideK8s bool 102 // NoManifestUpdate is a flag to skip manifest updating when connecting 103 NoManifestUpdate bool 104 // KeepConnection keeps connection until interrupted with a signal, useful when prototyping and debugging a new env 105 KeepConnection bool 106 // RemoveOnInterrupt automatically removes an environment on interrupt 107 RemoveOnInterrupt bool 108 // UpdateWaitInterval an interval to wait for deployment update started 109 UpdateWaitInterval time.Duration 110 111 // Remote Runner Specific Variables // 112 // JobImage an image to run environment as a job inside k8s 113 JobImage string 114 // JobLogFunction a function that will be run on each log 115 JobLogFunction func(*Environment, string) 116 // Test the testing library current Test struct 117 Test *testing.T 118 // jobDeployed used to limit us to 1 remote runner deploy 119 jobDeployed bool 120 // detachRunner should we detach the remote runner after starting the test 121 detachRunner bool 122 // fundReturnFailed the status of a fund return 123 fundReturnFailed bool 124 } 125 126 func defaultEnvConfig() *Config { 127 return &Config{ 128 TTL: 20 * time.Minute, 129 NamespacePrefix: "chainlink-test-env", 130 UpdateWaitInterval: 1 * time.Second, 131 ReadyCheckData: &client.ReadyCheckData{ 132 ReadinessProbeCheckSelector: "", 133 Timeout: 15 * time.Minute, 134 }, 135 } 136 } 137 138 // Environment describes a launched test environment 139 type Environment struct { 140 App cdk8s.App 141 CurrentManifest string 142 root cdk8s.Chart 143 Charts []ConnectedChart // All connected charts in the 144 Cfg *Config // The environment specific config 145 Client *client.K8sClient // Client connecting to the K8s cluster 146 Fwd *client.Forwarder // Used to forward ports from local machine to the K8s cluster 147 Artifacts *Artifacts 148 Chaos *client.Chaos 149 httpClient *resty.Client 150 URLs map[string][]string // General URLs of launched resources. Uses '_local' to delineate forwarded ports 151 ChainlinkNodeDetails []*ChainlinkNodeDetail // ChainlinkNodeDetails has convenient details for connecting to chainlink deployments 152 err error 153 } 154 155 // ChainlinkNodeDetail contains details about a chainlink node deployment 156 type ChainlinkNodeDetail struct { 157 // ChartName details the name of the Helm chart this node uses, handy for modifying deployment values 158 // Note: if you are using replicas of the same chart, this will be the same for all nodes 159 // Use NewDeployment function for Chainlink nodes to make use of this 160 ChartName string 161 // PodName is the name of the pod running the chainlink node 162 PodName string 163 // LocalIP is the URL to connect to the node from the local machine 164 LocalIP string 165 // InternalIP is the URL to connect to the node from inside the K8s cluster 166 InternalIP string 167 // DBLocalIP is the URL to connect to the node's database from the local machine 168 DBLocalIP string 169 } 170 171 // New creates new environment 172 func New(cfg *Config) *Environment { 173 logging.Init() 174 if cfg == nil { 175 cfg = &Config{} 176 } 177 targetCfg := defaultEnvConfig() 178 config.MustMerge(targetCfg, cfg) 179 ns := os.Getenv(config.EnvVarNamespace) 180 if ns != "" { 181 cfg.Namespace = ns 182 } 183 if cfg.Namespace != "" { 184 log.Info().Str("Namespace", cfg.Namespace).Msg("Namespace selected") 185 targetCfg.Namespace = cfg.Namespace 186 } else { 187 targetCfg.Namespace = fmt.Sprintf("%s-%s", targetCfg.NamespacePrefix, uuid.NewString()[0:5]) 188 log.Info().Str("Namespace", targetCfg.Namespace).Msg("Creating new namespace") 189 } 190 jobImage := os.Getenv(config.EnvVarJobImage) 191 if jobImage != "" { 192 targetCfg.JobImage = jobImage 193 targetCfg.detachRunner, _ = strconv.ParseBool(os.Getenv(config.EnvVarDetachRunner)) 194 } else { 195 targetCfg.InsideK8s, _ = strconv.ParseBool(os.Getenv(config.EnvVarInsideK8s)) 196 } 197 198 c, err := client.NewK8sClient() 199 if err != nil { 200 return &Environment{err: err} 201 } 202 e := &Environment{ 203 URLs: make(map[string][]string), 204 Charts: make([]ConnectedChart, 0), 205 Client: c, 206 Cfg: targetCfg, 207 Fwd: client.NewForwarder(c, targetCfg.KeepConnection), 208 } 209 arts, err := NewArtifacts(e.Client, e.Cfg.Namespace) 210 if err != nil { 211 log.Error().Err(err).Msg("failed to create artifacts client") 212 return &Environment{err: err} 213 } 214 e.Artifacts = arts 215 216 config.JSIIGlobalMu.Lock() 217 defer config.JSIIGlobalMu.Unlock() 218 if err := e.initApp(); err != nil { 219 log.Error().Err(err).Msg("failed to apply the initial manifest to create the namespace") 220 return &Environment{err: err} 221 } 222 e.Chaos = client.NewChaos(c, e.Cfg.Namespace) 223 224 // setup test cleanup if this is using a remote runner 225 // and not in detached mode 226 // and not using an existing environment 227 if targetCfg.JobImage != "" && !targetCfg.detachRunner && !targetCfg.NoManifestUpdate { 228 targetCfg.fundReturnFailed = false 229 if targetCfg.Test != nil { 230 targetCfg.Test.Cleanup(func() { 231 err := e.Shutdown() 232 require.NoError(targetCfg.Test, err) 233 }) 234 } 235 } 236 return e 237 } 238 239 func (m *Environment) initApp() error { 240 var err error 241 m.App = cdk8s.NewApp(&cdk8s.AppProps{ 242 YamlOutputType: cdk8s.YamlOutputType_FILE_PER_APP, 243 }) 244 m.Cfg.Labels = append(m.Cfg.Labels, "app.kubernetes.io/managed-by=cdk8s") 245 owner := os.Getenv(config.EnvVarUser) 246 if owner == "" { 247 return fmt.Errorf("missing owner environment variable, please set %s to your name or if you are seeing this in CI please set it to ${{ github.actor }}", config.EnvVarUser) 248 } 249 m.Cfg.Labels = append(m.Cfg.Labels, fmt.Sprintf("owner=%s", owner)) 250 251 if os.Getenv(config.EnvVarCLCommitSha) != "" { 252 m.Cfg.Labels = append(m.Cfg.Labels, fmt.Sprintf("commit=%s", os.Getenv(config.EnvVarCLCommitSha))) 253 } 254 testTrigger := os.Getenv(config.EnvVarTestTrigger) 255 if testTrigger == "" { 256 testTrigger = "manual" 257 } 258 m.Cfg.Labels = append(m.Cfg.Labels, fmt.Sprintf("triggered-by=%s", testTrigger)) 259 260 if tolerationRole := os.Getenv(config.EnvVarToleration); tolerationRole != "" { 261 m.Cfg.Tolerations = []map[string]string{{ 262 "key": "node-role", 263 "operator": "Equal", 264 "value": tolerationRole, 265 "effect": "NoSchedule", 266 }} 267 } 268 269 if selectorRole := os.Getenv(config.EnvVarNodeSelector); selectorRole != "" { 270 m.Cfg.NodeSelector = map[string]string{ 271 "node-role": selectorRole, 272 } 273 } 274 275 nsLabels, err := a.ConvertLabels(m.Cfg.Labels) 276 if err != nil { 277 return err 278 } 279 defaultNamespaceAnnotations[pkg.TTLLabelKey] = a.ShortDur(m.Cfg.TTL) 280 m.root = cdk8s.NewChart(m.App, ptr.Ptr(fmt.Sprintf("root-chart-%s", m.Cfg.Namespace)), &cdk8s.ChartProps{ 281 Labels: nsLabels, 282 Namespace: ptr.Ptr(m.Cfg.Namespace), 283 }) 284 k8s.NewKubeNamespace(m.root, ptr.Ptr("namespace"), &k8s.KubeNamespaceProps{ 285 Metadata: &k8s.ObjectMeta{ 286 Name: ptr.Ptr(m.Cfg.Namespace), 287 Labels: nsLabels, 288 Annotations: &defaultNamespaceAnnotations, 289 }, 290 }) 291 if m.Cfg.PreventPodEviction { 292 zero := float64(0) 293 k8s.NewKubePodDisruptionBudget(m.root, ptr.Ptr("pdb"), &k8s.KubePodDisruptionBudgetProps{ 294 Metadata: &k8s.ObjectMeta{ 295 Name: ptr.Ptr("clenv-pdb"), 296 Namespace: ptr.Ptr(m.Cfg.Namespace), 297 }, 298 Spec: &k8s.PodDisruptionBudgetSpec{ 299 MaxUnavailable: k8s.IntOrString_FromNumber(&zero), 300 Selector: &k8s.LabelSelector{ 301 MatchLabels: &map[string]*string{ 302 pkg.NamespaceLabelKey: ptr.Ptr(m.Cfg.Namespace), 303 }, 304 }, 305 }, 306 }) 307 } 308 m.CurrentManifest = *m.App.SynthYaml() 309 // loop retry applying the initial manifest with the namespace and other basics 310 ctx, cancel := context.WithTimeout(testcontext.Get(m.Cfg.Test), m.Cfg.ReadyCheckData.Timeout) 311 defer cancel() 312 startTime := time.Now() 313 deadline, _ := ctx.Deadline() 314 for { 315 err = m.Client.Apply(ctx, m.CurrentManifest, m.Cfg.Namespace, true) 316 if err == nil || ctx.Err() != nil { 317 break 318 } 319 elapsed := time.Since(startTime) 320 remaining := time.Until(deadline) 321 log.Debug().Err(err).Msgf("Failed to apply initial manifest, will continue to retry. Time elapsed: %s, Time until timeout %s\n", elapsed, remaining) 322 time.Sleep(5 * time.Second) 323 } 324 if errors.Is(ctx.Err(), context.DeadlineExceeded) { 325 return fmt.Errorf("failed to apply manifest within %s", m.Cfg.ReadyCheckData.Timeout) 326 } 327 if m.Cfg.PodLabels == nil { 328 m.Cfg.PodLabels = map[string]string{} 329 } 330 m.Cfg.PodLabels[pkg.NamespaceLabelKey] = m.Cfg.Namespace 331 return err 332 } 333 334 // AddChart adds a chart to the deployment 335 func (m *Environment) AddChart(f func(root cdk8s.Chart) ConnectedChart) *Environment { 336 if m.err != nil { 337 return m 338 } 339 config.JSIIGlobalMu.Lock() 340 defer config.JSIIGlobalMu.Unlock() 341 m.Charts = append(m.Charts, f(m.root)) 342 return m 343 } 344 345 func (m *Environment) removeChart(name string) error { 346 chartIndex, _, err := m.findChart(name) 347 if err != nil { 348 return err 349 } 350 m.Charts = append(m.Charts[:chartIndex], m.Charts[chartIndex+1:]...) 351 m.root.Node().TryRemoveChild(ptr.Ptr(name)) 352 return nil 353 } 354 355 // findChart finds a chart by name, returning the index of it in the Charts slice, and the chart itself 356 func (m *Environment) findChart(name string) (index int, chart ConnectedChart, err error) { 357 for i, c := range m.Charts { 358 if c.GetName() == name { 359 return i, c, nil 360 } 361 } 362 return -1, nil, fmt.Errorf("chart %s not found", name) 363 } 364 365 // ReplaceHelm entirely replaces an existing helm chart with a new one 366 // Note: you need to call Run() after this to apply the changes. If you're modifying ConfigMap values, you'll probably 367 // need to use RollOutStatefulSets to apply the changes to the pods. https://stackoverflow.com/questions/57356521/rollingupdate-for-stateful-set-doesnt-restart-pods-and-changes-from-updated-con 368 func (m *Environment) ReplaceHelm(name string, chart ConnectedChart) (*Environment, error) { 369 if m.err != nil { 370 return nil, m.err 371 } 372 config.JSIIGlobalMu.Lock() 373 defer config.JSIIGlobalMu.Unlock() 374 if err := m.removeChart(name); err != nil { 375 return nil, err 376 } 377 if m.Cfg.JobImage != "" || !chart.IsDeploymentNeeded() { 378 return m, fmt.Errorf("cannot modify helm chart '%s' that does not need deployment, it may be in a remote runner or detached mode", name) 379 } 380 log.Trace(). 381 Str("Chart", chart.GetName()). 382 Str("Path", chart.GetPath()). 383 Interface("Props", chart.GetProps()). 384 Interface("Values", chart.GetValues()). 385 Msg("Chart deployment values") 386 h := cdk8s.NewHelm(m.root, ptr.Ptr(chart.GetName()), &cdk8s.HelmProps{ 387 Chart: ptr.Ptr(chart.GetPath()), 388 HelmFlags: &[]*string{ 389 ptr.Ptr("--namespace"), 390 ptr.Ptr(m.Cfg.Namespace), 391 }, 392 ReleaseName: ptr.Ptr(chart.GetName()), 393 Values: chart.GetValues(), 394 }) 395 addDefaultPodAnnotationsAndLabels(h, markNotSafeToEvict(m.Cfg.PreventPodEviction, nil), m.Cfg.PodLabels) 396 m.Charts = append(m.Charts, chart) 397 return m, nil 398 } 399 400 func addDefaultPodAnnotationsAndLabels(h cdk8s.Helm, annotations, labels map[string]string) { 401 annoatationsCopy := map[string]string{} 402 for k, v := range annotations { 403 annoatationsCopy[k] = v 404 } 405 for _, ao := range *h.ApiObjects() { 406 switch *ao.Kind() { 407 case "Deployment", "ReplicaSet", "StatefulSet": 408 // we aren't guaranteed to have annotations in existence so we have to dig down to see if they exist 409 // and add any to our current list we want to add 410 aj := *ao.Chart().ToJson() 411 // loop over the json array until we get the expected kind and look for existing annotations 412 for _, dep := range aj { 413 l := fmt.Sprint(dep) 414 if !strings.Contains(l, fmt.Sprintf("kind:%s", *ao.Kind())) { 415 continue 416 } 417 depM := dep.(map[string]interface{}) 418 spec, ok := depM["spec"].(map[string]interface{}) 419 if !ok { 420 continue 421 } 422 template, ok := spec["template"].(map[string]interface{}) 423 if !ok { 424 continue 425 } 426 metadata, ok := template["metadata"].(map[string]interface{}) 427 if !ok { 428 continue 429 } 430 annot, ok := metadata["annotations"].(map[string]interface{}) 431 if !ok { 432 continue 433 } 434 for k, v := range annot { 435 annoatationsCopy[k] = v.(string) 436 } 437 } 438 ao.AddJsonPatch(cdk8s.JsonPatch_Add(ptr.Ptr("/spec/template/metadata/annotations"), annoatationsCopy)) 439 440 // loop over the labels and apply them to both the labels and selectors 441 // these should in theory always have at least one label/selector combo in existence so we don't 442 // have to do the existence check like we do for the annotations 443 for k, v := range labels { 444 // Escape the keys according to JSON Pointer syntax in RFC 6901 445 escapedKey := strings.ReplaceAll(strings.ReplaceAll(k, "~", "~0"), "/", "~1") 446 ao.AddJsonPatch(cdk8s.JsonPatch_Add(ptr.Ptr(fmt.Sprintf("/spec/template/metadata/labels/%s", escapedKey)), v)) 447 ao.AddJsonPatch(cdk8s.JsonPatch_Add(ptr.Ptr(fmt.Sprintf("/spec/selector/matchLabels/%s", escapedKey)), v)) 448 } 449 } 450 } 451 } 452 453 // UpdateHelm update a helm chart with new values. The pod will launch with an `updated=true` label if it's a Chainlink node. 454 // Note: If you're modifying ConfigMap values, you'll probably need to use RollOutStatefulSets to apply the changes to the pods. 455 // https://stackoverflow.com/questions/57356521/rollingupdate-for-stateful-set-doesnt-restart-pods-and-changes-from-updated-con 456 func (m *Environment) UpdateHelm(name string, values map[string]any) (*Environment, error) { 457 if m.err != nil { 458 return nil, m.err 459 } 460 _, chart, err := m.findChart(name) 461 if err != nil { 462 return nil, err 463 } 464 if _, labelsExist := values["labels"]; !labelsExist { 465 values["labels"] = make(map[string]*string) 466 } 467 values["labels"].(map[string]*string)["updated"] = ptr.Ptr("true") 468 if err = mergo.Merge(chart.GetValues(), values, mergo.WithOverride); err != nil { 469 return nil, err 470 } 471 return m.ReplaceHelm(name, chart) 472 } 473 474 // AddHelmCharts adds multiple helm charts to the testing environment 475 func (m *Environment) AddHelmCharts(charts []ConnectedChart) *Environment { 476 if m.err != nil { 477 return m 478 } 479 for _, c := range charts { 480 m.AddHelm(c) 481 } 482 return m 483 } 484 485 // AddHelm adds a helm chart to the testing environment 486 func (m *Environment) AddHelm(chart ConnectedChart) *Environment { 487 if m.err != nil { 488 return m 489 } 490 if m.Cfg.JobImage != "" || !chart.IsDeploymentNeeded() { 491 return m 492 } 493 config.JSIIGlobalMu.Lock() 494 defer config.JSIIGlobalMu.Unlock() 495 496 values := &map[string]any{ 497 "tolerations": m.Cfg.Tolerations, 498 "nodeSelector": m.Cfg.NodeSelector, 499 } 500 config.MustMerge(values, chart.GetValues()) 501 log.Trace(). 502 Str("Chart", chart.GetName()). 503 Str("Path", chart.GetPath()). 504 Interface("Props", chart.GetProps()). 505 Interface("Values", values). 506 Msg("Chart deployment values") 507 helmFlags := []*string{ 508 ptr.Ptr("--namespace"), 509 ptr.Ptr(m.Cfg.Namespace), 510 ptr.Ptr("--skip-tests"), 511 } 512 if chart.GetVersion() != "" { 513 helmFlags = append(helmFlags, ptr.Ptr("--version"), ptr.Ptr(chart.GetVersion())) 514 } 515 chartPath, err := m.PullOCIChart(chart) 516 if err != nil { 517 m.err = err 518 return m 519 } 520 h := cdk8s.NewHelm(m.root, ptr.Ptr(chart.GetName()), &cdk8s.HelmProps{ 521 Chart: ptr.Ptr(chartPath), 522 HelmFlags: &helmFlags, 523 ReleaseName: ptr.Ptr(chart.GetName()), 524 Values: values, 525 }) 526 addDefaultPodAnnotationsAndLabels(h, markNotSafeToEvict(m.Cfg.PreventPodEviction, nil), m.Cfg.PodLabels) 527 m.Charts = append(m.Charts, chart) 528 return m 529 } 530 531 // PullOCIChart handles working with OCI format repositories 532 // https://helm.sh/docs/topics/registries/ 533 // API is not compatible between helm repos and OCI repos, so we download and untar the chart 534 func (m *Environment) PullOCIChart(chart ConnectedChart) (string, error) { 535 if !strings.HasPrefix(chart.GetPath(), "oci") { 536 return chart.GetPath(), nil 537 } 538 cp := strings.Split(chart.GetPath(), "/") 539 if len(cp) != 5 { 540 return "", fmt.Errorf(ErrInvalidOCI, chart.GetPath()) 541 } 542 sp := strings.Split(chart.GetPath(), ":") 543 544 var cmd string 545 var chartName string 546 chartName = cp[len(cp)-1] 547 chartDir := uuid.NewString() 548 switch len(sp) { 549 case 2: 550 cmd = fmt.Sprintf("helm pull %s --untar --untardir %s", chart.GetPath(), chartDir) 551 case 3: 552 chartName = strings.Split(chartName, ":")[0] 553 cmd = fmt.Sprintf("helm pull %s --version %s --untar --untardir %s", fmt.Sprintf("%s:%s", sp[0], sp[1]), sp[2], chartDir) 554 default: 555 return "", fmt.Errorf(ErrInvalidOCI, chart.GetPath()) 556 } 557 log.Info().Str("CMD", cmd).Msg("Running helm cmd") 558 if err := client.ExecCmd(cmd); err != nil { 559 return "", fmt.Errorf(ErrOCIPull, chart.GetPath()) 560 } 561 localChartPath := fmt.Sprintf("%s/%s/", chartDir, chartName) 562 log.Info().Str("Path", localChartPath).Msg("Local chart path") 563 return localChartPath, nil 564 } 565 566 // PrintExportData prints export data 567 func (m *Environment) PrintExportData() error { 568 m.URLs = make(map[string][]string) 569 for _, c := range m.Charts { 570 err := c.ExportData(m) 571 if err != nil { 572 return err 573 } 574 } 575 log.Debug().Interface("URLs", m.URLs).Msg("Connection URLs") 576 return nil 577 } 578 579 // DumpLogs dumps all logs into a file 580 func (m *Environment) DumpLogs(path string) error { 581 arts, err := NewArtifacts(m.Client, m.Cfg.Namespace) 582 if err != nil { 583 return err 584 } 585 if path == "" { 586 path = fmt.Sprintf("logs/%s-%d", m.Cfg.Namespace, time.Now().Unix()) 587 } 588 return arts.DumpTestResult(path, "chainlink") 589 } 590 591 // ResourcesSummary returns resources summary for selected pods as a map, used in reports 592 func (m *Environment) ResourcesSummary(selector string) (map[string]map[string]string, error) { 593 pl, err := m.Client.ListPods(m.Cfg.Namespace, selector) 594 if err != nil { 595 return nil, err 596 } 597 if len(pl.Items) == 0 { 598 return nil, fmt.Errorf("no pods found for selector: %s", selector) 599 } 600 resources := make(map[string]map[string]string) 601 for _, p := range pl.Items { 602 for _, c := range p.Spec.Containers { 603 if resources[c.Name] == nil { 604 resources[c.Name] = make(map[string]string) 605 } 606 cpuRes := c.Resources.Requests["cpu"] 607 resources[c.Name]["cpu"] = cpuRes.String() 608 memRes := c.Resources.Requests["memory"] 609 resources[c.Name]["memory"] = memRes.String() 610 } 611 } 612 return resources, nil 613 } 614 615 // ClearCharts recreates cdk8s app 616 func (m *Environment) ClearCharts() error { 617 m.Charts = make([]ConnectedChart, 0) 618 if err := m.initApp(); err != nil { 619 log.Error().Err(err).Msg("failed to apply the initial manifest to create the namespace") 620 return err 621 } 622 return nil 623 } 624 625 func (m *Environment) Manifest() string { 626 return m.CurrentManifest 627 } 628 629 // Update current manifest based on the cdk8s app state 630 func (m *Environment) UpdateManifest() { 631 config.JSIIGlobalMu.Lock() 632 m.CurrentManifest = *m.App.SynthYaml() 633 config.JSIIGlobalMu.Unlock() 634 } 635 636 // RunCustomReadyConditions Runs the environment with custom ready conditions for a supplied pod count 637 func (m *Environment) RunCustomReadyConditions(customCheck *client.ReadyCheckData, podCount int) error { 638 if m.err != nil { 639 return m.err 640 } 641 if m.Cfg.jobDeployed { 642 return nil 643 } 644 if m.Cfg.JobImage != "" { 645 if m.Cfg.Test == nil { 646 return fmt.Errorf("Test must be configured in the environment when using the remote runner") 647 } 648 rrSelector := map[string]*string{pkg.NamespaceLabelKey: ptr.Ptr(m.Cfg.Namespace)} 649 m.AddChart(NewRunner(&Props{ 650 BaseName: REMOTE_RUNNER_NAME, 651 TargetNamespace: m.Cfg.Namespace, 652 Labels: &rrSelector, 653 Image: m.Cfg.JobImage, 654 TestName: m.Cfg.Test.Name(), 655 NoManifestUpdate: m.Cfg.NoManifestUpdate, 656 PreventPodEviction: m.Cfg.PreventPodEviction, 657 })) 658 } 659 m.UpdateManifest() 660 m.ChainlinkNodeDetails = []*ChainlinkNodeDetail{} // Resets potentially old details if re-deploying 661 if m.Cfg.DryRun { 662 log.Info().Msg("Dry-run mode, manifest synthesized and saved as tmp-manifest.yaml") 663 return nil 664 } 665 manifestUpdate := os.Getenv(config.EnvVarNoManifestUpdate) 666 if manifestUpdate != "" { 667 mu, err := strconv.ParseBool(manifestUpdate) 668 if err != nil { 669 return fmt.Errorf("manifest update should be bool: true, false") 670 } 671 m.Cfg.NoManifestUpdate = mu 672 } 673 log.Debug().Bool("ManifestUpdate", !m.Cfg.NoManifestUpdate).Msg("Update mode") 674 if !m.Cfg.NoManifestUpdate || m.Cfg.JobImage != "" { 675 if err := m.DeployCustomReadyConditions(customCheck, podCount); err != nil { 676 log.Error().Err(err).Msg("Error deploying environment") 677 _ = m.Shutdown() 678 return err 679 } 680 } 681 if m.Cfg.JobImage != "" { 682 log.Info().Msg("Waiting for remote runner to complete") 683 // Do not wait for the job to complete if we are running something like a soak test in the remote runner 684 if m.Cfg.detachRunner { 685 return nil 686 } 687 if err := m.Client.WaitForJob(m.Cfg.Namespace, "remote-test-runner", func(message string) { 688 if m.Cfg.JobLogFunction != nil { 689 m.Cfg.JobLogFunction(m, message) 690 } else { 691 DefaultJobLogFunction(m, message) 692 } 693 }); err != nil { 694 return err 695 } 696 if m.Cfg.fundReturnFailed { 697 return fmt.Errorf("failed to return funds in remote runner") 698 } 699 m.Cfg.jobDeployed = true 700 } else { 701 if err := m.Fwd.Connect(m.Cfg.Namespace, "", m.Cfg.InsideK8s); err != nil { 702 return err 703 } 704 log.Debug().Interface("Ports", m.Fwd.Info).Msg("Forwarded ports") 705 m.Fwd.PrintLocalPorts() 706 if err := m.PrintExportData(); err != nil { 707 return err 708 } 709 arts, err := NewArtifacts(m.Client, m.Cfg.Namespace) 710 if err != nil { 711 log.Error().Err(err).Msg("failed to create artifacts client") 712 return err 713 } 714 m.Artifacts = arts 715 if len(m.URLs["goc"]) != 0 { 716 m.httpClient = resty.New().SetBaseURL(m.URLs["goc"][0]) 717 } 718 if m.Cfg.KeepConnection { 719 log.Info().Msg("Keeping forwarder connections, press Ctrl+C to interrupt") 720 if m.Cfg.RemoveOnInterrupt { 721 log.Warn().Msg("Environment will be removed on interrupt") 722 } 723 ch := make(chan os.Signal, 1) 724 signal.Notify(ch, os.Interrupt, syscall.SIGTERM) 725 <-ch 726 log.Warn().Msg("Interrupted") 727 if m.Cfg.RemoveOnInterrupt { 728 return m.Client.RemoveNamespace(m.Cfg.Namespace) 729 } 730 } 731 } 732 return nil 733 } 734 735 // RunUpdated runs the environment and checks for pods with `updated=true` label 736 func (m *Environment) RunUpdated(podCount int) error { 737 if m.err != nil { 738 return m.err 739 } 740 conds := &client.ReadyCheckData{ 741 ReadinessProbeCheckSelector: "updated=true", 742 Timeout: 10 * time.Minute, 743 } 744 return m.RunCustomReadyConditions(conds, podCount) 745 } 746 747 // Run deploys or connects to already created environment 748 func (m *Environment) Run() error { 749 if m.err != nil { 750 return m.err 751 } 752 return m.RunCustomReadyConditions(nil, 0) 753 } 754 755 func (m *Environment) enumerateApps() error { 756 apps, err := m.Client.UniqueLabels(m.Cfg.Namespace, client.AppLabel) 757 if err != nil { 758 return err 759 } 760 for _, app := range apps { 761 if err := m.Client.EnumerateInstances(m.Cfg.Namespace, fmt.Sprintf("app=%s", app)); err != nil { 762 return err 763 } 764 } 765 return nil 766 } 767 768 // DeployCustomReadyConditions deploy current manifest with added custom readiness checks 769 func (m *Environment) DeployCustomReadyConditions(customCheck *client.ReadyCheckData, customPodCount int) error { 770 if m.err != nil { 771 return m.err 772 } 773 log.Info().Str("Namespace", m.Cfg.Namespace).Msg("Deploying namespace") 774 775 if m.Cfg.DryRun { 776 return m.Client.DryRun(m.CurrentManifest) 777 } 778 ctx, cancel := context.WithTimeout(testcontext.Get(m.Cfg.Test), m.Cfg.ReadyCheckData.Timeout) 779 defer cancel() 780 err := m.Client.Apply(ctx, m.CurrentManifest, m.Cfg.Namespace, true) 781 if errors.Is(ctx.Err(), context.DeadlineExceeded) { 782 return fmt.Errorf("timeout waiting for environment to be ready") 783 } 784 if err != nil { 785 return err 786 } 787 if int64(m.Cfg.UpdateWaitInterval) != 0 { 788 time.Sleep(m.Cfg.UpdateWaitInterval) 789 } 790 791 expectedPodCount := m.findPodCountInDeploymentManifest() 792 793 if err := m.Client.WaitPodsReady(m.Cfg.Namespace, m.Cfg.ReadyCheckData, expectedPodCount); err != nil { 794 return err 795 } 796 if customCheck != nil { 797 if err := m.Client.WaitPodsReady(m.Cfg.Namespace, customCheck, customPodCount); err != nil { 798 return err 799 } 800 } 801 return m.enumerateApps() 802 } 803 804 // Deploy deploy current manifest and check logs for readiness 805 func (m *Environment) Deploy() error { 806 return m.DeployCustomReadyConditions(nil, 0) 807 } 808 809 // RolloutStatefulSets applies "rollout statefulset" to all existing statefulsets in our namespace 810 func (m *Environment) RolloutStatefulSets() error { 811 if m.err != nil { 812 return m.err 813 } 814 ctx, cancel := context.WithTimeout(testcontext.Get(m.Cfg.Test), m.Cfg.ReadyCheckData.Timeout) 815 defer cancel() 816 err := m.Client.RolloutStatefulSets(ctx, m.Cfg.Namespace) 817 if errors.Is(ctx.Err(), context.DeadlineExceeded) { 818 return fmt.Errorf("timeout waiting for rollout statefulset to complete") 819 } 820 return err 821 } 822 823 // RolloutRestartBySelector applies "rollout restart" to the selected resources 824 func (m *Environment) RolloutRestartBySelector(resource string, selector string) error { 825 if m.err != nil { 826 return m.err 827 } 828 ctx, cancel := context.WithTimeout(testcontext.Get(m.Cfg.Test), m.Cfg.ReadyCheckData.Timeout) 829 defer cancel() 830 err := m.Client.RolloutRestartBySelector(ctx, m.Cfg.Namespace, resource, selector) 831 if errors.Is(ctx.Err(), context.DeadlineExceeded) { 832 return fmt.Errorf("timeout waiting for rollout restart to complete") 833 } 834 return err 835 } 836 837 // findPodsInDeploymentManifest finds all the pods we will be deploying 838 func (m *Environment) findPodCountInDeploymentManifest() int { 839 config.JSIIGlobalMu.Lock() 840 defer config.JSIIGlobalMu.Unlock() 841 podCount := 0 842 charts := m.App.Charts() 843 for _, chart := range *charts { 844 json := chart.ToJson() 845 if json == nil { 846 continue 847 } 848 for _, j := range *json { 849 m := j.(map[string]any) 850 // if the kind is a deployment then we want to see if it has replicas to count towards the app count 851 if _, ok := m["kind"]; !ok { 852 continue 853 } 854 kind := m["kind"].(string) 855 if kind == "Deployment" || kind == "StatefulSet" { 856 if _, ok := m["spec"]; !ok { 857 continue 858 } 859 podCount += getReplicaCount(m["spec"].(map[string]any)) 860 } 861 } 862 863 } 864 return podCount 865 } 866 867 func getReplicaCount(spec map[string]any) int { 868 if spec == nil { 869 return 0 870 } 871 if _, ok := spec["selector"]; !ok { 872 return 0 873 } 874 s := spec["selector"].(map[string]any) 875 if s == nil { 876 return 0 877 } 878 if _, ok := s["matchLabels"]; !ok { 879 return 0 880 } 881 m := s["matchLabels"].(map[string]any) 882 if m == nil { 883 return 0 884 } 885 if _, ok := m[client.AppLabel]; !ok { 886 return 0 887 } 888 l := m[client.AppLabel] 889 if l == nil { 890 return 0 891 } 892 893 replicaCount := 0 894 var replicas any 895 replicas, ok := spec["replicas"] 896 if ok { 897 replicaCount += int(replicas.(float64)) 898 } else { 899 replicaCount++ 900 } 901 902 return replicaCount 903 } 904 905 type CoverageProfileParams struct { 906 Force bool `form:"force" json:"force"` 907 Service []string `form:"service" json:"service"` 908 Address []string `form:"address" json:"address"` 909 CoverFilePatterns []string `form:"coverfile" json:"coverfile"` 910 SkipFilePatterns []string `form:"skipfile" json:"skipfile"` 911 } 912 913 func (m *Environment) getCoverageList() (map[string]any, error) { 914 var servicesMap map[string]any 915 resp, err := m.httpClient.R(). 916 SetResult(&servicesMap). 917 Get("v1/cover/list") 918 if err != nil { 919 return nil, err 920 } 921 if resp.Status() != "200 OK" { 922 return nil, fmt.Errorf("coverage service list request is not 200") 923 } 924 return servicesMap, nil 925 } 926 927 func (m *Environment) ClearCoverage() error { 928 servicesMap, err := m.getCoverageList() 929 if err != nil { 930 return err 931 } 932 for serviceName := range servicesMap { 933 r, err := m.httpClient.R(). 934 SetBody(CoverageProfileParams{Service: []string{serviceName}}). 935 Post("v1/cover/clear") 936 if err != nil { 937 return err 938 } 939 if r.Status() != "200 OK" { 940 return fmt.Errorf("coverage service list request is not 200") 941 } 942 log.Debug().Str("Service", serviceName).Msg("Coverage cleared") 943 } 944 return nil 945 } 946 947 func (m *Environment) SaveCoverage() error { 948 if err := MkdirIfNotExists(COVERAGE_DIR); err != nil { 949 return err 950 } 951 servicesMap, err := m.getCoverageList() 952 if err != nil { 953 return err 954 } 955 log.Debug().Interface("Services", servicesMap).Msg("Services eligible for coverage") 956 for serviceName := range servicesMap { 957 r, err := m.httpClient.R(). 958 SetBody(CoverageProfileParams{Service: []string{serviceName}}). 959 Post("v1/cover/profile") 960 if err != nil { 961 return err 962 } 963 if r.Status() != "200 OK" { 964 return fmt.Errorf("coverage service list request is not 200") 965 } 966 log.Debug().Str("Service", serviceName).Msg("Coverage received") 967 if err := os.WriteFile(fmt.Sprintf("%s/%s.cov", COVERAGE_DIR, serviceName), r.Body(), os.ModePerm); err != nil { 968 return err 969 } 970 } 971 return nil 972 } 973 974 // Shutdown environment, remove namespace 975 func (m *Environment) Shutdown() error { 976 // don't shutdown if returning of funds failed 977 if m.Cfg.fundReturnFailed { 978 return nil 979 } 980 981 // don't shutdown if this is a test running remotely 982 if m.Cfg.InsideK8s { 983 return nil 984 } 985 986 keepEnvs := os.Getenv(config.EnvVarKeepEnvironments) 987 if keepEnvs == "" { 988 keepEnvs = "NEVER" 989 } 990 991 shouldShutdown := false 992 switch strings.ToUpper(keepEnvs) { 993 case "ALWAYS": 994 return nil 995 case "ONFAIL": 996 if m.Cfg.Test != nil { 997 if !m.Cfg.Test.Failed() { 998 shouldShutdown = true 999 } 1000 } 1001 case "NEVER": 1002 shouldShutdown = true 1003 default: 1004 log.Warn().Str("Invalid Keep Value", keepEnvs). 1005 Msg("Invalid 'keep_environments' value, see the KEEP_ENVIRONMENTS env var") 1006 } 1007 1008 if shouldShutdown { 1009 return m.Client.RemoveNamespace(m.Cfg.Namespace) 1010 } 1011 return nil 1012 } 1013 1014 // WillUseRemoteRunner determines if we need to start the remote runner 1015 func (m *Environment) WillUseRemoteRunner() bool { 1016 val, _ := os.LookupEnv(config.EnvVarJobImage) 1017 return val != "" && m.Cfg != nil && m.Cfg.Test != nil && m.Cfg.Test.Name() != "" 1018 } 1019 1020 func DefaultJobLogFunction(e *Environment, message string) { 1021 logChunks := logging.SplitStringIntoChunks(message, 50000) 1022 for _, chunk := range logChunks { 1023 e.Cfg.Test.Log(chunk) 1024 } 1025 if strings.Contains(message, FAILED_FUND_RETURN) { 1026 e.Cfg.fundReturnFailed = true 1027 } 1028 if strings.Contains(message, TEST_FAILED) { 1029 e.Cfg.Test.Fail() 1030 } 1031 } 1032 1033 // markNotSafeToEvict adds the safe to evict annotation to the provided map if needed 1034 func markNotSafeToEvict(preventPodEviction bool, m map[string]string) map[string]string { 1035 if m == nil { 1036 m = make(map[string]string) 1037 } 1038 if preventPodEviction { 1039 m["karpenter.sh/do-not-evict"] = "true" 1040 m["cluster-autoscaler.kubernetes.io/safe-to-evict"] = "false" 1041 } 1042 1043 return m 1044 }