github.com/badrootd/celestia-core@v0.0.0-20240305091328-aa4207a4b25d/test/e2e/runner/perturb.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"path/filepath"
     6  	"time"
     7  
     8  	"github.com/badrootd/celestia-core/libs/log"
     9  	rpctypes "github.com/badrootd/celestia-core/rpc/core/types"
    10  	e2e "github.com/badrootd/celestia-core/test/e2e/pkg"
    11  )
    12  
    13  // Perturbs a running testnet.
    14  func Perturb(testnet *e2e.Testnet) error {
    15  	for _, node := range testnet.Nodes {
    16  		for _, perturbation := range node.Perturbations {
    17  			_, err := PerturbNode(node, perturbation)
    18  			if err != nil {
    19  				return err
    20  			}
    21  			time.Sleep(3 * time.Second) // give network some time to recover between each
    22  		}
    23  	}
    24  	return nil
    25  }
    26  
    27  // PerturbNode perturbs a node with a given perturbation, returning its status
    28  // after recovering.
    29  func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.ResultStatus, error) {
    30  	testnet := node.Testnet
    31  	baseDir := filepath.Base(testnet.Dir)
    32  	testnetName := fmt.Sprintf("%s_%s", baseDir, testnet.Name)
    33  
    34  	out, err := execComposeOutput(testnet.Dir, "ps", "-q", node.Name)
    35  	if err != nil {
    36  		return nil, err
    37  	}
    38  	name := node.Name
    39  	upgraded := false
    40  	if len(out) == 0 {
    41  		name = name + "_u"
    42  		upgraded = true
    43  		logger.Info("perturb node", "msg",
    44  			log.NewLazySprintf("Node %v already upgraded, operating on alternate container %v",
    45  				node.Name, name))
    46  	}
    47  
    48  	switch perturbation {
    49  	case e2e.PerturbationDisconnect:
    50  		logger.Info("perturb node", "msg", log.NewLazySprintf("Disconnecting node %v...", node.Name))
    51  		if err := execDocker("network", "disconnect", testnetName, name); err != nil {
    52  			return nil, err
    53  		}
    54  		time.Sleep(10 * time.Second)
    55  		if err := execDocker("network", "connect", testnetName, name); err != nil {
    56  			return nil, err
    57  		}
    58  
    59  	case e2e.PerturbationKill:
    60  		logger.Info("perturb node", "msg", log.NewLazySprintf("Killing node %v...", node.Name))
    61  		if err := execCompose(testnet.Dir, "kill", "-s", "SIGKILL", name); err != nil {
    62  			return nil, err
    63  		}
    64  		if err := execCompose(testnet.Dir, "start", name); err != nil {
    65  			return nil, err
    66  		}
    67  
    68  	case e2e.PerturbationPause:
    69  		logger.Info("perturb node", "msg", log.NewLazySprintf("Pausing node %v...", node.Name))
    70  		if err := execCompose(testnet.Dir, "pause", name); err != nil {
    71  			return nil, err
    72  		}
    73  		time.Sleep(10 * time.Second)
    74  		if err := execCompose(testnet.Dir, "unpause", name); err != nil {
    75  			return nil, err
    76  		}
    77  
    78  	case e2e.PerturbationRestart:
    79  		logger.Info("perturb node", "msg", log.NewLazySprintf("Restarting node %v...", node.Name))
    80  		if err := execCompose(testnet.Dir, "restart", name); err != nil {
    81  			return nil, err
    82  		}
    83  
    84  	case e2e.PerturbationUpgrade:
    85  		oldV := node.Version
    86  		newV := node.Testnet.UpgradeVersion
    87  		if upgraded {
    88  			return nil, fmt.Errorf("node %v can't be upgraded twice from version '%v' to version '%v'",
    89  				node.Name, oldV, newV)
    90  		}
    91  		if oldV == newV {
    92  			logger.Info("perturb node", "msg",
    93  				log.NewLazySprintf("Skipping upgrade of node %v to version '%v'; versions are equal.",
    94  					node.Name, newV))
    95  			break
    96  		}
    97  		logger.Info("perturb node", "msg",
    98  			log.NewLazySprintf("Upgrading node %v from version '%v' to version '%v'...",
    99  				node.Name, oldV, newV))
   100  
   101  		if err := execCompose(testnet.Dir, "stop", name); err != nil {
   102  			return nil, err
   103  		}
   104  		time.Sleep(10 * time.Second)
   105  		if err := execCompose(testnet.Dir, "up", "-d", name+"_u"); err != nil {
   106  			return nil, err
   107  		}
   108  
   109  	default:
   110  		return nil, fmt.Errorf("unexpected perturbation %q", perturbation)
   111  	}
   112  
   113  	status, err := waitForNode(node, 0, 20*time.Second)
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  	logger.Info("perturb node",
   118  		"msg",
   119  		log.NewLazySprintf("Node %v recovered at height %v", node.Name, status.SyncInfo.LatestBlockHeight))
   120  	return status, nil
   121  }