github.com/devwanda/aphelion-staking@v0.33.9/cmd/tendermint/commands/debug/kill.go (about)

     1  package debug
     2  
     3  import (
     4  	"fmt"
     5  	"io/ioutil"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"strconv"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/pkg/errors"
    14  	"github.com/spf13/cobra"
    15  	"github.com/spf13/viper"
    16  
    17  	cfg "github.com/devwanda/aphelion-staking/config"
    18  	"github.com/devwanda/aphelion-staking/libs/cli"
    19  	rpchttp "github.com/devwanda/aphelion-staking/rpc/client/http"
    20  )
    21  
    22  var killCmd = &cobra.Command{
    23  	Use:   "kill [pid] [compressed-output-file]",
    24  	Short: "Kill a Tendermint process while aggregating and packaging debugging data",
    25  	Long: `Kill a Tendermint process while also aggregating Tendermint process data
    26  such as the latest node state, including consensus and networking state,
    27  go-routine state, and the node's WAL and config information. This aggregated data
    28  is packaged into a compressed archive.
    29  
    30  Example:
    31  $ tendermint debug 34255 /path/to/tm-debug.zip`,
    32  	Args: cobra.ExactArgs(2),
    33  	RunE: killCmdHandler,
    34  }
    35  
    36  func killCmdHandler(cmd *cobra.Command, args []string) error {
    37  	pid, err := strconv.ParseUint(args[0], 10, 64)
    38  	if err != nil {
    39  		return err
    40  	}
    41  
    42  	outFile := args[1]
    43  	if outFile == "" {
    44  		return errors.New("invalid output file")
    45  	}
    46  
    47  	rpc, err := rpchttp.New(nodeRPCAddr, "/websocket")
    48  	if err != nil {
    49  		return errors.Wrap(err, "failed to create new http client")
    50  	}
    51  
    52  	home := viper.GetString(cli.HomeFlag)
    53  	conf := cfg.DefaultConfig()
    54  	conf = conf.SetRoot(home)
    55  	cfg.EnsureRoot(conf.RootDir)
    56  
    57  	// Create a temporary directory which will contain all the state dumps and
    58  	// relevant files and directories that will be compressed into a file.
    59  	tmpDir, err := ioutil.TempDir(os.TempDir(), "tendermint_debug_tmp")
    60  	if err != nil {
    61  		return errors.Wrap(err, "failed to create temporary directory")
    62  	}
    63  	defer os.RemoveAll(tmpDir)
    64  
    65  	logger.Info("getting node status...")
    66  	if err := dumpStatus(rpc, tmpDir, "status.json"); err != nil {
    67  		return err
    68  	}
    69  
    70  	logger.Info("getting node network info...")
    71  	if err := dumpNetInfo(rpc, tmpDir, "net_info.json"); err != nil {
    72  		return err
    73  	}
    74  
    75  	logger.Info("getting node consensus state...")
    76  	if err := dumpConsensusState(rpc, tmpDir, "consensus_state.json"); err != nil {
    77  		return err
    78  	}
    79  
    80  	logger.Info("copying node WAL...")
    81  	if err := copyWAL(conf, tmpDir); err != nil {
    82  		return err
    83  	}
    84  
    85  	logger.Info("copying node configuration...")
    86  	if err := copyConfig(home, tmpDir); err != nil {
    87  		return err
    88  	}
    89  
    90  	logger.Info("killing Tendermint process")
    91  	if err := killProc(pid, tmpDir); err != nil {
    92  		return err
    93  	}
    94  
    95  	logger.Info("archiving and compressing debug directory...")
    96  	return zipDir(tmpDir, outFile)
    97  }
    98  
    99  // killProc attempts to kill the Tendermint process with a given PID with an
   100  // ABORT signal which should result in a goroutine stacktrace. The PID's STDERR
   101  // is tailed and piped to a file under the directory dir. An error is returned
   102  // if the output file cannot be created or the tail command cannot be started.
   103  // An error is not returned if any subsequent syscall fails.
   104  func killProc(pid uint64, dir string) error {
   105  	// pipe STDERR output from tailing the Tendermint process to a file
   106  	//
   107  	// NOTE: This will only work on UNIX systems.
   108  	cmd := exec.Command("tail", "-f", fmt.Sprintf("/proc/%d/fd/2", pid)) // nolint: gosec
   109  
   110  	outFile, err := os.Create(filepath.Join(dir, "stacktrace.out"))
   111  	if err != nil {
   112  		return err
   113  	}
   114  	defer outFile.Close()
   115  
   116  	cmd.Stdout = outFile
   117  	cmd.Stderr = outFile
   118  
   119  	if err := cmd.Start(); err != nil {
   120  		return err
   121  	}
   122  
   123  	// kill the underlying Tendermint process and subsequent tailing process
   124  	go func() {
   125  		// Killing the Tendermint process with the '-ABRT|-6' signal will result in
   126  		// a goroutine stacktrace.
   127  		p, err := os.FindProcess(int(pid))
   128  		if err != nil {
   129  			fmt.Fprintf(os.Stderr, "failed to find PID to kill Tendermint process: %s", err)
   130  		} else if err = p.Signal(syscall.SIGABRT); err != nil {
   131  			fmt.Fprintf(os.Stderr, "failed to kill Tendermint process: %s", err)
   132  		}
   133  
   134  		// allow some time to allow the Tendermint process to be killed
   135  		//
   136  		// TODO: We should 'wait' for a kill to succeed (e.g. poll for PID until it
   137  		// cannot be found). Regardless, this should be ample time.
   138  		time.Sleep(5 * time.Second)
   139  
   140  		if err := cmd.Process.Kill(); err != nil {
   141  			fmt.Fprintf(os.Stderr, "failed to kill Tendermint process output redirection: %s", err)
   142  		}
   143  	}()
   144  
   145  	if err := cmd.Wait(); err != nil {
   146  		// only return an error not invoked by a manual kill
   147  		if _, ok := err.(*exec.ExitError); !ok {
   148  			return err
   149  		}
   150  	}
   151  
   152  	return nil
   153  }