github.com/vipernet-xyz/tm@v0.34.24/cmd/tendermint/commands/debug/kill.go (about)

     1  package debug
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"strconv"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/spf13/cobra"
    14  	"github.com/spf13/viper"
    15  
    16  	cfg "github.com/vipernet-xyz/tm/config"
    17  	"github.com/vipernet-xyz/tm/libs/cli"
    18  	rpchttp "github.com/vipernet-xyz/tm/rpc/client/http"
    19  )
    20  
    21  var killCmd = &cobra.Command{
    22  	Use:   "kill [pid] [compressed-output-file]",
    23  	Short: "Kill a Tendermint process while aggregating and packaging debugging data",
    24  	Long: `Kill a Tendermint process while also aggregating Tendermint process data
    25  such as the latest node state, including consensus and networking state,
    26  go-routine state, and the node's WAL and config information. This aggregated data
    27  is packaged into a compressed archive.
    28  
    29  Example:
    30  $ tendermint debug 34255 /path/to/tm-debug.zip`,
    31  	Args: cobra.ExactArgs(2),
    32  	RunE: killCmdHandler,
    33  }
    34  
    35  func killCmdHandler(cmd *cobra.Command, args []string) error {
    36  	pid, err := strconv.ParseUint(args[0], 10, 64)
    37  	if err != nil {
    38  		return err
    39  	}
    40  
    41  	outFile := args[1]
    42  	if outFile == "" {
    43  		return errors.New("invalid output file")
    44  	}
    45  
    46  	rpc, err := rpchttp.New(nodeRPCAddr, "/websocket")
    47  	if err != nil {
    48  		return fmt.Errorf("failed to create new http client: %w", err)
    49  	}
    50  
    51  	home := viper.GetString(cli.HomeFlag)
    52  	conf := cfg.DefaultConfig()
    53  	conf = conf.SetRoot(home)
    54  	cfg.EnsureRoot(conf.RootDir)
    55  
    56  	// Create a temporary directory which will contain all the state dumps and
    57  	// relevant files and directories that will be compressed into a file.
    58  	tmpDir, err := os.MkdirTemp(os.TempDir(), "tendermint_debug_tmp")
    59  	if err != nil {
    60  		return fmt.Errorf("failed to create temporary directory: %w", err)
    61  	}
    62  	defer os.RemoveAll(tmpDir)
    63  
    64  	logger.Info("getting node status...")
    65  	if err := dumpStatus(rpc, tmpDir, "status.json"); err != nil {
    66  		return err
    67  	}
    68  
    69  	logger.Info("getting node network info...")
    70  	if err := dumpNetInfo(rpc, tmpDir, "net_info.json"); err != nil {
    71  		return err
    72  	}
    73  
    74  	logger.Info("getting node consensus state...")
    75  	if err := dumpConsensusState(rpc, tmpDir, "consensus_state.json"); err != nil {
    76  		return err
    77  	}
    78  
    79  	logger.Info("copying node WAL...")
    80  	if err := copyWAL(conf, tmpDir); err != nil {
    81  		return err
    82  	}
    83  
    84  	logger.Info("copying node configuration...")
    85  	if err := copyConfig(home, tmpDir); err != nil {
    86  		return err
    87  	}
    88  
    89  	logger.Info("killing Tendermint process")
    90  	if err := killProc(pid, tmpDir); err != nil {
    91  		return err
    92  	}
    93  
    94  	logger.Info("archiving and compressing debug directory...")
    95  	return zipDir(tmpDir, outFile)
    96  }
    97  
    98  // killProc attempts to kill the Tendermint process with a given PID with an
    99  // ABORT signal which should result in a goroutine stacktrace. The PID's STDERR
   100  // is tailed and piped to a file under the directory dir. An error is returned
   101  // if the output file cannot be created or the tail command cannot be started.
   102  // An error is not returned if any subsequent syscall fails.
   103  func killProc(pid uint64, dir string) error {
   104  	// pipe STDERR output from tailing the Tendermint process to a file
   105  	//
   106  	// NOTE: This will only work on UNIX systems.
   107  	cmd := exec.Command("tail", "-f", fmt.Sprintf("/proc/%d/fd/2", pid)) //nolint: gosec
   108  
   109  	outFile, err := os.Create(filepath.Join(dir, "stacktrace.out"))
   110  	if err != nil {
   111  		return err
   112  	}
   113  	defer outFile.Close()
   114  
   115  	cmd.Stdout = outFile
   116  	cmd.Stderr = outFile
   117  
   118  	if err := cmd.Start(); err != nil {
   119  		return err
   120  	}
   121  
   122  	// kill the underlying Tendermint process and subsequent tailing process
   123  	go func() {
   124  		// Killing the Tendermint process with the '-ABRT|-6' signal will result in
   125  		// a goroutine stacktrace.
   126  		p, err := os.FindProcess(int(pid))
   127  		if err != nil {
   128  			fmt.Fprintf(os.Stderr, "failed to find PID to kill Tendermint process: %s", err)
   129  		} else if err = p.Signal(syscall.SIGABRT); err != nil {
   130  			fmt.Fprintf(os.Stderr, "failed to kill Tendermint process: %s", err)
   131  		}
   132  
   133  		// allow some time to allow the Tendermint process to be killed
   134  		//
   135  		// TODO: We should 'wait' for a kill to succeed (e.g. poll for PID until it
   136  		// cannot be found). Regardless, this should be ample time.
   137  		time.Sleep(5 * time.Second)
   138  
   139  		if err := cmd.Process.Kill(); err != nil {
   140  			fmt.Fprintf(os.Stderr, "failed to kill Tendermint process output redirection: %s", err)
   141  		}
   142  	}()
   143  
   144  	if err := cmd.Wait(); err != nil {
   145  		// only return an error not invoked by a manual kill
   146  		if _, ok := err.(*exec.ExitError); !ok {
   147  			return err
   148  		}
   149  	}
   150  
   151  	return nil
   152  }