github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/cmd/tendermint/commands/debug/kill.go (about)

     1  package debug
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"os/exec"
     8  	"path/filepath"
     9  	"strconv"
    10  	"syscall"
    11  	"time"
    12  
    13  	"github.com/spf13/cobra"
    14  	"github.com/spf13/viper"
    15  
    16  	"github.com/ari-anchor/sei-tendermint/config"
    17  	"github.com/ari-anchor/sei-tendermint/libs/cli"
    18  	"github.com/ari-anchor/sei-tendermint/libs/log"
    19  	rpchttp "github.com/ari-anchor/sei-tendermint/rpc/client/http"
    20  )
    21  
    22  func getKillCmd(logger log.Logger) *cobra.Command {
    23  	cmd := &cobra.Command{
    24  		Use:   "kill [pid] [compressed-output-file]",
    25  		Short: "Kill a Tendermint process while aggregating and packaging debugging data",
    26  		Long: `Kill a Tendermint process while also aggregating Tendermint process data
    27  such as the latest node state, including consensus and networking state,
    28  go-routine state, and the node's WAL and config information. This aggregated data
    29  is packaged into a compressed archive.
    30  
    31  Example:
    32  $ tendermint debug kill 34255 /path/to/tm-debug.zip`,
    33  		Args: cobra.ExactArgs(2),
    34  		RunE: func(cmd *cobra.Command, args []string) error {
    35  			ctx := cmd.Context()
    36  			pid, err := strconv.ParseInt(args[0], 10, 64)
    37  			if err != nil {
    38  				return err
    39  			}
    40  
    41  			outFile := args[1]
    42  			if outFile == "" {
    43  				return errors.New("invalid output file")
    44  			}
    45  			nodeRPCAddr, err := cmd.Flags().GetString(flagNodeRPCAddr)
    46  			if err != nil {
    47  				return fmt.Errorf("flag %q not defined: %w", flagNodeRPCAddr, err)
    48  			}
    49  
    50  			rpc, err := rpchttp.New(nodeRPCAddr)
    51  			if err != nil {
    52  				return fmt.Errorf("failed to create new http client: %w", err)
    53  			}
    54  
    55  			home := viper.GetString(cli.HomeFlag)
    56  			conf := config.DefaultConfig()
    57  			conf = conf.SetRoot(home)
    58  			config.EnsureRoot(conf.RootDir)
    59  
    60  			// Create a temporary directory which will contain all the state dumps and
    61  			// relevant files and directories that will be compressed into a file.
    62  			tmpDir, err := os.MkdirTemp(os.TempDir(), "tendermint_debug_tmp")
    63  			if err != nil {
    64  				return fmt.Errorf("failed to create temporary directory: %w", err)
    65  			}
    66  			defer os.RemoveAll(tmpDir)
    67  
    68  			logger.Info("getting node status...")
    69  			if err := dumpStatus(ctx, rpc, tmpDir, "status.json"); err != nil {
    70  				return err
    71  			}
    72  
    73  			logger.Info("getting node network info...")
    74  			if err := dumpNetInfo(ctx, rpc, tmpDir, "net_info.json"); err != nil {
    75  				return err
    76  			}
    77  
    78  			logger.Info("getting node consensus state...")
    79  			if err := dumpConsensusState(ctx, rpc, tmpDir, "consensus_state.json"); err != nil {
    80  				return err
    81  			}
    82  
    83  			logger.Info("copying node WAL...")
    84  			if err := copyWAL(conf, tmpDir); err != nil {
    85  				if !os.IsNotExist(err) {
    86  					return err
    87  				}
    88  
    89  				logger.Info("node WAL does not exist; continuing...")
    90  			}
    91  
    92  			logger.Info("copying node configuration...")
    93  			if err := copyConfig(home, tmpDir); err != nil {
    94  				return err
    95  			}
    96  
    97  			logger.Info("killing Tendermint process")
    98  			if err := killProc(int(pid), tmpDir); err != nil {
    99  				return err
   100  			}
   101  
   102  			logger.Info("archiving and compressing debug directory...")
   103  			return zipDir(tmpDir, outFile)
   104  		},
   105  	}
   106  
   107  	return cmd
   108  }
   109  
   110  // killProc attempts to kill the Tendermint process with a given PID with an
   111  // ABORT signal which should result in a goroutine stacktrace. The PID's STDERR
   112  // is tailed and piped to a file under the directory dir. An error is returned
   113  // if the output file cannot be created or the tail command cannot be started.
   114  // An error is not returned if any subsequent syscall fails.
   115  func killProc(pid int, dir string) error {
   116  	// pipe STDERR output from tailing the Tendermint process to a file
   117  	//
   118  	// NOTE: This will only work on UNIX systems.
   119  	cmd := exec.Command("tail", "-f", fmt.Sprintf("/proc/%d/fd/2", pid)) // nolint: gosec
   120  
   121  	outFile, err := os.Create(filepath.Join(dir, "stacktrace.out"))
   122  	if err != nil {
   123  		return err
   124  	}
   125  	defer outFile.Close()
   126  
   127  	cmd.Stdout = outFile
   128  	cmd.Stderr = outFile
   129  
   130  	if err := cmd.Start(); err != nil {
   131  		return err
   132  	}
   133  
   134  	// kill the underlying Tendermint process and subsequent tailing process
   135  	go func() {
   136  		// Killing the Tendermint process with the '-ABRT|-6' signal will result in
   137  		// a goroutine stacktrace.
   138  		p, err := os.FindProcess(pid)
   139  		if err != nil {
   140  			fmt.Fprintf(os.Stderr, "failed to find PID to kill Tendermint process: %s", err)
   141  		} else if err = p.Signal(syscall.SIGABRT); err != nil {
   142  			fmt.Fprintf(os.Stderr, "failed to kill Tendermint process: %s", err)
   143  		}
   144  
   145  		// allow some time to allow the Tendermint process to be killed
   146  		//
   147  		// TODO: We should 'wait' for a kill to succeed (e.g. poll for PID until it
   148  		// cannot be found). Regardless, this should be ample time.
   149  		time.Sleep(5 * time.Second)
   150  
   151  		if err := cmd.Process.Kill(); err != nil {
   152  			fmt.Fprintf(os.Stderr, "failed to kill Tendermint process output redirection: %s", err)
   153  		}
   154  	}()
   155  
   156  	if err := cmd.Wait(); err != nil {
   157  		// only return an error not invoked by a manual kill
   158  		if _, ok := err.(*exec.ExitError); !ok {
   159  			return err
   160  		}
   161  	}
   162  
   163  	return nil
   164  }