github.com/ari-anchor/sei-tendermint@v0.0.0-20230519144642-dc826b7b56bb/cmd/tendermint/commands/debug/kill.go (about) 1 package debug 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "os/exec" 8 "path/filepath" 9 "strconv" 10 "syscall" 11 "time" 12 13 "github.com/spf13/cobra" 14 "github.com/spf13/viper" 15 16 "github.com/ari-anchor/sei-tendermint/config" 17 "github.com/ari-anchor/sei-tendermint/libs/cli" 18 "github.com/ari-anchor/sei-tendermint/libs/log" 19 rpchttp "github.com/ari-anchor/sei-tendermint/rpc/client/http" 20 ) 21 22 func getKillCmd(logger log.Logger) *cobra.Command { 23 cmd := &cobra.Command{ 24 Use: "kill [pid] [compressed-output-file]", 25 Short: "Kill a Tendermint process while aggregating and packaging debugging data", 26 Long: `Kill a Tendermint process while also aggregating Tendermint process data 27 such as the latest node state, including consensus and networking state, 28 go-routine state, and the node's WAL and config information. This aggregated data 29 is packaged into a compressed archive. 30 31 Example: 32 $ tendermint debug kill 34255 /path/to/tm-debug.zip`, 33 Args: cobra.ExactArgs(2), 34 RunE: func(cmd *cobra.Command, args []string) error { 35 ctx := cmd.Context() 36 pid, err := strconv.ParseInt(args[0], 10, 64) 37 if err != nil { 38 return err 39 } 40 41 outFile := args[1] 42 if outFile == "" { 43 return errors.New("invalid output file") 44 } 45 nodeRPCAddr, err := cmd.Flags().GetString(flagNodeRPCAddr) 46 if err != nil { 47 return fmt.Errorf("flag %q not defined: %w", flagNodeRPCAddr, err) 48 } 49 50 rpc, err := rpchttp.New(nodeRPCAddr) 51 if err != nil { 52 return fmt.Errorf("failed to create new http client: %w", err) 53 } 54 55 home := viper.GetString(cli.HomeFlag) 56 conf := config.DefaultConfig() 57 conf = conf.SetRoot(home) 58 config.EnsureRoot(conf.RootDir) 59 60 // Create a temporary directory which will contain all the state dumps and 61 // relevant files and directories that will be compressed into a file. 62 tmpDir, err := os.MkdirTemp(os.TempDir(), "tendermint_debug_tmp") 63 if err != nil { 64 return fmt.Errorf("failed to create temporary directory: %w", err) 65 } 66 defer os.RemoveAll(tmpDir) 67 68 logger.Info("getting node status...") 69 if err := dumpStatus(ctx, rpc, tmpDir, "status.json"); err != nil { 70 return err 71 } 72 73 logger.Info("getting node network info...") 74 if err := dumpNetInfo(ctx, rpc, tmpDir, "net_info.json"); err != nil { 75 return err 76 } 77 78 logger.Info("getting node consensus state...") 79 if err := dumpConsensusState(ctx, rpc, tmpDir, "consensus_state.json"); err != nil { 80 return err 81 } 82 83 logger.Info("copying node WAL...") 84 if err := copyWAL(conf, tmpDir); err != nil { 85 if !os.IsNotExist(err) { 86 return err 87 } 88 89 logger.Info("node WAL does not exist; continuing...") 90 } 91 92 logger.Info("copying node configuration...") 93 if err := copyConfig(home, tmpDir); err != nil { 94 return err 95 } 96 97 logger.Info("killing Tendermint process") 98 if err := killProc(int(pid), tmpDir); err != nil { 99 return err 100 } 101 102 logger.Info("archiving and compressing debug directory...") 103 return zipDir(tmpDir, outFile) 104 }, 105 } 106 107 return cmd 108 } 109 110 // killProc attempts to kill the Tendermint process with a given PID with an 111 // ABORT signal which should result in a goroutine stacktrace. The PID's STDERR 112 // is tailed and piped to a file under the directory dir. An error is returned 113 // if the output file cannot be created or the tail command cannot be started. 114 // An error is not returned if any subsequent syscall fails. 115 func killProc(pid int, dir string) error { 116 // pipe STDERR output from tailing the Tendermint process to a file 117 // 118 // NOTE: This will only work on UNIX systems. 119 cmd := exec.Command("tail", "-f", fmt.Sprintf("/proc/%d/fd/2", pid)) // nolint: gosec 120 121 outFile, err := os.Create(filepath.Join(dir, "stacktrace.out")) 122 if err != nil { 123 return err 124 } 125 defer outFile.Close() 126 127 cmd.Stdout = outFile 128 cmd.Stderr = outFile 129 130 if err := cmd.Start(); err != nil { 131 return err 132 } 133 134 // kill the underlying Tendermint process and subsequent tailing process 135 go func() { 136 // Killing the Tendermint process with the '-ABRT|-6' signal will result in 137 // a goroutine stacktrace. 138 p, err := os.FindProcess(pid) 139 if err != nil { 140 fmt.Fprintf(os.Stderr, "failed to find PID to kill Tendermint process: %s", err) 141 } else if err = p.Signal(syscall.SIGABRT); err != nil { 142 fmt.Fprintf(os.Stderr, "failed to kill Tendermint process: %s", err) 143 } 144 145 // allow some time to allow the Tendermint process to be killed 146 // 147 // TODO: We should 'wait' for a kill to succeed (e.g. poll for PID until it 148 // cannot be found). Regardless, this should be ample time. 149 time.Sleep(5 * time.Second) 150 151 if err := cmd.Process.Kill(); err != nil { 152 fmt.Fprintf(os.Stderr, "failed to kill Tendermint process output redirection: %s", err) 153 } 154 }() 155 156 if err := cmd.Wait(); err != nil { 157 // only return an error not invoked by a manual kill 158 if _, ok := err.(*exec.ExitError); !ok { 159 return err 160 } 161 } 162 163 return nil 164 }