gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/tools/stucktasks/stucktasks.go (about)

     1  // Copyright 2022 The gVisor Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package main implements a tool to help troubleshoot watchdog dumps.
    16  package main
    17  
    18  import (
    19  	"bufio"
    20  	"fmt"
    21  	"io"
    22  	"os"
    23  	"path/filepath"
    24  	"regexp"
    25  	"sort"
    26  	"strconv"
    27  	"strings"
    28  
    29  	"gvisor.dev/gvisor/runsc/flag"
    30  )
    31  
    32  var (
    33  	flagStacks = flag.String("stacks", "", "path to log file containing stuck task stacks.")
    34  	flagOut    = flag.String("out", "", "path to output file (default: STDERR).")
    35  )
    36  
    37  func main() {
    38  	flag.Parse()
    39  
    40  	// Mandatory fields missing, print usage.
    41  	if len(*flagStacks) == 0 {
    42  		fmt.Fprintln(os.Stderr, "Usage:")
    43  		fmt.Fprintf(os.Stderr, "\t%s --stacks=<path> [--out=<path>]\n", filepath.Base(os.Args[0]))
    44  		os.Exit(1)
    45  	}
    46  
    47  	in, err := os.Open(*flagStacks)
    48  	if err != nil {
    49  		fatal(err)
    50  	}
    51  	defer in.Close()
    52  
    53  	var out io.Writer = os.Stdout
    54  	if len(*flagOut) > 0 {
    55  		f, err := os.Create(*flagOut)
    56  		if err != nil {
    57  			fatal(err)
    58  		}
    59  		defer f.Close()
    60  		out = f
    61  	}
    62  
    63  	if err := analyze(in, out); err != nil {
    64  		fatal(err)
    65  	}
    66  }
    67  
    68  func fatal(err error) {
    69  	fatalf("%v", err)
    70  }
    71  
    72  func fatalf(format string, args ...any) {
    73  	fmt.Fprintf(os.Stderr, format+"\n", args...)
    74  	os.Exit(1)
    75  }
    76  
    77  func analyze(in io.Reader, out io.Writer) error {
    78  	scanner := bufio.NewScanner(in)
    79  	for scanner.Scan() {
    80  		line := scanner.Text()
    81  		if strings.Contains(line, "stuck task(s)") {
    82  			return analyzeStuckTasks(scanner, out)
    83  		}
    84  		if strings.Contains(line, "Watchdog goroutine is stuck") {
    85  			return analyzeStackDump(scanner, out, nil)
    86  		}
    87  		// Skip all lines before the watchdog dump.
    88  	}
    89  	return fmt.Errorf("watchdog header not found")
    90  }
    91  
    92  func analyzeStuckTasks(scanner *bufio.Scanner, out io.Writer) error {
    93  	// Look for stuck tasks goroutine. The output has the folowing format:
    94  	//	Task tid: 123 (goroutine 45), entered RunSys state 3m28.77s ago.
    95  	ids := make(map[uint]struct{})
    96  	for scanner.Scan() {
    97  		line := scanner.Text()
    98  		id, err := parseGoroutineID(line)
    99  		if err != nil {
   100  			// All stuck tasks were collected, the log is followed by the stack dump.
   101  			return analyzeStackDump(scanner, out, ids)
   102  		}
   103  		ids[id] = struct{}{}
   104  	}
   105  	return fmt.Errorf("not able to find stuck task IDs")
   106  }
   107  
   108  func analyzeStackDump(scanner *bufio.Scanner, out io.Writer, stuckIds map[uint]struct{}) error {
   109  	stacks, err := collectStacks(scanner)
   110  	if err != nil {
   111  		return nil
   112  	}
   113  
   114  	// Create histogram with all unique stacks.
   115  	type counter struct {
   116  		count int
   117  		ids   []uint
   118  		*stack
   119  	}
   120  	uniq := make(map[string]*counter)
   121  	for _, stack := range stacks {
   122  		c := uniq[stack.signature]
   123  		if c == nil {
   124  			c = &counter{stack: stack}
   125  			uniq[stack.signature] = c
   126  		}
   127  		c.count++
   128  		c.ids = append(c.ids, stack.id)
   129  	}
   130  
   131  	// Sort them in reverse order, to print most occurring at the top.
   132  	var sorted []*counter
   133  	for _, c := range uniq {
   134  		sorted = append(sorted, c)
   135  	}
   136  	sort.Slice(sorted, func(i, j int) bool {
   137  		// Reverse sort
   138  		return sorted[i].count > sorted[j].count
   139  	})
   140  
   141  	fmt.Fprintf(out, "Stacks: %d, unique: %d\n\n", len(stacks), len(sorted))
   142  	for _, c := range sorted {
   143  		fmt.Fprintf(out, "=== Stack (count: %d) ===\ngoroutine IDs: %v\n", c.count, c.ids)
   144  		var stucks []uint
   145  		for _, id := range c.ids {
   146  			if _, ok := stuckIds[id]; ok {
   147  				stucks = append(stucks, id)
   148  			}
   149  		}
   150  		if len(stucks) > 0 {
   151  			fmt.Fprintf(out, "*** Stuck goroutines: %v ***\n", stucks)
   152  		}
   153  		fmt.Fprintln(out)
   154  		for _, line := range c.lines {
   155  			fmt.Fprintln(out, line)
   156  		}
   157  		fmt.Fprintln(out)
   158  	}
   159  
   160  	return nil
   161  }
   162  
   163  // collectStacks parses the input to find stack dump. Expected format is:
   164  //
   165  //	goroutine ID [reason, time]:
   166  //	package.function(args)
   167  //		GOROOT/path/file.go:line +offset
   168  //	<blank line between stacks>
   169  func collectStacks(scanner *bufio.Scanner) ([]*stack, error) {
   170  	var stacks []*stack
   171  	var block []string
   172  	for scanner.Scan() {
   173  		line := scanner.Text()
   174  
   175  		// Expect the first line of a block to be the goroutine header:
   176  		//   goroutine 43 [select, 19 minutes]:
   177  		if len(block) == 0 {
   178  			if _, err := parseGoroutineID(line); err != nil {
   179  				// If not the header and no stacks have been found yet, skip the line
   180  				// until the start of stack dump is found.
   181  				if len(stacks) == 0 {
   182  					continue
   183  				}
   184  				// if stacks has been found, it means we reached the end of the dump and
   185  				// more logging lines exist in the file.
   186  				break
   187  			}
   188  		}
   189  
   190  		// A blank line means that we reached the end of the block
   191  		if len(strings.TrimSpace(line)) > 0 {
   192  			block = append(block, line)
   193  			continue
   194  		}
   195  		stack, err := parseBlock(block)
   196  		if err != nil {
   197  			return nil, err
   198  		}
   199  		stacks = append(stacks, stack)
   200  		block = nil
   201  	}
   202  	return stacks, nil
   203  }
   204  
   205  func parseBlock(block []string) (*stack, error) {
   206  	id, err := parseGoroutineID(block[0])
   207  	if err != nil {
   208  		return nil, err
   209  	}
   210  
   211  	var signature string
   212  	for i, line := range block[1:] {
   213  		if i%2 == 1 {
   214  			signature += line + "\n"
   215  		}
   216  	}
   217  
   218  	return &stack{
   219  		id:        uint(id),
   220  		signature: signature,
   221  		lines:     block[1:],
   222  	}, nil
   223  }
   224  
   225  func parseGoroutineID(line string) (uint, error) {
   226  	r := regexp.MustCompile(`goroutine (\d+)`)
   227  	matches := r.FindStringSubmatch(line)
   228  	if len(matches) != 2 {
   229  		return 0, fmt.Errorf("invalid goroutine ID line: %q", line)
   230  	}
   231  	id, err := strconv.Atoi(matches[1])
   232  	if err != nil {
   233  		return 0, fmt.Errorf("parsing goroutine ID, line: %q: %w", line, err)
   234  	}
   235  	return uint(id), nil
   236  }
   237  
   238  type stack struct {
   239  	id        uint
   240  	signature string
   241  	lines     []string
   242  }