gvisor.dev/gvisor@v0.0.0-20240520182842-f9d4d51c7e0f/tools/stucktasks/stucktasks.go (about) 1 // Copyright 2022 The gVisor Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package main implements a tool to help troubleshoot watchdog dumps. 16 package main 17 18 import ( 19 "bufio" 20 "fmt" 21 "io" 22 "os" 23 "path/filepath" 24 "regexp" 25 "sort" 26 "strconv" 27 "strings" 28 29 "gvisor.dev/gvisor/runsc/flag" 30 ) 31 32 var ( 33 flagStacks = flag.String("stacks", "", "path to log file containing stuck task stacks.") 34 flagOut = flag.String("out", "", "path to output file (default: STDERR).") 35 ) 36 37 func main() { 38 flag.Parse() 39 40 // Mandatory fields missing, print usage. 41 if len(*flagStacks) == 0 { 42 fmt.Fprintln(os.Stderr, "Usage:") 43 fmt.Fprintf(os.Stderr, "\t%s --stacks=<path> [--out=<path>]\n", filepath.Base(os.Args[0])) 44 os.Exit(1) 45 } 46 47 in, err := os.Open(*flagStacks) 48 if err != nil { 49 fatal(err) 50 } 51 defer in.Close() 52 53 var out io.Writer = os.Stdout 54 if len(*flagOut) > 0 { 55 f, err := os.Create(*flagOut) 56 if err != nil { 57 fatal(err) 58 } 59 defer f.Close() 60 out = f 61 } 62 63 if err := analyze(in, out); err != nil { 64 fatal(err) 65 } 66 } 67 68 func fatal(err error) { 69 fatalf("%v", err) 70 } 71 72 func fatalf(format string, args ...any) { 73 fmt.Fprintf(os.Stderr, format+"\n", args...) 74 os.Exit(1) 75 } 76 77 func analyze(in io.Reader, out io.Writer) error { 78 scanner := bufio.NewScanner(in) 79 for scanner.Scan() { 80 line := scanner.Text() 81 if strings.Contains(line, "stuck task(s)") { 82 return analyzeStuckTasks(scanner, out) 83 } 84 if strings.Contains(line, "Watchdog goroutine is stuck") { 85 return analyzeStackDump(scanner, out, nil) 86 } 87 // Skip all lines before the watchdog dump. 88 } 89 return fmt.Errorf("watchdog header not found") 90 } 91 92 func analyzeStuckTasks(scanner *bufio.Scanner, out io.Writer) error { 93 // Look for stuck tasks goroutine. The output has the folowing format: 94 // Task tid: 123 (goroutine 45), entered RunSys state 3m28.77s ago. 95 ids := make(map[uint]struct{}) 96 for scanner.Scan() { 97 line := scanner.Text() 98 id, err := parseGoroutineID(line) 99 if err != nil { 100 // All stuck tasks were collected, the log is followed by the stack dump. 101 return analyzeStackDump(scanner, out, ids) 102 } 103 ids[id] = struct{}{} 104 } 105 return fmt.Errorf("not able to find stuck task IDs") 106 } 107 108 func analyzeStackDump(scanner *bufio.Scanner, out io.Writer, stuckIds map[uint]struct{}) error { 109 stacks, err := collectStacks(scanner) 110 if err != nil { 111 return nil 112 } 113 114 // Create histogram with all unique stacks. 115 type counter struct { 116 count int 117 ids []uint 118 *stack 119 } 120 uniq := make(map[string]*counter) 121 for _, stack := range stacks { 122 c := uniq[stack.signature] 123 if c == nil { 124 c = &counter{stack: stack} 125 uniq[stack.signature] = c 126 } 127 c.count++ 128 c.ids = append(c.ids, stack.id) 129 } 130 131 // Sort them in reverse order, to print most occurring at the top. 132 var sorted []*counter 133 for _, c := range uniq { 134 sorted = append(sorted, c) 135 } 136 sort.Slice(sorted, func(i, j int) bool { 137 // Reverse sort 138 return sorted[i].count > sorted[j].count 139 }) 140 141 fmt.Fprintf(out, "Stacks: %d, unique: %d\n\n", len(stacks), len(sorted)) 142 for _, c := range sorted { 143 fmt.Fprintf(out, "=== Stack (count: %d) ===\ngoroutine IDs: %v\n", c.count, c.ids) 144 var stucks []uint 145 for _, id := range c.ids { 146 if _, ok := stuckIds[id]; ok { 147 stucks = append(stucks, id) 148 } 149 } 150 if len(stucks) > 0 { 151 fmt.Fprintf(out, "*** Stuck goroutines: %v ***\n", stucks) 152 } 153 fmt.Fprintln(out) 154 for _, line := range c.lines { 155 fmt.Fprintln(out, line) 156 } 157 fmt.Fprintln(out) 158 } 159 160 return nil 161 } 162 163 // collectStacks parses the input to find stack dump. Expected format is: 164 // 165 // goroutine ID [reason, time]: 166 // package.function(args) 167 // GOROOT/path/file.go:line +offset 168 // <blank line between stacks> 169 func collectStacks(scanner *bufio.Scanner) ([]*stack, error) { 170 var stacks []*stack 171 var block []string 172 for scanner.Scan() { 173 line := scanner.Text() 174 175 // Expect the first line of a block to be the goroutine header: 176 // goroutine 43 [select, 19 minutes]: 177 if len(block) == 0 { 178 if _, err := parseGoroutineID(line); err != nil { 179 // If not the header and no stacks have been found yet, skip the line 180 // until the start of stack dump is found. 181 if len(stacks) == 0 { 182 continue 183 } 184 // if stacks has been found, it means we reached the end of the dump and 185 // more logging lines exist in the file. 186 break 187 } 188 } 189 190 // A blank line means that we reached the end of the block 191 if len(strings.TrimSpace(line)) > 0 { 192 block = append(block, line) 193 continue 194 } 195 stack, err := parseBlock(block) 196 if err != nil { 197 return nil, err 198 } 199 stacks = append(stacks, stack) 200 block = nil 201 } 202 return stacks, nil 203 } 204 205 func parseBlock(block []string) (*stack, error) { 206 id, err := parseGoroutineID(block[0]) 207 if err != nil { 208 return nil, err 209 } 210 211 var signature string 212 for i, line := range block[1:] { 213 if i%2 == 1 { 214 signature += line + "\n" 215 } 216 } 217 218 return &stack{ 219 id: uint(id), 220 signature: signature, 221 lines: block[1:], 222 }, nil 223 } 224 225 func parseGoroutineID(line string) (uint, error) { 226 r := regexp.MustCompile(`goroutine (\d+)`) 227 matches := r.FindStringSubmatch(line) 228 if len(matches) != 2 { 229 return 0, fmt.Errorf("invalid goroutine ID line: %q", line) 230 } 231 id, err := strconv.Atoi(matches[1]) 232 if err != nil { 233 return 0, fmt.Errorf("parsing goroutine ID, line: %q: %w", line, err) 234 } 235 return uint(id), nil 236 } 237 238 type stack struct { 239 id uint 240 signature string 241 lines []string 242 }