github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/diagnostic/dump/dump.go (about) 1 // Copyright 2019 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 // Package dump provides the endpoint "debug/dump", registered with 6 // http.DefaultServeMux, which returns a dump of useful diagnostic information 7 // as a tarball. The base configuration includes several useful diagnostics 8 // (see init). You may also register your own dump parts to be included, e.g.: 9 // 10 // Register("mystuff", func(ctx context.Context, w io.Writer) error { 11 // w.Write([]byte("mystuff diagnostic data")) 12 // return nil 13 // }) 14 // 15 // The endpoint responds with a gzipped tarball. The Content-Disposition of the 16 // response suggests a pseudo-unique filename to make it easier to deal with 17 // multiple dumps. Use curl flags to accept the suggested filename 18 // (recommended). 19 // 20 // curl -OJ http://example:1234/debug/dump 21 // 22 // Note that it will take at least 30 seconds to respond, as some of the parts 23 // of the base configuration are 30-second profiles. 24 package dump 25 26 import ( 27 "archive/zip" 28 "context" 29 "errors" 30 "fmt" 31 "io" 32 "io/ioutil" 33 "net/http" 34 "os" 35 "strings" 36 "sync" 37 "time" 38 39 "github.com/Schaudge/grailbase/log" 40 "github.com/Schaudge/grailbase/traverse" 41 ) 42 43 // init registers commonly useful parts in the registry and configures 44 // http.DefaultServeMux with the endpoint "/debug/dump" for getting the dump. 45 func init() { 46 Register("cmdline", dumpCmdline) 47 Register("cpuinfo", dumpCpuinfo) 48 Register("loadinfo", dumpLoadinfo) 49 Register("meminfo", dumpMeminfo) 50 Register("pprof-goroutine", dumpGoroutine) 51 Register("pprof-heap", dumpPprofHeap) 52 Register("pprof-mutex", dumpPprofMutex) 53 Register("pprof-profile", dumpPprofProfile) 54 Register("vars", dumpVars) 55 http.Handle("/debug/dump.zip", DefaultRegistry) 56 } 57 58 // ErrSkipPart signals that we should skip a part. Return this from your 59 // Func to silently ignore the part for the current dump. If your Func 60 // returns anything else non-nil, it will be logged as an error. This is 61 // mostly useful for keeping logs quiet for parts that are sometimes 62 // unavailable for non-error reasons. 63 var ErrSkipPart = errors.New("skip part") 64 65 // part is one part of a dump. It is ultimately expressed as a single file that 66 // is part the tarball archive dump. 67 type part struct { 68 // name is the name of this part of the dump. It is used as the filename in 69 // the dump tarball. 70 name string 71 // f is called to produce the contents of this part of the dump. 72 f Func 73 } 74 75 // Func is the function to be called when producing a dump for a part. 76 type Func func(ctx context.Context, w io.Writer) error 77 78 // Registry maintains the set of parts that will compose the dump. 79 type Registry struct { 80 mu sync.Mutex 81 // id is the identifier of this registry, which eventually becomes part of 82 // the suggested filename for the dump. 83 id string 84 parts []part 85 86 // createTime is the time at which this Registry was created with 87 // NewRegistry. 88 createTime time.Time 89 } 90 91 // NewRegistry returns a new registry for the parts to be included in the dump. 92 func NewRegistry(id string) *Registry { 93 return &Registry{id: id, createTime: time.Now()} 94 } 95 96 // Name returns a name for reg that is convenient for naming dump files, as it 97 // is pseudo-unique and includes the registry ID, the time at which the registry 98 // was created, and the duration from that creation time. 99 func (reg *Registry) Name() string { 100 sinceCreate := time.Since(reg.createTime) 101 ss := []string{reg.id, reg.createTime.Format(createTimeFormat), formatDuration(sinceCreate)} 102 return strings.Join(ss, ".") 103 } 104 105 // Register registers a new part to be included in the dump of reg. Name will 106 // become the filename of the part file in the dump tarball. Func f will be 107 // called to produce the contents of that file. 108 func (reg *Registry) Register(name string, f Func) { 109 reg.mu.Lock() 110 defer reg.mu.Unlock() 111 for _, part := range reg.parts { 112 if part.name == name { 113 panic(fmt.Sprintf("duplicate part name %q", name)) 114 } 115 } 116 reg.parts = append(reg.parts, part{name: name, f: f}) 117 } 118 119 // partFile is used by worker goroutines to communicate results back to the main 120 // dumping thread. Only one of err and file will be non-nil. 121 type partFile struct { 122 // part is the part to which this partFile applies. 123 part part 124 // err will be non-nil if there was an error producing the file of the part 125 // of the dump. 126 err error 127 // file will be non-nil in a successful result and will be the file that 128 // will be included in the dump tarball. 129 file *os.File 130 } 131 132 // processPart is called by worker goroutines to process a single part. 133 func processPart(ctx context.Context, part part) partFile { 134 tmpfile, err := ioutil.TempFile("", "dump") 135 if err != nil { 136 return partFile{ 137 part: part, 138 err: fmt.Errorf("error creating temp file: %v", err), 139 } 140 } 141 if err := os.Remove(tmpfile.Name()); err != nil { 142 log.Printf("dump: error removing temp file %s: %v", tmpfile.Name(), err) 143 } 144 if err := part.f(ctx, tmpfile); err != nil { 145 _ = tmpfile.Close() 146 if err == ErrSkipPart { 147 return partFile{part: part, err: err} 148 } 149 return partFile{ 150 part: part, 151 err: fmt.Errorf("error writing part contents: %v", err), 152 } 153 } 154 if _, err := tmpfile.Seek(0, 0); err != nil { 155 _ = tmpfile.Close() 156 return partFile{ 157 part: part, 158 err: fmt.Errorf("error seeking to read temp file for dump: %v", err), 159 } 160 } 161 // The returned file will be closed downstream after its contents have been 162 // written to the dump. 163 return partFile{part: part, file: tmpfile} 164 } 165 166 // writeFile writes a file to zw with filename name. 167 func writeFile(name string, f *os.File, zw *zip.Writer) error { 168 fi, err := f.Stat() 169 if err != nil { 170 return fmt.Errorf("error getting file stat of %q: %v", f.Name(), err) 171 } 172 hdr, err := zip.FileInfoHeader(fi) 173 if err != nil { 174 return fmt.Errorf("error building zip header of %q: %v", f.Name(), err) 175 } 176 hdr.Name = name 177 zfw, err := zw.CreateHeader(hdr) 178 if err != nil { 179 return fmt.Errorf("error writing zip header in diagnostic dump: %v", err) 180 } 181 if _, err = io.Copy(zfw, f); err != nil { 182 return fmt.Errorf("error writing diagnostic dump: %v", err) 183 } 184 return nil 185 } 186 187 // writePart writes a single part to zw. pfx is the path that will be prepended 188 // to the part name to construct the full path of the entry in the archive. 189 func writePart(pfx string, p partFile, zw *zip.Writer) (err error) { 190 if p.err != nil { 191 if p.err == ErrSkipPart { 192 return nil 193 } 194 return fmt.Errorf("error dumping %s: %v", p.part.name, p.err) 195 } 196 defer func() { 197 closeErr := p.file.Close() 198 if err == nil && closeErr != nil { 199 err = fmt.Errorf("error closing temp file %q: %v", p.file.Name(), closeErr) 200 } 201 }() 202 if fileErr := writeFile(pfx+"/"+p.part.name, p.file, zw); fileErr != nil { 203 return fmt.Errorf("error writing %s to archive: %v", p.part.name, fileErr) 204 } 205 return nil 206 } 207 208 // WriteDump writes the dump to w. pfx is prepended to the names of the parts of 209 // the dump, e.g. if pfx == "dump-123" and part name == "cpu", "dump-123/cpu" 210 // will be written into the archive. It returns no error, as it is best-effort. 211 func (reg *Registry) WriteDump(ctx context.Context, pfx string, zw *zip.Writer) { 212 reg.mu.Lock() 213 // Snapshot reg.parts to release the lock quickly. 214 parts := reg.parts 215 reg.mu.Unlock() 216 const concurrency = 8 217 partFileC := make(chan partFile, concurrency) 218 go func() { 219 defer close(partFileC) 220 err := traverse.Parallel.Each(len(parts), func(i int) error { 221 partCtx, partCtxCancel := context.WithTimeout(ctx, 2*time.Minute) 222 partFile := processPart(partCtx, parts[i]) 223 partCtxCancel() 224 partFileC <- partFile 225 return nil 226 }) 227 if err != nil { 228 log.Error.Printf("dump: error processing parts: %v", err) 229 return 230 } 231 }() 232 for p := range partFileC { 233 if err := writePart(pfx, p, zw); err != nil { 234 log.Error.Printf("dump: error processing part %s: %v", p.part.name, err) 235 } 236 } 237 } 238 239 var createTimeFormat = "2006-01-02-1504" 240 241 func formatDuration(d time.Duration) string { 242 d = d.Round(time.Second) 243 h := d / time.Hour 244 d -= h * time.Hour 245 m := d / time.Minute 246 d -= m * time.Minute 247 s := d / time.Second 248 return fmt.Sprintf("%02dh%02dm%02ds", h, m, s) 249 } 250 251 // ServeHTTP serves the dump with a Content-Disposition set with a unique filename. 252 func (reg *Registry) ServeHTTP(w http.ResponseWriter, r *http.Request) { 253 w.Header().Set("Content-Type", "application/zip") 254 pfx := Name() 255 filename := pfx + ".zip" 256 w.Header().Set("Content-Disposition", "attachment; filename="+filename) 257 zw := zip.NewWriter(w) 258 defer zw.Close() // nolint: errcheck 259 reg.WriteDump(r.Context(), pfx, zw) 260 }