github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/diagnostic/dump/dump.go (about)

     1  // Copyright 2019 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package dump provides the endpoint "debug/dump", registered with
     6  // http.DefaultServeMux, which returns a dump of useful diagnostic information
     7  // as a tarball. The base configuration includes several useful diagnostics
     8  // (see init). You may also register your own dump parts to be included, e.g.:
     9  //
    10  //  Register("mystuff", func(ctx context.Context, w io.Writer) error {
    11  //      w.Write([]byte("mystuff diagnostic data"))
    12  //      return nil
    13  //  })
    14  //
    15  // The endpoint responds with a gzipped tarball. The Content-Disposition of the
    16  // response suggests a pseudo-unique filename to make it easier to deal with
    17  // multiple dumps. Use curl flags to accept the suggested filename
    18  // (recommended).
    19  //
    20  //  curl -OJ http://example:1234/debug/dump
    21  //
    22  // Note that it will take at least 30 seconds to respond, as some of the parts
    23  // of the base configuration are 30-second profiles.
    24  package dump
    25  
    26  import (
    27  	"archive/zip"
    28  	"context"
    29  	"errors"
    30  	"fmt"
    31  	"io"
    32  	"io/ioutil"
    33  	"net/http"
    34  	"os"
    35  	"strings"
    36  	"sync"
    37  	"time"
    38  
    39  	"github.com/Schaudge/grailbase/log"
    40  	"github.com/Schaudge/grailbase/traverse"
    41  )
    42  
    43  // init registers commonly useful parts in the registry and configures
    44  // http.DefaultServeMux with the endpoint "/debug/dump" for getting the dump.
    45  func init() {
    46  	Register("cmdline", dumpCmdline)
    47  	Register("cpuinfo", dumpCpuinfo)
    48  	Register("loadinfo", dumpLoadinfo)
    49  	Register("meminfo", dumpMeminfo)
    50  	Register("pprof-goroutine", dumpGoroutine)
    51  	Register("pprof-heap", dumpPprofHeap)
    52  	Register("pprof-mutex", dumpPprofMutex)
    53  	Register("pprof-profile", dumpPprofProfile)
    54  	Register("vars", dumpVars)
    55  	http.Handle("/debug/dump.zip", DefaultRegistry)
    56  }
    57  
    58  // ErrSkipPart signals that we should skip a part. Return this from your
    59  // Func to silently ignore the part for the current dump. If your Func
    60  // returns anything else non-nil, it will be logged as an error. This is
    61  // mostly useful for keeping logs quiet for parts that are sometimes
    62  // unavailable for non-error reasons.
    63  var ErrSkipPart = errors.New("skip part")
    64  
    65  // part is one part of a dump. It is ultimately expressed as a single file that
    66  // is part the tarball archive dump.
    67  type part struct {
    68  	// name is the name of this part of the dump. It is used as the filename in
    69  	// the dump tarball.
    70  	name string
    71  	// f is called to produce the contents of this part of the dump.
    72  	f Func
    73  }
    74  
    75  // Func is the function to be called when producing a dump for a part.
    76  type Func func(ctx context.Context, w io.Writer) error
    77  
    78  // Registry maintains the set of parts that will compose the dump.
    79  type Registry struct {
    80  	mu sync.Mutex
    81  	// id is the identifier of this registry, which eventually becomes part of
    82  	// the suggested filename for the dump.
    83  	id    string
    84  	parts []part
    85  
    86  	// createTime is the time at which this Registry was created with
    87  	// NewRegistry.
    88  	createTime time.Time
    89  }
    90  
    91  // NewRegistry returns a new registry for the parts to be included in the dump.
    92  func NewRegistry(id string) *Registry {
    93  	return &Registry{id: id, createTime: time.Now()}
    94  }
    95  
    96  // Name returns a name for reg that is convenient for naming dump files, as it
    97  // is pseudo-unique and includes the registry ID, the time at which the registry
    98  // was created, and the duration from that creation time.
    99  func (reg *Registry) Name() string {
   100  	sinceCreate := time.Since(reg.createTime)
   101  	ss := []string{reg.id, reg.createTime.Format(createTimeFormat), formatDuration(sinceCreate)}
   102  	return strings.Join(ss, ".")
   103  }
   104  
   105  // Register registers a new part to be included in the dump of reg. Name will
   106  // become the filename of the part file in the dump tarball. Func f will be
   107  // called to produce the contents of that file.
   108  func (reg *Registry) Register(name string, f Func) {
   109  	reg.mu.Lock()
   110  	defer reg.mu.Unlock()
   111  	for _, part := range reg.parts {
   112  		if part.name == name {
   113  			panic(fmt.Sprintf("duplicate part name %q", name))
   114  		}
   115  	}
   116  	reg.parts = append(reg.parts, part{name: name, f: f})
   117  }
   118  
   119  // partFile is used by worker goroutines to communicate results back to the main
   120  // dumping thread. Only one of err and file will be non-nil.
   121  type partFile struct {
   122  	// part is the part to which this partFile applies.
   123  	part part
   124  	// err will be non-nil if there was an error producing the file of the part
   125  	// of the dump.
   126  	err error
   127  	// file will be non-nil in a successful result and will be the file that
   128  	// will be included in the dump tarball.
   129  	file *os.File
   130  }
   131  
   132  // processPart is called by worker goroutines to process a single part.
   133  func processPart(ctx context.Context, part part) partFile {
   134  	tmpfile, err := ioutil.TempFile("", "dump")
   135  	if err != nil {
   136  		return partFile{
   137  			part: part,
   138  			err:  fmt.Errorf("error creating temp file: %v", err),
   139  		}
   140  	}
   141  	if err := os.Remove(tmpfile.Name()); err != nil {
   142  		log.Printf("dump: error removing temp file %s: %v", tmpfile.Name(), err)
   143  	}
   144  	if err := part.f(ctx, tmpfile); err != nil {
   145  		_ = tmpfile.Close()
   146  		if err == ErrSkipPart {
   147  			return partFile{part: part, err: err}
   148  		}
   149  		return partFile{
   150  			part: part,
   151  			err:  fmt.Errorf("error writing part contents: %v", err),
   152  		}
   153  	}
   154  	if _, err := tmpfile.Seek(0, 0); err != nil {
   155  		_ = tmpfile.Close()
   156  		return partFile{
   157  			part: part,
   158  			err:  fmt.Errorf("error seeking to read temp file for dump: %v", err),
   159  		}
   160  	}
   161  	// The returned file will be closed downstream after its contents have been
   162  	// written to the dump.
   163  	return partFile{part: part, file: tmpfile}
   164  }
   165  
   166  // writeFile writes a file to zw with filename name.
   167  func writeFile(name string, f *os.File, zw *zip.Writer) error {
   168  	fi, err := f.Stat()
   169  	if err != nil {
   170  		return fmt.Errorf("error getting file stat of %q: %v", f.Name(), err)
   171  	}
   172  	hdr, err := zip.FileInfoHeader(fi)
   173  	if err != nil {
   174  		return fmt.Errorf("error building zip header of %q: %v", f.Name(), err)
   175  	}
   176  	hdr.Name = name
   177  	zfw, err := zw.CreateHeader(hdr)
   178  	if err != nil {
   179  		return fmt.Errorf("error writing zip header in diagnostic dump: %v", err)
   180  	}
   181  	if _, err = io.Copy(zfw, f); err != nil {
   182  		return fmt.Errorf("error writing diagnostic dump: %v", err)
   183  	}
   184  	return nil
   185  }
   186  
   187  // writePart writes a single part to zw. pfx is the path that will be prepended
   188  // to the part name to construct the full path of the entry in the archive.
   189  func writePart(pfx string, p partFile, zw *zip.Writer) (err error) {
   190  	if p.err != nil {
   191  		if p.err == ErrSkipPart {
   192  			return nil
   193  		}
   194  		return fmt.Errorf("error dumping %s: %v", p.part.name, p.err)
   195  	}
   196  	defer func() {
   197  		closeErr := p.file.Close()
   198  		if err == nil && closeErr != nil {
   199  			err = fmt.Errorf("error closing temp file %q: %v", p.file.Name(), closeErr)
   200  		}
   201  	}()
   202  	if fileErr := writeFile(pfx+"/"+p.part.name, p.file, zw); fileErr != nil {
   203  		return fmt.Errorf("error writing %s to archive: %v", p.part.name, fileErr)
   204  	}
   205  	return nil
   206  }
   207  
   208  // WriteDump writes the dump to w. pfx is prepended to the names of the parts of
   209  // the dump, e.g. if pfx == "dump-123" and part name == "cpu", "dump-123/cpu"
   210  // will be written into the archive. It returns no error, as it is best-effort.
   211  func (reg *Registry) WriteDump(ctx context.Context, pfx string, zw *zip.Writer) {
   212  	reg.mu.Lock()
   213  	// Snapshot reg.parts to release the lock quickly.
   214  	parts := reg.parts
   215  	reg.mu.Unlock()
   216  	const concurrency = 8
   217  	partFileC := make(chan partFile, concurrency)
   218  	go func() {
   219  		defer close(partFileC)
   220  		err := traverse.Parallel.Each(len(parts), func(i int) error {
   221  			partCtx, partCtxCancel := context.WithTimeout(ctx, 2*time.Minute)
   222  			partFile := processPart(partCtx, parts[i])
   223  			partCtxCancel()
   224  			partFileC <- partFile
   225  			return nil
   226  		})
   227  		if err != nil {
   228  			log.Error.Printf("dump: error processing parts: %v", err)
   229  			return
   230  		}
   231  	}()
   232  	for p := range partFileC {
   233  		if err := writePart(pfx, p, zw); err != nil {
   234  			log.Error.Printf("dump: error processing part %s: %v", p.part.name, err)
   235  		}
   236  	}
   237  }
   238  
   239  var createTimeFormat = "2006-01-02-1504"
   240  
   241  func formatDuration(d time.Duration) string {
   242  	d = d.Round(time.Second)
   243  	h := d / time.Hour
   244  	d -= h * time.Hour
   245  	m := d / time.Minute
   246  	d -= m * time.Minute
   247  	s := d / time.Second
   248  	return fmt.Sprintf("%02dh%02dm%02ds", h, m, s)
   249  }
   250  
   251  // ServeHTTP serves the dump with a Content-Disposition set with a unique filename.
   252  func (reg *Registry) ServeHTTP(w http.ResponseWriter, r *http.Request) {
   253  	w.Header().Set("Content-Type", "application/zip")
   254  	pfx := Name()
   255  	filename := pfx + ".zip"
   256  	w.Header().Set("Content-Disposition", "attachment; filename="+filename)
   257  	zw := zip.NewWriter(w)
   258  	defer zw.Close() // nolint: errcheck
   259  	reg.WriteDump(r.Context(), pfx, zw)
   260  }