github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/server/goroutinedumper/goroutinedumper_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package goroutinedumper
    12  
    13  import (
    14  	"compress/gzip"
    15  	"context"
    16  	"fmt"
    17  	"io/ioutil"
    18  	"os"
    19  	"path/filepath"
    20  	"testing"
    21  	"time"
    22  
    23  	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
    24  	"github.com/cockroachdb/cockroach/pkg/testutils"
    25  	"github.com/cockroachdb/errors"
    26  	"github.com/stretchr/testify/assert"
    27  )
    28  
    29  type goroutinesVal struct {
    30  	secs       time.Duration // the time at which this goroutines value was emitted
    31  	threshold  int64
    32  	goroutines int64
    33  }
    34  
    35  func TestHeuristic(t *testing.T) {
    36  	const dumpDir = "dump_dir"
    37  	st := &cluster.Settings{}
    38  
    39  	cases := []struct {
    40  		name          string
    41  		heuristics    []heuristic
    42  		vals          []goroutinesVal
    43  		dumpsToFail   []string
    44  		expectedDumps []string
    45  	}{
    46  		// N is the number of goroutines
    47  		{
    48  			name:       "Use only doubleSinceLastDumpHeuristic",
    49  			heuristics: []heuristic{doubleSinceLastDumpHeuristic},
    50  			vals: []goroutinesVal{
    51  				{0, 100, 30},    // not trigger since N < numGoroutinesThreshold
    52  				{10, 100, 40},   // not trigger since N < numGoroutinesThreshold
    53  				{20, 100, 120},  // trigger since N >= numGoroutinesThreshold
    54  				{50, 100, 35},   // not trigger since N has not doubled since last dump
    55  				{70, 100, 150},  // not trigger since N has not doubled since last dump
    56  				{80, 100, 250},  // trigger since N has doubled since last dump
    57  				{100, 100, 135}, // not trigger since N has not doubled since last dump
    58  				{180, 100, 30},  // not trigger since N has not doubled since last dump
    59  				{190, 100, 80},  // not trigger since N has not doubled since last dump
    60  				{220, 100, 500}, // trigger since N has doubled since last dump
    61  			},
    62  			expectedDumps: []string{
    63  				"goroutine_dump.2019-01-01T00_00_20.double_since_last_dump.000000120",
    64  				"goroutine_dump.2019-01-01T00_01_20.double_since_last_dump.000000250",
    65  				"goroutine_dump.2019-01-01T00_03_40.double_since_last_dump.000000500",
    66  			},
    67  		},
    68  		{
    69  			name: "Fail some dumps when doubleSinceLastDumpHeuristic is used",
    70  			heuristics: []heuristic{
    71  				doubleSinceLastDumpHeuristic,
    72  			},
    73  			vals: []goroutinesVal{
    74  				{0, 100, 20},    // not trigger since N < numGoroutinesThreshold
    75  				{10, 100, 35},   // not trigger since N < numGoroutinesThreshold
    76  				{20, 100, 110},  // trigger since N >= numGoroutinesThreshold
    77  				{50, 100, 150},  // not trigger since N has not doubled since last dump
    78  				{70, 100, 170},  // not trigger since N has not doubled since last dump
    79  				{80, 100, 230},  // trigger but dump will fail
    80  				{100, 100, 220}, // trigger since N has doubled since last dump
    81  				{180, 100, 85},  // not trigger since N has not doubled since last dump
    82  				{200, 100, 450}, // trigger since N has doubled since last dump
    83  				{220, 100, 500}, // not trigger since N has not doubled since last dump
    84  			},
    85  			expectedDumps: []string{
    86  				"goroutine_dump.2019-01-01T00_00_20.double_since_last_dump.000000110",
    87  				"goroutine_dump.2019-01-01T00_01_40.double_since_last_dump.000000220",
    88  				"goroutine_dump.2019-01-01T00_03_20.double_since_last_dump.000000450",
    89  			},
    90  			dumpsToFail: []string{
    91  				"goroutine_dump.2019-01-01T00_01_20.double_since_last_dump.000000230",
    92  			},
    93  		},
    94  		{
    95  			name:       "Change in threshold resets the maxGoroutinesDumped",
    96  			heuristics: []heuristic{doubleSinceLastDumpHeuristic},
    97  			vals: []goroutinesVal{
    98  				{0, 100, 30},    // not trigger since N < numGoroutinesThreshold
    99  				{10, 100, 40},   // not trigger since N < numGoroutinesThreshold
   100  				{20, 100, 120},  // trigger since N >= numGoroutinesThreshold
   101  				{50, 100, 135},  // not trigger since N has not doubled since last dump
   102  				{70, 100, 150},  // not trigger since N has not doubled since last dump
   103  				{80, 200, 150},  // update numGoroutinesThreshold, which resets maxGoroutinesDumped
   104  				{90, 200, 210},  // trigger since maxGoroutinesDumped was reset and N >= threshold
   105  				{100, 200, 235}, // not trigger since N has not doubled since last dump
   106  				{180, 200, 230}, // not trigger since N has not doubled since last dump
   107  				{190, 200, 280}, // not trigger since N has not doubled since last dump
   108  				{220, 200, 500}, // trigger since N has doubled since last dump
   109  			},
   110  			expectedDumps: []string{
   111  				"goroutine_dump.2019-01-01T00_00_20.double_since_last_dump.000000120",
   112  				"goroutine_dump.2019-01-01T00_01_30.double_since_last_dump.000000210",
   113  				"goroutine_dump.2019-01-01T00_03_40.double_since_last_dump.000000500",
   114  			},
   115  		},
   116  		{
   117  			name:       "No heuristic is used",
   118  			heuristics: []heuristic{},
   119  			vals: []goroutinesVal{
   120  				{0, 100, 10},
   121  				{10, 100, 15},
   122  				{20, 100, 50},
   123  				{50, 100, 35},
   124  				{70, 100, 80},
   125  				{80, 100, 150},
   126  				{100, 100, 120},
   127  				{180, 100, 85},
   128  				{200, 100, 130},
   129  				{220, 100, 500},
   130  			},
   131  			expectedDumps: nil,
   132  		},
   133  	}
   134  
   135  	for _, c := range cases {
   136  		t.Run(c.name, func(t *testing.T) {
   137  			baseTime := time.Date(2019, time.January, 1, 0, 0, 0, 0, time.UTC)
   138  			var dumps []string
   139  			var currentTime time.Time
   140  			gd := GoroutineDumper{
   141  				maxGoroutinesDumped: 0,
   142  				heuristics:          c.heuristics,
   143  				currentTime: func() time.Time {
   144  					return currentTime
   145  				},
   146  				takeGoroutineDump: func(dir string, filename string) error {
   147  					assert.Equal(t, dumpDir, dir)
   148  					for _, d := range c.dumpsToFail {
   149  						if filename == d {
   150  							return errors.New("this dump is set to fail")
   151  						}
   152  					}
   153  					dumps = append(dumps, filename)
   154  					return nil
   155  				},
   156  				gc:  func(ctx context.Context, dir string, sizeLimit int64) {},
   157  				dir: dumpDir,
   158  			}
   159  
   160  			ctx := context.Background()
   161  			for _, v := range c.vals {
   162  				currentTime = baseTime.Add(v.secs * time.Second)
   163  				numGoroutinesThreshold.Override(&st.SV, v.threshold)
   164  				gd.MaybeDump(ctx, st, v.goroutines)
   165  			}
   166  			assert.Equal(t, c.expectedDumps, dumps)
   167  		})
   168  	}
   169  }
   170  
   171  func TestNewGoroutineDumper(t *testing.T) {
   172  	t.Run("fails because no directory is specified", func(t *testing.T) {
   173  		_, err := NewGoroutineDumper("")
   174  		assert.EqualError(t, err, "directory to store dumps could not be determined")
   175  	})
   176  
   177  	t.Run("succeeds", func(t *testing.T) {
   178  		tempDir, dirCleanupFn := testutils.TempDir(t)
   179  		defer dirCleanupFn()
   180  		gd, err := NewGoroutineDumper(tempDir)
   181  		assert.NoError(t, err, "unexpected error in NewGoroutineDumper")
   182  		assert.Equal(t, int64(0), gd.goroutinesThreshold)
   183  		assert.Equal(t, int64(0), gd.maxGoroutinesDumped)
   184  		assert.Equal(t, tempDir, gd.dir)
   185  	})
   186  }
   187  
   188  func TestGC(t *testing.T) {
   189  	type file struct {
   190  		name string
   191  		size int64
   192  	}
   193  	cases := []struct {
   194  		name      string
   195  		files     []file
   196  		sizeLimit int64
   197  		expected  []string
   198  	}{
   199  		{
   200  			name: "total size smaller than size limit",
   201  			files: []file{
   202  				{name: "goroutine_dump.2019-01-01T00_00_00", size: 1},
   203  				{name: "goroutine_dump.2019-01-01T00_10_00", size: 1},
   204  				{name: "goroutine_dump.2019-01-01T00_20_00", size: 1},
   205  			},
   206  			sizeLimit: 5,
   207  			expected: []string{
   208  				"goroutine_dump.2019-01-01T00_00_00",
   209  				"goroutine_dump.2019-01-01T00_10_00",
   210  				"goroutine_dump.2019-01-01T00_20_00",
   211  			},
   212  		},
   213  		{
   214  			name: "total size smaller than size limit and unknown files are removed",
   215  			files: []file{
   216  				{name: "goroutine_dump.2019-01-01T00_00_00", size: 1},
   217  				{name: "goroutine_dump.2019-01-01T00_10_00", size: 1},
   218  				{name: "goroutine_dump.2019-01-01T00_20_00", size: 1},
   219  				{name: "unknown_file", size: 1},
   220  				{name: "unknown_file2", size: 1},
   221  			},
   222  			sizeLimit: 5,
   223  			expected: []string{
   224  				"goroutine_dump.2019-01-01T00_00_00",
   225  				"goroutine_dump.2019-01-01T00_10_00",
   226  				"goroutine_dump.2019-01-01T00_20_00",
   227  			},
   228  		},
   229  		{
   230  			name: "total size larger than size limit",
   231  			files: []file{
   232  				{name: "goroutine_dump.2019-01-01T00_00_00", size: 1},
   233  				{name: "goroutine_dump.2019-01-01T00_10_00", size: 1},
   234  				{name: "goroutine_dump.2019-01-01T00_20_00", size: 1},
   235  				{name: "unknown_file", size: 1},
   236  			},
   237  			sizeLimit: 2,
   238  			expected: []string{
   239  				"goroutine_dump.2019-01-01T00_10_00",
   240  				"goroutine_dump.2019-01-01T00_20_00",
   241  			},
   242  		},
   243  		{
   244  			name: "newest dump is already larger than size limit",
   245  			files: []file{
   246  				{name: "goroutine_dump.2019-01-01T00_00_00", size: 1},
   247  				{name: "goroutine_dump.2019-01-01T00_10_00", size: 1},
   248  				{name: "goroutine_dump.2019-01-01T00_20_00", size: 10},
   249  				{name: "unknown_file", size: 1},
   250  			},
   251  			sizeLimit: 5,
   252  			expected: []string{
   253  				"goroutine_dump.2019-01-01T00_20_00",
   254  			},
   255  		},
   256  		{
   257  			name: "no dump in directory",
   258  			files: []file{
   259  				{name: "unknown_file", size: 1},
   260  			},
   261  			sizeLimit: 5,
   262  			expected:  nil,
   263  		},
   264  		{
   265  			name:      "no file in directory",
   266  			files:     []file{},
   267  			sizeLimit: 5,
   268  			expected:  nil,
   269  		},
   270  	}
   271  
   272  	for _, c := range cases {
   273  		t.Run(c.name, func(t *testing.T) {
   274  			tempDir, dirCleanupFn := testutils.TempDir(t)
   275  			defer dirCleanupFn()
   276  
   277  			for _, f := range c.files {
   278  				path := filepath.Join(tempDir, f.name)
   279  				fi, err := os.Create(path)
   280  				assert.NoError(t, err, "unexpected error when creating file %s", path)
   281  				err = fi.Close()
   282  				assert.NoError(t, err, "unexpected error when closing file %s", path)
   283  				err = os.Truncate(path, f.size)
   284  				assert.NoError(t, err, "unexpected error when truncating file %s", path)
   285  			}
   286  			ctx := context.Background()
   287  			gc(ctx, tempDir, c.sizeLimit)
   288  			files, err := ioutil.ReadDir(tempDir)
   289  			assert.NoError(t, err, "unexpected error when listing files in %s", tempDir)
   290  			var actual []string
   291  			for _, f := range files {
   292  				actual = append(actual, f.Name())
   293  			}
   294  			assert.Equal(t, c.expected, actual)
   295  		})
   296  	}
   297  }
   298  
   299  func TestTakeGoroutineDump(t *testing.T) {
   300  	t.Run("fails because dump already exists as a directory", func(t *testing.T) {
   301  		tempDir, dirCleanupFn := testutils.TempDir(t)
   302  		defer dirCleanupFn()
   303  		path := filepath.Join(tempDir, "goroutine_dump.txt.gz")
   304  		err := os.Mkdir(path, 0755)
   305  		assert.NoError(t, err, "failed to make dump directory %s", path)
   306  
   307  		filename := "goroutine_dump"
   308  		err = takeGoroutineDump(tempDir, filename)
   309  		assert.Error(t, err)
   310  		assert.Contains(
   311  			t,
   312  			err.Error(),
   313  			fmt.Sprintf("error creating file %s for goroutine dump", path),
   314  		)
   315  	})
   316  
   317  	t.Run("succeeds writing a goroutine dump in gzip format", func(t *testing.T) {
   318  		tempDir, dirCleanupFn := testutils.TempDir(t)
   319  		defer dirCleanupFn()
   320  
   321  		err := takeGoroutineDump(tempDir, "goroutine_dump")
   322  		assert.NoError(t, err, "unexpected error when dumping goroutines")
   323  
   324  		expectedFile := filepath.Join(tempDir, "goroutine_dump.txt.gz")
   325  		f, err := os.Open(expectedFile)
   326  		if err != nil {
   327  			t.Fatalf("could not open goroutine dump file %s: %s", expectedFile, err)
   328  		}
   329  		defer f.Close()
   330  		// Test file is in gzip format.
   331  		r, err := gzip.NewReader(f)
   332  		if err != nil {
   333  			t.Fatalf("could not create gzip reader for file %s: %s", expectedFile, err)
   334  		}
   335  		if _, err = ioutil.ReadAll(r); err != nil {
   336  			t.Fatalf("could not read goroutine dump file %s with gzip: %s", expectedFile, err)
   337  		}
   338  		if err = r.Close(); err != nil {
   339  			t.Fatalf("error closing gzip reader: %s", err)
   340  		}
   341  	})
   342  }