istio.io/istio@v0.0.0-20240520182934-d79c90f27776/tests/util/leak/check.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // leak checks for goroutine leaks in tests
    16  // This is (heavily) inspired by https://github.com/grpc/grpc-go/blob/master/internal/leakcheck/leakcheck.go
    17  // and https://github.com/fortytw2/leaktest
    18  package leak
    19  
    20  import (
    21  	"errors"
    22  	"fmt"
    23  	"os"
    24  	"runtime"
    25  	"sort"
    26  	"strconv"
    27  	"strings"
    28  	"time"
    29  
    30  	"go.uber.org/atomic"
    31  
    32  	"istio.io/istio/pkg/log"
    33  	"istio.io/istio/pkg/test"
    34  	"istio.io/istio/pkg/test/util/retry"
    35  )
    36  
    37  var goroutinesToIgnore = []string{
    38  	// "global" goroutines we always initialize. Maybe we shouldn't always initialize these, but for now every
    39  	// test fails with these
    40  	"k8s.io/klog/v2.(*loggingT).flushDaemon",      // k8s logging
    41  	"go.opencensus.io/stats/view.(*worker).start", // metrics runs on init. We are *almost* off opencensus, but transitively import it.
    42  
    43  	// goroutines for test
    44  	"testing.Main(",
    45  	"testing.tRunner(",
    46  	"testing.(*M).",
    47  
    48  	// go runtime
    49  	"runtime.goexit",
    50  	"created by runtime.gc",
    51  	"runtime.MHeap_Scavenger",
    52  	"signal.signal_recv",
    53  	"sigterm.handler",
    54  	"runtime_mcall",
    55  
    56  	// created by leak checker
    57  	"created by runtime/trace.Start",
    58  	"interestingGoroutines",
    59  
    60  	// This is not technically required. However, its a loop that is outside our control that runs every 500ms
    61  	// so we skip it to avoid delayed tests
    62  	"workqueue.(*Type).updateUnfinishedWorkLoop",
    63  }
    64  
    65  // TestingM is the minimal subset of testing.M that we use.
    66  type TestingM interface {
    67  	Run() int
    68  }
    69  
    70  type TestingTB interface {
    71  	Cleanup(func())
    72  	Errorf(format string, args ...any)
    73  }
    74  
    75  var gracePeriod = time.Second * 5
    76  
    77  func check(filter func(in []*goroutine) []*goroutine) error {
    78  	// Loop, waiting for goroutines to shut down.
    79  	// Wait up to timeout, but finish as quickly as possible.
    80  	// The timeout here is not super sensitive, since if we hit this we will fail; a happy case will finish quickly
    81  	deadline := time.Now().Add(gracePeriod)
    82  	var leaked []*goroutine
    83  	var err error
    84  	delay := time.Duration(0)
    85  	for time.Now().Before(deadline) {
    86  		leaked, err = interestingGoroutines()
    87  		if err != nil {
    88  			return fmt.Errorf("failed to fetch post-test goroutines: %v", err)
    89  		}
    90  		if filter != nil {
    91  			leaked = filter(leaked)
    92  		}
    93  		if len(leaked) == 0 {
    94  			return nil
    95  		}
    96  		time.Sleep(delay)
    97  		delay += time.Millisecond * 10
    98  	}
    99  	errString := strings.Builder{}
   100  	for _, g := range leaked {
   101  		errString.WriteString(fmt.Sprintf("Leaked goroutine: %v\n", g.stack))
   102  	}
   103  	return errors.New(errString.String())
   104  }
   105  
   106  // Check adds a check to a test to ensure there are no leaked goroutines
   107  // To use, simply call leak.Check(t) at the start of a test; Do not call it in defer.
   108  // It is recommended to call this as the first step, as Cleanup is called in LIFO order; this ensures any
   109  // Cleanup's called in the test happen first.
   110  // Any existing goroutines before the test starts are filtered out. This ensures a single test failing doesn't
   111  // cause all future tests to fail. However, it is still possible another test influences the result when t.Parallel is used.
   112  // Where possible, CheckMain is preferred.
   113  func Check(t TestingTB) {
   114  	existingRaw, err := interestingGoroutines()
   115  	if err != nil {
   116  		t.Errorf("failed to fetch pre-test goroutines: %v", err)
   117  		return
   118  	}
   119  	existing := map[uint64]struct{}{}
   120  	for _, g := range existingRaw {
   121  		existing[g.id] = struct{}{}
   122  	}
   123  	filter := func(in []*goroutine) []*goroutine {
   124  		res := make([]*goroutine, 0, len(in))
   125  		for _, i := range in {
   126  			if _, f := existing[i.id]; !f {
   127  				// This was not in the goroutines list when the test started
   128  				res = append(res, i)
   129  			}
   130  		}
   131  		return res
   132  	}
   133  	t.Cleanup(func() {
   134  		if err := check(filter); err != nil {
   135  			t.Errorf("goroutine leak: %v", err)
   136  		}
   137  	})
   138  }
   139  
   140  // CheckMain asserts that no goroutines are leaked after a test package exits.
   141  // This can be used with the following code:
   142  //
   143  //	func TestMain(m *testing.M) {
   144  //	    leak.CheckMain(m)
   145  //	}
   146  //
   147  // Failures here are scoped to the package, not a specific test. To determine the source of the failure,
   148  // you can use the tool `go test -exec $PWD/tools/go-ordered-test ./my/package`. This runs each test individually.
   149  // If there are some tests that are leaky, you the Check method can be used on individual tests.
   150  func CheckMain(m TestingM) {
   151  	exitCode := m.Run()
   152  
   153  	if exitCode == 0 {
   154  		if err := check(nil); err != nil {
   155  			log.Errorf("fatal: %v", err)
   156  			exitCode = 1
   157  		}
   158  	}
   159  
   160  	os.Exit(exitCode)
   161  }
   162  
   163  // MustGarbageCollect asserts that an object was garbage collected by the end of the test.
   164  // The input must be a pointer to an object.
   165  func MustGarbageCollect(tb test.Failer, i any) {
   166  	tb.Helper()
   167  	collected := atomic.NewBool(false)
   168  	runtime.SetFinalizer(i, func(x any) {
   169  		collected.Store(true)
   170  	})
   171  	tb.Cleanup(func() {
   172  		retry.UntilOrFail(tb, func() bool {
   173  			// Trigger GC explicitly, otherwise we may need to wait a long time for it to run
   174  			runtime.GC()
   175  			return collected.Load()
   176  		}, retry.Timeout(time.Second*5), retry.Message("object was not garbage collected"))
   177  	})
   178  }
   179  
   180  type goroutine struct {
   181  	id    uint64
   182  	stack string
   183  }
   184  
   185  type goroutineByID []*goroutine
   186  
   187  func (g goroutineByID) Len() int           { return len(g) }
   188  func (g goroutineByID) Less(i, j int) bool { return g[i].id < g[j].id }
   189  func (g goroutineByID) Swap(i, j int)      { g[i], g[j] = g[j], g[i] }
   190  
   191  func interestingGoroutine(g string) (*goroutine, error) {
   192  	sl := strings.SplitN(g, "\n", 2)
   193  	if len(sl) != 2 {
   194  		return nil, fmt.Errorf("error parsing stack: %q", g)
   195  	}
   196  	stack := strings.TrimSpace(sl[1])
   197  	if strings.HasPrefix(stack, "testing.RunTests") {
   198  		return nil, nil
   199  	}
   200  
   201  	for _, s := range goroutinesToIgnore {
   202  		if strings.Contains(stack, s) {
   203  			return nil, nil
   204  		}
   205  	}
   206  
   207  	// Parse the goroutine's ID from the header line.
   208  	h := strings.SplitN(sl[0], " ", 3)
   209  	if len(h) < 3 {
   210  		return nil, fmt.Errorf("error parsing stack header: %q", sl[0])
   211  	}
   212  	id, err := strconv.ParseUint(h[1], 10, 64)
   213  	if err != nil {
   214  		return nil, fmt.Errorf("error parsing goroutine id: %s", err)
   215  	}
   216  
   217  	return &goroutine{id: id, stack: strings.TrimSpace(g)}, nil
   218  }
   219  
   220  // interestingGoroutines returns all goroutines we care about for the purpose
   221  // of leak checking. It excludes testing or runtime ones.
   222  func interestingGoroutines() ([]*goroutine, error) {
   223  	buf := make([]byte, 2<<20)
   224  	buf = buf[:runtime.Stack(buf, true)]
   225  	var gs []*goroutine
   226  	for _, g := range strings.Split(string(buf), "\n\n") {
   227  		gr, err := interestingGoroutine(g)
   228  		if err != nil {
   229  			return nil, err
   230  		} else if gr == nil {
   231  			continue
   232  		}
   233  		gs = append(gs, gr)
   234  	}
   235  	sort.Sort(goroutineByID(gs))
   236  	return gs, nil
   237  }