istio.io/istio@v0.0.0-20240520182934-d79c90f27776/tests/util/leak/check.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // leak checks for goroutine leaks in tests 16 // This is (heavily) inspired by https://github.com/grpc/grpc-go/blob/master/internal/leakcheck/leakcheck.go 17 // and https://github.com/fortytw2/leaktest 18 package leak 19 20 import ( 21 "errors" 22 "fmt" 23 "os" 24 "runtime" 25 "sort" 26 "strconv" 27 "strings" 28 "time" 29 30 "go.uber.org/atomic" 31 32 "istio.io/istio/pkg/log" 33 "istio.io/istio/pkg/test" 34 "istio.io/istio/pkg/test/util/retry" 35 ) 36 37 var goroutinesToIgnore = []string{ 38 // "global" goroutines we always initialize. Maybe we shouldn't always initialize these, but for now every 39 // test fails with these 40 "k8s.io/klog/v2.(*loggingT).flushDaemon", // k8s logging 41 "go.opencensus.io/stats/view.(*worker).start", // metrics runs on init. We are *almost* off opencensus, but transitively import it. 42 43 // goroutines for test 44 "testing.Main(", 45 "testing.tRunner(", 46 "testing.(*M).", 47 48 // go runtime 49 "runtime.goexit", 50 "created by runtime.gc", 51 "runtime.MHeap_Scavenger", 52 "signal.signal_recv", 53 "sigterm.handler", 54 "runtime_mcall", 55 56 // created by leak checker 57 "created by runtime/trace.Start", 58 "interestingGoroutines", 59 60 // This is not technically required. However, its a loop that is outside our control that runs every 500ms 61 // so we skip it to avoid delayed tests 62 "workqueue.(*Type).updateUnfinishedWorkLoop", 63 } 64 65 // TestingM is the minimal subset of testing.M that we use. 66 type TestingM interface { 67 Run() int 68 } 69 70 type TestingTB interface { 71 Cleanup(func()) 72 Errorf(format string, args ...any) 73 } 74 75 var gracePeriod = time.Second * 5 76 77 func check(filter func(in []*goroutine) []*goroutine) error { 78 // Loop, waiting for goroutines to shut down. 79 // Wait up to timeout, but finish as quickly as possible. 80 // The timeout here is not super sensitive, since if we hit this we will fail; a happy case will finish quickly 81 deadline := time.Now().Add(gracePeriod) 82 var leaked []*goroutine 83 var err error 84 delay := time.Duration(0) 85 for time.Now().Before(deadline) { 86 leaked, err = interestingGoroutines() 87 if err != nil { 88 return fmt.Errorf("failed to fetch post-test goroutines: %v", err) 89 } 90 if filter != nil { 91 leaked = filter(leaked) 92 } 93 if len(leaked) == 0 { 94 return nil 95 } 96 time.Sleep(delay) 97 delay += time.Millisecond * 10 98 } 99 errString := strings.Builder{} 100 for _, g := range leaked { 101 errString.WriteString(fmt.Sprintf("Leaked goroutine: %v\n", g.stack)) 102 } 103 return errors.New(errString.String()) 104 } 105 106 // Check adds a check to a test to ensure there are no leaked goroutines 107 // To use, simply call leak.Check(t) at the start of a test; Do not call it in defer. 108 // It is recommended to call this as the first step, as Cleanup is called in LIFO order; this ensures any 109 // Cleanup's called in the test happen first. 110 // Any existing goroutines before the test starts are filtered out. This ensures a single test failing doesn't 111 // cause all future tests to fail. However, it is still possible another test influences the result when t.Parallel is used. 112 // Where possible, CheckMain is preferred. 113 func Check(t TestingTB) { 114 existingRaw, err := interestingGoroutines() 115 if err != nil { 116 t.Errorf("failed to fetch pre-test goroutines: %v", err) 117 return 118 } 119 existing := map[uint64]struct{}{} 120 for _, g := range existingRaw { 121 existing[g.id] = struct{}{} 122 } 123 filter := func(in []*goroutine) []*goroutine { 124 res := make([]*goroutine, 0, len(in)) 125 for _, i := range in { 126 if _, f := existing[i.id]; !f { 127 // This was not in the goroutines list when the test started 128 res = append(res, i) 129 } 130 } 131 return res 132 } 133 t.Cleanup(func() { 134 if err := check(filter); err != nil { 135 t.Errorf("goroutine leak: %v", err) 136 } 137 }) 138 } 139 140 // CheckMain asserts that no goroutines are leaked after a test package exits. 141 // This can be used with the following code: 142 // 143 // func TestMain(m *testing.M) { 144 // leak.CheckMain(m) 145 // } 146 // 147 // Failures here are scoped to the package, not a specific test. To determine the source of the failure, 148 // you can use the tool `go test -exec $PWD/tools/go-ordered-test ./my/package`. This runs each test individually. 149 // If there are some tests that are leaky, you the Check method can be used on individual tests. 150 func CheckMain(m TestingM) { 151 exitCode := m.Run() 152 153 if exitCode == 0 { 154 if err := check(nil); err != nil { 155 log.Errorf("fatal: %v", err) 156 exitCode = 1 157 } 158 } 159 160 os.Exit(exitCode) 161 } 162 163 // MustGarbageCollect asserts that an object was garbage collected by the end of the test. 164 // The input must be a pointer to an object. 165 func MustGarbageCollect(tb test.Failer, i any) { 166 tb.Helper() 167 collected := atomic.NewBool(false) 168 runtime.SetFinalizer(i, func(x any) { 169 collected.Store(true) 170 }) 171 tb.Cleanup(func() { 172 retry.UntilOrFail(tb, func() bool { 173 // Trigger GC explicitly, otherwise we may need to wait a long time for it to run 174 runtime.GC() 175 return collected.Load() 176 }, retry.Timeout(time.Second*5), retry.Message("object was not garbage collected")) 177 }) 178 } 179 180 type goroutine struct { 181 id uint64 182 stack string 183 } 184 185 type goroutineByID []*goroutine 186 187 func (g goroutineByID) Len() int { return len(g) } 188 func (g goroutineByID) Less(i, j int) bool { return g[i].id < g[j].id } 189 func (g goroutineByID) Swap(i, j int) { g[i], g[j] = g[j], g[i] } 190 191 func interestingGoroutine(g string) (*goroutine, error) { 192 sl := strings.SplitN(g, "\n", 2) 193 if len(sl) != 2 { 194 return nil, fmt.Errorf("error parsing stack: %q", g) 195 } 196 stack := strings.TrimSpace(sl[1]) 197 if strings.HasPrefix(stack, "testing.RunTests") { 198 return nil, nil 199 } 200 201 for _, s := range goroutinesToIgnore { 202 if strings.Contains(stack, s) { 203 return nil, nil 204 } 205 } 206 207 // Parse the goroutine's ID from the header line. 208 h := strings.SplitN(sl[0], " ", 3) 209 if len(h) < 3 { 210 return nil, fmt.Errorf("error parsing stack header: %q", sl[0]) 211 } 212 id, err := strconv.ParseUint(h[1], 10, 64) 213 if err != nil { 214 return nil, fmt.Errorf("error parsing goroutine id: %s", err) 215 } 216 217 return &goroutine{id: id, stack: strings.TrimSpace(g)}, nil 218 } 219 220 // interestingGoroutines returns all goroutines we care about for the purpose 221 // of leak checking. It excludes testing or runtime ones. 222 func interestingGoroutines() ([]*goroutine, error) { 223 buf := make([]byte, 2<<20) 224 buf = buf[:runtime.Stack(buf, true)] 225 var gs []*goroutine 226 for _, g := range strings.Split(string(buf), "\n\n") { 227 gr, err := interestingGoroutine(g) 228 if err != nil { 229 return nil, err 230 } else if gr == nil { 231 continue 232 } 233 gs = append(gs, gr) 234 } 235 sort.Sort(goroutineByID(gs)) 236 return gs, nil 237 }