sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/interrupts/interrupts.go (about) 1 /* 2 Copyright 2019 The Kubernetes Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 // Package interrupts exposes helpers for graceful handling of interrupt signals 18 package interrupts 19 20 import ( 21 "context" 22 "net/http" 23 "os" 24 "os/signal" 25 "sync" 26 "syscall" 27 "time" 28 29 "github.com/sirupsen/logrus" 30 ) 31 32 // only one instance of the manager ever exists 33 var single *manager 34 35 func init() { 36 m := sync.Mutex{} 37 single = &manager{ 38 c: sync.NewCond(&m), 39 wg: sync.WaitGroup{}, 40 } 41 go handleInterrupt() 42 } 43 44 type manager struct { 45 // only one signal handler should be installed, so we use a cond to 46 // broadcast to workers that an interrupt has occurred 47 c *sync.Cond 48 // we record whether we've broadcast in the past 49 seenSignal bool 50 // we want to ensure that all registered servers and workers get a 51 // change to gracefully shut down 52 wg sync.WaitGroup 53 } 54 55 // handleInterrupt turns an interrupt into a broadcast for our condition. 56 // This must be called _first_ before any work is registered with the 57 // manager, or there will be a deadlock. 58 func handleInterrupt() { 59 signalsLock.Lock() 60 sigChan := signals() 61 signalsLock.Unlock() 62 s := <-sigChan 63 logrus.WithField("signal", s).Info("Received signal.") 64 single.c.L.Lock() 65 single.seenSignal = true 66 single.c.Broadcast() 67 single.c.L.Unlock() 68 } 69 70 // test initialization will set the signals channel in another goroutine 71 // so we need to synchronize that in order to not trigger the race detector 72 // even though we know that init() calls will be serial and the test init() 73 // will fire first 74 var signalsLock = sync.Mutex{} 75 76 var signalChannel = make(chan os.Signal, 1) 77 78 // Terminate can be called to trigger a termination 79 // to the current process. 80 func Terminate() { 81 signalChannel <- os.Interrupt 82 } 83 84 // signals allows for injection of mock signals in testing 85 var signals = func() <-chan os.Signal { 86 signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM) 87 return signalChannel 88 } 89 90 // wait executes the cancel when an interrupt is seen or if one has already 91 // been handled 92 func wait(cancel func()) { 93 single.c.L.Lock() 94 if !single.seenSignal { 95 single.c.Wait() 96 } 97 single.c.L.Unlock() 98 cancel() 99 } 100 101 var gracePeriod = 1 * time.Minute 102 103 // WaitForGracefulShutdown waits until all registered servers and workers 104 // have had time to gracefully shut down, or times out. This function is 105 // blocking. 106 func WaitForGracefulShutdown() { 107 wait(func() { 108 logrus.Info("Interrupt received.") 109 }) 110 finished := make(chan struct{}) 111 go func() { 112 single.wg.Wait() 113 close(finished) 114 }() 115 select { 116 case <-finished: 117 logrus.Info("All workers gracefully terminated, exiting.") 118 case <-time.After(gracePeriod): 119 logrus.Warn("Timed out waiting for workers to gracefully terminate, exiting.") 120 } 121 } 122 123 // Context returns a context that is cancelled when an interrupt hits. 124 // Using this context is a weak guarantee that your work will finish before 125 // process exit as callers cannot signal that they are finished. Prefer to use 126 // Run(). 127 func Context() context.Context { 128 ctx, cancel := context.WithCancel(context.Background()) 129 single.wg.Add(1) 130 go wait(func() { 131 cancel() 132 single.wg.Done() 133 }) 134 135 return ctx 136 } 137 138 // Run will do work until an interrupt is received, then signal the 139 // worker. This function is not blocking. Callers are expected to exit 140 // only after WaitForGracefulShutdown returns to ensure all workers have 141 // had time to shut down. This is preferable to getting the raw Context 142 // as we can ensure that the work is finished before releasing our share 143 // of the wait group on shutdown. 144 func Run(work func(ctx context.Context)) { 145 ctx, cancel := context.WithCancel(context.Background()) 146 single.wg.Add(1) 147 go func() { 148 defer single.wg.Done() 149 work(ctx) 150 }() 151 152 go wait(cancel) 153 } 154 155 // ListenAndServer is typically an http.Server 156 type ListenAndServer interface { 157 Shutdownable 158 ListenAndServe() error 159 } 160 161 // ListenAndServe runs the HTTP server and handles shutting it down 162 // gracefully on interrupts. This function is not blocking. Callers 163 // are expected to exit only after WaitForGracefulShutdown returns to 164 // ensure all servers have had time to shut down. 165 func ListenAndServe(server ListenAndServer, gracePeriod time.Duration) { 166 single.wg.Add(1) 167 go func() { 168 defer single.wg.Done() 169 logrus.WithError(server.ListenAndServe()).Info("Server exited.") 170 }() 171 172 go wait(shutdown(server, gracePeriod)) 173 } 174 175 // ListenAndServeTLS runs the HTTP server and handles shutting it down 176 // gracefully on interrupts. This function is not blocking. Callers 177 // are expected to exit only after WaitForGracefulShutdown returns to 178 // ensure all servers have had time to shut down. 179 func ListenAndServeTLS(server *http.Server, certFile, keyFile string, gracePeriod time.Duration) { 180 single.wg.Add(1) 181 go func() { 182 defer single.wg.Done() 183 logrus.WithError(server.ListenAndServeTLS(certFile, keyFile)).Info("Server exited.") 184 }() 185 186 go wait(shutdown(server, gracePeriod)) 187 } 188 189 // Shutdownable is typically an http.Server 190 type Shutdownable interface { 191 Shutdown(context.Context) error 192 } 193 194 // shutdown will shut down the server 195 func shutdown(server Shutdownable, gracePeriod time.Duration) func() { 196 return func() { 197 logrus.Info("Server shutting down...") 198 ctx, cancel := context.WithTimeout(context.Background(), gracePeriod) 199 if err := server.Shutdown(ctx); err != nil { 200 logrus.WithError(err).Info("Error shutting down server...") 201 } 202 cancel() 203 } 204 } 205 206 // Tick will do work on a dynamically determined interval until an 207 // interrupt is received. This function is not blocking. Callers are 208 // expected to exit only after WaitForGracefulShutdown returns to 209 // ensure all workers have had time to shut down. 210 func Tick(work func(), interval func() time.Duration) { 211 before := time.Time{} // we want to do work right away 212 sig := make(chan int, 1) 213 single.wg.Add(1) 214 go func() { 215 defer single.wg.Done() 216 for { 217 nextInterval := interval() 218 nextTick := before.Add(nextInterval) 219 sleep := time.Until(nextTick) 220 logrus.WithFields(logrus.Fields{ 221 "before": before, 222 "interval": nextInterval, 223 "sleep": sleep, 224 }).Debug("Resolved next tick interval.") 225 select { 226 case <-time.After(sleep): 227 before = time.Now() 228 work() 229 case <-sig: 230 logrus.Info("Worker shutting down...") 231 return 232 } 233 } 234 }() 235 236 go wait(func() { 237 sig <- 1 238 }) 239 } 240 241 // TickLiteral runs Tick with an unchanging interval. 242 func TickLiteral(work func(), interval time.Duration) { 243 Tick(work, func() time.Duration { 244 return interval 245 }) 246 } 247 248 // OnInterrupt ensures that work is done when an interrupt is fired 249 // and that we wait for the work to be finished before we consider 250 // the process cleaned up. This function is not blocking. 251 func OnInterrupt(work func()) { 252 single.wg.Add(1) 253 go wait(func() { 254 work() 255 single.wg.Done() 256 }) 257 }