sigs.k8s.io/prow@v0.0.0-20240503223140-c5e374dc7eb1/pkg/interrupts/interrupts.go (about)

     1  /*
     2  Copyright 2019 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  // Package interrupts exposes helpers for graceful handling of interrupt signals
    18  package interrupts
    19  
    20  import (
    21  	"context"
    22  	"net/http"
    23  	"os"
    24  	"os/signal"
    25  	"sync"
    26  	"syscall"
    27  	"time"
    28  
    29  	"github.com/sirupsen/logrus"
    30  )
    31  
    32  // only one instance of the manager ever exists
    33  var single *manager
    34  
    35  func init() {
    36  	m := sync.Mutex{}
    37  	single = &manager{
    38  		c:  sync.NewCond(&m),
    39  		wg: sync.WaitGroup{},
    40  	}
    41  	go handleInterrupt()
    42  }
    43  
    44  type manager struct {
    45  	// only one signal handler should be installed, so we use a cond to
    46  	// broadcast to workers that an interrupt has occurred
    47  	c *sync.Cond
    48  	// we record whether we've broadcast in the past
    49  	seenSignal bool
    50  	// we want to ensure that all registered servers and workers get a
    51  	// change to gracefully shut down
    52  	wg sync.WaitGroup
    53  }
    54  
    55  // handleInterrupt turns an interrupt into a broadcast for our condition.
    56  // This must be called _first_ before any work is registered with the
    57  // manager, or there will be a deadlock.
    58  func handleInterrupt() {
    59  	signalsLock.Lock()
    60  	sigChan := signals()
    61  	signalsLock.Unlock()
    62  	s := <-sigChan
    63  	logrus.WithField("signal", s).Info("Received signal.")
    64  	single.c.L.Lock()
    65  	single.seenSignal = true
    66  	single.c.Broadcast()
    67  	single.c.L.Unlock()
    68  }
    69  
    70  // test initialization will set the signals channel in another goroutine
    71  // so we need to synchronize that in order to not trigger the race detector
    72  // even though we know that init() calls will be serial and the test init()
    73  // will fire first
    74  var signalsLock = sync.Mutex{}
    75  
    76  var signalChannel = make(chan os.Signal, 1)
    77  
    78  // Terminate can be called to trigger a termination
    79  // to the current process.
    80  func Terminate() {
    81  	signalChannel <- os.Interrupt
    82  }
    83  
    84  // signals allows for injection of mock signals in testing
    85  var signals = func() <-chan os.Signal {
    86  	signal.Notify(signalChannel, os.Interrupt, syscall.SIGTERM)
    87  	return signalChannel
    88  }
    89  
    90  // wait executes the cancel when an interrupt is seen or if one has already
    91  // been handled
    92  func wait(cancel func()) {
    93  	single.c.L.Lock()
    94  	if !single.seenSignal {
    95  		single.c.Wait()
    96  	}
    97  	single.c.L.Unlock()
    98  	cancel()
    99  }
   100  
   101  var gracePeriod = 1 * time.Minute
   102  
   103  // WaitForGracefulShutdown waits until all registered servers and workers
   104  // have had time to gracefully shut down, or times out. This function is
   105  // blocking.
   106  func WaitForGracefulShutdown() {
   107  	wait(func() {
   108  		logrus.Info("Interrupt received.")
   109  	})
   110  	finished := make(chan struct{})
   111  	go func() {
   112  		single.wg.Wait()
   113  		close(finished)
   114  	}()
   115  	select {
   116  	case <-finished:
   117  		logrus.Info("All workers gracefully terminated, exiting.")
   118  	case <-time.After(gracePeriod):
   119  		logrus.Warn("Timed out waiting for workers to gracefully terminate, exiting.")
   120  	}
   121  }
   122  
   123  // Context returns a context that is cancelled when an interrupt hits.
   124  // Using this context is a weak guarantee that your work will finish before
   125  // process exit as callers cannot signal that they are finished. Prefer to use
   126  // Run().
   127  func Context() context.Context {
   128  	ctx, cancel := context.WithCancel(context.Background())
   129  	single.wg.Add(1)
   130  	go wait(func() {
   131  		cancel()
   132  		single.wg.Done()
   133  	})
   134  
   135  	return ctx
   136  }
   137  
   138  // Run will do work until an interrupt is received, then signal the
   139  // worker. This function is not blocking. Callers are expected to exit
   140  // only after WaitForGracefulShutdown returns to ensure all workers have
   141  // had time to shut down. This is preferable to getting the raw Context
   142  // as we can ensure that the work is finished before releasing our share
   143  // of the wait group on shutdown.
   144  func Run(work func(ctx context.Context)) {
   145  	ctx, cancel := context.WithCancel(context.Background())
   146  	single.wg.Add(1)
   147  	go func() {
   148  		defer single.wg.Done()
   149  		work(ctx)
   150  	}()
   151  
   152  	go wait(cancel)
   153  }
   154  
   155  // ListenAndServer is typically an http.Server
   156  type ListenAndServer interface {
   157  	Shutdownable
   158  	ListenAndServe() error
   159  }
   160  
   161  // ListenAndServe runs the HTTP server and handles shutting it down
   162  // gracefully on interrupts. This function is not blocking. Callers
   163  // are expected to exit only after WaitForGracefulShutdown returns to
   164  // ensure all servers have had time to shut down.
   165  func ListenAndServe(server ListenAndServer, gracePeriod time.Duration) {
   166  	single.wg.Add(1)
   167  	go func() {
   168  		defer single.wg.Done()
   169  		logrus.WithError(server.ListenAndServe()).Info("Server exited.")
   170  	}()
   171  
   172  	go wait(shutdown(server, gracePeriod))
   173  }
   174  
   175  // ListenAndServeTLS runs the HTTP server and handles shutting it down
   176  // gracefully on interrupts. This function is not blocking. Callers
   177  // are expected to exit only after WaitForGracefulShutdown returns to
   178  // ensure all servers have had time to shut down.
   179  func ListenAndServeTLS(server *http.Server, certFile, keyFile string, gracePeriod time.Duration) {
   180  	single.wg.Add(1)
   181  	go func() {
   182  		defer single.wg.Done()
   183  		logrus.WithError(server.ListenAndServeTLS(certFile, keyFile)).Info("Server exited.")
   184  	}()
   185  
   186  	go wait(shutdown(server, gracePeriod))
   187  }
   188  
   189  // Shutdownable is typically an http.Server
   190  type Shutdownable interface {
   191  	Shutdown(context.Context) error
   192  }
   193  
   194  // shutdown will shut down the server
   195  func shutdown(server Shutdownable, gracePeriod time.Duration) func() {
   196  	return func() {
   197  		logrus.Info("Server shutting down...")
   198  		ctx, cancel := context.WithTimeout(context.Background(), gracePeriod)
   199  		if err := server.Shutdown(ctx); err != nil {
   200  			logrus.WithError(err).Info("Error shutting down server...")
   201  		}
   202  		cancel()
   203  	}
   204  }
   205  
   206  // Tick will do work on a dynamically determined interval until an
   207  // interrupt is received. This function is not blocking. Callers are
   208  // expected to exit only after WaitForGracefulShutdown returns to
   209  // ensure all workers have had time to shut down.
   210  func Tick(work func(), interval func() time.Duration) {
   211  	before := time.Time{} // we want to do work right away
   212  	sig := make(chan int, 1)
   213  	single.wg.Add(1)
   214  	go func() {
   215  		defer single.wg.Done()
   216  		for {
   217  			nextInterval := interval()
   218  			nextTick := before.Add(nextInterval)
   219  			sleep := time.Until(nextTick)
   220  			logrus.WithFields(logrus.Fields{
   221  				"before":   before,
   222  				"interval": nextInterval,
   223  				"sleep":    sleep,
   224  			}).Debug("Resolved next tick interval.")
   225  			select {
   226  			case <-time.After(sleep):
   227  				before = time.Now()
   228  				work()
   229  			case <-sig:
   230  				logrus.Info("Worker shutting down...")
   231  				return
   232  			}
   233  		}
   234  	}()
   235  
   236  	go wait(func() {
   237  		sig <- 1
   238  	})
   239  }
   240  
   241  // TickLiteral runs Tick with an unchanging interval.
   242  func TickLiteral(work func(), interval time.Duration) {
   243  	Tick(work, func() time.Duration {
   244  		return interval
   245  	})
   246  }
   247  
   248  // OnInterrupt ensures that work is done when an interrupt is fired
   249  // and that we wait for the work to be finished before we consider
   250  // the process cleaned up. This function is not blocking.
   251  func OnInterrupt(work func()) {
   252  	single.wg.Add(1)
   253  	go wait(func() {
   254  		work()
   255  		single.wg.Done()
   256  	})
   257  }