go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/lucictx/deadline.go (about)

     1  // Copyright 2020 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package lucictx
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"math"
    21  	"os"
    22  	"os/signal"
    23  	"time"
    24  
    25  	"go.chromium.org/luci/common/clock"
    26  	"go.chromium.org/luci/common/errors"
    27  	"go.chromium.org/luci/common/logging"
    28  	"go.chromium.org/luci/common/system/signals"
    29  )
    30  
    31  // signalNotify/signalStop are used to mock signal.Notify in tests.
    32  var signalNotify = signal.Notify
    33  var signalStop = signal.Stop
    34  
    35  // DefaultGracePeriod is the value of Deadline.grace_period to assume
    36  // if Deadline is entirely missing in LUCI_CONTEXT.
    37  const DefaultGracePeriod = 30 * time.Second
    38  
    39  // DeadlineEvent is the type pushed into the cleanup channel returned by
    40  // AdjustDeadline.
    41  type DeadlineEvent int
    42  
    43  // The cleanup channel, when it unblocks, will have an infinite supply of one of
    44  // the following event types.
    45  const (
    46  	// ClosureEvent occurs when the context returned by AdjustDeadline is Done.
    47  	// This is the value you'll get from `cleanup` when it's closed.
    48  	ClosureEvent DeadlineEvent = iota
    49  
    50  	// InterruptEvent occurs when a SIGTERM/os.Interrupt was handled to unblock
    51  	// the cleanup channel.
    52  	InterruptEvent
    53  
    54  	// TimeoutEvent occurs when the cleanup channel was unblocked due to
    55  	// a timeout on deadline-gracePeriod.
    56  	TimeoutEvent
    57  )
    58  
    59  func (de DeadlineEvent) String() string {
    60  	switch de {
    61  	case ClosureEvent:
    62  		return "ClosureEvent"
    63  	case InterruptEvent:
    64  		return "InterruptEvent"
    65  	case TimeoutEvent:
    66  		return "TimeoutEvent"
    67  	default:
    68  		panic(fmt.Sprintf("impossible DeadlineEvent %d", de))
    69  	}
    70  }
    71  
    72  // earlier returns the earlier of a and b, treating "zero" as "infinity".
    73  func earlier(a, b time.Time) time.Time {
    74  	if a.IsZero() {
    75  		return b
    76  	}
    77  	if b.IsZero() {
    78  		return a
    79  	}
    80  	if a.Before(b) {
    81  		return a
    82  	}
    83  	return b
    84  }
    85  
    86  var softDeadlineKey = "holds <-chan DeadlineEvent"
    87  
    88  // TrackSoftDeadline returns a context containing a channel for the
    89  // `SoftDeadlineDone` function in this package.
    90  //
    91  // The "soft" deadline is somewhat like context.WithDeadline, except that it
    92  // participates in the LUCI_CONTEXT['deadline'] protocol as well. On hitting the
    93  // soft deadline (or on an external interrupt signal), the SoftDeadlineDone()
    94  // channel will produce a stream of DeadlineEvents. Once this happens, the
    95  // program has LUCI_CONTEXT['deadline']['grace_period'] seconds until
    96  // ctx.Done(). This is meant to give your program time to do cleanup actions
    97  // with a non-canceled Context.
    98  //
    99  // The soft deadline expires based on the earlier of:
   100  //   - LUCI_CONTEXT['deadline']['soft_deadline']
   101  //   - ctx.Deadline() - LUCI_CONTEXT['deadline']['grace_period']
   102  //
   103  // If LUCI_CONTEXT['deadline'] is missing, it is assumed to be:
   104  //
   105  //	{soft_deadline: infinity, grace_period: 30}
   106  //
   107  // This function additionally allows you to reserve a portion of the
   108  // grace_period with `reserveGracePeriod`. This will have the effect of
   109  // adjusting LUCI_CONTEXT['deadline']['grace_period'] in the returned
   110  // context, as well as canceling the returned context that much earlier.
   111  //
   112  // NOTE: If you want to reduce LUCI_CONTEXT['deadline']['soft_deadline'],
   113  // you should do so by applying a Deadline/Timeout to the context prior
   114  // to invoking this function.
   115  //
   116  // Panics if:
   117  //   - reserveGracePeriod < 0.
   118  //   - LUCI_CONTEXT['deadline']['grace_period'] (or its default 30s value)
   119  //     is insufficient to cover reserveGracePeriod.
   120  //
   121  // Example:
   122  //
   123  //	func MainFunc(ctx context.Context) {
   124  //	  // ctx.Deadline  = <unset>
   125  //	  // soft_deadline = t0 + 5:00
   126  //	  // grace_period  = 40
   127  //
   128  //	  newCtx, shutdown := lucictx.TrackSoftDeadline(ctx, 500*time.Millisecond)
   129  //	  defer shutdown()
   130  //	  ScopedFunction(newCtx)
   131  //	}
   132  //
   133  //	func ScopedFunction(newCtx context.Context) {
   134  //	  // hard deadline is (soft_deadline + grace_period - reserveGracePeriod)
   135  //	  // newCtx.Deadline  = unix(t0+5:39.5)
   136  //	  //
   137  //	  // soft_deadline is unchanged
   138  //	  // soft_deadline = t0 + 5:00
   139  //	  //
   140  //	  // grace_period is reduced by reserveGracePeriod
   141  //	  // grace_period = 39.5
   142  //
   143  //	  go func() {
   144  //	    // unblocked at SIGTERM, soft_deadline or shutdown(), whichever is first.
   145  //	    <-lucictx.SoftDeadlineDone()
   146  //	    // have 39.5s to do something (say, send SIGTERM to a child, or start
   147  //	    // tearing down work in-process) before newCtx.Done().
   148  //	  }()
   149  //	}
   150  //
   151  // NOTE: In the event that `ctx` is canceled from outside, `newCtx` will also
   152  // immediately cancel, and SoftDeadlineDone will also move to the ClosureEvent
   153  // state.
   154  func TrackSoftDeadline(ctx context.Context, reserveGracePeriod time.Duration) (newCtx context.Context, shutdown func()) {
   155  	if reserveGracePeriod < 0 {
   156  		panic(errors.Reason("reserveGracePeriod(%d) < 0", reserveGracePeriod).Err())
   157  	}
   158  
   159  	d := GetDeadline(ctx)
   160  	if d == nil {
   161  		logging.Warningf(
   162  			ctx, "AdjustDeadline without Deadline in LUCI_CONTEXT. "+
   163  				"Assuming Deadline={grace_period: %.2f}", DefaultGracePeriod.Seconds())
   164  		d = &Deadline{GracePeriod: DefaultGracePeriod.Seconds()}
   165  	}
   166  
   167  	needSet := false // set to true if we need to do a write to LUCI_CONTEXT
   168  
   169  	// find current soft deadline
   170  	var ctxSoftDeadline time.Time
   171  	if ctxDl, ok := ctx.Deadline(); ok {
   172  		// synthesize the current soft deadline by applying the current grace period
   173  		// to the context's existing hard deadline.
   174  		ctxSoftDeadline = ctxDl.Add(-d.GracePeriodDuration())
   175  	}
   176  	lucictxSoftDeadline := d.SoftDeadlineTime()
   177  	softDeadline := earlier(ctxSoftDeadline, lucictxSoftDeadline)
   178  
   179  	// adjust grace period.
   180  	adjustedGrace := d.GracePeriodDuration()
   181  	if reserveGracePeriod > 0 {
   182  		adjustedGrace -= reserveGracePeriod
   183  		if adjustedGrace < 0 {
   184  			panic(errors.Reason(
   185  				"reserveGracePeriod(%s) > gracePeriod(%s)", reserveGracePeriod, d.GracePeriodDuration()).Err())
   186  		}
   187  		d.GracePeriod = adjustedGrace.Seconds()
   188  		needSet = true
   189  	}
   190  
   191  	var newCtxCancel func()
   192  	var enforceSoftDeadline bool
   193  
   194  	// set the new hard deadline, if any, on newCtx
   195  	if softDeadline.IsZero() /* a.k.a. "infinity" */ {
   196  		// need cancel func here so that newCtx can hit hard closure after
   197  		// a signal/shutdown, even though it won't have a deadline.
   198  		newCtx, newCtxCancel = context.WithCancel(ctx)
   199  	} else {
   200  		if !softDeadline.Equal(lucictxSoftDeadline) {
   201  			d.SetSoftDeadline(softDeadline)
   202  			needSet = true
   203  			enforceSoftDeadline = true
   204  		}
   205  		newCtx, newCtxCancel = clock.WithDeadline(ctx, softDeadline.Add(adjustedGrace))
   206  	}
   207  
   208  	if needSet {
   209  		newCtx = SetDeadline(newCtx, d)
   210  	}
   211  
   212  	cleanup, shutdown := runDeadlineMonitor(newCtx, newCtxCancel, softDeadline, enforceSoftDeadline, adjustedGrace)
   213  	newCtx = context.WithValue(newCtx, &softDeadlineKey, cleanup)
   214  
   215  	return
   216  }
   217  
   218  // SoftDeadlineDone is the counterpart of TrackSoftDeadline, and returns
   219  // a channel which unblocks when the soft deadline of ctx is met, or `ctx` has
   220  // been shut down/interrupted.
   221  //
   222  // If ctx does not come from TrackSoftDeadline(), this returns a nil
   223  // channel (i.e. blocks forever)
   224  func SoftDeadlineDone(ctx context.Context) (ret <-chan DeadlineEvent) {
   225  	ret, _ = ctx.Value(&softDeadlineKey).(<-chan DeadlineEvent)
   226  	return
   227  }
   228  
   229  func runDeadlineMonitor(ctx context.Context, cancel func(), softDeadline time.Time, enforceSoftDeadline bool, gracePeriod time.Duration) (<-chan DeadlineEvent, func()) {
   230  	cleanupCh := make(chan DeadlineEvent)
   231  	// buffer 1 is essential; otherwise signals will be missed if our goroutine
   232  	// isn't currently blocked on sigCh. With a buffer of 1, sigCh is always ready
   233  	// to send.
   234  	sigCh := make(chan os.Signal, 1)
   235  	signalNotify(sigCh, signals.Interrupts()...)
   236  
   237  	var timeoutC <-chan clock.TimerResult
   238  	if enforceSoftDeadline {
   239  		timeoutC = clock.After(ctx, clock.Until(ctx, softDeadline))
   240  	}
   241  
   242  	go func() {
   243  		defer cancel()
   244  
   245  		evt := func() DeadlineEvent {
   246  			defer signalStop(sigCh)
   247  
   248  			select {
   249  			case <-ctx.Done():
   250  				return ClosureEvent
   251  
   252  			case <-timeoutC:
   253  				// clock timer channels unblock on ctx.Done; It's a race whether the
   254  				// previous select case or this one will activate, so check to see if
   255  				// the context has Err set, and return accordingly.
   256  				if ctx.Err() != nil {
   257  					return ClosureEvent
   258  				}
   259  				return TimeoutEvent
   260  
   261  			case <-sigCh:
   262  				if !softDeadline.IsZero() && clock.Now(ctx).After(softDeadline) {
   263  					return TimeoutEvent
   264  				}
   265  				return InterruptEvent
   266  			}
   267  		}()
   268  
   269  		// Note we do this before signaling cleanupCh so that tests can force
   270  		// `clock.After` to run before incrementing the test clock.
   271  		if evt == InterruptEvent {
   272  			timeoutC = clock.After(ctx, gracePeriod)
   273  		} else {
   274  			timeoutC = nil
   275  		}
   276  
   277  		if evt == ClosureEvent {
   278  			close(cleanupCh)
   279  		} else {
   280  			go func() {
   281  				for {
   282  					cleanupCh <- evt
   283  				}
   284  			}()
   285  		}
   286  
   287  		select {
   288  		case <-timeoutC:
   289  		case <-ctx.Done():
   290  		}
   291  		// note `defer cancel()` at the top; at this point ctx has either timed out
   292  		// from its internal deadline, or we're about to cancel it.
   293  	}()
   294  
   295  	return cleanupCh, func() {
   296  		// shutdown func just interrupts on sigCh; multiple calls will have no
   297  		// effect since we only listen to sigCh exactly once.
   298  		select {
   299  		case sigCh <- os.Interrupt:
   300  		default:
   301  		}
   302  	}
   303  }
   304  
   305  // GetDeadline retrieves the raw Deadline information from the context.
   306  //
   307  // You probably want to use AdjustDeadline instead.
   308  func GetDeadline(ctx context.Context) *Deadline {
   309  	t := Deadline{}
   310  	ok, err := Lookup(ctx, "deadline", &t)
   311  	if err != nil {
   312  		panic(err)
   313  	}
   314  	if !ok {
   315  		return nil
   316  	}
   317  	return &t
   318  }
   319  
   320  // SetDeadline sets the raw Deadline information in the context.
   321  //
   322  // If d is nil, sets a default deadline of:
   323  //
   324  //	{grace_period: DefaultGracePeriod}
   325  //
   326  // If d.deadline == 0, adjusts it to ctx.Deadline() - d.grace_period.
   327  //
   328  // You probably want to use AdjustDeadline instead.
   329  func SetDeadline(ctx context.Context, d *Deadline) context.Context {
   330  	if d == nil {
   331  		d = &Deadline{GracePeriod: DefaultGracePeriod.Seconds()}
   332  	}
   333  	if deadline, ok := ctx.Deadline(); ok && d.SoftDeadline == 0 {
   334  		d.SetSoftDeadline(deadline)
   335  		d.SoftDeadline -= d.GracePeriod
   336  	}
   337  	return Set(ctx, "deadline", d)
   338  }
   339  
   340  // SoftDeadlineTime returns the SoftDeadline as a time.Time.
   341  //
   342  // If SoftDeadline is 0 (or *Deadline is nil) this returns a Zero Time.
   343  func (d *Deadline) SoftDeadlineTime() time.Time {
   344  	if d.GetSoftDeadline() == 0 {
   345  		return time.Time{}
   346  	}
   347  
   348  	int, frac := math.Modf(d.SoftDeadline)
   349  	return time.Unix(int64(int), int64(frac*1e9)).UTC()
   350  }
   351  
   352  // SetSoftDeadline sets the SoftDeadline from a time.Time.
   353  //
   354  // If t.IsZero, this sets the SoftDeadline to 0 as well.
   355  func (d *Deadline) SetSoftDeadline(t time.Time) {
   356  	if t.IsZero() {
   357  		d.SoftDeadline = 0
   358  	} else {
   359  		d.SoftDeadline = float64(t.Unix()) + (float64(t.Nanosecond()) / 1e9)
   360  	}
   361  }
   362  
   363  // GracePeriodDuration returns the GracePeriod as a time.Duration.
   364  //
   365  // If d == nil, returns DefaultGracePeriod.
   366  func (d *Deadline) GracePeriodDuration() time.Duration {
   367  	if d == nil {
   368  		return DefaultGracePeriod
   369  	}
   370  	return time.Duration(d.GracePeriod * float64(time.Second))
   371  }