go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/cron/dispatcher.go (about)

     1  // Copyright 2021 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package cron
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"net/http"
    21  	"regexp"
    22  	"sort"
    23  	"strings"
    24  	"sync"
    25  
    26  	"go.chromium.org/luci/common/clock"
    27  	"go.chromium.org/luci/common/errors"
    28  	"go.chromium.org/luci/common/retry/transient"
    29  	"go.chromium.org/luci/common/tsmon/distribution"
    30  	"go.chromium.org/luci/common/tsmon/field"
    31  	"go.chromium.org/luci/common/tsmon/metric"
    32  	"go.chromium.org/luci/common/tsmon/types"
    33  
    34  	"go.chromium.org/luci/server/internal"
    35  	"go.chromium.org/luci/server/router"
    36  )
    37  
    38  var (
    39  	callsCounter = metric.NewCounter(
    40  		"cron/server/calls",
    41  		"Count of handled cron job invocations",
    42  		nil,
    43  		field.String("id"),     // cron handler ID
    44  		field.String("result"), // OK | transient | fatal | panic | no_handler | auth
    45  	)
    46  
    47  	callsDurationMS = metric.NewCumulativeDistribution(
    48  		"cron/server/duration",
    49  		"Duration of handling of recognized handlers",
    50  		&types.MetricMetadata{Units: types.Milliseconds},
    51  		distribution.DefaultBucketer,
    52  		field.String("id"),     // cron handler ID
    53  		field.String("result"), // OK | transient | fatal | panic
    54  	)
    55  )
    56  
    57  // Handler is called to handle a cron job invocation.
    58  //
    59  // Transient errors are transformed into HTTP 500 replies to Cloud Scheduler,
    60  // which may trigger a retry based on the job's retry configuration. Returning a
    61  // non-transient error results in a error-level logging message and HTTP 202
    62  // reply, which does not trigger a retry.
    63  type Handler func(ctx context.Context) error
    64  
    65  // Dispatcher routes requests from Cloud Scheduler to registered handlers.
    66  type Dispatcher struct {
    67  	// AuthorizedCallers is a list of service accounts Cloud Scheduler may use to
    68  	// call cron HTTP endpoints.
    69  	//
    70  	// See https://cloud.google.com/scheduler/docs/http-target-auth for details.
    71  	//
    72  	// Can be empty on Appengine, since there calls are authenticated using
    73  	// "X-Appengine-Cron" header.
    74  	AuthorizedCallers []string
    75  
    76  	// GAE is true when running on Appengine.
    77  	//
    78  	// It alters how incoming HTTP requests are authenticated.
    79  	GAE bool
    80  
    81  	// DisableAuth can be used to disable authentication on HTTP endpoints.
    82  	//
    83  	// This is useful when running in development mode on localhost or in tests.
    84  	DisableAuth bool
    85  
    86  	m sync.RWMutex
    87  	h map[string]Handler
    88  }
    89  
    90  // handlerIDRe is used to validate handler IDs.
    91  var handlerIDRe = regexp.MustCompile(`^[a-zA-Z0-9_\-.]{1,100}$`)
    92  
    93  // RegisterHandler registers a callback called to handle a cron job invocation.
    94  //
    95  // The handler can be invoked via GET requests to "<serving-prefix>/<id>",
    96  // (usually "/internal/cron/<id>"). This URL path should be used when
    97  // configuring Cloud Scheduler jobs or in cron.yaml when running on Appengine.
    98  //
    99  // The ID must match `[a-zA-Z0-9_\-.]{1,100}`. Panics otherwise. Panics if a
   100  // handler with such ID is already registered.
   101  func (d *Dispatcher) RegisterHandler(id string, h Handler) {
   102  	if !handlerIDRe.MatchString(id) {
   103  		panic(fmt.Sprintf("bad cron handler ID %q", id))
   104  	}
   105  	d.m.Lock()
   106  	defer d.m.Unlock()
   107  	if d.h == nil {
   108  		d.h = make(map[string]Handler, 1)
   109  	}
   110  	if _, ok := d.h[id]; ok {
   111  		panic(fmt.Sprintf("cron handler with ID %q is already registered", id))
   112  	}
   113  	d.h[id] = h
   114  }
   115  
   116  // InstallCronRoutes installs routes that handle requests from Cloud Scheduler.
   117  func (d *Dispatcher) InstallCronRoutes(r *router.Router, prefix string) {
   118  	if prefix == "" {
   119  		prefix = "/internal/cron/"
   120  	} else if !strings.HasPrefix(prefix, "/") {
   121  		panic("the prefix should start with /")
   122  	}
   123  
   124  	route := strings.TrimRight(prefix, "/") + "/*handler"
   125  	handlerID := func(c *router.Context) string {
   126  		return strings.TrimPrefix(c.Params.ByName("handler"), "/")
   127  	}
   128  
   129  	var mw router.MiddlewareChain
   130  	if !d.DisableAuth {
   131  		header := ""
   132  		if d.GAE {
   133  			header = "X-Appengine-Cron"
   134  		}
   135  		mw = internal.CloudAuthMiddleware(d.AuthorizedCallers, header,
   136  			func(c *router.Context) {
   137  				callsCounter.Add(c.Request.Context(), 1, handlerID(c), "auth")
   138  			},
   139  		)
   140  	}
   141  
   142  	r.GET(route, mw, func(c *router.Context) {
   143  		id := handlerID(c)
   144  		if err := d.executeHandlerByID(c.Request.Context(), id); err != nil {
   145  			status := 0
   146  			if transient.Tag.In(err) {
   147  				err = errors.Annotate(err, "transient error in cron handler %q", id).Err()
   148  				status = 500
   149  			} else {
   150  				err = errors.Annotate(err, "fatal error in cron handler %q", id).Err()
   151  				status = 202
   152  			}
   153  			errors.Log(c.Request.Context(), err)
   154  			http.Error(c.Writer, err.Error(), status)
   155  		} else {
   156  			c.Writer.Write([]byte("OK"))
   157  		}
   158  	})
   159  }
   160  
   161  // handlerIDs returns a sorted list of registered handler IDs.
   162  func (d *Dispatcher) handlerIDs() []string {
   163  	d.m.RLock()
   164  	defer d.m.RUnlock()
   165  	ids := make([]string, 0, len(d.h))
   166  	for id := range d.h {
   167  		ids = append(ids, id)
   168  	}
   169  	sort.Strings(ids)
   170  	return ids
   171  }
   172  
   173  // executeHandlerByID executes a registered cron handler.
   174  func (d *Dispatcher) executeHandlerByID(ctx context.Context, id string) error {
   175  	d.m.RLock()
   176  	h := d.h[id]
   177  	d.m.RUnlock()
   178  	if h == nil {
   179  		callsCounter.Add(ctx, 1, id, "no_handler")
   180  		return errors.Reason("no cron handler with ID %q is registered", id).Err()
   181  	}
   182  
   183  	start := clock.Now(ctx)
   184  	result := "panic"
   185  	defer func() {
   186  		callsCounter.Add(ctx, 1, id, result)
   187  		callsDurationMS.Add(ctx, float64(clock.Since(ctx, start).Milliseconds()), id, result)
   188  	}()
   189  
   190  	err := h(ctx)
   191  	switch {
   192  	case err == nil:
   193  		result = "OK"
   194  	case transient.Tag.In(err):
   195  		result = "transient"
   196  	default:
   197  		result = "fatal"
   198  	}
   199  	return err
   200  }