go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/cron/dispatcher.go (about) 1 // Copyright 2021 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cron 16 17 import ( 18 "context" 19 "fmt" 20 "net/http" 21 "regexp" 22 "sort" 23 "strings" 24 "sync" 25 26 "go.chromium.org/luci/common/clock" 27 "go.chromium.org/luci/common/errors" 28 "go.chromium.org/luci/common/retry/transient" 29 "go.chromium.org/luci/common/tsmon/distribution" 30 "go.chromium.org/luci/common/tsmon/field" 31 "go.chromium.org/luci/common/tsmon/metric" 32 "go.chromium.org/luci/common/tsmon/types" 33 34 "go.chromium.org/luci/server/internal" 35 "go.chromium.org/luci/server/router" 36 ) 37 38 var ( 39 callsCounter = metric.NewCounter( 40 "cron/server/calls", 41 "Count of handled cron job invocations", 42 nil, 43 field.String("id"), // cron handler ID 44 field.String("result"), // OK | transient | fatal | panic | no_handler | auth 45 ) 46 47 callsDurationMS = metric.NewCumulativeDistribution( 48 "cron/server/duration", 49 "Duration of handling of recognized handlers", 50 &types.MetricMetadata{Units: types.Milliseconds}, 51 distribution.DefaultBucketer, 52 field.String("id"), // cron handler ID 53 field.String("result"), // OK | transient | fatal | panic 54 ) 55 ) 56 57 // Handler is called to handle a cron job invocation. 58 // 59 // Transient errors are transformed into HTTP 500 replies to Cloud Scheduler, 60 // which may trigger a retry based on the job's retry configuration. Returning a 61 // non-transient error results in a error-level logging message and HTTP 202 62 // reply, which does not trigger a retry. 63 type Handler func(ctx context.Context) error 64 65 // Dispatcher routes requests from Cloud Scheduler to registered handlers. 66 type Dispatcher struct { 67 // AuthorizedCallers is a list of service accounts Cloud Scheduler may use to 68 // call cron HTTP endpoints. 69 // 70 // See https://cloud.google.com/scheduler/docs/http-target-auth for details. 71 // 72 // Can be empty on Appengine, since there calls are authenticated using 73 // "X-Appengine-Cron" header. 74 AuthorizedCallers []string 75 76 // GAE is true when running on Appengine. 77 // 78 // It alters how incoming HTTP requests are authenticated. 79 GAE bool 80 81 // DisableAuth can be used to disable authentication on HTTP endpoints. 82 // 83 // This is useful when running in development mode on localhost or in tests. 84 DisableAuth bool 85 86 m sync.RWMutex 87 h map[string]Handler 88 } 89 90 // handlerIDRe is used to validate handler IDs. 91 var handlerIDRe = regexp.MustCompile(`^[a-zA-Z0-9_\-.]{1,100}$`) 92 93 // RegisterHandler registers a callback called to handle a cron job invocation. 94 // 95 // The handler can be invoked via GET requests to "<serving-prefix>/<id>", 96 // (usually "/internal/cron/<id>"). This URL path should be used when 97 // configuring Cloud Scheduler jobs or in cron.yaml when running on Appengine. 98 // 99 // The ID must match `[a-zA-Z0-9_\-.]{1,100}`. Panics otherwise. Panics if a 100 // handler with such ID is already registered. 101 func (d *Dispatcher) RegisterHandler(id string, h Handler) { 102 if !handlerIDRe.MatchString(id) { 103 panic(fmt.Sprintf("bad cron handler ID %q", id)) 104 } 105 d.m.Lock() 106 defer d.m.Unlock() 107 if d.h == nil { 108 d.h = make(map[string]Handler, 1) 109 } 110 if _, ok := d.h[id]; ok { 111 panic(fmt.Sprintf("cron handler with ID %q is already registered", id)) 112 } 113 d.h[id] = h 114 } 115 116 // InstallCronRoutes installs routes that handle requests from Cloud Scheduler. 117 func (d *Dispatcher) InstallCronRoutes(r *router.Router, prefix string) { 118 if prefix == "" { 119 prefix = "/internal/cron/" 120 } else if !strings.HasPrefix(prefix, "/") { 121 panic("the prefix should start with /") 122 } 123 124 route := strings.TrimRight(prefix, "/") + "/*handler" 125 handlerID := func(c *router.Context) string { 126 return strings.TrimPrefix(c.Params.ByName("handler"), "/") 127 } 128 129 var mw router.MiddlewareChain 130 if !d.DisableAuth { 131 header := "" 132 if d.GAE { 133 header = "X-Appengine-Cron" 134 } 135 mw = internal.CloudAuthMiddleware(d.AuthorizedCallers, header, 136 func(c *router.Context) { 137 callsCounter.Add(c.Request.Context(), 1, handlerID(c), "auth") 138 }, 139 ) 140 } 141 142 r.GET(route, mw, func(c *router.Context) { 143 id := handlerID(c) 144 if err := d.executeHandlerByID(c.Request.Context(), id); err != nil { 145 status := 0 146 if transient.Tag.In(err) { 147 err = errors.Annotate(err, "transient error in cron handler %q", id).Err() 148 status = 500 149 } else { 150 err = errors.Annotate(err, "fatal error in cron handler %q", id).Err() 151 status = 202 152 } 153 errors.Log(c.Request.Context(), err) 154 http.Error(c.Writer, err.Error(), status) 155 } else { 156 c.Writer.Write([]byte("OK")) 157 } 158 }) 159 } 160 161 // handlerIDs returns a sorted list of registered handler IDs. 162 func (d *Dispatcher) handlerIDs() []string { 163 d.m.RLock() 164 defer d.m.RUnlock() 165 ids := make([]string, 0, len(d.h)) 166 for id := range d.h { 167 ids = append(ids, id) 168 } 169 sort.Strings(ids) 170 return ids 171 } 172 173 // executeHandlerByID executes a registered cron handler. 174 func (d *Dispatcher) executeHandlerByID(ctx context.Context, id string) error { 175 d.m.RLock() 176 h := d.h[id] 177 d.m.RUnlock() 178 if h == nil { 179 callsCounter.Add(ctx, 1, id, "no_handler") 180 return errors.Reason("no cron handler with ID %q is registered", id).Err() 181 } 182 183 start := clock.Now(ctx) 184 result := "panic" 185 defer func() { 186 callsCounter.Add(ctx, 1, id, result) 187 callsDurationMS.Add(ctx, float64(clock.Since(ctx, start).Milliseconds()), id, result) 188 }() 189 190 err := h(ctx) 191 switch { 192 case err == nil: 193 result = "OK" 194 case transient.Tag.In(err): 195 result = "transient" 196 default: 197 result = "fatal" 198 } 199 return err 200 }