go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/frontend/handler.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Binary frontend implements GAE web server for luci-scheduler service. 16 package main 17 18 import ( 19 "context" 20 "fmt" 21 "io" 22 "net/http" 23 "sync" 24 "time" 25 26 "github.com/golang/protobuf/proto" 27 "google.golang.org/appengine" 28 29 "go.chromium.org/luci/gae/service/info" 30 31 "go.chromium.org/luci/config/appengine/gaeconfig" 32 "go.chromium.org/luci/config/server/cfgmodule" 33 "go.chromium.org/luci/config/validation" 34 "go.chromium.org/luci/grpc/discovery" 35 "go.chromium.org/luci/grpc/grpcmon" 36 "go.chromium.org/luci/grpc/grpcutil" 37 "go.chromium.org/luci/grpc/prpc" 38 "go.chromium.org/luci/server/auth" 39 "go.chromium.org/luci/server/auth/signing" 40 "go.chromium.org/luci/server/router" 41 "go.chromium.org/luci/web/rpcexplorer" 42 43 gaeserver "go.chromium.org/luci/appengine/gaeauth/server" 44 "go.chromium.org/luci/appengine/gaemiddleware" 45 "go.chromium.org/luci/appengine/gaemiddleware/standard" 46 "go.chromium.org/luci/appengine/tq" 47 48 "go.chromium.org/luci/common/errors" 49 "go.chromium.org/luci/common/logging" 50 "go.chromium.org/luci/common/proto/config" 51 "go.chromium.org/luci/common/retry/transient" 52 53 "go.chromium.org/luci/scheduler/api/scheduler/v1" 54 55 "go.chromium.org/luci/scheduler/appengine/apiservers" 56 "go.chromium.org/luci/scheduler/appengine/catalog" 57 "go.chromium.org/luci/scheduler/appengine/engine" 58 "go.chromium.org/luci/scheduler/appengine/internal" 59 "go.chromium.org/luci/scheduler/appengine/task" 60 "go.chromium.org/luci/scheduler/appengine/task/buildbucket" 61 "go.chromium.org/luci/scheduler/appengine/task/gitiles" 62 "go.chromium.org/luci/scheduler/appengine/task/noop" 63 "go.chromium.org/luci/scheduler/appengine/task/urlfetch" 64 "go.chromium.org/luci/scheduler/appengine/ui" 65 ) 66 67 //// Global state. See main(). 68 69 const adminGroup = "administrators" 70 71 var ( 72 globalDispatcher = tq.Dispatcher{ 73 // Default "/internal/tasks/" is already used by the old-style task queue 74 // router, so pick some other prefix to avoid collisions. 75 BaseURL: "/internal/tq/", 76 } 77 globalCatalog catalog.Catalog 78 globalEngine engine.EngineInternal 79 80 // Known kinds of tasks. 81 managers = []task.Manager{ 82 &buildbucket.TaskManager{}, 83 &gitiles.TaskManager{}, 84 &noop.TaskManager{}, 85 &urlfetch.TaskManager{}, 86 } 87 ) 88 89 //// Helpers. 90 91 // requestContext is used to add helper methods. 92 type requestContext router.Context 93 94 // fail writes error message to the log and the response and sets status code. 95 func (c *requestContext) fail(code int, msg string, args ...any) { 96 body := fmt.Sprintf(msg, args...) 97 logging.Errorf(c.Request.Context(), "HTTP %d: %s", code, body) 98 http.Error(c.Writer, body, code) 99 } 100 101 // err sets status to 409 on tq.Retry errors, 500 on transient errors and 202 on 102 // fatal ones. Returning status code in range [200–299] is the only way to tell 103 // PubSub to stop redelivering the task. 104 func (c *requestContext) err(e error, msg string, args ...any) { 105 code := 0 106 switch { 107 case e == nil: 108 panic("nil") 109 case tq.Retry.In(e): 110 code = 409 111 case transient.Tag.In(e): 112 code = 500 113 default: 114 code = 202 // fatal error, don't need a redelivery 115 } 116 args = append(args, e) 117 c.fail(code, msg+" - %s", args...) 118 } 119 120 // ok sets status to 200 and puts "OK" in response. 121 func (c *requestContext) ok() { 122 c.Writer.Header().Set("Content-Type", "text/plain; charset=utf-8") 123 c.Writer.WriteHeader(200) 124 fmt.Fprintln(c.Writer, "OK") 125 } 126 127 /// 128 129 var globalInit sync.Once 130 131 // initializeGlobalState does one time initialization for stuff that needs 132 // active GAE context. 133 func initializeGlobalState(c context.Context) { 134 if info.IsDevAppServer(c) { 135 // Dev app server doesn't preserve the state of task queues across restarts, 136 // need to reset datastore state accordingly, otherwise everything gets stuck. 137 if err := globalEngine.ResetAllJobsOnDevServer(c); err != nil { 138 logging.Errorf(c, "Failed to reset jobs: %s", err) 139 } 140 } 141 } 142 143 //// Routes. 144 145 func main() { 146 // Register tasks handled here. 'NewEngine' call below will register more. 147 globalDispatcher.RegisterTask(&internal.ReadProjectConfigTask{}, readProjectConfig, "read-project-config", nil) 148 149 // Setup global singletons. 150 globalCatalog = catalog.New() 151 for _, m := range managers { 152 if err := globalCatalog.RegisterTaskManager(m); err != nil { 153 panic(err) 154 } 155 } 156 globalCatalog.RegisterConfigRules(&validation.Rules) 157 158 globalEngine = engine.NewEngine(engine.Config{ 159 Catalog: globalCatalog, 160 Dispatcher: &globalDispatcher, 161 PubSubPushPath: "/pubsub", 162 }) 163 164 // Do global init before handling requests. 165 base := standard.Base().Extend( 166 func(c *router.Context, next router.Handler) { 167 globalInit.Do(func() { initializeGlobalState(c.Request.Context()) }) 168 next(c) 169 }, 170 ) 171 172 // Setup HTTP routes. 173 r := router.New() 174 175 standard.InstallHandlersWithMiddleware(r, base) 176 globalDispatcher.InstallRoutes(r, base) 177 rpcexplorer.Install(r, nil) 178 179 ui.InstallHandlers(r, base, ui.Config{ 180 Engine: globalEngine.PublicAPI(), 181 Catalog: globalCatalog, 182 TemplatesPath: "templates", 183 }) 184 185 r.POST("/pubsub", base, pubsubPushHandler) // auth is via custom tokens 186 r.GET("/internal/cron/read-config", base.Extend(gaemiddleware.RequireCron), readConfigCron) 187 188 // Devserver can't accept PubSub pushes, so allow manual pulls instead to 189 // simplify local development. 190 if appengine.IsDevAppServer() { 191 r.GET("/pubsub/pull/:ManagerName/:Publisher", base, pubsubPullHandler) 192 } 193 194 // Install RPC servers. 195 api := prpc.Server{ 196 UnaryServerInterceptor: grpcutil.ChainUnaryServerInterceptors( 197 grpcmon.UnaryServerInterceptor, 198 auth.AuthenticatingInterceptor([]auth.Method{ 199 &gaeserver.OAuth2Method{Scopes: []string{gaeserver.EmailScope}}, 200 }).Unary(), 201 ), 202 } 203 scheduler.RegisterSchedulerServer(&api, &apiservers.SchedulerServer{ 204 Engine: globalEngine.PublicAPI(), 205 Catalog: globalCatalog, 206 }) 207 internal.RegisterAdminServer(&api, &apiservers.AdminServer{ 208 Engine: globalEngine, 209 Catalog: globalCatalog, 210 AdminGroup: adminGroup, 211 }) 212 config.RegisterConsumerServer(&api, &cfgmodule.ConsumerServer{ 213 Rules: &validation.Rules, 214 GetConfigServiceAccountFn: func(ctx context.Context) (string, error) { 215 settings, err := gaeconfig.FetchCachedSettings(ctx) 216 switch { 217 case err != nil: 218 return "", err 219 case settings.ConfigServiceHost == "": 220 return "", errors.New("can not find config service host from settings") 221 } 222 info, err := signing.FetchServiceInfoFromLUCIService(ctx, "https://"+settings.ConfigServiceHost) 223 if err != nil { 224 return "", err 225 } 226 return info.ServiceAccountName, nil 227 }, 228 }) 229 discovery.Enable(&api) 230 api.InstallHandlers(r, base) 231 232 http.DefaultServeMux.Handle("/", r) 233 appengine.Main() 234 } 235 236 // pubsubPushHandler handles incoming PubSub messages. 237 func pubsubPushHandler(c *router.Context) { 238 rc := requestContext(*c) 239 body, err := io.ReadAll(rc.Request.Body) 240 if err != nil { 241 rc.fail(500, "Failed to read the request: %s", err) 242 return 243 } 244 if err = globalEngine.ProcessPubSubPush(rc.Request.Context(), body, rc.Request.URL.Query()); err != nil { 245 rc.err(err, "Failed to process incoming PubSub push") 246 return 247 } 248 rc.ok() 249 } 250 251 // pubsubPullHandler is called on dev server by developer to pull pubsub 252 // messages from a topic created for a publisher. 253 func pubsubPullHandler(c *router.Context) { 254 rc := requestContext(*c) 255 if !appengine.IsDevAppServer() { 256 rc.fail(403, "Not a dev server") 257 return 258 } 259 err := globalEngine.PullPubSubOnDevServer( 260 rc.Request.Context(), rc.Params.ByName("ManagerName"), rc.Params.ByName("Publisher")) 261 if err != nil { 262 rc.err(err, "Failed to pull PubSub messages") 263 } else { 264 rc.ok() 265 } 266 } 267 268 // readConfigCron grabs a list of projects from the catalog and datastore and 269 // dispatches task queue tasks to update each project's cron jobs. 270 func readConfigCron(c *router.Context) { 271 rc := requestContext(*c) 272 projectsToVisit := map[string]bool{} 273 274 // Visit all projects in the catalog. 275 ctx, cancel := context.WithTimeout(rc.Request.Context(), 150*time.Second) 276 defer cancel() 277 projects, err := globalCatalog.GetAllProjects(ctx) 278 if err != nil { 279 rc.err(err, "Failed to grab a list of project IDs from catalog") 280 return 281 } 282 for _, id := range projects { 283 projectsToVisit[id] = true 284 } 285 286 // Also visit all registered projects that do not show up in the catalog 287 // listing anymore. It will unregister all jobs belonging to them. 288 existing, err := globalEngine.GetAllProjects(rc.Request.Context()) 289 if err != nil { 290 rc.err(err, "Failed to grab a list of project IDs from datastore") 291 return 292 } 293 for _, id := range existing { 294 projectsToVisit[id] = true 295 } 296 297 // Handle each project in its own task to avoid "bad" projects (e.g. ones with 298 // lots of jobs) to slow down "good" ones. 299 tasks := make([]*tq.Task, 0, len(projectsToVisit)) 300 for projectID := range projectsToVisit { 301 tasks = append(tasks, &tq.Task{ 302 Payload: &internal.ReadProjectConfigTask{ProjectId: projectID}, 303 }) 304 } 305 if err = globalDispatcher.AddTask(rc.Request.Context(), tasks...); err != nil { 306 rc.err(err, "Failed to add tasks to task queue") 307 } else { 308 rc.ok() 309 } 310 } 311 312 // readProjectConfig grabs a list of jobs in a project from catalog, updates 313 // all changed jobs, adds new ones, disables old ones. 314 func readProjectConfig(c context.Context, task proto.Message) error { 315 projectID := task.(*internal.ReadProjectConfigTask).ProjectId 316 317 ctx, cancel := context.WithTimeout(c, 150*time.Second) 318 defer cancel() 319 320 jobs, err := globalCatalog.GetProjectJobs(ctx, projectID) 321 if err != nil { 322 logging.WithError(err).Errorf(c, "Failed to query for a list of jobs") 323 return err 324 } 325 326 if err := globalEngine.UpdateProjectJobs(ctx, projectID, jobs); err != nil { 327 logging.WithError(err).Errorf(c, "Failed to update some jobs") 328 return err 329 } 330 331 return nil 332 }