go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/frontend/handler.go (about)

     1  // Copyright 2015 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Binary frontend implements GAE web server for luci-scheduler service.
    16  package main
    17  
    18  import (
    19  	"context"
    20  	"fmt"
    21  	"io"
    22  	"net/http"
    23  	"sync"
    24  	"time"
    25  
    26  	"github.com/golang/protobuf/proto"
    27  	"google.golang.org/appengine"
    28  
    29  	"go.chromium.org/luci/gae/service/info"
    30  
    31  	"go.chromium.org/luci/config/appengine/gaeconfig"
    32  	"go.chromium.org/luci/config/server/cfgmodule"
    33  	"go.chromium.org/luci/config/validation"
    34  	"go.chromium.org/luci/grpc/discovery"
    35  	"go.chromium.org/luci/grpc/grpcmon"
    36  	"go.chromium.org/luci/grpc/grpcutil"
    37  	"go.chromium.org/luci/grpc/prpc"
    38  	"go.chromium.org/luci/server/auth"
    39  	"go.chromium.org/luci/server/auth/signing"
    40  	"go.chromium.org/luci/server/router"
    41  	"go.chromium.org/luci/web/rpcexplorer"
    42  
    43  	gaeserver "go.chromium.org/luci/appengine/gaeauth/server"
    44  	"go.chromium.org/luci/appengine/gaemiddleware"
    45  	"go.chromium.org/luci/appengine/gaemiddleware/standard"
    46  	"go.chromium.org/luci/appengine/tq"
    47  
    48  	"go.chromium.org/luci/common/errors"
    49  	"go.chromium.org/luci/common/logging"
    50  	"go.chromium.org/luci/common/proto/config"
    51  	"go.chromium.org/luci/common/retry/transient"
    52  
    53  	"go.chromium.org/luci/scheduler/api/scheduler/v1"
    54  
    55  	"go.chromium.org/luci/scheduler/appengine/apiservers"
    56  	"go.chromium.org/luci/scheduler/appengine/catalog"
    57  	"go.chromium.org/luci/scheduler/appengine/engine"
    58  	"go.chromium.org/luci/scheduler/appengine/internal"
    59  	"go.chromium.org/luci/scheduler/appengine/task"
    60  	"go.chromium.org/luci/scheduler/appengine/task/buildbucket"
    61  	"go.chromium.org/luci/scheduler/appengine/task/gitiles"
    62  	"go.chromium.org/luci/scheduler/appengine/task/noop"
    63  	"go.chromium.org/luci/scheduler/appengine/task/urlfetch"
    64  	"go.chromium.org/luci/scheduler/appengine/ui"
    65  )
    66  
    67  //// Global state. See main().
    68  
    69  const adminGroup = "administrators"
    70  
    71  var (
    72  	globalDispatcher = tq.Dispatcher{
    73  		// Default "/internal/tasks/" is already used by the old-style task queue
    74  		// router, so pick some other prefix to avoid collisions.
    75  		BaseURL: "/internal/tq/",
    76  	}
    77  	globalCatalog catalog.Catalog
    78  	globalEngine  engine.EngineInternal
    79  
    80  	// Known kinds of tasks.
    81  	managers = []task.Manager{
    82  		&buildbucket.TaskManager{},
    83  		&gitiles.TaskManager{},
    84  		&noop.TaskManager{},
    85  		&urlfetch.TaskManager{},
    86  	}
    87  )
    88  
    89  //// Helpers.
    90  
    91  // requestContext is used to add helper methods.
    92  type requestContext router.Context
    93  
    94  // fail writes error message to the log and the response and sets status code.
    95  func (c *requestContext) fail(code int, msg string, args ...any) {
    96  	body := fmt.Sprintf(msg, args...)
    97  	logging.Errorf(c.Request.Context(), "HTTP %d: %s", code, body)
    98  	http.Error(c.Writer, body, code)
    99  }
   100  
   101  // err sets status to 409 on tq.Retry errors, 500 on transient errors and 202 on
   102  // fatal ones. Returning status code in range [200–299] is the only way to tell
   103  // PubSub to stop redelivering the task.
   104  func (c *requestContext) err(e error, msg string, args ...any) {
   105  	code := 0
   106  	switch {
   107  	case e == nil:
   108  		panic("nil")
   109  	case tq.Retry.In(e):
   110  		code = 409
   111  	case transient.Tag.In(e):
   112  		code = 500
   113  	default:
   114  		code = 202 // fatal error, don't need a redelivery
   115  	}
   116  	args = append(args, e)
   117  	c.fail(code, msg+" - %s", args...)
   118  }
   119  
   120  // ok sets status to 200 and puts "OK" in response.
   121  func (c *requestContext) ok() {
   122  	c.Writer.Header().Set("Content-Type", "text/plain; charset=utf-8")
   123  	c.Writer.WriteHeader(200)
   124  	fmt.Fprintln(c.Writer, "OK")
   125  }
   126  
   127  ///
   128  
   129  var globalInit sync.Once
   130  
   131  // initializeGlobalState does one time initialization for stuff that needs
   132  // active GAE context.
   133  func initializeGlobalState(c context.Context) {
   134  	if info.IsDevAppServer(c) {
   135  		// Dev app server doesn't preserve the state of task queues across restarts,
   136  		// need to reset datastore state accordingly, otherwise everything gets stuck.
   137  		if err := globalEngine.ResetAllJobsOnDevServer(c); err != nil {
   138  			logging.Errorf(c, "Failed to reset jobs: %s", err)
   139  		}
   140  	}
   141  }
   142  
   143  //// Routes.
   144  
   145  func main() {
   146  	// Register tasks handled here. 'NewEngine' call below will register more.
   147  	globalDispatcher.RegisterTask(&internal.ReadProjectConfigTask{}, readProjectConfig, "read-project-config", nil)
   148  
   149  	// Setup global singletons.
   150  	globalCatalog = catalog.New()
   151  	for _, m := range managers {
   152  		if err := globalCatalog.RegisterTaskManager(m); err != nil {
   153  			panic(err)
   154  		}
   155  	}
   156  	globalCatalog.RegisterConfigRules(&validation.Rules)
   157  
   158  	globalEngine = engine.NewEngine(engine.Config{
   159  		Catalog:        globalCatalog,
   160  		Dispatcher:     &globalDispatcher,
   161  		PubSubPushPath: "/pubsub",
   162  	})
   163  
   164  	// Do global init before handling requests.
   165  	base := standard.Base().Extend(
   166  		func(c *router.Context, next router.Handler) {
   167  			globalInit.Do(func() { initializeGlobalState(c.Request.Context()) })
   168  			next(c)
   169  		},
   170  	)
   171  
   172  	// Setup HTTP routes.
   173  	r := router.New()
   174  
   175  	standard.InstallHandlersWithMiddleware(r, base)
   176  	globalDispatcher.InstallRoutes(r, base)
   177  	rpcexplorer.Install(r, nil)
   178  
   179  	ui.InstallHandlers(r, base, ui.Config{
   180  		Engine:        globalEngine.PublicAPI(),
   181  		Catalog:       globalCatalog,
   182  		TemplatesPath: "templates",
   183  	})
   184  
   185  	r.POST("/pubsub", base, pubsubPushHandler) // auth is via custom tokens
   186  	r.GET("/internal/cron/read-config", base.Extend(gaemiddleware.RequireCron), readConfigCron)
   187  
   188  	// Devserver can't accept PubSub pushes, so allow manual pulls instead to
   189  	// simplify local development.
   190  	if appengine.IsDevAppServer() {
   191  		r.GET("/pubsub/pull/:ManagerName/:Publisher", base, pubsubPullHandler)
   192  	}
   193  
   194  	// Install RPC servers.
   195  	api := prpc.Server{
   196  		UnaryServerInterceptor: grpcutil.ChainUnaryServerInterceptors(
   197  			grpcmon.UnaryServerInterceptor,
   198  			auth.AuthenticatingInterceptor([]auth.Method{
   199  				&gaeserver.OAuth2Method{Scopes: []string{gaeserver.EmailScope}},
   200  			}).Unary(),
   201  		),
   202  	}
   203  	scheduler.RegisterSchedulerServer(&api, &apiservers.SchedulerServer{
   204  		Engine:  globalEngine.PublicAPI(),
   205  		Catalog: globalCatalog,
   206  	})
   207  	internal.RegisterAdminServer(&api, &apiservers.AdminServer{
   208  		Engine:     globalEngine,
   209  		Catalog:    globalCatalog,
   210  		AdminGroup: adminGroup,
   211  	})
   212  	config.RegisterConsumerServer(&api, &cfgmodule.ConsumerServer{
   213  		Rules: &validation.Rules,
   214  		GetConfigServiceAccountFn: func(ctx context.Context) (string, error) {
   215  			settings, err := gaeconfig.FetchCachedSettings(ctx)
   216  			switch {
   217  			case err != nil:
   218  				return "", err
   219  			case settings.ConfigServiceHost == "":
   220  				return "", errors.New("can not find config service host from settings")
   221  			}
   222  			info, err := signing.FetchServiceInfoFromLUCIService(ctx, "https://"+settings.ConfigServiceHost)
   223  			if err != nil {
   224  				return "", err
   225  			}
   226  			return info.ServiceAccountName, nil
   227  		},
   228  	})
   229  	discovery.Enable(&api)
   230  	api.InstallHandlers(r, base)
   231  
   232  	http.DefaultServeMux.Handle("/", r)
   233  	appengine.Main()
   234  }
   235  
   236  // pubsubPushHandler handles incoming PubSub messages.
   237  func pubsubPushHandler(c *router.Context) {
   238  	rc := requestContext(*c)
   239  	body, err := io.ReadAll(rc.Request.Body)
   240  	if err != nil {
   241  		rc.fail(500, "Failed to read the request: %s", err)
   242  		return
   243  	}
   244  	if err = globalEngine.ProcessPubSubPush(rc.Request.Context(), body, rc.Request.URL.Query()); err != nil {
   245  		rc.err(err, "Failed to process incoming PubSub push")
   246  		return
   247  	}
   248  	rc.ok()
   249  }
   250  
   251  // pubsubPullHandler is called on dev server by developer to pull pubsub
   252  // messages from a topic created for a publisher.
   253  func pubsubPullHandler(c *router.Context) {
   254  	rc := requestContext(*c)
   255  	if !appengine.IsDevAppServer() {
   256  		rc.fail(403, "Not a dev server")
   257  		return
   258  	}
   259  	err := globalEngine.PullPubSubOnDevServer(
   260  		rc.Request.Context(), rc.Params.ByName("ManagerName"), rc.Params.ByName("Publisher"))
   261  	if err != nil {
   262  		rc.err(err, "Failed to pull PubSub messages")
   263  	} else {
   264  		rc.ok()
   265  	}
   266  }
   267  
   268  // readConfigCron grabs a list of projects from the catalog and datastore and
   269  // dispatches task queue tasks to update each project's cron jobs.
   270  func readConfigCron(c *router.Context) {
   271  	rc := requestContext(*c)
   272  	projectsToVisit := map[string]bool{}
   273  
   274  	// Visit all projects in the catalog.
   275  	ctx, cancel := context.WithTimeout(rc.Request.Context(), 150*time.Second)
   276  	defer cancel()
   277  	projects, err := globalCatalog.GetAllProjects(ctx)
   278  	if err != nil {
   279  		rc.err(err, "Failed to grab a list of project IDs from catalog")
   280  		return
   281  	}
   282  	for _, id := range projects {
   283  		projectsToVisit[id] = true
   284  	}
   285  
   286  	// Also visit all registered projects that do not show up in the catalog
   287  	// listing anymore. It will unregister all jobs belonging to them.
   288  	existing, err := globalEngine.GetAllProjects(rc.Request.Context())
   289  	if err != nil {
   290  		rc.err(err, "Failed to grab a list of project IDs from datastore")
   291  		return
   292  	}
   293  	for _, id := range existing {
   294  		projectsToVisit[id] = true
   295  	}
   296  
   297  	// Handle each project in its own task to avoid "bad" projects (e.g. ones with
   298  	// lots of jobs) to slow down "good" ones.
   299  	tasks := make([]*tq.Task, 0, len(projectsToVisit))
   300  	for projectID := range projectsToVisit {
   301  		tasks = append(tasks, &tq.Task{
   302  			Payload: &internal.ReadProjectConfigTask{ProjectId: projectID},
   303  		})
   304  	}
   305  	if err = globalDispatcher.AddTask(rc.Request.Context(), tasks...); err != nil {
   306  		rc.err(err, "Failed to add tasks to task queue")
   307  	} else {
   308  		rc.ok()
   309  	}
   310  }
   311  
   312  // readProjectConfig grabs a list of jobs in a project from catalog, updates
   313  // all changed jobs, adds new ones, disables old ones.
   314  func readProjectConfig(c context.Context, task proto.Message) error {
   315  	projectID := task.(*internal.ReadProjectConfigTask).ProjectId
   316  
   317  	ctx, cancel := context.WithTimeout(c, 150*time.Second)
   318  	defer cancel()
   319  
   320  	jobs, err := globalCatalog.GetProjectJobs(ctx, projectID)
   321  	if err != nil {
   322  		logging.WithError(err).Errorf(c, "Failed to query for a list of jobs")
   323  		return err
   324  	}
   325  
   326  	if err := globalEngine.UpdateProjectJobs(ctx, projectID, jobs); err != nil {
   327  		logging.WithError(err).Errorf(c, "Failed to update some jobs")
   328  		return err
   329  	}
   330  
   331  	return nil
   332  }