go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/scheduler/appengine/catalog/catalog.go (about) 1 // Copyright 2015 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package catalog implements a part that talks to luci-config service to fetch 16 // and parse job definitions. Catalog knows about all task types and can 17 // instantiate task.Manager's. 18 package catalog 19 20 import ( 21 "context" 22 "fmt" 23 "reflect" 24 "regexp" 25 "sort" 26 "strings" 27 28 "github.com/golang/protobuf/proto" 29 30 "go.chromium.org/luci/common/data/stringset" 31 "go.chromium.org/luci/common/logging" 32 "go.chromium.org/luci/config" 33 "go.chromium.org/luci/config/cfgclient" 34 "go.chromium.org/luci/config/validation" 35 "go.chromium.org/luci/server/auth/realms" 36 37 "go.chromium.org/luci/scheduler/appengine/engine/policy" 38 "go.chromium.org/luci/scheduler/appengine/messages" 39 "go.chromium.org/luci/scheduler/appengine/schedule" 40 "go.chromium.org/luci/scheduler/appengine/task" 41 ) 42 43 var ( 44 // jobIDRe is used to validate job ID field. 45 jobIDRe = regexp.MustCompile(`^[0-9A-Za-z_\-\. \)\(]{1,100}$`) 46 ) 47 48 const ( 49 // defaultJobSchedule is default value of 'schedule' field of Job proto. 50 defaultJobSchedule = "triggered" 51 // defaultTriggerSchedule is default value of 'schedule' field of Trigger 52 // proto. 53 defaultTriggerSchedule = "with 30s interval" 54 ) 55 56 // Catalog knows how to enumerate all scheduler configs across all projects. 57 // Methods return errors.Transient on non-fatal errors. Any other error means 58 // that retry won't help. 59 type Catalog interface { 60 // RegisterTaskManager registers a manager that knows how to deal with 61 // a particular kind of tasks (as specified by its ProtoMessageType method, 62 // e.g. SwarmingTask proto). 63 RegisterTaskManager(m task.Manager) error 64 65 // GetTaskManager takes pointer to a proto message describing some task config 66 // (e.g. SwarmingTask proto) and returns corresponding TaskManager 67 // implementation (or nil). 68 GetTaskManager(m proto.Message) task.Manager 69 70 // GetTaskManagerByName returns a registered task manager given its name. 71 // 72 // Returns nil if there's no such task manager. 73 GetTaskManagerByName(name string) task.Manager 74 75 // UnmarshalTask takes a serialized task definition (as in Definition.Task), 76 // unmarshals and validates it, and returns proto.Message that represent 77 // the concrete task to run (e.g. SwarmingTask proto). It can be passed to 78 // corresponding task.Manager. 79 UnmarshalTask(c context.Context, task []byte, realmID string) (proto.Message, error) 80 81 // GetAllProjects returns a list of all known project ids. 82 GetAllProjects(c context.Context) ([]string, error) 83 84 // GetProjectJobs returns a list of scheduler jobs defined within a project or 85 // empty list if no such project. 86 GetProjectJobs(c context.Context, projectID string) ([]Definition, error) 87 88 // RegisterConfigRules adds the config validation rules that verify job config 89 // files. 90 RegisterConfigRules(r *validation.RuleSet) 91 } 92 93 // JobFlavor describes a category of jobs. 94 type JobFlavor int 95 96 const ( 97 // JobFlavorPeriodic is a regular job (Swarming, Buildbucket) that runs on 98 // a schedule or via a trigger. 99 // 100 // Defined via 'job {...}' config stanza with 'schedule' field. 101 JobFlavorPeriodic JobFlavor = iota 102 103 // JobFlavorTriggered is a regular jog (Swarming, Buildbucket) that runs only 104 // when triggered. 105 // 106 // Defined via 'job {...}' config stanza with no 'schedule' field. 107 JobFlavorTriggered 108 109 // JobFlavorTrigger is a job that can trigger other jobs (e.g. git poller). 110 // 111 // Defined via 'trigger {...}' config stanza. 112 JobFlavorTrigger 113 ) 114 115 // Definition wraps definition of a scheduler job fetched from the config. 116 type Definition struct { 117 // JobID is globally unique job identifier: "<ProjectID>/<JobName>". 118 JobID string 119 120 // Realm is a global realm name (i.e. "<ProjectID>:...") the job belongs to. 121 RealmID string 122 123 // Flavor describes what category of jobs this is, see the enum. 124 Flavor JobFlavor 125 126 // Revision is config revision this definition was fetched from. 127 Revision string 128 129 // RevisionURL is URL to human readable page with config file. 130 RevisionURL string 131 132 // Schedule is job's schedule in regular cron expression format. 133 Schedule string 134 135 // Task is serialized representation of scheduler job. It can be fed back to 136 // Catalog.UnmarshalTask(...) to get proto.Message describing the task. 137 // 138 // Internally it is TaskDefWrapper proto message, but callers must treat it as 139 // an opaque byte blob. 140 Task []byte 141 142 // TriggeringPolicy is serialized TriggeringPolicy proto that defines a 143 // function that decides when to trigger invocations. 144 // 145 // It is taken verbatim from the config if defined there, or set to nil 146 // if not there. 147 TriggeringPolicy []byte 148 149 // TriggeredJobIDs is a list of jobIDs which this job triggers. 150 // It's set only for triggering jobs. 151 TriggeredJobIDs []string 152 } 153 154 // New returns implementation of Catalog. 155 func New() Catalog { 156 return &catalog{ 157 managersByType: map[reflect.Type]task.Manager{}, 158 managersByName: map[string]task.Manager{}, 159 } 160 } 161 162 type catalog struct { 163 managersByType map[reflect.Type]task.Manager 164 managersByName map[string]task.Manager 165 } 166 167 func (cat *catalog) RegisterTaskManager(m task.Manager) error { 168 prototype := m.ProtoMessageType() 169 typ := reflect.TypeOf(prototype) 170 if typ == nil || typ.Kind() != reflect.Ptr || typ.Elem().Kind() != reflect.Struct { 171 return fmt.Errorf("expecting pointer to a struct, got %T instead", prototype) 172 } 173 if _, ok := cat.managersByType[typ]; ok { 174 return fmt.Errorf("task kind %T is already registered", prototype) 175 } 176 if _, ok := cat.managersByName[m.Name()]; ok { 177 return fmt.Errorf("task manager with name %q is already registered", m.Name()) 178 } 179 cat.managersByType[typ] = m 180 cat.managersByName[m.Name()] = m 181 return nil 182 } 183 184 func (cat *catalog) GetTaskManager(msg proto.Message) task.Manager { 185 return cat.managersByType[reflect.TypeOf(msg)] 186 } 187 188 func (cat *catalog) GetTaskManagerByName(name string) task.Manager { 189 return cat.managersByName[name] 190 } 191 192 func (cat *catalog) UnmarshalTask(c context.Context, task []byte, realmID string) (proto.Message, error) { 193 msg := messages.TaskDefWrapper{} 194 if err := proto.Unmarshal(task, &msg); err != nil { 195 return nil, err 196 } 197 return cat.extractTaskProto(c, &msg, realmID) 198 } 199 200 func (cat *catalog) GetAllProjects(c context.Context) ([]string, error) { 201 return cfgclient.ProjectsWithConfig(c, "${appid}.cfg") 202 } 203 204 func (cat *catalog) GetProjectJobs(c context.Context, projectID string) ([]Definition, error) { 205 c = logging.SetField(c, "project", projectID) 206 207 configSet, err := config.ProjectSet(projectID) 208 if err != nil { 209 return nil, err 210 } 211 var ( 212 cfg messages.ProjectConfig 213 meta config.Meta 214 ) 215 switch err := cfgclient.Get(c, configSet, "${appid}.cfg", cfgclient.ProtoText(&cfg), &meta); err { 216 case nil: 217 break 218 case config.ErrNoConfig: 219 // Project is not using scheduler. 220 return nil, nil 221 default: 222 return nil, err 223 } 224 225 revisionURL := meta.ViewURL 226 if revisionURL != "" { 227 logging.Infof(c, "Importing %s", revisionURL) 228 } 229 230 out := make([]Definition, 0, len(cfg.Job)+len(cfg.Trigger)) 231 disabledCount := 0 232 233 // Regular jobs, triggered jobs. 234 // TODO(tandrii): consider switching to validateProjectConfig because configs 235 // provided by luci-config are known to be valid and so there is little value 236 // in finding all valid jobs/triggers vs complexity of this function. 237 for _, job := range cfg.Job { 238 if job.Disabled { 239 disabledCount++ 240 continue 241 } 242 id := "(empty)" 243 if job.Id != "" { 244 id = job.Id 245 } 246 // Create a new validation context for each job/trigger since errors 247 // persist in context but we want to find all valid jobs/trigger. 248 ctx := &validation.Context{Context: c} 249 realmID := validateRealm(ctx, projectID, job.Realm) 250 task := cat.validateJobProto(ctx, job, realmID) 251 if err := ctx.Finalize(); err != nil { 252 logging.Errorf(c, "Invalid job definition %s: %s", id, err) 253 continue 254 } 255 packed, err := cat.marshalTask(task) 256 if err != nil { 257 logging.Errorf(c, "Failed to marshal the task: %s: %s", id, err) 258 continue 259 } 260 schedule := job.Schedule 261 if schedule == "" { 262 schedule = defaultJobSchedule 263 } 264 flavor := JobFlavorTriggered 265 if schedule != "triggered" { 266 flavor = JobFlavorPeriodic 267 } 268 out = append(out, Definition{ 269 JobID: fmt.Sprintf("%s/%s", projectID, job.Id), 270 RealmID: realmID, 271 Flavor: flavor, 272 Revision: meta.Revision, 273 RevisionURL: revisionURL, 274 Schedule: schedule, 275 Task: packed, 276 TriggeringPolicy: marshalTriggeringPolicy(job.TriggeringPolicy), 277 }) 278 } 279 280 // Triggering jobs. 281 allJobIDs := getAllJobIDs(&cfg) 282 for _, trigger := range cfg.Trigger { 283 if trigger.Disabled { 284 disabledCount++ 285 continue 286 } 287 id := "(empty)" 288 if trigger.Id != "" { 289 id = trigger.Id 290 } 291 ctx := &validation.Context{Context: c} 292 realmID := validateRealm(ctx, projectID, trigger.Realm) 293 task := cat.validateTriggerProto(ctx, trigger, realmID, allJobIDs, false) 294 if err := ctx.Finalize(); err != nil { 295 logging.Errorf(c, "Invalid trigger definition %s: %s", id, err) 296 continue 297 } 298 packed, err := cat.marshalTask(task) 299 if err != nil { 300 logging.Errorf(c, "Failed to marshal the task: %s: %s", id, err) 301 continue 302 } 303 schedule := trigger.Schedule 304 if schedule == "" { 305 schedule = defaultTriggerSchedule 306 } 307 out = append(out, Definition{ 308 JobID: fmt.Sprintf("%s/%s", projectID, trigger.Id), 309 RealmID: realmID, 310 Flavor: JobFlavorTrigger, 311 Revision: meta.Revision, 312 RevisionURL: revisionURL, 313 Schedule: schedule, 314 Task: packed, 315 TriggeringPolicy: marshalTriggeringPolicy(trigger.TriggeringPolicy), 316 TriggeredJobIDs: normalizeTriggeredJobIDs(projectID, trigger), 317 }) 318 } 319 320 // Mark project as valid even if not all its jobs/triggers are. 321 return out, nil 322 } 323 324 func (cat *catalog) RegisterConfigRules(r *validation.RuleSet) { 325 r.Add("regex:projects/.*", "${appid}.cfg", cat.validateProjectConfig) 326 } 327 328 // validateProjectConfig validates the content of a project config file. 329 // 330 // Validation errors are returned via validation.Context. Returns an error if 331 // the validation itself fails for some reason. 332 func (cat *catalog) validateProjectConfig(ctx *validation.Context, configSet, path string, content []byte) error { 333 var cfg messages.ProjectConfig 334 err := proto.UnmarshalText(string(content), &cfg) 335 if err != nil { 336 ctx.Error(err) 337 return nil 338 } 339 340 // Get the project ID to be able to construct full realm ID. 341 if !strings.HasPrefix(configSet, "projects/") { 342 return fmt.Errorf("expecting projects/... config set, got %q", configSet) 343 } 344 projectID := strings.TrimPrefix(configSet, "projects/") 345 346 knownIDs := stringset.New(len(cfg.Job) + len(cfg.Trigger)) 347 // Jobs. 348 ctx.Enter("job") 349 for _, job := range cfg.Job { 350 id := "(empty)" 351 if job.Id != "" { 352 id = job.Id 353 } 354 ctx.Enter(id) 355 if job.Id != "" && !knownIDs.Add(job.Id) { 356 ctx.Errorf("duplicate id %q", job.Id) 357 } 358 realmID := validateRealm(ctx, projectID, job.Realm) 359 cat.validateJobProto(ctx, job, realmID) 360 ctx.Exit() 361 } 362 ctx.Exit() 363 364 // Triggers. 365 ctx.Enter("trigger") 366 allJobIDs := getAllJobIDs(&cfg) 367 for _, trigger := range cfg.Trigger { 368 id := "(empty)" 369 if trigger.Id != "" { 370 id = trigger.Id 371 } 372 ctx.Enter(id) 373 if trigger.Id != "" && !knownIDs.Add(trigger.Id) { 374 ctx.Errorf("duplicate id %q", trigger.Id) 375 } 376 realmID := validateRealm(ctx, projectID, trigger.Realm) 377 cat.validateTriggerProto(ctx, trigger, realmID, allJobIDs, true) 378 ctx.Exit() 379 } 380 ctx.Exit() 381 382 return nil 383 } 384 385 // validateJobProto validates messages.Job protobuf message. 386 // 387 // It also extracts a task definition from it (e.g. SwarmingTask proto). 388 // Errors are returned via validation.Context. 389 func (cat *catalog) validateJobProto(ctx *validation.Context, j *messages.Job, realmID string) proto.Message { 390 validateJobID(ctx, j.Id) 391 if j.Schedule != "" { 392 if _, err := schedule.Parse(j.Schedule, 0); err != nil { 393 ctx.Errorf("%s is not valid value for 'schedule' field - %s", j.Schedule, err) 394 } 395 } 396 cat.validateTriggeringPolicy(ctx, j.TriggeringPolicy) 397 return cat.validateTaskProto(ctx, j, realmID) 398 } 399 400 // validateTriggerProto validates and filters messages.Trigger protobuf message. 401 // 402 // It also extracts a task definition from it. 403 // 404 // Takes a set of all defined job IDs, to verify the trigger triggers only 405 // declared jobs. If failOnMissing is true, referencing an undefined job is 406 // reported as a validation error. Otherwise it is logged as a warning, and the 407 // reference to the undefined job is removed. 408 // 409 // Errors are returned via validation.Context. 410 func (cat *catalog) validateTriggerProto(ctx *validation.Context, t *messages.Trigger, realmID string, jobIDs stringset.Set, failOnMissing bool) proto.Message { 411 validateJobID(ctx, t.Id) 412 if t.Schedule != "" { 413 if _, err := schedule.Parse(t.Schedule, 0); err != nil { 414 ctx.Errorf("%s is not valid value for 'schedule' field - %s", t.Schedule, err) 415 } 416 } 417 filtered := make([]string, 0, len(t.Triggers)) 418 for _, id := range t.Triggers { 419 switch { 420 case jobIDs.Has(id): 421 filtered = append(filtered, id) 422 case failOnMissing: 423 ctx.Errorf("referencing unknown job %q in 'triggers' field", id) 424 default: 425 logging.Warningf(ctx.Context, 426 "Trigger %q references unknown job %q in 'triggers' field", t.Id, id) 427 } 428 } 429 t.Triggers = filtered 430 cat.validateTriggeringPolicy(ctx, t.TriggeringPolicy) 431 return cat.validateTaskProto(ctx, t, realmID) 432 } 433 434 func validateJobID(ctx *validation.Context, id string) { 435 if id == "" { 436 ctx.Errorf("missing 'id' field'") 437 } else if !jobIDRe.MatchString(id) { 438 ctx.Errorf("%q is not valid value for 'id' field, must match %q regexp", id, jobIDRe) 439 } 440 } 441 442 // validateTaskProto visits all fields of a proto and sniffs ones that correspond 443 // to task definitions (as registered via RegisterTaskManager). It ensures 444 // there's one and only one such field, validates it, and returns it. 445 // 446 // Errors are returned via validation.Context. 447 func (cat *catalog) validateTaskProto(ctx *validation.Context, t proto.Message, realmID string) proto.Message { 448 var taskMsg proto.Message 449 450 v := reflect.ValueOf(t) 451 if v.Kind() != reflect.Ptr { 452 ctx.Errorf("expecting a pointer to proto message, got %T", t) 453 return nil 454 } 455 v = v.Elem() 456 457 for i := 0; i < v.NumField(); i++ { 458 // Skip unset, scalar and repeated fields and fields that do not correspond 459 // to registered task types. 460 field := v.Field(i) 461 if field.Kind() != reflect.Ptr || field.IsNil() || field.Elem().Kind() != reflect.Struct { 462 continue 463 } 464 fieldVal, _ := field.Interface().(proto.Message) 465 if fieldVal != nil && cat.GetTaskManager(fieldVal) != nil { 466 if taskMsg != nil { 467 ctx.Errorf("only one field with task definition must be set, at least two are given (%T and %T)", taskMsg, fieldVal) 468 return nil 469 } 470 taskMsg = fieldVal 471 } 472 } 473 474 if taskMsg == nil { 475 ctx.Errorf("can't find a recognized task definition inside %T", t) 476 return nil 477 } 478 479 taskMan := cat.GetTaskManager(taskMsg) 480 ctx.Enter("task") 481 taskMan.ValidateProtoMessage(ctx, taskMsg, realmID) 482 ctx.Exit() 483 if ctx.Finalize() != nil { 484 return nil 485 } 486 return taskMsg 487 } 488 489 // validateTriggeringPolicy validates TriggeringPolicy proto. 490 // 491 // Errors are returned via validation.Context. 492 func (cat *catalog) validateTriggeringPolicy(ctx *validation.Context, p *messages.TriggeringPolicy) { 493 if p != nil { 494 ctx.Enter("triggering_policy") 495 policy.ValidateDefinition(ctx, p) 496 ctx.Exit() 497 } 498 } 499 500 // extractTaskProto visits all fields of a proto and sniffs ones that correspond 501 // to task definitions (as registered via RegisterTaskManager). It ensures 502 // there's one and only one such field, validates it, and returns it. 503 func (cat *catalog) extractTaskProto(c context.Context, t proto.Message, realmID string) (proto.Message, error) { 504 ctx := &validation.Context{Context: c} 505 return cat.validateTaskProto(ctx, t, realmID), ctx.Finalize() 506 } 507 508 // marshalTask takes a concrete task definition proto (e.g. SwarmingTask), wraps 509 // it into TaskDefWrapper proto and marshals this proto. The resulting blob can 510 // be sent to UnmarshalTask to get back the task definition proto. 511 func (cat *catalog) marshalTask(task proto.Message) ([]byte, error) { 512 if cat.GetTaskManager(task) == nil { 513 return nil, fmt.Errorf("unrecognized task definition type %T", task) 514 } 515 // Enumerate all fields of the wrapper until we find a matching type. 516 taskType := reflect.TypeOf(task) 517 wrapper := messages.TaskDefWrapper{} 518 v := reflect.ValueOf(&wrapper).Elem() 519 for i := 0; i < v.NumField(); i++ { 520 field := v.Field(i) 521 if field.Type() == taskType { 522 field.Set(reflect.ValueOf(task)) 523 return proto.Marshal(&wrapper) 524 } 525 } 526 // This can happen only if TaskDefWrapper wasn't updated when a new task type 527 // was added. This is a developer's mistake, not a config mistake. 528 return nil, fmt.Errorf("could not find a field of type %T in TaskDefWrapper", task) 529 } 530 531 /// Helper functions. 532 533 // validateRealm validates validity of `realm` configs field and returns the 534 // full realm name (perhaps "<project>:@legacy" if the config doesn't have 535 // a realm). 536 func validateRealm(ctx *validation.Context, projectID, realm string) string { 537 if realm != "" { 538 if err := realms.ValidateRealmName(realm, realms.ProjectScope); err != nil { 539 ctx.Errorf("bad 'realm' field - %s", err) 540 realm = "" 541 } 542 } 543 if realm == "" { 544 realm = realms.LegacyRealm 545 } 546 return realms.Join(projectID, realm) 547 } 548 549 // getAllJobIDs returns a set of IDs of regular jobs and triggering jobs. 550 // 551 // Doesn't filter out disabled jobs. IDs don't include project prefixes, e.g. 552 // they are just "job" instead of "project/job". 553 func getAllJobIDs(cfg *messages.ProjectConfig) stringset.Set { 554 out := stringset.New(len(cfg.Job) + len(cfg.Trigger)) 555 for _, job := range cfg.Job { 556 if job.Id != "" { 557 out.Add(job.Id) 558 } 559 } 560 for _, job := range cfg.Trigger { 561 if job.Id != "" { 562 out.Add(job.Id) 563 } 564 } 565 return out 566 } 567 568 // normalizeTriggeredJobIDs returns sorted list without duplicates. 569 func normalizeTriggeredJobIDs(projectID string, t *messages.Trigger) []string { 570 set := stringset.New(len(t.Triggers)) 571 for _, j := range t.Triggers { 572 set.Add(projectID + "/" + j) 573 } 574 out := set.ToSlice() 575 sort.Strings(out) 576 return out 577 } 578 579 // marshalTriggeringPolicy serializes TriggeringPolicy proto. 580 func marshalTriggeringPolicy(p *messages.TriggeringPolicy) []byte { 581 if p == nil { 582 return nil 583 } 584 out, err := proto.Marshal(p) 585 if err != nil { 586 panic(fmt.Errorf("failed to marshal TriggeringPolicy - %s", err)) 587 } 588 return out 589 }