go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/server/tq/dispatcher.go (about) 1 // Copyright 2020 The LUCI Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tq 16 17 import ( 18 "context" 19 "crypto/sha256" 20 "encoding/hex" 21 "encoding/json" 22 "fmt" 23 "io" 24 "net/http" 25 "regexp" 26 "strconv" 27 "strings" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 taskspb "cloud.google.com/go/cloudtasks/apiv2/cloudtaskspb" 33 "cloud.google.com/go/pubsub/apiv1/pubsubpb" 34 "github.com/GoogleCloudPlatform/opentelemetry-operations-go/propagator" 35 "go.opentelemetry.io/otel" 36 "go.opentelemetry.io/otel/attribute" 37 "go.opentelemetry.io/otel/codes" 38 "go.opentelemetry.io/otel/propagation" 39 "go.opentelemetry.io/otel/trace" 40 "google.golang.org/protobuf/encoding/protojson" 41 "google.golang.org/protobuf/proto" 42 "google.golang.org/protobuf/reflect/protoreflect" 43 "google.golang.org/protobuf/reflect/protoregistry" 44 "google.golang.org/protobuf/types/known/timestamppb" 45 46 "go.chromium.org/luci/common/clock" 47 "go.chromium.org/luci/common/data/rand/cryptorand" 48 "go.chromium.org/luci/common/errors" 49 "go.chromium.org/luci/common/logging" 50 "go.chromium.org/luci/common/retry/transient" 51 52 srvinternal "go.chromium.org/luci/server/internal" 53 "go.chromium.org/luci/server/router" 54 "go.chromium.org/luci/server/tq/internal" 55 "go.chromium.org/luci/server/tq/internal/db" 56 "go.chromium.org/luci/server/tq/internal/metrics" 57 "go.chromium.org/luci/server/tq/internal/reminder" 58 ) 59 60 const ( 61 // TraceContextHeader is name of a header that contains the trace context of 62 // a span that produced the task. 63 // 64 // This header is read only by Dispatcher itself and exists mostly for FYI 65 // purposes to help in debugging issues. 66 TraceContextHeader = "X-Luci-Tq-Trace-Context" 67 68 // ExpectedETAHeader is the name of a header that indicates when the task was 69 // originally expected to run. 70 // 71 // One use of this header is for measuring latency of task completion. 72 ExpectedETAHeader = "X-Luci-Tq-Expected-ETA" 73 ) 74 75 // Dispatcher is a registry of task classes that knows how serialize and route 76 // them. 77 // 78 // There's rarely a need to manually create instances of Dispatcher outside of 79 // Dispatcher's own tests. You should generally use the global Default 80 // dispatcher which is configured by the tq server module. Methods of the 81 // default dispatcher (such as RegisterTaskClass and AddTask) are also available 82 // as lop-level functions, prefer to use them. 83 // 84 // The dispatcher needs a way to submit tasks to Cloud Tasks or Cloud PubSub. 85 // This is the job of Submitter. It lives in the context, so that it can be 86 // mocked in tests. In production contexts (setup when using the tq server 87 // module), the submitter is initialized to be CloudSubmitter. Tests will need 88 // to provide their own submitter (usually via TestingContext). 89 // 90 // TODO(vadimsh): Support consuming PubSub tasks, not just producing them. 91 type Dispatcher struct { 92 // Sweeper knows how to sweep transactional tasks reminders. 93 // 94 // If not set, Sweep calls will fail. 95 Sweeper Sweeper 96 97 // Namespace is a namespace for tasks that use DeduplicationKey. 98 // 99 // This is needed if two otherwise independent deployments share a single 100 // Cloud Tasks instance. 101 // 102 // Used only for Cloud Tasks tasks. Doesn't affect PubSub tasks. 103 // 104 // Must be valid per ValidateNamespace. Default is "". 105 Namespace string 106 107 // GAE is true when running on Appengine. 108 // 109 // It alters how tasks are submitted and how incoming HTTP requests are 110 // authenticated. 111 GAE bool 112 113 // DisableAuth can be used to disable authentication on HTTP endpoints. 114 // 115 // This is useful when running in development mode on localhost or in tests. 116 DisableAuth bool 117 118 // CloudProject is ID of a project to use to construct full resource names. 119 // 120 // If not set, "default" will be used, which is pretty useless outside of 121 // tests. 122 CloudProject string 123 124 // CloudRegion is a ID of a region to use to construct full resource names. 125 // 126 // If not set, "default" will be used, which is pretty useless outside of 127 // tests. 128 CloudRegion string 129 130 // DefaultRoutingPrefix is a URL prefix for produced Cloud Tasks. 131 // 132 // Used only for Cloud Tasks tasks whose TaskClass doesn't provide some custom 133 // RoutingPrefix. Doesn't affect PubSub tasks. 134 // 135 // Default is "/internal/tasks/t/". It means generated Cloud Tasks by will 136 // have target URL "/internal/tasks/t/<generated-per-task-suffix>". 137 // 138 // A non-default value may be valuable if you host multiple dispatchers in 139 // a single process. This is a niche use case. 140 DefaultRoutingPrefix string 141 142 // DefaultTargetHost is a hostname to dispatch Cloud Tasks to by default. 143 // 144 // Individual Cloud Tasks task classes may override it with their own specific 145 // host. Doesn't affect PubSub tasks. 146 // 147 // On GAE defaults to the GAE application itself. Elsewhere defaults to 148 // "127.0.0.1", which is pretty useless outside of tests. 149 DefaultTargetHost string 150 151 // PushAs is a service account email to be used for generating OIDC tokens. 152 // 153 // Used only for Cloud Tasks tasks. Doesn't affect PubSub tasks. 154 // 155 // The service account must be within the same project. The server account 156 // must have "iam.serviceAccounts.actAs" permission for PushAs account. 157 // 158 // Optional on GAE when submitting tasks targeting GAE. Elsewhere defaults to 159 // "default@example.com", which is pretty useless outside of tests. 160 PushAs string 161 162 // AuthorizedPushers is a list of service account emails to accept pushes from 163 // in addition to PushAs. 164 // 165 // This is handy when migrating from one PushAs account to another, or when 166 // submitting tasks from one service, but handing them in another. 167 // 168 // Optional. 169 AuthorizedPushers []string 170 171 // SweepInitiationLaunchers is a list of service account emails authorized to 172 // launch sweeps via the exposed HTTP endpoint. 173 SweepInitiationLaunchers []string 174 175 mu sync.RWMutex 176 clsByID map[string]*taskClassImpl 177 clsByTyp map[protoreflect.MessageType]*taskClassImpl 178 } 179 180 // Sweeper knows how sweep transaction tasks reminders. 181 type Sweeper interface { 182 // sweep either performs the full sweep itself or schedules a task to do it. 183 sweep(ctx context.Context, s Submitter, reminderKeySpaceBytes int) error 184 } 185 186 // TaskKind describes how a task class interoperates with transactions. 187 type TaskKind int 188 189 const ( 190 // NonTransactional is a task kind for tasks that must be enqueued outside 191 // of a transaction. 192 NonTransactional TaskKind = 1 193 194 // Transactional is a task kind for tasks that must be enqueued only from 195 // a transaction. 196 Transactional TaskKind = 2 197 198 // FollowsContext is a task kind for tasks that are enqueue transactionally 199 // if the context is transactional or non-transactionally otherwise. 200 FollowsContext TaskKind = 3 201 ) 202 203 // TaskClass defines how to treat tasks of a specific proto message type. 204 // 205 // It assigns some stable ID to a proto message kind and also defines how tasks 206 // of this kind should be submitted and routed. 207 // 208 // The are two backends for tasks: Cloud Tasks and Cloud PubSub. Which one to 209 // use for a particular task class is defined via mutually exclusive Queue and 210 // Topic fields. 211 // 212 // Refer to Google Cloud documentation for all semantic differences between 213 // Cloud Tasks and Cloud PubSub. One important difference is that Cloud PubSub 214 // tasks can't be deduplicated and thus the handler must expect to receive 215 // duplicates. 216 type TaskClass struct { 217 // ID is unique identifier of this class of tasks. 218 // 219 // Must match `[a-zA-Z0-9_\-.]{1,100}`. 220 // 221 // It is used to decide how to deserialize and route the task. Changing IDs of 222 // existing task classes is a disruptive operation, make sure the queue is 223 // drained first. The dispatcher will reject Cloud Tasks with unrecognized 224 // class IDs with HTTP 404 error (casing Cloud Tasks to retry them later). 225 // 226 // Required. 227 ID string 228 229 // Prototype identifies a proto message type of a task payload. 230 // 231 // Used for its type information only. In particular it is used by AddTask 232 // to discover what TaskClass matches the added task. There should be 233 // one-to-one correspondence between proto message types and task classes. 234 // 235 // It is safe to arbitrarily change this type as long as JSONPB encoding of 236 // the previous type can be decoded using the new type. The dispatcher will 237 // reject Cloud Tasks with bodies it can't deserialize with HTTP 400 error 238 // (causing Cloud Tasks to retry them later). 239 // 240 // Required. 241 Prototype proto.Message 242 243 // Kind indicates whether the task requires a transaction to be enqueued. 244 // 245 // Note that using transactional tasks requires setting up a sweeper first 246 // and importing a module that implements transactions support for the 247 // database you are using. See "Transactional tasks" section above. 248 // 249 // Required. Pick one of NonTransactional, Transactional or FollowsContext. 250 Kind TaskKind 251 252 // Queue is a name of Cloud Tasks queue to use for the tasks. 253 // 254 // If set, indicates the task should be submitted through Cloud Tasks API. 255 // The queue must exist already in this case. Can't be set together with 256 // Topic. 257 // 258 // It can either be a short name like "default" or a full name like 259 // "projects/<project>/locations/<region>/queues/<name>". If it is a full 260 // name, it must have the above format or RegisterTaskClass would panic. 261 // 262 // If it is a short queue name, the full queue name will be constructed using 263 // dispatcher's CloudProject and CloudRegion if they are set. 264 Queue string 265 266 // Topic is a name of PubSub topic to use for the tasks. 267 // 268 // If set, indicates the task should be submitted through Cloud PubSub API. 269 // The topic must exist already in this case. Can't be set together with 270 // Queue. 271 // 272 // It can either be a short name like "tasks" or a full name like 273 // "projects/<project>/topics/<name>". If it is a full name, it must have the 274 // above format or RegisterTaskClass would panic. 275 Topic string 276 277 // RoutingPrefix is a URL prefix for produced Cloud Tasks. 278 // 279 // Can only be used for Cloud Tasks task (i.e. only if Queue is also set). 280 // 281 // Default is dispatcher's DefaultRoutingPrefix which itself defaults to 282 // "/internal/tasks/t/". It means generated Cloud Tasks by default will have 283 // target URL "/internal/tasks/t/<generated-per-task-suffix>". 284 // 285 // A non-default value can be used to route Cloud Tasks tasks of a particular 286 // class to particular processes, assuming the load balancer is configured 287 // accordingly. 288 RoutingPrefix string 289 290 // TargetHost is a hostname to dispatch Cloud Tasks to. 291 // 292 // Can only be used for Cloud Tasks task (i.e. only if Queue is also set). 293 // 294 // If unset, will use dispatcher's DefaultTargetHost. 295 TargetHost string 296 297 // Quiet, if set, instructs the dispatcher not to log bodies of tasks. 298 Quiet bool 299 300 // QuietOnError, if set, instructs the dispatcher not to log errors returned 301 // by the task handler. 302 // 303 // This is useful if task handler wants to do its own custom error logging. 304 QuietOnError bool 305 306 // Custom, if given, will be called to generate a custom payload from the 307 // task's proto payload. 308 // 309 // Useful for interoperability with existing code that doesn't use dispatcher 310 // or if the tasks are meant to be consumed in some custom way. You'll need to 311 // setup the consumer manually, the Dispatcher doesn't know how to handle 312 // tasks with custom payload. 313 // 314 // For Cloud Tasks tasks it is possible to customize HTTP method, relative 315 // URI, headers and the request body this way. Other properties of the task 316 // (such as the target host, the queue, the task name, authentication headers) 317 // are not customizable. 318 // 319 // For PubSub tasks it is possible to customize only task's body and 320 // attributes (via CustomPayload.Meta). Other fields in CustomPayload are 321 // ignored. 322 // 323 // Receives the exact same context as passed to AddTask. If returns nil 324 // result, the task will be submitted as usual. 325 Custom func(ctx context.Context, m proto.Message) (*CustomPayload, error) 326 327 // Handler will be called by the dispatcher to execute the tasks. 328 // 329 // The handler will receive the task's payload as a proto message of the exact 330 // same type as the type of Prototype. See Handler doc for more info. 331 // 332 // Populating this field is equivalent to calling AttachHandler after 333 // registering the class. It may be left nil if the current process just wants 334 // to submit tasks, but not handle them. Some other process would need to 335 // attach the handler then to be able to process tasks. 336 // 337 // The dispatcher will permanently fail tasks if it can't find a handler for 338 // them. 339 Handler Handler 340 } 341 342 // CustomPayload is returned by TaskClass's Custom, see its doc. 343 type CustomPayload struct { 344 Method string // e.g. "GET" or "POST", Cloud Tasks only 345 RelativeURI string // an URI relative to the task's target host, Cloud Tasks only 346 Meta map[string]string // HTTP headers or message attributes to attach 347 Body []byte // serialized body of the request 348 } 349 350 // TaskClassRef represents a TaskClass registered in a Dispatcher. 351 type TaskClassRef interface { 352 // AttachHandler sets a handler which will be called by the dispatcher to 353 // execute the tasks. 354 // 355 // The handler will receive the task's payload as a proto message of the exact 356 // same type as the type of TaskClass's Prototype. See Handler doc for more 357 // info. 358 // 359 // Panics if the class has already a handler attached. 360 AttachHandler(h Handler) 361 362 // Definition returns the original task class definition. 363 Definition() TaskClass 364 } 365 366 // Task contains task body and metadata. 367 type Task struct { 368 // Payload is task's payload as well as indicator of its class. 369 // 370 // Its type will be used to find a matching registered TaskClass which defines 371 // how to route and handle the task. 372 Payload proto.Message 373 374 // DeduplicationKey is optional unique key used to derive name of the task. 375 // 376 // If a task of a given class with a given key has already been enqueued 377 // recently (within ~1h), this task will be silently ignored. 378 // 379 // Because there is an extra lookup cost to identify duplicate task names, 380 // enqueues of named tasks have significantly increased latency. 381 // 382 // Can be used only with Cloud Tasks tasks, since PubSub doesn't support 383 // deduplication during enqueuing. 384 // 385 // Named tasks can only be used outside of transactions. 386 DeduplicationKey string 387 388 // Title is optional string that identifies the task in server logs. 389 // 390 // For Cloud Tasks it will also show up as a suffix in task handler URL. It 391 // exists exclusively to simplify reading server logs. It serves no other 392 // purpose! In particular, it is *not* a task name. 393 // 394 // Handlers won't ever see it. Pass all information through the payload. 395 Title string 396 397 // Delay specifies the duration the Cloud Tasks service must wait before 398 // attempting to execute the task. 399 // 400 // Can be used only with Cloud Tasks tasks. Either Delay or ETA may be set, 401 // but not both. 402 Delay time.Duration 403 404 // ETA specifies the earliest time a task may be executed. 405 // 406 // Can be used only with Cloud Tasks tasks. Either Delay or ETA may be set, 407 // but not both. 408 ETA time.Time 409 } 410 411 var ( 412 // Fatal is an error tag used to indicate that the handler wants the task to 413 // be dropped due to unrecoverable failure. 414 // 415 // See Handler doc for more details. 416 Fatal = errors.BoolTag{Key: errors.NewTagKey("the task should be dropped due to fatal failure")} 417 418 // Ignore is an error tag used to indicate that the handler wants the task to 419 // be dropped as no longer needed. 420 // 421 // See Handler doc for more details. 422 Ignore = errors.BoolTag{Key: errors.NewTagKey("the task should be dropped as no longer needed")} 423 ) 424 425 // Used to override HTTP status of some errors. 426 var ( 427 httpStatusKey = errors.NewTagKey("http status override") 428 httpStatus404 = errors.TagValue{Key: httpStatusKey, Value: 404} 429 httpStatus400 = errors.TagValue{Key: httpStatusKey, Value: 400} 430 ) 431 432 // quietOnError is an error tag used to implement TaskClass.QuietOnError. 433 var quietOnError = errors.BoolTag{Key: errors.NewTagKey("QuietOnError")} 434 435 // Handler is called to handle one enqueued task. 436 // 437 // If Handler returns an error tagged with Ignore tag, the task will be dropped 438 // with HTTP 204 reply to Cloud Tasks. This is useful when task is no longer 439 // needed yet it's desirable to distinguish such a case from the normal case 440 // for monitoring purposes (e.g. in emitted logs or tsmon metrics). 441 // 442 // If Handler returns an error tagged with Fatal tag, the task will be dropped with 443 // HTTP 202 reply to Cloud Tasks. This should be rarely used. 444 // 445 // Otherwise, the task will be retried later (per the queue configuration) with 446 // HTTP 429 reply. 447 // 448 // Errors tagged with transient.Tag result in HTTP 500 replies. They also 449 // trigger a retry. 450 type Handler func(ctx context.Context, payload proto.Message) error 451 452 // ExecutionInfo is parsed from incoming task's metadata. 453 // 454 // It is accessible from within task handlers via TaskExecutionInfo(ctx). 455 type ExecutionInfo struct { 456 // ExecutionCount is 0 on a first delivery attempt and increased by 1 for each 457 // failed attempt. 458 ExecutionCount int 459 460 // TaskID is the ID of the task in the underlying backend service. 461 // 462 // For Cloud Task, it is `X-CloudTasks-TaskName`. 463 // For PubSub, it is `messageID`. 464 TaskID string 465 466 taskRetryReason string // X-CloudTasks-TaskRetryReason 467 taskPreviousResponse string // X-CloudTasks-TaskPreviousResponse 468 submitterTraceContext string // see TraceContextHeader 469 expectedETA time.Time // see ExpectedETAHeader 470 } 471 472 var executionInfoKey = "go.chromium.org/luci/server/tq.ExecutionInfo" 473 474 // TaskExecutionInfo returns information about the currently executing task. 475 // 476 // Returns nil if called not from a task handler. 477 func TaskExecutionInfo(ctx context.Context) *ExecutionInfo { 478 info, _ := ctx.Value(&executionInfoKey).(*ExecutionInfo) 479 return info 480 } 481 482 // ValidateNamespace returns an error if `n` is not a valid namespace name. 483 // 484 // An empty string is a valid namespace (denoting the default namespace). Other 485 // valid namespaces must start with an ASCII letter or '_', contain only 486 // ASCII letters, digits or '_', and be less than 50 chars in length. 487 func ValidateNamespace(n string) error { 488 if n != "" && !namespaceRe.MatchString(n) { 489 return errors.New("must start with a letter or '_' and contain only letters, numbers and '_'") 490 } 491 return nil 492 } 493 494 // RegisterTaskClass tells the dispatcher how to route and handle tasks of some 495 // particular type. 496 // 497 // Intended to be called during process startup. Panics if there's already 498 // a registered task class with the same ID or Prototype. 499 func (d *Dispatcher) RegisterTaskClass(cls TaskClass) TaskClassRef { 500 if !taskClassIDRe.MatchString(cls.ID) { 501 panic(fmt.Sprintf("bad TaskClass ID %q", cls.ID)) 502 } 503 if cls.Prototype == nil { 504 panic("TaskClass Prototype must be set") 505 } 506 if cls.RoutingPrefix != "" && !strings.HasPrefix(cls.RoutingPrefix, "/") { 507 panic("TaskClass RoutingPrefix must start with /") 508 } 509 if cls.Kind == 0 { 510 panic("TaskClass Kind is required") 511 } 512 513 var backend taskBackend 514 switch { 515 case cls.Queue == "" && cls.Topic == "": 516 panic("TaskClass must have either Queue or Topic set") 517 case cls.Queue != "" && cls.Topic != "": 518 panic("TaskClass must have either Queue or Topic set, not both") 519 case cls.Queue != "": 520 backend = backendCloudTasks 521 if strings.ContainsRune(cls.Queue, '/') && !isValidQueue(cls.Queue) { 522 panic(fmt.Sprintf("not a valid full queue name %q", cls.Queue)) 523 } 524 case cls.Topic != "": 525 backend = backendPubSub 526 if strings.ContainsRune(cls.Topic, '/') && !isValidTopic(cls.Topic) { 527 panic(fmt.Sprintf("not a valid full topic name %q", cls.Topic)) 528 } 529 if cls.RoutingPrefix != "" { 530 panic("PubSub tasks do not support RoutingPrefix") 531 } 532 if cls.TargetHost != "" { 533 panic("PubSub tasks do not support TargetHost") 534 } 535 } 536 537 typ := cls.Prototype.ProtoReflect().Type() 538 539 d.mu.Lock() 540 defer d.mu.Unlock() 541 542 if d.clsByID == nil { 543 d.clsByID = make(map[string]*taskClassImpl, 1) 544 } 545 if d.clsByTyp == nil { 546 d.clsByTyp = make(map[protoreflect.MessageType]*taskClassImpl, 1) 547 } 548 549 if _, ok := d.clsByID[cls.ID]; ok { 550 panic(fmt.Sprintf("TaskClass with ID %q is already registered", cls.ID)) 551 } 552 if _, ok := d.clsByTyp[typ]; ok { 553 panic(fmt.Sprintf("TaskClass with Prototype %q is already registered", proto.MessageName(cls.Prototype))) 554 } 555 556 impl := &taskClassImpl{ 557 TaskClass: cls, 558 disp: d, 559 protoType: typ, 560 backend: backend, 561 } 562 d.clsByID[cls.ID] = impl 563 d.clsByTyp[typ] = impl 564 return impl 565 } 566 567 // TaskClassRef returns a task class reference given its ID or nil if no such 568 // task class is registered. 569 func (d *Dispatcher) TaskClassRef(id string) TaskClassRef { 570 impl, _, _ := d.classByID(id) 571 if impl == nil { 572 return nil 573 } 574 return impl 575 } 576 577 // AddTask submits a task for later execution. 578 // 579 // The task payload type should match some registered TaskClass. Its ID will 580 // be used to identify the task class in the serialized Cloud Tasks task body. 581 // 582 // At some later time, in some other process, the dispatcher will invoke 583 // a handler attached to the corresponding TaskClass, based on its ID extracted 584 // from the task body. 585 // 586 // If the given context is transactional, inherits the transaction if allowed 587 // according to the TaskClass's Kind. A transactional task will eventually be 588 // submitted to Cloud Tasks if and only if the transaction successfully commits. 589 // This requires a sweeper instance to be running somewhere, see ModuleOptions. 590 // Note that a failure to submit the task to Cloud Tasks will not abort 591 // the transaction. 592 // 593 // If the task has a DeduplicationKey and there already was a recent task with 594 // the same TaskClass ID and DeduplicationKey, silently ignores the added task. 595 // This works only outside of transactions. Using DeduplicationKey with 596 // transactional tasks results in an error. 597 // 598 // Annotates retriable errors with transient.Tag. 599 func (d *Dispatcher) AddTask(ctx context.Context, task *Task) (err error) { 600 sub, err := currentSubmitter(ctx) 601 if err != nil { 602 return err 603 } 604 605 // Start a span annotated with the task's class. 606 cls, _, err := d.classByMsg(task.Payload) 607 if err != nil { 608 return err 609 } 610 ctx, span := startSpan(ctx, "go.chromium.org/luci/server/tq.AddTask", map[string]string{ 611 "cr.dev.class": cls.ID, 612 "cr.dev.title": task.Title, 613 }) 614 defer func() { 615 if err != nil { 616 span.RecordError(err) 617 span.SetStatus(codes.Error, err.Error()) 618 } 619 span.End() 620 }() 621 622 // Prepare a raw request. We'll either submit it right away (for non-tx 623 // tasks), or attach it to a reminder and store in the DB for later handling. 624 payload, err := d.prepPayload(ctx, cls, task) 625 if err != nil { 626 return err 627 } 628 629 // Examine the context to see if we are inside a transaction. 630 txndb := db.TxnDB(ctx) 631 switch cls.Kind { 632 case FollowsContext: 633 // do nothing, will use `txndb` if it is non-nil 634 case Transactional: 635 if txndb == nil { 636 if !db.Configured() { 637 return errors.Reason("enqueuing of tasks %q requires transactions support, "+ 638 "see https://pkg.go.dev/go.chromium.org/luci/server/tq#hdr-Transactional_tasks", cls.ID).Err() 639 } 640 return errors.Reason("enqueuing of tasks %q must be done from inside a transaction", cls.ID).Err() 641 } 642 case NonTransactional: 643 if txndb != nil { 644 return errors.Reason("enqueuing of tasks %q must be done outside of a transaction", cls.ID).Err() 645 } 646 default: 647 panic(fmt.Sprintf("unrecognized TaskKind %v", cls.Kind)) 648 } 649 650 // If not inside a transaction, submit the task right away. 651 if txndb == nil { 652 return internal.Submit(ctx, sub, payload, internal.TxnPathNone) 653 } 654 655 // Named transactional tasks are not supported. 656 if task.DeduplicationKey != "" { 657 return errors.Reason("when enqueuing %q: can't use DeduplicationKey for a transactional task", cls.ID).Err() 658 } 659 660 // Otherwise transactionally commit a reminder and schedule a best-effort 661 // post-transaction enqueuing of the actual task. If it fails, the sweeper 662 // will eventually discover the reminder and enqueue the task. Note that this 663 // modifies `payload` with the reminder's ID. 664 r, err := d.attachToReminder(ctx, payload) 665 if err != nil { 666 return errors.Annotate(err, "failed to prepare a reminder").Err() 667 } 668 span.SetAttributes(attribute.String("cr.dev.reminder", r.ID)) 669 if err := txndb.SaveReminder(ctx, r); err != nil { 670 return errors.Annotate(err, "failed to store a transactional enqueue reminder").Err() 671 } 672 673 once := int32(0) 674 txndb.Defer(ctx, func(ctx context.Context) { 675 if count := atomic.AddInt32(&once, 1); count > 1 { 676 panic("transaction defer has already been called") 677 } 678 679 // `ctx` here is an outer non-transactional context. 680 var err error 681 ctx, span := startSpan(ctx, "go.chromium.org/luci/server/tq.PostTxn", map[string]string{ 682 "cr.dev.class": cls.ID, 683 "cr.dev.title": task.Title, 684 "cr.dev.reminder": r.ID, 685 }) 686 defer func() { 687 if err != nil { 688 span.RecordError(err) 689 span.SetStatus(codes.Error, err.Error()) 690 } 691 span.End() 692 }() 693 694 // Attempt to submit the task right away if the reminder is still fresh. 695 err = internal.ProcessReminderPostTxn(ctx, sub, txndb, r) 696 }) 697 698 return nil 699 } 700 701 // Sweep initiates a sweep of transactional tasks reminders. 702 // 703 // It must be called periodically (e.g. once per minute) somewhere in the fleet. 704 func (d *Dispatcher) Sweep(ctx context.Context) error { 705 if d.Sweeper == nil { 706 return errors.New("can't sweep: the Sweeper is not set") 707 } 708 sub, err := currentSubmitter(ctx) 709 if err != nil { 710 return err 711 } 712 return d.Sweeper.sweep(ctx, sub, reminderKeySpaceBytes) 713 } 714 715 // InstallTasksRoutes installs tasks HTTP routes under the given prefix. 716 // 717 // The exposed HTTP endpoints are called by Cloud Tasks service when it is time 718 // to execute a task. 719 func (d *Dispatcher) InstallTasksRoutes(r *router.Router, prefix string) { 720 if prefix == "" { 721 prefix = "/internal/tasks/" 722 } else if !strings.HasPrefix(prefix, "/") { 723 panic("the prefix should start with /") 724 } 725 726 var mw router.MiddlewareChain 727 if !d.DisableAuth { 728 // Tasks are primarily submitted as `PushAs`, but we also accept all 729 // `AuthorizedPushers`. 730 pushers := append([]string{d.PushAs}, d.AuthorizedPushers...) 731 // On GAE X-Appengine-* headers can be trusted. Check we are being called 732 // by Cloud Tasks. We don't care by which queue exactly though. It is 733 // easier to move tasks between queues that way. 734 header := "" 735 if d.GAE { 736 header = "X-Appengine-Queuename" 737 } 738 mw = srvinternal.CloudAuthMiddleware(pushers, header, func(c *router.Context) { 739 metrics.ServerRejectedCount.Add(c.Request.Context(), 1, "auth") 740 }) 741 } 742 743 // We don't really care about the exact format of URLs. At the same time 744 // accepting all requests under InternalRoutingPrefix is necessary for 745 // compatibility with "appengine/tq" which used totally different URL format. 746 prefix = strings.TrimRight(prefix, "/") + "/*path" 747 r.POST(prefix, mw, func(c *router.Context) { 748 body, err := io.ReadAll(c.Request.Body) 749 if err != nil { 750 httpReply(c, 500, "Failed to read the request", err) 751 } else { 752 replyWithErr(c, d.handlePush(c.Request.Context(), body, parseHeaders(c.Request.Header))) 753 } 754 }) 755 } 756 757 // InstallSweepRoute installs a route that initiates a sweep. 758 // 759 // It may be called periodically (e.g. by Cloud Scheduler) to launch sweeps. 760 func (d *Dispatcher) InstallSweepRoute(r *router.Router, path string) { 761 var mw router.MiddlewareChain 762 if !d.DisableAuth { 763 // On GAE X-Appengine-* headers can be trusted. Check we are being called 764 // by Cloud Scheduler. 765 header := "" 766 if d.GAE { 767 header = "X-Appengine-Cron" 768 } 769 mw = srvinternal.CloudAuthMiddleware(d.SweepInitiationLaunchers, header, nil) 770 } 771 772 r.GET(path, mw, func(c *router.Context) { 773 err := d.Sweep(c.Request.Context()) 774 if err != nil && !transient.Tag.In(err) { 775 err = Fatal.Apply(err) 776 } 777 replyWithErr(c, err) 778 }) 779 } 780 781 // ReportMetrics writes gauge metrics to tsmon. 782 // 783 // This should be called before tsmon flush. By reporting them only here, we 784 // can avoid hitting tsmon state every time some gauge value changes (which 785 // can happen very often). 786 func (d *Dispatcher) ReportMetrics(ctx context.Context) { 787 d.mu.RLock() 788 defer d.mu.RUnlock() 789 for id, cls := range d.clsByID { 790 metrics.ServerRunning.Set(ctx, int64(atomic.LoadInt32(&cls.running)), id) 791 } 792 } 793 794 //////////////////////////////////////////////////////////////////////////////// 795 796 var ( 797 // namespaceRe is used to validate Dispatcher.Namespace. 798 namespaceRe = regexp.MustCompile(`^[a-zA-Z_][0-9a-zA-Z_]{0,49}$`) 799 // taskClassIDRe is used to validate TaskClass.ID. 800 taskClassIDRe = regexp.MustCompile(`^[a-zA-Z0-9_\-.]{1,100}$`) 801 // tracer is used to report tracing spans. 802 tracer = otel.Tracer("go.chromium.org/luci/server/tq") 803 ) 804 805 const ( 806 // reminderKeySpaceBytes defines the space of the Reminder Ids. 807 // 808 // Because Reminder.ID is hex-encoded, actual length is doubled. 809 // 810 // 16 is chosen is big enough to avoid collisions in practice yet small enough 811 // for easier human-debugging of key ranges in queries. 812 reminderKeySpaceBytes = 16 813 814 // happyPathMaxDuration caps how long the happy path will be waited for. 815 happyPathMaxDuration = time.Minute 816 ) 817 818 // defaultHeaders returns headers to add to all submitted tasks. 819 func defaultHeaders() map[string]string { 820 return map[string]string{"Content-Type": "application/json"} 821 } 822 823 // startSpan starts a new span and puts `meta` into its attributes and into 824 // logger fields. 825 func startSpan(ctx context.Context, title string, meta map[string]string) (context.Context, trace.Span) { 826 attrs := make([]attribute.KeyValue, 0, len(meta)) 827 fields := make(logging.Fields, len(meta)) 828 for k, v := range meta { 829 attrs = append(attrs, attribute.String(k, v)) 830 fields[k] = v 831 } 832 return tracer.Start(logging.SetFields(ctx, fields), title, trace.WithAttributes(attrs...)) 833 } 834 835 // prepPayload converts a task into a reminder.Payload. 836 func (d *Dispatcher) prepPayload(ctx context.Context, cls *taskClassImpl, t *Task) (*reminder.Payload, error) { 837 payload := &reminder.Payload{ 838 TaskClass: cls.ID, 839 Created: clock.Now(ctx), 840 Raw: t.Payload, // used on a happy path only (essentially only in tests) 841 } 842 var err error 843 switch cls.backend { 844 case backendCloudTasks: 845 payload.CreateTaskRequest, err = d.prepCloudTasksRequest(ctx, cls, t) 846 case backendPubSub: 847 payload.PublishRequest, err = d.prepPubSubRequest(ctx, cls, t) 848 default: 849 panic("impossible") 850 } 851 return payload, err 852 } 853 854 // prepCloudTasksRequest prepares Cloud Tasks request based on a *Task. 855 func (d *Dispatcher) prepCloudTasksRequest(ctx context.Context, cls *taskClassImpl, t *Task) (*taskspb.CreateTaskRequest, error) { 856 queueID, err := d.queueID(cls.Queue) 857 if err != nil { 858 return nil, err 859 } 860 861 taskID := "" 862 if t.DeduplicationKey != "" { 863 taskID = queueID + "/tasks/" + cls.taskName(t, d.Namespace) 864 } 865 866 var scheduleTime *timestamppb.Timestamp 867 switch { 868 case !t.ETA.IsZero(): 869 if t.Delay != 0 { 870 return nil, errors.New("bad task: either ETA or Delay should be given, not both") 871 } 872 scheduleTime = timestamppb.New(t.ETA) 873 case t.Delay > 0: 874 scheduleTime = timestamppb.New(clock.Now(ctx).Add(t.Delay)) 875 } 876 877 // E.g. ("example.com", "/internal/tasks/t/<class>[/<title>]"). 878 // Note: relativeURI is discarded when using custom payload. 879 host, relativeURI, err := d.taskTarget(cls, t) 880 if err != nil { 881 return nil, err 882 } 883 884 var payload *CustomPayload 885 if cls.Custom != nil { 886 if payload, err = cls.Custom(ctx, t.Payload); err != nil { 887 return nil, err 888 } 889 } 890 if payload == nil { 891 // This is not really a "custom" payload, we are just reusing the struct. 892 payload = &CustomPayload{ 893 Method: "POST", 894 RelativeURI: relativeURI, 895 Meta: defaultHeaders(), 896 } 897 if payload.Body, err = cls.serialize(t); err != nil { 898 return nil, err 899 } 900 } else { 901 // We'll likely be mutating the headers below, make a copy. 902 meta := make(map[string]string, len(payload.Meta)) 903 for k, v := range payload.Meta { 904 meta[k] = v 905 } 906 payload.Meta = meta 907 } 908 909 // Inject tracing headers. 910 if traceCtx := traceContext(ctx); traceCtx != "" { 911 payload.Meta[TraceContextHeader] = traceCtx 912 } 913 914 // Inject magic header with ETA. 915 if scheduleTime == nil { 916 payload.Meta[ExpectedETAHeader] = makeETAHeader(clock.Now(ctx)) 917 } else { 918 payload.Meta[ExpectedETAHeader] = makeETAHeader(scheduleTime.AsTime()) 919 } 920 921 method := taskspb.HttpMethod(taskspb.HttpMethod_value[payload.Method]) 922 if method == 0 { 923 return nil, errors.Reason("bad HTTP method %q", payload.Method).Err() 924 } 925 if !strings.HasPrefix(payload.RelativeURI, "/") { 926 return nil, errors.Reason("bad relative URI %q", payload.RelativeURI).Err() 927 } 928 929 // We need to populate one of Task.MessageType oneof alternatives. It has 930 // unexported type, so we have to instantiate the message now and then mutate 931 // it. 932 req := &taskspb.CreateTaskRequest{ 933 Parent: queueID, 934 Task: &taskspb.Task{ 935 Name: taskID, 936 ScheduleTime: scheduleTime, 937 // TODO(vadimsh): Make DispatchDeadline configurable? 938 }, 939 } 940 941 // On GAE we by default push to the GAE itself. 942 if host == "" && d.GAE { 943 req.Task.MessageType = &taskspb.Task_AppEngineHttpRequest{ 944 AppEngineHttpRequest: &taskspb.AppEngineHttpRequest{ 945 HttpMethod: method, 946 RelativeUri: payload.RelativeURI, 947 Headers: payload.Meta, 948 Body: payload.Body, 949 }, 950 } 951 return req, nil 952 } 953 954 // Elsewhere pick up some defaults mostly used only in tests. 955 if host == "" { 956 host = "127.0.0.1" 957 } 958 pushAs := d.PushAs 959 if d.PushAs == "" { 960 pushAs = "default@example.com" 961 } 962 963 req.Task.MessageType = &taskspb.Task_HttpRequest{ 964 HttpRequest: &taskspb.HttpRequest{ 965 HttpMethod: method, 966 Url: "https://" + host + payload.RelativeURI, 967 Headers: payload.Meta, 968 Body: payload.Body, 969 AuthorizationHeader: &taskspb.HttpRequest_OidcToken{ 970 OidcToken: &taskspb.OidcToken{ 971 ServiceAccountEmail: pushAs, 972 }, 973 }, 974 }, 975 } 976 return req, nil 977 } 978 979 // makeETAHeader converts the given time into a decimal string representing 980 // the number of seconds since the Unix epoch with microsecond resolution. 981 func makeETAHeader(t time.Time) string { 982 mics := t.UnixNano() / 1000 983 return fmt.Sprintf("%d.%06d", mics/1e6, mics%1e6) 984 } 985 986 // queueID expands `id` into a full queue name if necessary. 987 func (d *Dispatcher) queueID(id string) (string, error) { 988 if strings.HasPrefix(id, "projects/") { 989 return id, nil // already full name 990 } 991 project := d.CloudProject 992 if project == "" { 993 project = "default" 994 } 995 region := d.CloudRegion 996 if region == "" { 997 region = "default" 998 } 999 return fmt.Sprintf("projects/%s/locations/%s/queues/%s", project, region, id), nil 1000 } 1001 1002 // taskTarget constructs a target URL for a task. 1003 // 1004 // `host` will be "" if no explicit host is configured anywhere. On GAE this 1005 // means "send the task back to the GAE app". On non-GAE this indicates to use 1006 // default "127.0.0.1" which is really usable only in tests. 1007 func (d *Dispatcher) taskTarget(cls *taskClassImpl, t *Task) (host string, relativeURI string, err error) { 1008 if cls.TargetHost != "" { 1009 host = cls.TargetHost 1010 } else { 1011 host = d.DefaultTargetHost 1012 } 1013 1014 pfx := cls.RoutingPrefix 1015 if pfx == "" { 1016 pfx = d.DefaultRoutingPrefix 1017 } 1018 if pfx == "" { 1019 pfx = "/internal/tasks/t/" 1020 } 1021 1022 if !strings.HasPrefix(pfx, "/") { 1023 return "", "", errors.Reason("bad routing prefix %q: must start with /", pfx).Err() 1024 } 1025 if !strings.HasSuffix(pfx, "/") { 1026 pfx += "/" 1027 } 1028 1029 relativeURI = pfx + cls.ID 1030 switch { 1031 case t.Title == "": 1032 return 1033 case strings.ContainsRune(t.Title, ' '): 1034 return "", "", errors.Reason("bad task title %q: must not contain spaces", t.Title).Err() 1035 case len(relativeURI)+1+len(t.Title) > 2083: 1036 return "", "", errors.Reason("bad task title %q: too long;"+ 1037 " must not exceed 2083 characters when combined with %q", t.Title, relativeURI).Err() 1038 default: 1039 relativeURI += "/" + t.Title 1040 return 1041 } 1042 } 1043 1044 // prepPubSubRequest prepares Cloud PubSub request based on a *Task. 1045 func (d *Dispatcher) prepPubSubRequest(ctx context.Context, cls *taskClassImpl, t *Task) (*pubsubpb.PublishRequest, error) { 1046 if t.DeduplicationKey != "" { 1047 return nil, errors.New("can't use DeduplicationKey with PubSub tasks") 1048 } 1049 if t.Delay != 0 || !t.ETA.IsZero() { 1050 return nil, errors.New("can't use Delay or ETA with PubSub tasks") 1051 } 1052 1053 topicID, err := d.topicID(cls.Topic) 1054 if err != nil { 1055 return nil, err 1056 } 1057 1058 var payload *CustomPayload 1059 if cls.Custom != nil { 1060 if payload, err = cls.Custom(ctx, t.Payload); err != nil { 1061 return nil, err 1062 } 1063 } 1064 if payload == nil { 1065 // This is not really a "custom" payload, we are just reusing the struct. 1066 payload = &CustomPayload{} 1067 if payload.Body, err = cls.serialize(t); err != nil { 1068 return nil, err 1069 } 1070 } 1071 1072 msg := &pubsubpb.PubsubMessage{ 1073 Data: payload.Body, 1074 Attributes: make(map[string]string, len(payload.Meta)+1), 1075 } 1076 for k, v := range payload.Meta { 1077 msg.Attributes[k] = v 1078 } 1079 if traceCtx := traceContext(ctx); traceCtx != "" { 1080 msg.Attributes[TraceContextHeader] = traceCtx 1081 } 1082 1083 return &pubsubpb.PublishRequest{ 1084 Topic: topicID, 1085 Messages: []*pubsubpb.PubsubMessage{msg}, 1086 }, nil 1087 } 1088 1089 // topicID expands `id` into a full topic name if necessary. 1090 func (d *Dispatcher) topicID(id string) (string, error) { 1091 if strings.HasPrefix(id, "projects/") { 1092 return id, nil // already full name 1093 } 1094 project := d.CloudProject 1095 if project == "" { 1096 project = "default" 1097 } 1098 return fmt.Sprintf("projects/%s/topics/%s", project, id), nil 1099 } 1100 1101 // attachToReminder makes a reminder and attaches the payload to it, thus 1102 // mutating the payload with reminder's ID. 1103 // 1104 // Returns the constructed reminder. It will eventually be stored in the 1105 // database to remind the sweeper to submit the task if best-effort 1106 // post-transactional submit fails. 1107 func (d *Dispatcher) attachToReminder(ctx context.Context, payload *reminder.Payload) (*reminder.Reminder, error) { 1108 buf := make([]byte, reminderKeySpaceBytes) 1109 if _, err := io.ReadFull(cryptorand.Get(ctx), buf); err != nil { 1110 return nil, errors.Annotate(err, "failed to get random bytes").Tag(transient.Tag).Err() 1111 } 1112 1113 // Note: length of the generated ID here is different from the length of IDs 1114 // we generate when using DeduplicationKey, so there'll be no collisions 1115 // between two different sorts of named tasks. 1116 r := &reminder.Reminder{ID: hex.EncodeToString(buf)} 1117 1118 // Bound FreshUntil to at most current context deadline. 1119 r.FreshUntil = clock.Now(ctx).Add(happyPathMaxDuration) 1120 if deadline, ok := ctx.Deadline(); ok && r.FreshUntil.After(deadline) { 1121 // TODO(tandrii): allow propagating custom deadline for the async happy 1122 // path which won't bind the context's deadline. 1123 r.FreshUntil = deadline 1124 } 1125 r.FreshUntil = r.FreshUntil.UTC().Truncate(reminder.FreshUntilPrecision) 1126 1127 return r, r.AttachPayload(payload) 1128 } 1129 1130 // isValidQueue is true if q looks like "projects/.../locations/.../queues/...". 1131 func isValidQueue(q string) bool { 1132 chunks := strings.Split(q, "/") 1133 return len(chunks) == 6 && 1134 chunks[0] == "projects" && 1135 chunks[1] != "" && 1136 chunks[2] == "locations" && 1137 chunks[3] != "" && 1138 chunks[4] == "queues" && 1139 chunks[5] != "" 1140 } 1141 1142 // isValidTopic is true if t looks like "projects/.../topics/...". 1143 func isValidTopic(t string) bool { 1144 chunks := strings.Split(t, "/") 1145 return len(chunks) == 4 && 1146 chunks[0] == "projects" && 1147 chunks[1] != "" && 1148 chunks[2] == "topics" && 1149 chunks[3] != "" 1150 } 1151 1152 // handlePush handles one incoming task. 1153 // 1154 // Returns errors annotated in the same style as errors from Handler, see its 1155 // doc. 1156 func (d *Dispatcher) handlePush(ctx context.Context, body []byte, info ExecutionInfo) error { 1157 // See taskClassImpl.serialize(). 1158 env := envelope{} 1159 if err := json.Unmarshal(body, &env); err != nil { 1160 metrics.ServerRejectedCount.Add(ctx, 1, "bad_request") 1161 return errors.Annotate(err, "not a valid JSON body").Tag(httpStatus400).Err() 1162 } 1163 1164 // Find the matching registered task class. Newer tasks always have `class` 1165 // set. Older ones have `type` instead. 1166 var cls *taskClassImpl 1167 var h Handler 1168 var err error 1169 if env.Class != "" { 1170 cls, h, err = d.classByID(env.Class) 1171 } else if env.Type != "" { 1172 cls, h, err = d.classByTyp(env.Type) 1173 } else { 1174 err = errors.Reason("malformed task body, no class").Tag(httpStatus400).Err() 1175 } 1176 if err != nil { 1177 logging.Debugf(ctx, "TQ: %s", body) 1178 metrics.ServerRejectedCount.Add(ctx, 1, "unknown_class") 1179 return err 1180 } 1181 1182 if !cls.Quiet { 1183 logging.Debugf(ctx, "TQ: %s", body) 1184 if info.submitterTraceContext != "" { 1185 logging.Debugf(ctx, "TQ: submitted at %s", info.submitterTraceContext) 1186 } 1187 if info.ExecutionCount != 0 { 1188 logging.Debugf(ctx, "TQ: this is a retry: %d previous attempt(s) already failed", info.ExecutionCount) 1189 if info.taskRetryReason != "" || info.taskPreviousResponse != "" { 1190 logging.Debugf(ctx, "TQ: the previous attempt failed with %s: %s", info.taskPreviousResponse, info.taskRetryReason) 1191 } 1192 } 1193 } 1194 1195 if h == nil { 1196 metrics.ServerRejectedCount.Add(ctx, 1, "no_handler") 1197 return errors.Reason("task class %q exists, but has no handler attached", cls.ID).Tag(httpStatus404).Err() 1198 } 1199 1200 msg, err := cls.deserialize(&env) 1201 if err != nil { 1202 metrics.ServerRejectedCount.Add(ctx, 1, "bad_payload") 1203 return errors.Annotate(err, "malformed body of task class %q", cls.ID).Tag(httpStatus400).Err() 1204 } 1205 1206 atomic.AddInt32(&cls.running, 1) 1207 defer atomic.AddInt32(&cls.running, -1) 1208 1209 ctx = context.WithValue(ctx, &executionInfoKey, &info) 1210 1211 start := clock.Now(ctx) 1212 err = h(ctx, msg) 1213 dur := clock.Now(ctx).Sub(start) 1214 1215 result := "OK" 1216 switch { 1217 case Fatal.In(err): 1218 result = "fatal" 1219 case Ignore.In(err): 1220 result = "ignore" 1221 case transient.Tag.In(err): 1222 result = "transient" 1223 case err != nil: 1224 result = "retry" 1225 } 1226 1227 retry := info.ExecutionCount 1228 if retry > metrics.MaxRetryFieldValue { 1229 retry = metrics.MaxRetryFieldValue 1230 } 1231 1232 metrics.ServerHandledCount.Add(ctx, 1, cls.ID, result, retry) 1233 metrics.ServerDurationMS.Add(ctx, float64(dur.Milliseconds()), cls.ID, result) 1234 if !info.expectedETA.IsZero() { 1235 latency := clock.Since(ctx, info.expectedETA).Milliseconds() 1236 if latency < 0 { 1237 latency = 0 1238 } 1239 metrics.ServerTaskLatency.Add(ctx, float64(latency), cls.ID, result, retry) 1240 } 1241 1242 if err != nil && cls.QuietOnError { 1243 err = quietOnError.Apply(err) 1244 } 1245 return err 1246 } 1247 1248 // classByID returns a task class given its ID or an error if no such class. 1249 // 1250 // Reads cls.Handler while under the lock as well, since it may be concurrently 1251 // modified by AttachHandler. 1252 func (d *Dispatcher) classByID(id string) (*taskClassImpl, Handler, error) { 1253 d.mu.RLock() 1254 defer d.mu.RUnlock() 1255 if cls := d.clsByID[id]; cls != nil { 1256 return cls, cls.Handler, nil 1257 } 1258 return nil, nil, errors.Reason("no task class with ID %q is registered", id).Tag(httpStatus404).Err() 1259 } 1260 1261 // classByMsg returns a task class given proto message or an error if no 1262 // such class. 1263 // 1264 // Reads cls.Handler while under the lock as well, since it may be concurrently 1265 // modified by AttachHandler. 1266 func (d *Dispatcher) classByMsg(msg proto.Message) (*taskClassImpl, Handler, error) { 1267 typ := msg.ProtoReflect().Type() 1268 d.mu.RLock() 1269 defer d.mu.RUnlock() 1270 if cls := d.clsByTyp[typ]; cls != nil { 1271 return cls, cls.Handler, nil 1272 } 1273 return nil, nil, errors.Reason("no task class matching type %q is registered", typ.Descriptor().FullName()).Tag(httpStatus404).Err() 1274 } 1275 1276 // classByTyp returns a task class given proto message name or an error if no 1277 // such class. 1278 // 1279 // Reads cls.Handler while under the lock as well, since it may be concurrently 1280 // modified by AttachHandler. 1281 func (d *Dispatcher) classByTyp(typ string) (*taskClassImpl, Handler, error) { 1282 msgTyp, _ := protoregistry.GlobalTypes.FindMessageByName(protoreflect.FullName(typ)) 1283 if msgTyp == nil { 1284 return nil, nil, errors.Reason("no proto message %q is registered", typ).Tag(httpStatus404).Err() 1285 } 1286 d.mu.RLock() 1287 defer d.mu.RUnlock() 1288 if cls := d.clsByTyp[msgTyp]; cls != nil { 1289 return cls, cls.Handler, nil 1290 } 1291 return nil, nil, errors.Reason("no task class matching type %q is registered", typ).Tag(httpStatus404).Err() 1292 } 1293 1294 //////////////////////////////////////////////////////////////////////////////// 1295 1296 type taskBackend int 1297 1298 const ( 1299 backendCloudTasks taskBackend = 1 1300 backendPubSub taskBackend = 2 1301 ) 1302 1303 // taskClassImpl knows how to prepare and handle tasks of a particular class. 1304 type taskClassImpl struct { 1305 TaskClass 1306 disp *Dispatcher 1307 protoType protoreflect.MessageType 1308 backend taskBackend 1309 running int32 1310 } 1311 1312 // envelope is what we put into all Cloud Tasks. 1313 type envelope struct { 1314 Class string `json:"class,omitempty"` // ID of TaskClass 1315 Type string `json:"type,omitempty"` // for compatibility with appengine/tq 1316 Body *json.RawMessage `json:"body"` // JSONPB-serialized Task.Payload 1317 } 1318 1319 // AttachHandler implements TaskClassRef interface. 1320 func (cls *taskClassImpl) AttachHandler(h Handler) { 1321 cls.disp.mu.Lock() 1322 defer cls.disp.mu.Unlock() 1323 if h == nil { 1324 panic("The handler must not be nil") 1325 } 1326 if cls.Handler != nil { 1327 panic("The task class has a handler attached already") 1328 } 1329 cls.Handler = h 1330 } 1331 1332 // Definition implements TaskClassRef interface. 1333 func (cls *taskClassImpl) Definition() TaskClass { 1334 return cls.TaskClass 1335 } 1336 1337 // taskName returns a short ID for the task to use to dedup it. 1338 func (cls *taskClassImpl) taskName(t *Task, namespace string) string { 1339 h := sha256.New() 1340 h.Write([]byte(namespace)) 1341 h.Write([]byte{0}) 1342 h.Write([]byte(cls.ID)) 1343 h.Write([]byte{0}) 1344 h.Write([]byte(t.DeduplicationKey)) 1345 return hex.EncodeToString(h.Sum(nil)) 1346 } 1347 1348 // serialize serializes the task body into JSONPB. 1349 func (cls *taskClassImpl) serialize(t *Task) ([]byte, error) { 1350 opts := protojson.MarshalOptions{ 1351 Indent: "\t", 1352 UseEnumNumbers: true, 1353 } 1354 blob, err := opts.Marshal(t.Payload) 1355 if err != nil { 1356 return nil, errors.Annotate(err, "failed to serialize %q", proto.MessageName(t.Payload)).Err() 1357 } 1358 raw := json.RawMessage(blob) 1359 return json.MarshalIndent(envelope{ 1360 Class: cls.ID, 1361 Type: string(proto.MessageName(t.Payload)), 1362 Body: &raw, 1363 }, "", "\t") 1364 } 1365 1366 // deserialize instantiates a proto message based on its serialized body. 1367 func (cls *taskClassImpl) deserialize(env *envelope) (proto.Message, error) { 1368 if env.Body == nil { 1369 return nil, errors.Reason("no body").Err() 1370 } 1371 opts := protojson.UnmarshalOptions{ 1372 DiscardUnknown: true, 1373 } 1374 msg := cls.protoType.New().Interface() 1375 if err := opts.Unmarshal(*env.Body, msg); err != nil { 1376 return nil, err 1377 } 1378 return msg, nil 1379 } 1380 1381 //////////////////////////////////////////////////////////////////////////////// 1382 1383 // traceContext returns a tracing context for TraceContextHeader header or "". 1384 // 1385 // We use Cloud Trace propagation format. 1386 func traceContext(ctx context.Context) string { 1387 span := trace.SpanContextFromContext(ctx) 1388 if !span.IsValid() { 1389 return "" 1390 } 1391 headers := make(propagation.MapCarrier, 1) 1392 (propagator.CloudTraceFormatPropagator{}).Inject(ctx, headers) 1393 return headers[propagator.TraceContextHeaderName] 1394 } 1395 1396 // parseHeaders examines headers of the incoming Cloud Tasks push. 1397 func parseHeaders(h http.Header) ExecutionInfo { 1398 magicHeader := func(key string) string { 1399 if val := h.Get("X-AppEngine-" + key); val != "" { 1400 return val 1401 } 1402 return h.Get("X-CloudTasks-" + key) 1403 } 1404 1405 var execCount int64 1406 if count := magicHeader("TaskExecutionCount"); count != "" { 1407 execCount, _ = strconv.ParseInt(count, 10, 32) 1408 } 1409 1410 var eta time.Time 1411 if s := h.Get(ExpectedETAHeader); s != "" { 1412 // Expected format is "<seconds(int64)>.<microseconds(int32)>". 1413 parts := strings.Split(s, ".") 1414 if len(parts) == 2 { 1415 secs, errS := strconv.ParseInt(parts[0], 10, 64) 1416 micros, errM := strconv.ParseInt(parts[1], 10, 32) 1417 if errS == nil && errM == nil { 1418 eta = time.Unix(secs, micros*1000) 1419 } 1420 } 1421 } 1422 1423 return ExecutionInfo{ 1424 ExecutionCount: int(execCount), 1425 TaskID: magicHeader("TaskName"), 1426 taskRetryReason: magicHeader("TaskRetryReason"), 1427 taskPreviousResponse: magicHeader("TaskPreviousResponse"), 1428 submitterTraceContext: h.Get(TraceContextHeader), 1429 expectedETA: eta, 1430 } 1431 } 1432 1433 // httpReply writes and logs HTTP response. 1434 // 1435 // `msg` is sent to the caller as is. `err` is logged, but not sent. 1436 func httpReply(c *router.Context, code int, msg string, err error) { 1437 if err != nil && !quietOnError.In(err) { 1438 if Ignore.In(err) { 1439 logging.Warningf(c.Request.Context(), "server/tq task %s: %s", msg, err) 1440 } else { 1441 logging.Errorf(c.Request.Context(), "server/tq task %s: %s", msg, err) 1442 } 1443 } 1444 if code == http.StatusNoContent { 1445 msg = "" 1446 } 1447 http.Error(c.Writer, msg, code) 1448 } 1449 1450 // replyWithErr calls httpReply deriving status code from `err`. 1451 func replyWithErr(c *router.Context, err error) { 1452 switch { 1453 case err == nil: 1454 httpReply(c, http.StatusOK /* 200 */, "OK", nil) 1455 case Fatal.In(err): 1456 httpReply(c, http.StatusAccepted /* 202 */, "fatal error", err) 1457 case Ignore.In(err): 1458 httpReply(c, http.StatusNoContent /* 204 */, "ignored error", err) 1459 case transient.Tag.In(err): 1460 httpReply(c, http.StatusInternalServerError /* 500 */, "transient error", err) 1461 default: 1462 status := http.StatusTooManyRequests 1463 if code, ok := errors.TagValueIn(httpStatusKey, err); ok { 1464 status = code.(int) 1465 } 1466 httpReply(c, status, "error", err) 1467 } 1468 }