cuelang.org/go@v0.13.0/internal/core/adt/sched.go (about) 1 // Copyright 2023 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package adt 16 17 import ( 18 "math/bits" 19 ) 20 21 // The CUE scheduler schedules tasks for evaluation. 22 // 23 // A task is a computation unit associated with a single node. Each task may 24 // depend on knowing certain properties of one or more fields, namely: 25 // 26 // - whether the field exists 27 // - the scalar value of a field, if any 28 // - the set of all conjuncts 29 // - the set of all sub fields 30 // - the recursively evaluated value 31 // 32 // Each task, in turn, may mark itself as providing knowledge about one or more 33 // of these properties. If it is not known upfront whether a task may contribute 34 // to a certain property, it must mark itself as (potentially) contributing to 35 // this property. 36 // 37 // 38 // DEPENDENCY GRAPH 39 // 40 // A task may depend on zero or more fields, including the field for which it 41 // is defined. The graph of all dependencies is defined as follows: 42 // 43 // - Each task and each <field, property> pair is a node in the graph. 44 // - A task T for field F that (possibly) computes property P for F is 45 // represented by an edge from <F, P> to T. 46 // - A task T for field F that depends on property P of field G is represented 47 // by an edge from <G, P> to T. 48 // 49 // It is an evaluation cycle for a task T if there is a path from any task T to 50 // itself in the dependency graph. Processing will stop in the even of such a 51 // cycle. In such case, the scheduler will commence an unblocking mechanism. 52 // 53 // As a general rule, once a node is detected to be blocking, it may no longer 54 // become more specific. In other words, it is "frozen". 55 // The unblocking consists of two phases: the scheduler will first freeze and 56 // unblock all blocked nodes for the properties marked as autoUnblock-ing in 57 // taskContext. Subsequently all tasks that are unblocked by this will run. 58 // In the next phase all remaining tasks are unblocked. 59 // See taskContext.autoUnblock for more information. 60 // 61 // Note that some tasks, like references, may depend on other fields without 62 // requiring a certain property. These do not count as dependencies. 63 64 // A taskContext manages the task memory and task stack. 65 // It is typically associated with an OpContext. 66 type taskContext struct { 67 // stack tracks the current execution of tasks. This is a stack as tasks 68 // may trigger the evaluation of other tasks to complete. 69 stack []*task 70 71 // blocking lists all tasks that were blocked during a round of evaluation. 72 // Evaluation finalized one node at a time, which includes the evaluation 73 // of all nodes necessary to evaluate that node. Any task that is blocked 74 // during such a round of evaluation is recorded here. Any mutual cycles 75 // will result in unresolved tasks. At the end of such a round, computation 76 // can be frozen and the tasks unblocked. 77 blocking []*task 78 79 // counterMask marks which conditions use counters. Other conditions are 80 // handled by signals only. 81 counterMask condition 82 83 // autoUnblock marks the flags that get unblocked automatically when there 84 // is a deadlock between nodes. These are properties that may become 85 // meaningful once it is known that a value may not become more specific. 86 // An example of this is the property "scalar". If something is not a scalar 87 // yet, and it is known that the value may never become more specific, it is 88 // known that this value is never will become a scalar, thus effectively 89 // making it known. 90 autoUnblock condition 91 92 // This is called upon completion of states, allowing other states to be 93 // updated atomically. 94 complete func(s *scheduler) condition 95 } 96 97 func (p *taskContext) current() *task { 98 if len(p.stack) == 0 { 99 return nil 100 } 101 return p.stack[len(p.stack)-1] 102 } 103 104 func (p *taskContext) pushTask(t *task) { 105 p.stack = append(p.stack, t) 106 } 107 108 func (p *taskContext) popTask() { 109 p.stack = p.stack[:len(p.stack)-1] 110 } 111 112 func (p *taskContext) newTask() *task { 113 // TODO: allocate from pool. 114 return &task{} 115 } 116 117 type taskState uint8 118 119 const ( 120 taskREADY taskState = iota 121 122 taskRUNNING // processing conjunct(s) 123 taskWAITING // task is blocked on a property of an arc to hold 124 taskSUCCESS 125 taskFAILED 126 taskCANCELLED 127 ) 128 129 type schedState uint8 130 131 const ( 132 schedREADY schedState = iota 133 134 schedRUNNING // processing conjunct(s) 135 schedFINALIZING // all tasks completed, run new tasks immediately 136 schedSUCCESS 137 schedFAILED 138 ) 139 140 func (s schedState) done() bool { return s >= schedSUCCESS } 141 142 func (s taskState) String() string { 143 switch s { 144 case taskREADY: 145 return "READY" 146 case taskRUNNING: 147 return "RUNNING" 148 case taskWAITING: 149 return "WAITING" 150 case taskSUCCESS: 151 return "SUCCESS" 152 case taskFAILED: 153 return "FAILED" 154 default: 155 return "UNKNOWN" 156 } 157 } 158 159 func (s schedState) String() string { 160 switch s { 161 case schedREADY: 162 return "READY" 163 case schedRUNNING: 164 return "RUNNING" 165 case schedFINALIZING: 166 return "FINALIZING" 167 case schedSUCCESS: 168 return "SUCCESS" 169 case schedFAILED: 170 return "FAILED" 171 default: 172 return "UNKNOWN" 173 } 174 } 175 176 // runMode indicates how to proceed after a condition could not be met. 177 type runMode uint8 178 179 //go:generate go run golang.org/x/tools/cmd/stringer -type=runMode 180 181 const ( 182 // ignore indicates that the new evaluator should not do any processing. 183 // This is mostly used in the transition from old to new evaluator and 184 // should probably eventually be removed. 185 ignore runMode = 1 + iota 186 187 // attemptOnly indicates that execution should continue even if the 188 // condition is not met. 189 attemptOnly 190 191 // yield means that execution should be yielded if the condition is not met. 192 // That is, the task is marked as a dependency and control is returned to 193 // the runloop. The task will resume once the dependency is met. 194 yield 195 196 // finalize means that uncompleted tasks should be turned into errors to 197 // complete the evaluation of a Vertex. 198 finalize 199 ) 200 201 // condition is a bit mask of states that a task may depend on. 202 // 203 // There are generally two types of states: states that are met if all tasks 204 // that contribute to that state are completed (counter states), and states that 205 // are met if some global set of conditions are met. 206 type condition uint16 207 208 const ( 209 // allKnown indicates that all possible states are completed. 210 allKnown condition = 0x7fff 211 212 // neverKnown is a special condition that is never met. It can be used to 213 // mark a task as impossible to complete. 214 neverKnown condition = 0x8000 215 ) 216 217 func (c condition) meets(x condition) bool { 218 return c&x == x 219 } 220 221 const numCompletionStates = 10 // TODO: make this configurable 222 223 // A scheduler represents the set of outstanding tasks for a node. 224 type scheduler struct { 225 ctx *OpContext 226 node *nodeContext 227 228 state schedState 229 230 // completed is bit set of completed states. 231 completed condition 232 233 // needs specifies all the states needed to complete tasks in this scheduler. 234 needs condition 235 236 // provided specifies all the states that are provided by tasks added 237 // to this scheduler. 238 provided condition // TODO: rename to "provides"? To be consistent with "needs". 239 240 // frozen indicates all states that are frozen. These bits should be checked 241 // before making a node more specific. 242 // TODO: do we need a separate field for this, or can we use completed? 243 frozen condition 244 245 // isFrozen indicates if freeze was called explicitly. 246 // 247 // TODO: rename to isExplicitlyFrozen if it turns out we need both frozen 248 // and isFrozen. We probably do not. Check once the implementation of the 249 // new evaluator is complete. 250 isFrozen bool 251 252 // counters keeps track of the number of uncompleted tasks that are 253 // outstanding for each of the possible conditions. A state is 254 // considered completed if the corresponding counter reaches zero. 255 counters [numCompletionStates]int 256 257 // tasks lists all tasks that were scheduled for this scheduler. 258 // The list only contains tasks that are associated with this node. 259 // TODO: rename to queue and taskPos to nextQueueIndex. 260 tasks []*task 261 taskPos int 262 263 // blocking is a list of tasks that are blocked on the completion of 264 // the indicate conditions. This can hold tasks from other nodes or tasks 265 // originating from this node itself. 266 blocking []*task 267 } 268 269 func (s *scheduler) clear() { 270 // TODO(perf): free tasks into task pool 271 272 // Any tasks blocked on this scheduler are unblocked once the scheduler is cleared. 273 // Otherwise they might signal a cleared scheduler, which can panic. 274 // 275 // TODO(mvdan,mpvl): In principle, all blocks should have been removed when a scheduler 276 // is cleared. Perhaps this can happen when the scheduler is stopped prematurely. 277 // For now, this solution seems to work OK. 278 for _, t := range s.blocking { 279 t.blockedOn = nil 280 t.blockCondition = neverKnown 281 } 282 283 *s = scheduler{ 284 ctx: s.ctx, 285 tasks: s.tasks[:0], 286 blocking: s.blocking[:0], 287 } 288 } 289 290 // cloneInto initializes the state of dst to be the same as s. 291 // 292 // NOTE: this is deliberately not a pointer receiver: this approach allows 293 // cloning s into dst while preserving the buffers of dst and not having to 294 // explicitly clone any non-buffer fields. 295 func (s scheduler) cloneInto(dst *scheduler) { 296 s.tasks = append(dst.tasks, s.tasks...) 297 s.blocking = append(dst.blocking, s.blocking...) 298 299 *dst = s 300 } 301 302 // incrementCounts adds the counters for each condition. 303 // See also decrementCounts. 304 func (s *scheduler) incrementCounts(x condition) { 305 x &= s.ctx.counterMask 306 307 for { 308 n := bits.TrailingZeros16(uint16(x)) 309 if n == 16 { 310 break 311 } 312 bit := condition(1 << n) 313 x &^= bit 314 315 s.counters[n]++ 316 } 317 } 318 319 // decrementCounts decrements the counters for each condition. If a counter for 320 // a condition reaches zero, it means that condition is met and all blocking 321 // tasks depending on that state can be run. 322 func (s *scheduler) decrementCounts(x condition) { 323 x &= s.ctx.counterMask 324 325 var completed condition 326 for { 327 n := bits.TrailingZeros16(uint16(x)) 328 if n == 16 { 329 break 330 } 331 bit := condition(1 << n) 332 x &^= bit 333 334 s.counters[n]-- 335 if s.counters[n] == 0 { 336 completed |= bit 337 } 338 } 339 340 s.signal(completed) 341 } 342 343 // finalize runs all tasks and signals that the scheduler is done upon 344 // completion for the given signals. 345 func (s *scheduler) finalize(completed condition) { 346 // Do not panic on cycle detection. Instead, post-process the tasks 347 // by collecting and marking cycle errors. 348 s.process(allKnown, finalize) 349 s.signal(completed) 350 if s.state == schedRUNNING { 351 if s.meets(s.needs) { 352 s.state = schedSUCCESS 353 } else { 354 s.state = schedFAILED 355 } 356 } 357 } 358 359 // process advances a scheduler by executing tasks that are required. 360 // Depending on mode, if the scheduler is blocked on a condition, it will 361 // forcefully unblock the tasks. 362 func (s *scheduler) process(needs condition, mode runMode) bool { 363 c := s.ctx 364 365 // Update completions, if necessary. 366 if f := c.taskContext.complete; f != nil { 367 s.signal(f(s)) 368 } 369 370 if s.ctx.LogEval > 0 && len(s.tasks) > 0 { 371 372 if v := s.tasks[0].node.node; v != nil { 373 c.Logf(v, "PROCESS(%v)", mode) 374 } 375 } 376 377 // hasRunning := false 378 s.state = schedRUNNING 379 // Use variable instead of range, because s.tasks may grow during processes. 380 381 processNextTask: 382 for s.taskPos < len(s.tasks) { 383 t := s.tasks[s.taskPos] 384 s.taskPos++ 385 386 if t.state != taskREADY { 387 // TODO(perf): Figure out how it is possible to reach this and if we 388 // should optimize. 389 // panic("task not READY") 390 } 391 392 switch { 393 case t.state == taskRUNNING: 394 // TODO: we could store the current referring node that caused 395 // the cycle and then proceed up the stack to mark all tasks 396 // that re involved in the cycle as well. Further, we could 397 // mark the cycle as a generation counter, instead of a boolean 398 // value, so that it will be trivial reconstruct a detailed cycle 399 // report when generating an error message. 400 401 case t.state != taskREADY: 402 403 default: 404 runTask(t, mode) 405 } 406 } 407 408 switch mode { 409 default: // case attemptOnly: 410 return s.meets(needs) 411 412 case yield: 413 if s.meets(needs) { 414 return true 415 } 416 // This can happen in some cases. We "promote" to finalization if this 417 // was not triggered by a task. 418 if t := c.current(); t != nil { 419 t.waitFor(s, needs) 420 s.yield() 421 } 422 423 case finalize: 424 // remainder of function 425 } 426 427 unblockTasks: 428 // Unblocking proceeds in three stages. Each of the stages may cause 429 // formerly blocked tasks to become unblocked. To ensure that unblocking 430 // tasks do not happen in an order-dependent way, we want to ensure that we 431 // have unblocked all tasks from one phase, before commencing to the next. 432 433 // The types of the node can no longer be altered. We can unblock the 434 // relevant states first to finish up any tasks that were just waiting for 435 // types, such as lists. 436 for _, t := range c.blocking { 437 if t.blockedOn != nil { 438 t.blockedOn.signal(s.ctx.autoUnblock) 439 } 440 } 441 442 // Mark all remaining conditions as "frozen" before actually running the 443 // tasks. Doing this before running the remaining tasks ensures that we get 444 // the same errors, regardless of the order in which tasks are unblocked. 445 for _, t := range c.blocking { 446 if t.blockedOn != nil { 447 t.blockedOn.freeze(t.blockCondition) 448 t.unblocked = true 449 } 450 } 451 452 // Run the remaining blocked tasks. 453 numBlocked := len(c.blocking) 454 for _, t := range c.blocking { 455 if t.blockedOn != nil && !t.defunct { 456 n, cond := t.blockedOn, t.blockCondition 457 t.blockedOn, t.blockCondition = nil, neverKnown 458 n.signal(cond) 459 runTask(t, attemptOnly) // Does this need to be final? Probably not if we do a fixed point computation. 460 } 461 } 462 463 // The running of tasks above may result in more tasks being added to the 464 // queue. Process these first before continuing. 465 if s.taskPos < len(s.tasks) { 466 goto processNextTask 467 } 468 469 // Similarly, the running of tasks may result in more tasks being blocked. 470 // Ensure we processed them all. 471 if numBlocked < len(c.blocking) { 472 goto unblockTasks 473 } 474 475 c.blocking = c.blocking[:0] 476 477 return true 478 } 479 480 // yield causes the current task to be suspended until the given conditions 481 // are met. 482 func (s *scheduler) yield() { 483 panic(s) 484 } 485 486 // meets reports whether all needed completion states in s are met. 487 func (s *scheduler) meets(needs condition) bool { 488 s.node.assertInitialized() 489 490 if s.state != schedREADY { 491 // Automatically qualify for conditions that are not provided by this node. 492 // NOTE: in the evaluator this is generally not the case, as tasks my still 493 // be added during evaluation until all ancestor nodes are evaluated. This 494 // can be encoded by the scheduler by adding a state "ancestorsCompleted". 495 // which all other conditions depend on. 496 needs &= s.provided 497 } 498 return s.completed&needs == needs 499 } 500 501 // blockOn marks a state as uncompleted. 502 func (s *scheduler) blockOn(cond condition) { 503 // TODO: should we allow this to be used for counters states? 504 // if s.ctx.counterMask&cond != 0 { 505 // panic("cannot block on counter states") 506 // } 507 s.provided |= cond 508 } 509 510 // signal causes tasks that are blocking on the given completion to be run 511 // for this scheduler. Tasks are only run if the completion state was not 512 // already reached before. 513 func (s *scheduler) signal(completed condition) { 514 was := s.completed 515 s.completed |= completed 516 if was == s.completed { 517 s.frozen |= completed 518 return 519 } 520 521 s.completed |= s.ctx.complete(s) 522 s.frozen |= completed 523 524 // TODO: this could benefit from a linked list where tasks are removed 525 // from the list before being run. 526 for _, t := range s.blocking { 527 if t.blockCondition&s.completed == t.blockCondition { 528 // Prevent task from running again. 529 t.blockCondition = neverKnown 530 t.blockedOn = nil 531 runTask(t, attemptOnly) // TODO: does this ever need to be final? 532 // TODO: should only be run once for each blocking queue. 533 } 534 } 535 } 536 537 // freeze indicates no more tasks satisfying the given condition may be added. 538 // It is also used to freeze certain elements of the task. 539 func (s *scheduler) freeze(c condition) { 540 s.frozen |= c 541 s.completed |= c 542 s.ctx.complete(s) 543 s.isFrozen = true 544 } 545 546 // signalDoneAdding signals that no more tasks will be added to this scheduler. 547 // This allows unblocking tasks that depend on states for which there are no 548 // tasks in this scheduler. 549 func (s *scheduler) signalDoneAdding() { 550 s.signal(s.needs &^ s.provided) 551 } 552 553 // runner defines properties of a type of task, including a function to run it. 554 type runner struct { 555 name string 556 557 // The mode argument indicates whether the scheduler 558 // of this field is finalizing. It is passed as a component of the required 559 // state to various evaluation methods. 560 f func(ctx *OpContext, t *task, mode runMode) 561 562 // completes indicates which states this tasks contributes to. 563 completes condition 564 565 // needes indicates which states of the corresponding node need to be 566 // completed before this task can be run. 567 needs condition 568 569 // a lower priority indicates a preference to run a task before tasks 570 // of a higher priority. 571 priority int8 572 } 573 574 type task struct { 575 state taskState 576 577 completes condition // cycles may alter the completion mask. TODO: is this still true? 578 579 // defunct indicates that this task is no longer relevant. This is the case 580 // when it has not yet been run before it is copied into a disjunction. 581 defunct bool 582 583 // unblocked indicates this task was unblocked by force. 584 unblocked bool 585 586 // The following fields indicate what this task is blocked on, including 587 // the scheduler, which conditions it is blocking on, and the stack of 588 // tasks executed leading to the block. 589 590 // blockedOn cannot be needed in a clone for a disjunct, because as long 591 // as the disjunct is unresolved, its value cannot contribute to another 592 // scheduler. 593 blockedOn *scheduler 594 blockCondition condition 595 // blockStack []*task // TODO: use; for error reporting. 596 597 err *Bottom 598 599 // The node from which this conjunct originates. 600 node *nodeContext 601 602 run *runner // TODO: use struct to make debugging easier? 603 604 // The Conjunct processed by this task. 605 env *Environment 606 id CloseInfo // TODO: rename to closeInfo? 607 x Node // The conjunct Expression or Value. 608 609 // For Comprehensions: 610 comp *envComprehension 611 leaf *Comprehension 612 } 613 614 func (s *scheduler) insertTask(t *task) { 615 completes := t.run.completes 616 needs := t.run.needs 617 618 s.needs |= needs 619 s.provided |= completes 620 621 if needs&completes != 0 { 622 panic("task depends on its own completion") 623 } 624 t.completes = completes 625 626 if s.state == schedFINALIZING { 627 runTask(t, finalize) 628 return 629 } 630 631 s.incrementCounts(completes) 632 s.tasks = append(s.tasks, t) 633 634 // Sort by priority. This code is optimized for the case that there are 635 // very few tasks with higher priority. This loop will almost always 636 // terminate within 0 or 1 iterations. 637 for i := len(s.tasks) - 1; i > s.taskPos; i-- { 638 if s.tasks[i-1].run.priority <= s.tasks[i].run.priority { 639 break 640 } 641 s.tasks[i], s.tasks[i-1] = s.tasks[i-1], s.tasks[i] 642 } 643 644 if s.completed&needs != needs { 645 t.waitFor(s, needs) 646 } 647 } 648 649 func runTask(t *task, mode runMode) { 650 if t.defunct { 651 if t.state != taskCANCELLED { 652 t.state = taskCANCELLED 653 } 654 return 655 } 656 ctx := t.node.ctx 657 if ctx.LogEval > 0 { 658 defer ctx.Un(ctx.Indentf(t.node.node, "RUNTASK(%v, %v)", t.run.name, t.x)) 659 } 660 661 switch t.state { 662 case taskSUCCESS, taskFAILED: 663 return 664 case taskRUNNING: 665 // TODO: should we mark this as a cycle? 666 } 667 668 defer func() { 669 if n := t.node; n.toComplete { 670 n.toComplete = false 671 n.completeNodeTasks(attemptOnly) 672 } 673 674 switch r := recover().(type) { 675 case nil: 676 case *scheduler: 677 // Task must be WAITING. 678 if t.state == taskRUNNING { 679 t.state = taskSUCCESS // XXX: something else? Do we known the dependency? 680 if t.err != nil { 681 t.state = taskFAILED 682 } 683 } 684 default: 685 panic(r) 686 } 687 }() 688 689 defer ctx.PopArc(ctx.PushArc(t.node.node)) 690 691 // TODO: merge these two mechanisms once we get rid of the old evaluator. 692 ctx.pushTask(t) 693 defer ctx.popTask() 694 if t.env != nil { 695 id := t.id 696 // This is done to avoid struct args from passing fields up. 697 // Use [task.updateCI] to get the current CloseInfo with this field 698 // restored. 699 s := ctx.PushConjunct(MakeConjunct(t.env, t.x, id)) 700 defer ctx.PopState(s) 701 } 702 703 t.state = taskRUNNING 704 // A task may have recorded an error on a previous try. Clear it. 705 t.err = nil 706 707 t.run.f(ctx, t, mode) 708 709 if t.state != taskWAITING { 710 t.blockedOn = nil 711 t.blockCondition = neverKnown 712 713 // TODO: always reporting errors in the current task would avoid us 714 // having to collect and assign errors here. 715 t.err = CombineErrors(nil, t.err, ctx.Err()) 716 if t.err == nil { 717 t.state = taskSUCCESS 718 } else { 719 t.state = taskFAILED 720 } 721 // TODO: do not add both context and task errors. Do something more 722 // principled. 723 t.node.addBottom(t.err) 724 t.node.decrementCounts(t.completes) 725 t.completes = 0 // safety 726 } 727 } 728 729 // updateCI stitches back the closeContext that more removed from the CloseInfo 730 // before in the given CloseInfo. 731 func (t *task) updateCI(ci CloseInfo) CloseInfo { 732 return ci 733 } 734 735 // waitFor blocks task t until the needs for scheduler s are met. 736 func (t *task) waitFor(s *scheduler, needs condition) { 737 if s.meets(needs) { 738 panic("waiting for condition that already completed") 739 } 740 // TODO: this line causes the scheduler state to fail if tasks are blocking 741 // on it. Is this desirable? At the very least we should then ensure that 742 // the scheduler where the tasks originate from will fail in that case. 743 s.needs |= needs 744 745 t.state = taskWAITING 746 747 t.blockCondition = needs 748 t.blockedOn = s 749 s.blocking = append(s.blocking, t) 750 s.ctx.blocking = append(s.ctx.blocking, t) 751 }