cuelang.org/go@v0.10.1/internal/core/adt/sched.go (about) 1 // Copyright 2023 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package adt 16 17 import ( 18 "math/bits" 19 ) 20 21 // The CUE scheduler schedules tasks for evaluation. 22 // 23 // A task is a computation unit associated with a single node. Each task may 24 // depend on knowing certain properties of one or more fields, namely: 25 // 26 // - whether the field exists 27 // - the scalar value of a field, if any 28 // - the set of all conjuncts 29 // - the set of all sub fields 30 // - the recursively evaluated value 31 // 32 // Each task, in turn, may mark itself as providing knowledge about one or more 33 // of these properties. If it is not known upfront whether a task may contribute 34 // to a certain property, it must mark itself as (potentially) contributing to 35 // this property. 36 // 37 // 38 // DEPENDENCY GRAPH 39 // 40 // A task may depend on zero or more fields, including the field for which it 41 // is defined. The graph of all dependencies is defined as follows: 42 // 43 // - Each task and each <field, property> pair is a node in the graph. 44 // - A task T for field F that (possibly) computes property P for F is 45 // represented by an edge from <F, P> to T. 46 // - A task T for field F that depends on property P of field G is represented 47 // by an edge from <G, P> to T. 48 // 49 // It is an evaluation cycle for a task T if there is a path from any task T to 50 // itself in the dependency graph. Processing will stop in the even of such a 51 // cycle. In such case, the scheduler will commence an unblocking mechanism. 52 // 53 // As a general rule, once a node is detected to be blocking, it may no longer 54 // become more specific. In other words, it is "frozen". 55 // The unblocking consists of two phases: the scheduler will first freeze and 56 // unblock all blocked nodes for the properties marked as autoUnblock-ing in 57 // taskContext. Subsequently all tasks that are unblocked by this will run. 58 // In the next phase all remaining tasks are unblocked. 59 // See taskContext.autoUnblock for more information. 60 // 61 // Note that some tasks, like references, may depend on other fields without 62 // requiring a certain property. These do not count as dependencies. 63 64 // A taskContext manages the task memory and task stack. 65 // It is typically associated with an OpContext. 66 type taskContext struct { 67 // stack tracks the current execution of tasks. This is a stack as tasks 68 // may trigger the evaluation of other tasks to complete. 69 stack []*task 70 71 // blocking lists all tasks that were blocked during a round of evaluation. 72 // Evaluation finalized one node at a time, which includes the evaluation 73 // of all nodes necessary to evaluate that node. Any task that is blocked 74 // during such a round of evaluation is recorded here. Any mutual cycles 75 // will result in unresolved tasks. At the end of such a round, computation 76 // can be frozen and the tasks unblocked. 77 blocking []*task 78 79 // counterMask marks which conditions use counters. Other conditions are 80 // handled by signals only. 81 counterMask condition 82 83 // autoUnblock marks the flags that get unblocked automatically when there 84 // is a deadlock between nodes. These are properties that may become 85 // meaningful once it is known that a value may not become more specific. 86 // An example of this is the property "scalar". If something is not a scalar 87 // yet, and it is known that the value may never become more specific, it is 88 // known that this value is never will become a scalar, thus effectively 89 // making it known. 90 autoUnblock condition 91 92 // This is called upon completion of states, allowing other states to be 93 // updated atomically. 94 complete func(s *scheduler) condition 95 } 96 97 func (p *taskContext) current() *task { 98 if len(p.stack) == 0 { 99 return nil 100 } 101 return p.stack[len(p.stack)-1] 102 } 103 104 func (p *taskContext) pushTask(t *task) { 105 p.stack = append(p.stack, t) 106 } 107 108 func (p *taskContext) popTask() { 109 p.stack = p.stack[:len(p.stack)-1] 110 } 111 112 func (p *taskContext) newTask() *task { 113 // TODO: allocate from pool. 114 return &task{} 115 } 116 117 type taskState uint8 118 119 const ( 120 taskREADY taskState = iota 121 122 taskRUNNING // processing conjunct(s) 123 taskWAITING // task is blocked on a property of an arc to hold 124 taskSUCCESS 125 taskFAILED 126 ) 127 128 type schedState uint8 129 130 const ( 131 schedREADY schedState = iota 132 133 schedRUNNING // processing conjunct(s) 134 schedFINALIZING // all tasks completed, run new tasks immediately 135 schedSUCCESS 136 schedFAILED 137 ) 138 139 func (s schedState) done() bool { return s >= schedSUCCESS } 140 141 func (s taskState) String() string { 142 switch s { 143 case taskREADY: 144 return "READY" 145 case taskRUNNING: 146 return "RUNNING" 147 case taskWAITING: 148 return "WAITING" 149 case taskSUCCESS: 150 return "SUCCESS" 151 case taskFAILED: 152 return "FAILED" 153 default: 154 return "UNKNOWN" 155 } 156 } 157 158 func (s schedState) String() string { 159 switch s { 160 case schedREADY: 161 return "READY" 162 case schedRUNNING: 163 return "RUNNING" 164 case schedFINALIZING: 165 return "FINALIZING" 166 case schedSUCCESS: 167 return "SUCCESS" 168 case schedFAILED: 169 return "FAILED" 170 default: 171 return "UNKNOWN" 172 } 173 } 174 175 // runMode indicates how to proceed after a condition could not be met. 176 type runMode uint8 177 178 const ( 179 // ignore indicates that the new evaluator should not do any processing. 180 // This is mostly used in the transition from old to new evaluator and 181 // should probably eventually be removed. 182 ignore runMode = 1 + iota 183 184 // attemptOnly indicates that execution should continue even if the 185 // condition is not met. 186 attemptOnly 187 188 // yield means that execution should be yielded if the condition is not met. 189 // That is, the task is marked as a dependency and control is returned to 190 // the runloop. The task will resume once the dependency is met. 191 yield 192 193 // finalize means that uncompleted tasks should be turned into errors to 194 // complete the evaluation of a Vertex. 195 finalize 196 ) 197 198 func (r runMode) String() string { 199 switch r { 200 case ignore: 201 return "ignore" 202 case attemptOnly: 203 return "attemptOnly" 204 case yield: 205 return "yield" 206 case finalize: 207 return "finalize" 208 } 209 return "unknown" 210 } 211 212 // condition is a bit mask of states that a task may depend on. 213 // 214 // There are generally two types of states: states that are met if all tasks 215 // that contribute to that state are completed (counter states), and states that 216 // are met if some global set of conditions are met. 217 type condition uint16 218 219 const ( 220 // allKnown indicates that all possible states are completed. 221 allKnown condition = 0x7fff 222 223 // neverKnown is a special condition that is never met. It can be used to 224 // mark a task as impossible to complete. 225 neverKnown condition = 0x8000 226 ) 227 228 func (c condition) meets(x condition) bool { 229 return c&x == x 230 } 231 232 const numCompletionStates = 10 // TODO: make this configurable 233 234 // A scheduler represents the set of outstanding tasks for a node. 235 type scheduler struct { 236 ctx *OpContext 237 node *nodeContext 238 239 state schedState 240 241 // completed is bit set of completed states. 242 completed condition 243 244 // needs specifies all the states needed to complete tasks in this scheduler. 245 needs condition 246 247 // provided specifies all the states that are provided by tasks added 248 // to this scheduler. 249 provided condition // TODO: rename to "provides"? To be consistent with "needs". 250 251 // frozen indicates all states that are frozen. These bits should be checked 252 // before making a node more specific. 253 // TODO: do we need a separate field for this, or can we use completed? 254 frozen condition 255 256 // isFrozen indicates if freeze was called explicitly. 257 // 258 // TODO: rename to isExplicitlyFrozen if it turns out we need both frozen 259 // and isFrozen. We probably do not. Check once the implementation of the 260 // new evaluator is complete. 261 isFrozen bool 262 263 // counters keeps track of the number of uncompleted tasks that are 264 // outstanding for each of the possible conditions. A state is 265 // considered completed if the corresponding counter reaches zero. 266 counters [numCompletionStates]int 267 268 // tasks lists all tasks that were scheduled for this scheduler. 269 // The list only contains tasks that are associated with this node. 270 // TODO: rename to queue and taskPos to nextQueueIndex. 271 tasks []*task 272 taskPos int 273 274 // blocking is a list of tasks that are blocked on the completion of 275 // the indicate conditions. This can hold tasks from other nodes or tasks 276 // originating from this node itself. 277 blocking []*task 278 } 279 280 func (s *scheduler) clear() { 281 // TODO(perf): free tasks into task pool 282 283 // Any tasks blocked on this scheduler are unblocked once the scheduler is cleared. 284 // Otherwise they might signal a cleared scheduler, which can panic. 285 // 286 // TODO(mvdan,mpvl): In principle, all blocks should have been removed when a scheduler 287 // is cleared. Perhaps this can happen when the scheduler is stopped prematurely. 288 // For now, this solution seems to work OK. 289 for _, t := range s.blocking { 290 t.blockedOn = nil 291 t.blockCondition = neverKnown 292 } 293 294 *s = scheduler{ 295 ctx: s.ctx, 296 tasks: s.tasks[:0], 297 blocking: s.blocking[:0], 298 } 299 } 300 301 // cloneInto initializes the state of dst to be the same as s. 302 // 303 // NOTE: this is deliberately not a pointer receiver: this approach allows 304 // cloning s into dst while preserving the buffers of dst and not having to 305 // explicitly clone any non-buffer fields. 306 func (s scheduler) cloneInto(dst *scheduler) { 307 s.tasks = append(dst.tasks, s.tasks...) 308 s.blocking = append(dst.blocking, s.blocking...) 309 310 *dst = s 311 } 312 313 // incrementCounts adds the counters for each condition. 314 // See also decrementCounts. 315 func (s *scheduler) incrementCounts(x condition) { 316 x &= s.ctx.counterMask 317 318 for { 319 n := bits.TrailingZeros16(uint16(x)) 320 if n == 16 { 321 break 322 } 323 bit := condition(1 << n) 324 x &^= bit 325 326 s.counters[n]++ 327 } 328 } 329 330 // decrementCounts decrements the counters for each condition. If a counter for 331 // a condition reaches zero, it means that condition is met and all blocking 332 // tasks depending on that state can be run. 333 func (s *scheduler) decrementCounts(x condition) { 334 x &= s.ctx.counterMask 335 336 var completed condition 337 for { 338 n := bits.TrailingZeros16(uint16(x)) 339 if n == 16 { 340 break 341 } 342 bit := condition(1 << n) 343 x &^= bit 344 345 s.counters[n]-- 346 if s.counters[n] == 0 { 347 completed |= bit 348 } 349 } 350 351 s.signal(completed) 352 } 353 354 // finalize runs all tasks and signals that the scheduler is done upon 355 // completion for the given signals. 356 func (s *scheduler) finalize(completed condition) { 357 // Do not panic on cycle detection. Instead, post-process the tasks 358 // by collecting and marking cycle errors. 359 s.process(allKnown, finalize) 360 s.signal(completed) 361 if s.state == schedRUNNING { 362 if s.meets(s.needs) { 363 s.state = schedSUCCESS 364 } else { 365 s.state = schedFAILED 366 } 367 } 368 } 369 370 // process advances a scheduler by executing tasks that are required. 371 // Depending on mode, if the scheduler is blocked on a condition, it will 372 // forcefully unblock the tasks. 373 func (s *scheduler) process(needs condition, mode runMode) bool { 374 c := s.ctx 375 376 // Update completions, if necessary. 377 if f := c.taskContext.complete; f != nil { 378 s.signal(f(s)) 379 } 380 381 if s.ctx.LogEval > 0 && len(s.tasks) > 0 { 382 if v := s.tasks[0].node.node; v != nil { 383 c.nest++ 384 c.Logf(v, "START Process %v -- mode: %v", v.Label, mode) 385 defer func() { 386 c.Logf(v, "END Process") 387 c.nest-- 388 }() 389 } 390 } 391 392 // hasRunning := false 393 s.state = schedRUNNING 394 // Use variable instead of range, because s.tasks may grow during processes. 395 396 processNextTask: 397 for s.taskPos < len(s.tasks) { 398 t := s.tasks[s.taskPos] 399 s.taskPos++ 400 401 if t.state != taskREADY { 402 // TODO(perf): Figure out how it is possible to reach this and if we 403 // should optimize. 404 // panic("task not READY") 405 } 406 407 switch { 408 case t.defunct: 409 continue 410 411 case t.state == taskRUNNING: 412 // TODO: we could store the current referring node that caused 413 // the cycle and then proceed up the stack to mark all tasks 414 // that re involved in the cycle as well. Further, we could 415 // mark the cycle as a generation counter, instead of a boolean 416 // value, so that it will be trivial reconstruct a detailed cycle 417 // report when generating an error message. 418 419 case t.state != taskREADY: 420 421 default: 422 runTask(t, mode) 423 } 424 } 425 426 switch mode { 427 default: // case attemptOnly: 428 return s.meets(needs) 429 430 case yield: 431 if s.meets(needs) { 432 return true 433 } 434 c.current().waitFor(s, needs) 435 s.yield() 436 panic("unreachable") 437 438 case finalize: 439 // remainder of function 440 } 441 442 unblockTasks: 443 // Unblocking proceeds in three stages. Each of the stages may cause 444 // formerly blocked tasks to become unblocked. To ensure that unblocking 445 // tasks do not happen in an order-dependent way, we want to ensure that we 446 // have unblocked all tasks from one phase, before commencing to the next. 447 448 // The types of the node can no longer be altered. We can unblock the 449 // relevant states first to finish up any tasks that were just waiting for 450 // types, such as lists. 451 for _, t := range c.blocking { 452 if t.blockedOn != nil { 453 t.blockedOn.signal(s.ctx.autoUnblock) 454 } 455 } 456 457 // Mark all remaining conditions as "frozen" before actually running the 458 // tasks. Doing this before running the remaining tasks ensures that we get 459 // the same errors, regardless of the order in which tasks are unblocked. 460 for _, t := range c.blocking { 461 if t.blockedOn != nil { 462 t.blockedOn.freeze(t.blockCondition) 463 t.unblocked = true 464 } 465 } 466 467 // Run the remaining blocked tasks. 468 numBlocked := len(c.blocking) 469 for _, t := range c.blocking { 470 if t.blockedOn != nil && !t.defunct { 471 n, cond := t.blockedOn, t.blockCondition 472 t.blockedOn, t.blockCondition = nil, neverKnown 473 n.signal(cond) 474 runTask(t, attemptOnly) // Does this need to be final? Probably not if we do a fixed point computation. 475 } 476 } 477 478 // The running of tasks above may result in more tasks being added to the 479 // queue. Process these first before continuing. 480 if s.taskPos < len(s.tasks) { 481 goto processNextTask 482 } 483 484 // Similarly, the running of tasks may result in more tasks being blocked. 485 // Ensure we processed them all. 486 if numBlocked < len(c.blocking) { 487 goto unblockTasks 488 } 489 490 c.blocking = c.blocking[:0] 491 492 return true 493 } 494 495 // yield causes the current task to be suspended until the given conditions 496 // are met. 497 func (s *scheduler) yield() { 498 panic(s) 499 } 500 501 // meets reports whether all needed completion states in s are met. 502 func (s *scheduler) meets(needs condition) bool { 503 s.node.assertInitialized() 504 505 if s.state != schedREADY { 506 // Automatically qualify for conditions that are not provided by this node. 507 // NOTE: in the evaluator this is generally not the case, as tasks my still 508 // be added during evaluation until all ancestor nodes are evaluated. This 509 // can be encoded by the scheduler by adding a state "ancestorsCompleted". 510 // which all other conditions depend on. 511 needs &= s.provided 512 } 513 return s.completed&needs == needs 514 } 515 516 // blockOn marks a state as uncompleted. 517 func (s *scheduler) blockOn(cond condition) { 518 // TODO: should we allow this to be used for counters states? 519 // if s.ctx.counterMask&cond != 0 { 520 // panic("cannot block on counter states") 521 // } 522 s.provided |= cond 523 } 524 525 // signal causes tasks that are blocking on the given completion to be run 526 // for this scheduler. Tasks are only run if the completion state was not 527 // already reached before. 528 func (s *scheduler) signal(completed condition) { 529 was := s.completed 530 s.completed |= completed 531 if was == s.completed { 532 s.frozen |= completed 533 return 534 } 535 536 s.completed |= s.ctx.complete(s) 537 s.frozen |= completed 538 539 // TODO: this could benefit from a linked list where tasks are removed 540 // from the list before being run. 541 for _, t := range s.blocking { 542 if t.blockCondition&s.completed == t.blockCondition { 543 // Prevent task from running again. 544 t.blockCondition = neverKnown 545 t.blockedOn = nil 546 runTask(t, attemptOnly) // TODO: does this ever need to be final? 547 // TODO: should only be run once for each blocking queue. 548 } 549 } 550 } 551 552 // freeze indicates no more tasks satisfying the given condition may be added. 553 // It is also used to freeze certain elements of the task. 554 func (s *scheduler) freeze(c condition) { 555 s.frozen |= c 556 s.completed |= c 557 s.ctx.complete(s) 558 s.isFrozen = true 559 } 560 561 // signalDoneAdding signals that no more tasks will be added to this scheduler. 562 // This allows unblocking tasks that depend on states for which there are no 563 // tasks in this scheduler. 564 func (s *scheduler) signalDoneAdding() { 565 s.signal(s.needs &^ s.provided) 566 } 567 568 // runner defines properties of a type of task, including a function to run it. 569 type runner struct { 570 name string 571 572 // The mode argument indicates whether the scheduler 573 // of this field is finalizing. It is passed as a component of the required 574 // state to various evaluation methods. 575 f func(ctx *OpContext, t *task, mode runMode) 576 577 // completes indicates which states this tasks contributes to. 578 completes condition 579 580 // needes indicates which states of the corresponding node need to be 581 // completed before this task can be run. 582 needs condition 583 584 // a lower priority indicates a preference to run a task before tasks 585 // of a higher priority. 586 priority int8 587 } 588 589 type task struct { 590 state taskState 591 592 completes condition // cycles may alter the completion mask. TODO: is this still true? 593 594 // defunct indicates that this task is no longer relevant. This is the case 595 // when it has not yet been run before it is copied into a disjunction. 596 defunct bool 597 598 // unblocked indicates this task was unblocked by force. 599 unblocked bool 600 601 // The following fields indicate what this task is blocked on, including 602 // the scheduler, which conditions it is blocking on, and the stack of 603 // tasks executed leading to the block. 604 605 // blockedOn cannot be needed in a clone for a disjunct, because as long 606 // as the disjunct is unresolved, its value cannot contribute to another 607 // scheduler. 608 blockedOn *scheduler 609 blockCondition condition 610 blockStack []*task // TODO: use; for error reporting. 611 612 err *Bottom 613 614 // The node from which this conjunct originates. 615 node *nodeContext 616 617 run *runner // TODO: use struct to make debugging easier? 618 619 // The Conjunct processed by this task. 620 env *Environment 621 id CloseInfo // TODO: rename to closeInfo? 622 x Node // The conjunct Expression or Value. 623 624 // For Comprehensions: 625 comp *envComprehension 626 leaf *Comprehension 627 } 628 629 func (s *scheduler) insertTask(t *task) { 630 completes := t.run.completes 631 needs := t.run.needs 632 633 s.needs |= needs 634 s.provided |= completes 635 636 if needs&completes != 0 { 637 panic("task depends on its own completion") 638 } 639 t.completes = completes 640 641 if s.state == schedFINALIZING { 642 runTask(t, finalize) 643 return 644 } 645 646 s.incrementCounts(completes) 647 if cc := t.id.cc; cc != nil { 648 // may be nil for "group" tasks, such as processLists. 649 dep := cc.incDependent(t.node.ctx, TASK, nil) 650 if dep != nil { 651 dep.taskID = len(s.tasks) 652 dep.task = t 653 } 654 } 655 s.tasks = append(s.tasks, t) 656 657 // Sort by priority. This code is optimized for the case that there are 658 // very few tasks with higher priority. This loop will almost always 659 // terminate within 0 or 1 iterations. 660 for i := len(s.tasks) - 1; i > s.taskPos; i-- { 661 if s.tasks[i-1].run.priority <= s.tasks[i].run.priority { 662 break 663 } 664 s.tasks[i], s.tasks[i-1] = s.tasks[i-1], s.tasks[i] 665 } 666 667 if s.completed&needs != needs { 668 t.waitFor(s, needs) 669 } 670 } 671 672 func runTask(t *task, mode runMode) { 673 if t.defunct { 674 return 675 } 676 t.node.Logf("============ RUNTASK %v %v", t.run.name, t.x) 677 ctx := t.node.ctx 678 679 switch t.state { 680 case taskSUCCESS, taskFAILED: 681 return 682 case taskRUNNING: 683 // TODO: should we mark this as a cycle? 684 } 685 686 defer func() { 687 if n := t.node; n.toComplete { 688 n.toComplete = false 689 n.completeNodeTasks(attemptOnly) 690 } 691 692 switch r := recover().(type) { 693 case nil: 694 case *scheduler: 695 // Task must be WAITING. 696 if t.state == taskRUNNING { 697 t.state = taskSUCCESS // XXX: something else? Do we known the dependency? 698 if t.err != nil { 699 t.state = taskFAILED 700 } 701 } 702 default: 703 panic(r) 704 } 705 }() 706 707 defer ctx.PopArc(ctx.PushArc(t.node.node)) 708 709 // TODO: merge these two mechanisms once we get rid of the old evaluator. 710 ctx.pushTask(t) 711 defer ctx.popTask() 712 if t.env != nil { 713 id := t.id 714 id.cc = nil // this is done to avoid struct args from passing fields up. 715 s := ctx.PushConjunct(MakeConjunct(t.env, t.x, id)) 716 defer ctx.PopState(s) 717 } 718 719 t.state = taskRUNNING 720 // A task may have recorded an error on a previous try. Clear it. 721 t.err = nil 722 723 t.run.f(ctx, t, mode) 724 725 if t.state != taskWAITING { 726 t.blockedOn = nil 727 t.blockCondition = neverKnown 728 729 // TODO: always reporting errors in the current task would avoid us 730 // having to collect and assign errors here. 731 t.err = CombineErrors(nil, t.err, ctx.Err()) 732 if t.err == nil { 733 t.state = taskSUCCESS 734 } else { 735 t.state = taskFAILED 736 } 737 t.node.addBottom(t.err) // TODO: replace with something more principled. 738 739 if t.id.cc != nil { 740 t.id.cc.decDependent(ctx, TASK, nil) 741 } 742 t.node.decrementCounts(t.completes) 743 t.completes = 0 // safety 744 } 745 } 746 747 // waitFor blocks task t until the needs for scheduler s are met. 748 func (t *task) waitFor(s *scheduler, needs condition) { 749 if s.meets(needs) { 750 panic("waiting for condition that already completed") 751 } 752 // TODO: this line causes the scheduler state to fail if tasks are blocking 753 // on it. Is this desirable? At the very least we should then ensure that 754 // the scheduler where the tasks originate from will fail in that case. 755 s.needs |= needs 756 757 t.state = taskWAITING 758 759 t.blockCondition = needs 760 t.blockedOn = s 761 s.blocking = append(s.blocking, t) 762 s.ctx.blocking = append(s.ctx.blocking, t) 763 }