cuelang.org/go@v0.13.0/tools/trim/trimv3.go (about) 1 // Copyright 2025 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package trim 16 17 // # Overview 18 // 19 // The goal of trim is to remove redundant code within the supplied 20 // CUE ASTs. 21 // 22 // This is achieved by analysis of both the ASTs and the result of 23 // evaluation: looking at conjuncts etc within vertices. For each 24 // vertex, we try to identify conjuncts which are, by subsumption, as 25 // specific as the vertex as a whole. There three possible outcomes: 26 // 27 // a) No conjuncts on their own are found to be as specific as the 28 // vertex. In this case, we keep all the conjuncts. This is 29 // conservative, and may lead to conjuncts being kept which don't need 30 // to be, because we don't attempt to detect subsumption between 31 // subsets of a vertex's conjuncts. It is however safe. 32 // 33 // b) Exactly one conjunct is found which is as specific as the 34 // vertex. We keep this conjunct. Note that we do not currently 35 // consider that there may be other conjuncts within this vertex which 36 // have to be kept for other reasons, and in conjunction are as 37 // specific as this vertex. So again, we may end up keeping more 38 // conjuncts than strictly necessary, but it is still safe. 39 // 40 // c) Several conjuncts are found which are individually as specific 41 // as the vertex. We save this set of "winning conjuncts" for later. 42 // 43 // As we progress, we record the number of times each conjunct is seen 44 // (conjunct identity is taken as the conjunct's source node). Once we 45 // have completed traversing the vertices, we may have several sets of 46 // "winning conjuncts" each of which needs a conjunct selected to 47 // keep. We order these sets individually by seen-count (descending), 48 // and collectively by the sum of seen-counts for each set (also 49 // descending). For each set in turn, if there is no conjunct that is 50 // already kept, we choose to keep the most widely seen conjunct. If 51 // there is still a tie, we order by source code position. 52 // 53 // Additionally, if a conjunct survives, then we make sure that all 54 // references to that conjunct also survive. This helps to prevent 55 // surprises for the user: a field `x` that constrains a field `y` 56 // will always do so, even if `y` is always found to be more 57 // specific. For example: 58 // 59 // x: >5 60 // x: <10 61 // y: 7 62 // y: x 63 // 64 // Here, `y` will not be simplified to 7. By contrast, 65 // 66 // y: >5 67 // y: <10 68 // y: 7 69 // 70 // will be simplified to `y: 7`. 71 // 72 // # Ignoring conjuncts 73 // 74 // When we inspect each vertex, there may be conjuncts that we must 75 // ignore for the purposes of finding conjuncts as specific as the 76 // vertex. The danger is that such conjuncts are found to be as 77 // specific as the whole vertex, thus causing the other conjuncts to 78 // be removed. But this can alter the semantics of the CUE code. For 79 // example, conjuncts that originate from within a disjunction branch 80 // must be ignored. Consider: 81 // 82 // d: 6 | string 83 // o: d & int 84 // 85 // The vertex for `o` will contain conjuncts for 6, and int. We would 86 // find the 6 is as specific as the vertex, so it is tempting to 87 // remove the `int`. But if we do, then the value of `o` changes 88 // because the string-branch of the disjunction can no longer be 89 // dismissed. Processing of disjunctions cannot be done on the AST, 90 // because disjunctions may contain references which we need to 91 // resolve, in order to know which conjuncts to ignore. For example: 92 // 93 // d: c | string 94 // o: d & int 95 // c: 6 96 // 97 // Thus before we traverse the vertices to identify redundant 98 // conjuncts, we first traverse the vertices looking for disjunctions, 99 // and recording which conjuncts should be ignored. 100 // 101 // Another example is patterns: we must ignore conjuncts which are the 102 // roots of patterns. Consider: 103 // 104 // [string]: 5 105 // o: int 106 // 107 // In the vertex for `o` we would find conjuncts for 5 and `int`. We 108 // must ignore the 5, otherwise we would find that it is as specific 109 // as `o`, which could cause the entire field declaration `o: int` to 110 // be removed, which then changes the value of the CUE program. 111 // 112 // As with disjunctions, an earlier pass over the vertices identifies 113 // patterns and marks them accordingly. 114 // 115 // Finally, embedded values require special treatment. Consider: 116 // 117 // x: y: 5 118 // z: { 119 // x 120 // } 121 // 122 // Unfortunately, the evaluator doesn't track how different conjuncts 123 // arrive in a vertex: the vertex for `z` will not contain a conjunct 124 // which is a reference for `x`. All we will find in `z` is the arc 125 // for `y`. Because of this, we cannot discover that we must keep the 126 // embedded `x` -- it simply does not exist. So we take a rather blunt 127 // approach: an analysis of the AST will find where embeddings occur, 128 // which we record, and then when a vertex contains a struct which we 129 // know has an embedding, we always keep all the conjuncts in that 130 // vertex and its descendents. 131 132 import ( 133 "fmt" 134 "io" 135 "os" 136 "slices" 137 "strings" 138 139 "cuelang.org/go/cue" 140 "cuelang.org/go/cue/ast" 141 "cuelang.org/go/cue/ast/astutil" 142 "cuelang.org/go/cue/errors" 143 "cuelang.org/go/cue/token" 144 "cuelang.org/go/internal/core/adt" 145 "cuelang.org/go/internal/core/runtime" 146 "cuelang.org/go/internal/core/subsume" 147 "cuelang.org/go/internal/value" 148 ) 149 150 func filesV3(files []*ast.File, val cue.Value, cfg *Config) error { 151 dir := val.BuildInstance().Dir 152 dir = strings.TrimRight(dir, string(os.PathSeparator)) + 153 string(os.PathSeparator) 154 155 if cfg.Trace && cfg.TraceWriter == nil { 156 cfg.TraceWriter = os.Stderr 157 } 158 159 r, v := value.ToInternal(val) 160 ctx := adt.NewContext(r, v) 161 t := &trimmerV3{ 162 r: r, 163 ctx: ctx, 164 nodes: make(map[ast.Node]*nodeMeta), 165 trace: cfg.TraceWriter, 166 } 167 168 t.logf("\nStarting trim in dir %q with files:", dir) 169 for i, file := range files { 170 t.logf(" %d: %s", i, file.Filename) 171 } 172 t.logf("\nFinding static dependencies") 173 t.findStaticDependencies(files) 174 t.logf("\nFinding patterns") 175 t.findPatterns(v) 176 t.logf("\nFinding disjunctions") 177 t.findDisjunctions(v) 178 t.logf("\nFinding redundances") 179 t.findRedundancies(v, false) 180 t.logf("\nSolve undecideds") 181 t.solveUndecideds() 182 183 t.logf("\nTrimming source") 184 return t.trim(files, dir) 185 } 186 187 type nodeMeta struct { 188 // The static parent - i.e. parent from the AST. 189 parent *nodeMeta 190 191 src ast.Node 192 193 // If true, then this node must not be removed, because it is not 194 // redundant in at least one place where it's used. 195 required bool 196 197 // If true, then conjuncts of this node should be ignored for the 198 // purpose of testing for redundant conjuncts. 199 ignoreConjunct bool 200 201 // If this is true then this node has one or more embedded values 202 // (statically) - i.e. EmbedDecl has been found within this node 203 // (and src will be either a File or a StructLit). 204 hasEmbedding bool 205 206 // If x.requiredBy = {y,z} then it means x must be kept if one or 207 // more of {y,z} are kept. It is directional: if x must be kept for 208 // other reasons, then that says nothing about whether any of {y,z} 209 // must be kept. 210 requiredBy []*nodeMeta 211 212 // The number of times conjuncts of this node have been found in 213 // the vertices. This is used for choosing winning conjuncts, and 214 // to ensure that we never remove a node which we have only seen in 215 // the AST, and not in result of evaluation. 216 seenCount int 217 } 218 219 func (nm *nodeMeta) incSeenCount() { 220 nm.seenCount++ 221 } 222 223 func (nm *nodeMeta) markRequired() { 224 nm.required = true 225 } 226 227 func (nm *nodeMeta) addRequiredBy(e *nodeMeta) { 228 for _, f := range nm.requiredBy { 229 if f == e { 230 return 231 } 232 } 233 nm.requiredBy = append(nm.requiredBy, e) 234 } 235 236 func (a *nodeMeta) isRequiredBy(b *nodeMeta) bool { 237 if a == b { 238 return true 239 } 240 return a._isRequiredBy(map[*nodeMeta]struct{}{a: {}}, b) 241 } 242 243 // Need to cope with cycles, hence the seen/visited-set. 244 func (a *nodeMeta) _isRequiredBy(seen map[*nodeMeta]struct{}, b *nodeMeta) bool { 245 for _, e := range a.requiredBy { 246 if e == b { 247 return true 248 } 249 if _, found := seen[e]; found { 250 continue 251 } 252 seen[e] = struct{}{} 253 if e._isRequiredBy(seen, b) { 254 return true 255 } 256 } 257 return false 258 } 259 260 // True iff this node is required, or any of the nodes that require 261 // this node are themselves required (transitively). 262 func (nm *nodeMeta) isRequired() bool { 263 if nm.required { 264 return true 265 } 266 if len(nm.requiredBy) == 0 { 267 return false 268 } 269 return nm._isRequired(map[*nodeMeta]struct{}{nm: {}}) 270 } 271 272 func (nm *nodeMeta) _isRequired(seen map[*nodeMeta]struct{}) bool { 273 if nm.required { 274 return true 275 } 276 for _, e := range nm.requiredBy { 277 if _, found := seen[e]; found { 278 continue 279 } 280 seen[e] = struct{}{} 281 if e._isRequired(seen) { 282 nm.required = true 283 return true 284 } 285 } 286 return false 287 } 288 289 // True iff this node or any of its parent nodes (static/AST parents), 290 // have been identified as containing embedded values. 291 func (nm *nodeMeta) isEmbedded() bool { 292 for ; nm != nil; nm = nm.parent { 293 if nm.hasEmbedding { 294 return true 295 } 296 } 297 return false 298 } 299 300 // True iff a is an ancestor of b (in the static/AST parent-child 301 // sense). 302 func (a *nodeMeta) isAncestorOf(b *nodeMeta) bool { 303 if a == nil { 304 return false 305 } 306 for b != nil { 307 if b == a { 308 return true 309 } 310 b = b.parent 311 } 312 return false 313 } 314 315 type trimmerV3 struct { 316 r *runtime.Runtime 317 ctx *adt.OpContext 318 nodes map[ast.Node]*nodeMeta 319 320 undecided []nodeMetas 321 322 // depth is purely for debugging trace indentation level. 323 depth int 324 trace io.Writer 325 } 326 327 func (t *trimmerV3) logf(format string, args ...any) { 328 w := t.trace 329 if w == nil { 330 return 331 } 332 fmt.Fprintf(w, "%*s", t.depth*3, "") 333 fmt.Fprintf(w, format, args...) 334 fmt.Fprintln(w) 335 } 336 337 func (t *trimmerV3) inc() { t.depth++ } 338 func (t *trimmerV3) dec() { t.depth-- } 339 340 func (t *trimmerV3) getNodeMeta(n ast.Node) *nodeMeta { 341 if n == nil { 342 return nil 343 } 344 d, found := t.nodes[n] 345 if !found { 346 d = &nodeMeta{src: n} 347 t.nodes[n] = d 348 } 349 return d 350 } 351 352 // Discovers findStaticDependencies between nodes by walking through the AST of 353 // the files. 354 // 355 // 1. Establishes that if a node survives then its parent must also 356 // survive. I.e. a parent is required by its children. 357 // 358 // 2. Marks the arguments for call expressions as required: no 359 // simplification can occur there. This is because we cannot discover 360 // the relationship between arguments to a function and the function's 361 // result, and so any simplification of the arguments may change the 362 // result of the function call in unknown ways. 363 // 364 // 3. The conjuncts in a adt.Vertex do not give any information as to 365 // whether they have arrived via embedding or not. But, in the AST, we 366 // do have that information. So find and record embedding information. 367 func (t *trimmerV3) findStaticDependencies(files []*ast.File) { 368 t.inc() 369 defer t.dec() 370 371 var ancestors []*nodeMeta 372 callCount := 0 373 for _, f := range files { 374 t.logf("%s", f.Filename) 375 ast.Walk(f, func(n ast.Node) bool { 376 t.inc() 377 t.logf("%p::%T %v", n, n, n.Pos()) 378 nm := t.getNodeMeta(n) 379 if field, ok := n.(*ast.Field); ok { 380 switch field.Constraint { 381 case token.NOT, token.OPTION: 382 t.logf(" ignoring %v", nm.src.Pos()) 383 nm.ignoreConjunct = true 384 nm.markRequired() 385 } 386 } 387 if l := len(ancestors); l > 0 { 388 parent := ancestors[l-1] 389 parent.addRequiredBy(nm) 390 nm.parent = parent 391 } 392 ancestors = append(ancestors, nm) 393 if _, ok := n.(*ast.CallExpr); ok { 394 callCount++ 395 } 396 if callCount > 0 { 397 // This is somewhat unfortunate, but for now, as soon as 398 // we're in the arguments for a function call, we prevent 399 // all simplifications. 400 nm.markRequired() 401 } 402 if _, ok := n.(*ast.EmbedDecl); ok && nm.parent != nil { 403 // The parent of an EmbedDecl is always either a File or a 404 // StructLit. 405 nm.parent.hasEmbedding = true 406 } 407 return true 408 }, func(n ast.Node) { 409 if _, ok := n.(*ast.CallExpr); ok { 410 callCount-- 411 } 412 ancestors = ancestors[:len(ancestors)-1] 413 t.dec() 414 }) 415 } 416 } 417 418 // Discovers patterns by walking vertices and their arcs recursively. 419 // 420 // Conjuncts that originate from the pattern constraint must be 421 // ignored when searching for redundancies, otherwise they can be 422 // found to be more-or-equally-specific than the vertex in which 423 // they're found, and could lead to the entire field being 424 // removed. These conjuncts must also be kept because even if the 425 // pattern is not actually used, it may form part of the public API of 426 // the CUE, and so removing an unused pattern may alter the API. 427 // 428 // We only need to mark the conjuncts at the "top level" of the 429 // pattern constraint as required+ignore; we do not need to descend 430 // into the arcs of the pattern constraint. This is because the 431 // pattern only matches against a key, and not a path. So, even with: 432 // 433 // a: [string]: x: y: z: 5 434 // 435 // we only need to mark the x as required+ignore, and not the y, z, or 436 // 5. This ensures we later ignore only this x when simplifying other 437 // conjuncts in a vertex who's label has matched this pattern. If we 438 // add: 439 // 440 // b: w: x: y: {} 441 // b: a 442 // 443 // This will get trimmed to: 444 // 445 // a: [string]: x: y: z: 5 446 // b: w: _ 447 // b: a 448 // 449 // I.e. by ignoring the pattern`s "top level" conjuncts, we ensure we 450 // keep b: w, even though the pattern is equally specific to the 451 // vertex for b.w, and the explicit b: w (from line 2) is less 452 // specific. 453 func (t *trimmerV3) findPatterns(v *adt.Vertex) { 454 t.inc() 455 defer t.dec() 456 457 worklist := []*adt.Vertex{v} 458 for len(worklist) != 0 { 459 v := worklist[0] 460 worklist = worklist[1:] 461 462 t.logf("vertex %p; kind %v; value %p::%T", 463 v, v.Kind(), v.BaseValue, v.BaseValue) 464 t.inc() 465 466 if patterns := v.PatternConstraints; patterns != nil { 467 for i, pair := range patterns.Pairs { 468 t.logf("pattern %d %p::%T", i, pair.Constraint, pair.Constraint) 469 t.inc() 470 pair.Constraint.VisitLeafConjuncts(func(c adt.Conjunct) bool { 471 field := c.Field() 472 elem := c.Elem() 473 expr := c.Expr() 474 t.logf("conjunct field: %p::%T, elem: %p::%T, expr: %p::%T", 475 field, field, elem, elem, expr, expr) 476 477 if src := field.Source(); src != nil { 478 nm := t.getNodeMeta(src) 479 t.logf(" ignoring %v", nm.src.Pos()) 480 nm.ignoreConjunct = true 481 nm.markRequired() 482 } 483 484 return true 485 }) 486 t.dec() 487 } 488 } 489 490 t.dec() 491 492 worklist = append(worklist, v.Arcs...) 493 if v, ok := v.BaseValue.(*adt.Vertex); ok { 494 worklist = append(worklist, v) 495 } 496 } 497 } 498 499 // Discovers disjunctions by walking vertices and their arcs 500 // recursively. 501 // 502 // Disjunctions and their branches must be found before we attempt to 503 // simplify vertices. We must find disjunctions and mark all conjuncts 504 // within each branch of a disjunction, including all conjuncts that 505 // can be reached via resolution, as required+ignore. 506 // 507 // Failure to do this can lead to the removal of conjuncts in a vertex 508 // which were essential for discriminating between branches of a 509 // disjunction. 510 func (t *trimmerV3) findDisjunctions(v *adt.Vertex) { 511 t.inc() 512 defer t.dec() 513 514 var branches []*adt.Vertex 515 seen := make(map[*adt.Vertex]struct{}) 516 worklist := []*adt.Vertex{v} 517 for len(worklist) != 0 { 518 v := worklist[0] 519 worklist = worklist[1:] 520 521 if _, found := seen[v]; found { 522 continue 523 } 524 seen[v] = struct{}{} 525 526 t.logf("vertex %p; kind %v; value %p::%T", 527 v, v.Kind(), v.BaseValue, v.BaseValue) 528 t.inc() 529 530 v.VisitLeafConjuncts(func(c adt.Conjunct) bool { 531 switch disj := c.Elem().(type) { 532 case *adt.Disjunction: 533 t.logf("found disjunction") 534 for i, val := range disj.Values { 535 t.logf("branch %d", i) 536 branch := &adt.Vertex{ 537 Parent: v.Parent, 538 Label: v.Label, 539 } 540 c := adt.MakeConjunct(c.Env, val, c.CloseInfo) 541 branch.InsertConjunct(c) 542 branch.Finalize(t.ctx) 543 branches = append(branches, branch) 544 } 545 546 case *adt.DisjunctionExpr: 547 t.logf("found disjunctionexpr") 548 for i, val := range disj.Values { 549 t.logf("branch %d", i) 550 branch := &adt.Vertex{ 551 Parent: v.Parent, 552 Label: v.Label, 553 } 554 c := adt.MakeConjunct(c.Env, val.Val, c.CloseInfo) 555 branch.InsertConjunct(c) 556 branch.Finalize(t.ctx) 557 branches = append(branches, branch) 558 } 559 } 560 return true 561 }) 562 563 t.dec() 564 565 worklist = append(worklist, v.Arcs...) 566 if v, ok := v.BaseValue.(*adt.Vertex); ok { 567 worklist = append(worklist, v) 568 } 569 } 570 571 clear(seen) 572 worklist = branches 573 for len(worklist) != 0 { 574 v := worklist[0] 575 worklist = worklist[1:] 576 577 if _, found := seen[v]; found { 578 continue 579 } 580 seen[v] = struct{}{} 581 582 v.VisitLeafConjuncts(func(c adt.Conjunct) bool { 583 if src := c.Field().Source(); src != nil { 584 nm := t.getNodeMeta(src) 585 t.logf(" ignoring %v", nm.src.Pos()) 586 nm.ignoreConjunct = true 587 nm.markRequired() 588 } 589 t.resolveElemAll(c, func(resolver adt.Resolver, resolvedTo *adt.Vertex) { 590 worklist = append(worklist, resolvedTo.Arcs...) 591 }) 592 return true 593 }) 594 worklist = append(worklist, v.Arcs...) 595 } 596 } 597 598 func (t *trimmerV3) keepAllChildren(n ast.Node) { 599 ast.Walk(n, func(n ast.Node) bool { 600 nm := t.getNodeMeta(n) 601 nm.markRequired() 602 return true 603 }, nil) 604 } 605 606 // Once we have identified, and masked out, call expressions, 607 // embeddings, patterns, and disjunctions, we can finally work 608 // recursively through the vertices, testing their conjuncts to find 609 // redundant conjuncts. 610 func (t *trimmerV3) findRedundancies(v *adt.Vertex, keepAll bool) { 611 v = v.DerefDisjunct() 612 t.logf("vertex %p (parent %p); kind %v; value %p::%T", 613 v, v.Parent, v.Kind(), v.BaseValue, v.BaseValue) 614 t.inc() 615 defer t.dec() 616 617 _, isDisjunct := v.BaseValue.(*adt.Disjunction) 618 for _, si := range v.Structs { 619 if src := si.StructLit.Src; src != nil { 620 t.logf("struct lit %p src: %p::%T %v", si.StructLit, src, src, src.Pos()) 621 nm := t.getNodeMeta(src) 622 nm.incSeenCount() 623 keepAll = keepAll || nm.isEmbedded() 624 if nm.hasEmbedding { 625 t.logf(" (has embedding root)") 626 } 627 if nm.isEmbedded() { 628 t.logf(" (isEmbedded)") 629 } else if keepAll { 630 t.logf(" (keepAll)") 631 } 632 633 if !isDisjunct { 634 continue 635 } 636 v1 := &adt.Vertex{ 637 Parent: v.Parent, 638 Label: v.Label, 639 } 640 c := adt.MakeConjunct(si.Env, si.StructLit, si.CloseInfo) 641 v1.InsertConjunct(c) 642 v1.Finalize(t.ctx) 643 t.logf("exploring disj struct lit %p (src %v): start", si, src.Pos()) 644 t.findRedundancies(v1, keepAll) 645 t.logf("exploring disj struct lit %p (src %v): end", si, src.Pos()) 646 } 647 } 648 649 if keepAll { 650 for _, si := range v.Structs { 651 if src := si.StructLit.Src; src != nil { 652 t.keepAllChildren(src) 653 } 654 } 655 } 656 657 if patterns := v.PatternConstraints; patterns != nil { 658 for i, pair := range patterns.Pairs { 659 t.logf("pattern %d %p::%T", i, pair.Constraint, pair.Constraint) 660 t.findRedundancies(pair.Constraint, keepAll) 661 } 662 } 663 664 var nodeMetas, winners, disjDefaultWinners []*nodeMeta 665 v.VisitLeafConjuncts(func(c adt.Conjunct) bool { 666 field := c.Field() 667 elem := c.Elem() 668 expr := c.Expr() 669 src := field.Source() 670 if src == nil { 671 t.logf("conjunct field: %p::%T, elem: %p::%T, expr: %p::%T, src nil", 672 field, field, elem, elem, expr, expr) 673 return true 674 } 675 676 t.logf("conjunct field: %p::%T, elem: %p::%T, expr: %p::%T, src: %v", 677 field, field, elem, elem, expr, expr, src.Pos()) 678 679 nm := t.getNodeMeta(src) 680 nm.incSeenCount() 681 682 // Currently we replace redundant structs with _. If it becomes 683 // desired to replace them with {} instead, then we want this 684 // code instead of the block that follows: 685 // 686 // if exprSrc := expr.Source(); exprSrc != nil { 687 // exprNm := t.getNodeMeta(exprSrc) 688 // exprNm.addRequiredBy(nm) 689 // } 690 if exprSrc := expr.Source(); exprSrc != nil && len(v.Arcs) == 0 { 691 switch expr.(type) { 692 case *adt.StructLit, *adt.ListLit: 693 t.logf(" saving emptyness") 694 exprNm := t.getNodeMeta(exprSrc) 695 exprNm.addRequiredBy(nm) 696 } 697 } 698 699 if nm.ignoreConjunct { 700 t.logf(" ignoring conjunct") 701 } else { 702 nodeMetas = append(nodeMetas, nm) 703 if t.equallySpecific(v, c) { 704 winners = append(winners, nm) 705 t.logf(" equally specific: %p::%T", field, field) 706 } else { 707 t.logf(" redundant here: %p::%T", field, field) 708 } 709 } 710 711 if disj, ok := expr.(*adt.DisjunctionExpr); ok && disj.HasDefaults { 712 defaultCount := 0 713 matchingDefaultCount := 0 714 for _, branch := range disj.Values { 715 if !branch.Default { 716 continue 717 } 718 defaultCount++ 719 c := adt.MakeConjunct(c.Env, branch.Val, c.CloseInfo) 720 if t.equallySpecific(v, c) { 721 matchingDefaultCount++ 722 } 723 } 724 if defaultCount > 0 && defaultCount == matchingDefaultCount { 725 t.logf(" found %d matching defaults in disjunction", 726 matchingDefaultCount) 727 disjDefaultWinners = append(disjDefaultWinners, nm) 728 } 729 } 730 731 if compr, ok := elem.(*adt.Comprehension); ok { 732 t.logf("comprehension found") 733 for _, clause := range compr.Clauses { 734 var conj adt.Conjunct 735 switch clause := clause.(type) { 736 case *adt.IfClause: 737 conj = adt.MakeConjunct(c.Env, clause.Condition, c.CloseInfo) 738 case *adt.ForClause: 739 conj = adt.MakeConjunct(c.Env, clause.Src, c.CloseInfo) 740 case *adt.LetClause: 741 conj = adt.MakeConjunct(c.Env, clause.Expr, c.CloseInfo) 742 } 743 t.linkResolvers(conj, true) 744 } 745 } 746 747 t.linkResolvers(c, false) 748 return true 749 }) 750 751 if keepAll { 752 t.logf("keeping all %d nodes", len(nodeMetas)) 753 for _, d := range nodeMetas { 754 t.logf(" %p::%T %v", d.src, d.src, d.src.Pos()) 755 d.markRequired() 756 } 757 758 } else { 759 if len(disjDefaultWinners) != 0 { 760 // For all the conjuncts that were disjunctions and contained 761 // defaults, and *every* default is equally specific as the 762 // vertex as a whole, then we should be able to ignore all 763 // other winning conjuncts. 764 winners = disjDefaultWinners 765 } 766 switch len(winners) { 767 case 0: 768 t.logf("no winners; keeping all %d nodes", len(nodeMetas)) 769 for _, d := range nodeMetas { 770 t.logf(" %p::%T %v", d.src, d.src, d.src.Pos()) 771 d.markRequired() 772 } 773 774 case 1: 775 t.logf("1 winner") 776 src := winners[0].src 777 t.logf(" %p::%T %v", src, src, src.Pos()) 778 winners[0].markRequired() 779 780 default: 781 t.logf("%d winners found", len(winners)) 782 foundRequired := false 783 for _, d := range winners { 784 if d.isRequired() { 785 foundRequired = true 786 break 787 } 788 } 789 if !foundRequired { 790 t.logf("no winner already required") 791 t.undecided = append(t.undecided, winners) 792 } 793 } 794 } 795 796 for i, a := range v.Arcs { 797 t.logf("arc %d %v", i, a.Label) 798 t.findRedundancies(a, keepAll) 799 } 800 801 if v, ok := v.BaseValue.(*adt.Vertex); ok && v != nil { 802 t.logf("exploring base value: start") 803 t.findRedundancies(v, keepAll) 804 t.logf("exploring base value: end") 805 } 806 } 807 808 // If somewhere within a conjunct, there's a *[adt.FieldReference], or 809 // other type of [adt.Resolver], then we need to find that, and ensure 810 // that: 811 // 812 // 1. if the resolver part of this conjunct survives, then the target 813 // of the resolver must survive too (i.e. we don't create dangling 814 // pointers). This bit is done for free, because if a vertex 815 // contains a conjunct for some reference `r`, then whatever `r` 816 // resolved to will also appear in this vertex's conjuncts. 817 // 818 // 2. if the target of the resolver survives, then we must 819 // survive. This enforces the basic rule that if a conjunct 820 // survives then all the references to that conjunct must also 821 // survive. 822 func (t *trimmerV3) linkResolvers(c adt.Conjunct, addInverse bool) { 823 var origNm *nodeMeta 824 if src := c.Field().Source(); src != nil { 825 origNm = t.getNodeMeta(src) 826 } 827 828 t.resolveElemAll(c, func(resolver adt.Resolver, resolvedTo *adt.Vertex) { 829 resolvedTo.VisitLeafConjuncts(func(resolvedToC adt.Conjunct) bool { 830 src := resolvedToC.Source() 831 if src == nil { 832 return true 833 } 834 resolvedToNm := t.getNodeMeta(src) 835 resolverNm := t.getNodeMeta(resolver.Source()) 836 837 // If the resolvedToC conjunct survives, then the resolver 838 // itself must survive too. 839 resolverNm.addRequiredBy(resolvedToNm) 840 t.logf(" (regular) %v reqBy %v", 841 resolverNm.src.Pos(), resolvedToNm.src.Pos()) 842 if addInverse { 843 t.logf(" (inverse) %v reqBy %v", 844 resolvedToNm.src.Pos(), resolverNm.src.Pos()) 845 resolvedToNm.addRequiredBy(resolverNm) 846 } 847 848 // Don't break lexical scopes. Consider: 849 // 850 // c: { 851 // x: int 852 // y: x 853 // } 854 // c: x: 5 855 // 856 // We must make sure that if `y: x` survives, then `x: 857 // int` survives (or at least the field does - it could 858 // be simplified to `x: _`) *even though* there is a 859 // more specific value for c.x in the final line. Thus 860 // the field which we have found by resolution, is 861 // required by the original element. 862 if origNm != nil && 863 resolvedToNm.parent.isAncestorOf(origNm) { 864 t.logf(" (extra) %v reqBy %v", 865 resolvedToNm.src.Pos(), origNm.src.Pos()) 866 resolvedToNm.addRequiredBy(origNm) 867 } 868 return true 869 }) 870 }) 871 } 872 873 func (t *trimmerV3) resolveElemAll(c adt.Conjunct, f func(adt.Resolver, *adt.Vertex)) { 874 worklist := []adt.Elem{c.Elem()} 875 for len(worklist) != 0 { 876 elem := worklist[0] 877 worklist = worklist[1:] 878 879 switch elemT := elem.(type) { 880 case *adt.UnaryExpr: 881 worklist = append(worklist, elemT.X) 882 case *adt.BinaryExpr: 883 worklist = append(worklist, elemT.X, elemT.Y) 884 case *adt.DisjunctionExpr: 885 for _, disjunct := range elemT.Values { 886 worklist = append(worklist, disjunct.Val) 887 } 888 case *adt.Disjunction: 889 for _, disjunct := range elemT.Values { 890 worklist = append(worklist, disjunct) 891 } 892 case *adt.Ellipsis: 893 worklist = append(worklist, elemT.Value) 894 case *adt.BoundExpr: 895 worklist = append(worklist, elemT.Expr) 896 case *adt.BoundValue: 897 worklist = append(worklist, elemT.Value) 898 case *adt.Interpolation: 899 for _, part := range elemT.Parts { 900 worklist = append(worklist, part) 901 } 902 case *adt.Conjunction: 903 for _, val := range elemT.Values { 904 worklist = append(worklist, val) 905 } 906 case *adt.CallExpr: 907 worklist = append(worklist, elemT.Fun) 908 for _, arg := range elemT.Args { 909 worklist = append(worklist, arg) 910 } 911 case *adt.Comprehension: 912 for _, y := range elemT.Clauses { 913 switch y := y.(type) { 914 case *adt.IfClause: 915 worklist = append(worklist, y.Condition) 916 case *adt.LetClause: 917 worklist = append(worklist, y.Expr) 918 case *adt.ForClause: 919 worklist = append(worklist, y.Src) 920 } 921 } 922 case *adt.LabelReference: 923 elem = &adt.ValueReference{UpCount: elemT.UpCount, Src: elemT.Src} 924 t.logf(" converting LabelReference to ValueReference") 925 } 926 927 if r, ok := elem.(adt.Resolver); ok && elem.Source() != nil { 928 resolvedTo, bot := t.ctx.Resolve(c, r) 929 if bot != nil { 930 continue 931 } 932 t.logf(" resolved to %p", resolvedTo) 933 f(r, resolvedTo) 934 } 935 } 936 } 937 938 // Are all the cs combined, (more or) equally as specific as v? 939 func (t *trimmerV3) equallySpecific(v *adt.Vertex, cs ...adt.Conjunct) bool { 940 t.inc() 941 // t.ctx.LogEval = 1 942 conjVertex := &adt.Vertex{ 943 Parent: v.Parent, 944 Label: v.Label, 945 } 946 for _, c := range cs { 947 if r, ok := c.Elem().(adt.Resolver); ok { 948 v1, bot := t.ctx.Resolve(c, r) 949 if bot == nil { 950 v1.VisitLeafConjuncts(func(c adt.Conjunct) bool { 951 conjVertex.InsertConjunct(c) 952 return true 953 }) 954 continue 955 } 956 } 957 conjVertex.InsertConjunct(c) 958 } 959 conjVertex.Finalize(t.ctx) 960 err := subsume.Value(t.ctx, v, conjVertex) 961 if err != nil { 962 t.logf(" not equallySpecific") 963 if t.trace != nil && t.ctx.LogEval > 0 { 964 errors.Print(t.trace, err, nil) 965 } 966 } 967 // t.ctx.LogEval = 0 968 t.dec() 969 return err == nil 970 } 971 972 // NB this is not perfect. We do not attempt to track dependencies 973 // *between* different sets of "winning" nodes. 974 // 975 // We could have two sets, [a, b, c] and [c, d], and decide here to 976 // require a from the first set, and then c from the second set. This 977 // preserves more nodes than strictly necessary (preserving c on its 978 // own is sufficient to satisfy both sets). However, doing this 979 // perfectly is the “Hitting Set Problem”, and it is proven 980 // NP-complete. Thus for efficiency, we consider each set (more or 981 // less) in isolation. 982 func (t *trimmerV3) solveUndecideds() { 983 if len(t.undecided) == 0 { 984 return 985 } 986 undecided := t.undecided 987 for i, ds := range undecided { 988 ds.sort() 989 if ds.hasRequired() { 990 undecided[i] = nil 991 } 992 } 993 994 slices.SortFunc(undecided, func(as, bs nodeMetas) int { 995 aSum, bSum := as.seenCountSum(), bs.seenCountSum() 996 if aSum != bSum { 997 return bSum - aSum 998 } 999 aLen, bLen := len(as), len(bs) 1000 if aLen != bLen { 1001 return bLen - aLen 1002 } 1003 for i, a := range as { 1004 b := bs[i] 1005 if posCmp := a.src.Pos().Compare(b.src.Pos()); posCmp != 0 { 1006 return posCmp 1007 } 1008 } 1009 return 0 1010 }) 1011 1012 for _, nms := range undecided { 1013 if len(nms) == 0 { 1014 // once we get to length of 0, everything that follows must 1015 // also be length of 0 1016 break 1017 } 1018 t.logf("choosing winner from %v", nms) 1019 if nms.hasRequired() { 1020 t.logf(" already contains required node") 1021 continue 1022 } 1023 1024 nms[0].markRequired() 1025 } 1026 } 1027 1028 type nodeMetas []*nodeMeta 1029 1030 // Sort a single set of nodeMetas. If a set contains x and y: 1031 // 1032 // - if x is required by y, then x will come first; 1033 // - otherwise whichever node has a higher seenCount comes first; 1034 // - otherwise sort x and y by their src position. 1035 func (nms nodeMetas) sort() { 1036 slices.SortFunc(nms, func(a, b *nodeMeta) int { 1037 if a.isRequiredBy(b) { 1038 return -1 1039 } 1040 if b.isRequiredBy(a) { 1041 return 1 1042 } 1043 aSeen, bSeen := a.seenCount, b.seenCount 1044 if aSeen != bSeen { 1045 return bSeen - aSeen 1046 } 1047 return a.src.Pos().Compare(b.src.Pos()) 1048 }) 1049 } 1050 1051 func (nms nodeMetas) seenCountSum() (sum int) { 1052 for _, d := range nms { 1053 sum += d.seenCount 1054 } 1055 return sum 1056 } 1057 1058 func (nms nodeMetas) hasRequired() bool { 1059 for _, d := range nms { 1060 if d.isRequired() { 1061 return true 1062 } 1063 } 1064 return false 1065 } 1066 1067 // After all the analysis is complete, trim finally modifies the AST, 1068 // removing (or simplifying) nodes which have not been found to be 1069 // required. 1070 func (t *trimmerV3) trim(files []*ast.File, dir string) error { 1071 t.inc() 1072 defer t.dec() 1073 1074 for _, f := range files { 1075 if !strings.HasPrefix(f.Filename, dir) { 1076 continue 1077 } 1078 t.logf("%s", f.Filename) 1079 t.inc() 1080 astutil.Apply(f, func(c astutil.Cursor) bool { 1081 n := c.Node() 1082 d := t.nodes[n] 1083 1084 if !d.isRequired() && d.seenCount > 0 { 1085 // The astutils cursor only supports deleting nodes if the 1086 // node is a child of a structlit or a file. So in all 1087 // other cases, we must replace the child with top. 1088 var replacement ast.Node = ast.NewIdent("_") 1089 if d.parent != nil { 1090 switch parentN := d.parent.src.(type) { 1091 case *ast.File, *ast.StructLit: 1092 replacement = nil 1093 case *ast.Comprehension: 1094 if n == parentN.Value { 1095 replacement = ast.NewStruct() 1096 } 1097 } 1098 } 1099 if replacement == nil { 1100 t.logf("deleting node %p::%T %v", n, n, n.Pos()) 1101 c.Delete() 1102 } else { 1103 t.logf("replacing node %p::%T with %T %v", 1104 n, n, replacement, n.Pos()) 1105 c.Replace(replacement) 1106 } 1107 } 1108 1109 return true 1110 }, nil) 1111 if err := astutil.Sanitize(f); err != nil { 1112 return err 1113 } 1114 t.dec() 1115 } 1116 return nil 1117 }