cuelang.org/go@v0.13.0/tools/trim/trimv3.go

cuelang.org/go@v0.13.0/tools/trim/trimv3.go (about)

     1  // Copyright 2025 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package trim
    16  
    17  // # Overview
    18  //
    19  // The goal of trim is to remove redundant code within the supplied
    20  // CUE ASTs.
    21  //
    22  // This is achieved by analysis of both the ASTs and the result of
    23  // evaluation: looking at conjuncts etc within vertices. For each
    24  // vertex, we try to identify conjuncts which are, by subsumption, as
    25  // specific as the vertex as a whole. There three possible outcomes:
    26  //
    27  // a) No conjuncts on their own are found to be as specific as the
    28  // vertex. In this case, we keep all the conjuncts. This is
    29  // conservative, and may lead to conjuncts being kept which don't need
    30  // to be, because we don't attempt to detect subsumption between
    31  // subsets of a vertex's conjuncts. It is however safe.
    32  //
    33  // b) Exactly one conjunct is found which is as specific as the
    34  // vertex. We keep this conjunct. Note that we do not currently
    35  // consider that there may be other conjuncts within this vertex which
    36  // have to be kept for other reasons, and in conjunction are as
    37  // specific as this vertex. So again, we may end up keeping more
    38  // conjuncts than strictly necessary, but it is still safe.
    39  //
    40  // c) Several conjuncts are found which are individually as specific
    41  // as the vertex. We save this set of "winning conjuncts" for later.
    42  //
    43  // As we progress, we record the number of times each conjunct is seen
    44  // (conjunct identity is taken as the conjunct's source node). Once we
    45  // have completed traversing the vertices, we may have several sets of
    46  // "winning conjuncts" each of which needs a conjunct selected to
    47  // keep. We order these sets individually by seen-count (descending),
    48  // and collectively by the sum of seen-counts for each set (also
    49  // descending). For each set in turn, if there is no conjunct that is
    50  // already kept, we choose to keep the most widely seen conjunct. If
    51  // there is still a tie, we order by source code position.
    52  //
    53  // Additionally, if a conjunct survives, then we make sure that all
    54  // references to that conjunct also survive. This helps to prevent
    55  // surprises for the user: a field `x` that constrains a field `y`
    56  // will always do so, even if `y` is always found to be more
    57  // specific. For example:
    58  //
    59  //	x: >5
    60  //	x: <10
    61  //	y: 7
    62  //	y: x
    63  //
    64  // Here, `y` will not be simplified to 7. By contrast,
    65  //
    66  //	y: >5
    67  //	y: <10
    68  //	y: 7
    69  //
    70  // will be simplified to `y: 7`.
    71  //
    72  // # Ignoring conjuncts
    73  //
    74  // When we inspect each vertex, there may be conjuncts that we must
    75  // ignore for the purposes of finding conjuncts as specific as the
    76  // vertex. The danger is that such conjuncts are found to be as
    77  // specific as the whole vertex, thus causing the other conjuncts to
    78  // be removed. But this can alter the semantics of the CUE code. For
    79  // example, conjuncts that originate from within a disjunction branch
    80  // must be ignored. Consider:
    81  //
    82  //	d: 6 | string
    83  //	o: d & int
    84  //
    85  // The vertex for `o` will contain conjuncts for 6, and int. We would
    86  // find the 6 is as specific as the vertex, so it is tempting to
    87  // remove the `int`. But if we do, then the value of `o` changes
    88  // because the string-branch of the disjunction can no longer be
    89  // dismissed. Processing of disjunctions cannot be done on the AST,
    90  // because disjunctions may contain references which we need to
    91  // resolve, in order to know which conjuncts to ignore. For example:
    92  //
    93  //	d: c | string
    94  //	o: d & int
    95  //	c: 6
    96  //
    97  // Thus before we traverse the vertices to identify redundant
    98  // conjuncts, we first traverse the vertices looking for disjunctions,
    99  // and recording which conjuncts should be ignored.
   100  //
   101  // Another example is patterns: we must ignore conjuncts which are the
   102  // roots of patterns. Consider:
   103  //
   104  //	[string]: 5
   105  //	o: int
   106  //
   107  // In the vertex for `o` we would find conjuncts for 5 and `int`. We
   108  // must ignore the 5, otherwise we would find that it is as specific
   109  // as `o`, which could cause the entire field declaration `o: int` to
   110  // be removed, which then changes the value of the CUE program.
   111  //
   112  // As with disjunctions, an earlier pass over the vertices identifies
   113  // patterns and marks them accordingly.
   114  //
   115  // Finally, embedded values require special treatment. Consider:
   116  //
   117  //	x: y: 5
   118  //	z: {
   119  //		x
   120  //	}
   121  //
   122  // Unfortunately, the evaluator doesn't track how different conjuncts
   123  // arrive in a vertex: the vertex for `z` will not contain a conjunct
   124  // which is a reference for `x`. All we will find in `z` is the arc
   125  // for `y`. Because of this, we cannot discover that we must keep the
   126  // embedded `x` -- it simply does not exist. So we take a rather blunt
   127  // approach: an analysis of the AST will find where embeddings occur,
   128  // which we record, and then when a vertex contains a struct which we
   129  // know has an embedding, we always keep all the conjuncts in that
   130  // vertex and its descendents.
   131  
   132  import (
   133  	"fmt"
   134  	"io"
   135  	"os"
   136  	"slices"
   137  	"strings"
   138  
   139  	"cuelang.org/go/cue"
   140  	"cuelang.org/go/cue/ast"
   141  	"cuelang.org/go/cue/ast/astutil"
   142  	"cuelang.org/go/cue/errors"
   143  	"cuelang.org/go/cue/token"
   144  	"cuelang.org/go/internal/core/adt"
   145  	"cuelang.org/go/internal/core/runtime"
   146  	"cuelang.org/go/internal/core/subsume"
   147  	"cuelang.org/go/internal/value"
   148  )
   149  
   150  func filesV3(files []*ast.File, val cue.Value, cfg *Config) error {
   151  	dir := val.BuildInstance().Dir
   152  	dir = strings.TrimRight(dir, string(os.PathSeparator)) +
   153  		string(os.PathSeparator)
   154  
   155  	if cfg.Trace && cfg.TraceWriter == nil {
   156  		cfg.TraceWriter = os.Stderr
   157  	}
   158  
   159  	r, v := value.ToInternal(val)
   160  	ctx := adt.NewContext(r, v)
   161  	t := &trimmerV3{
   162  		r:     r,
   163  		ctx:   ctx,
   164  		nodes: make(map[ast.Node]*nodeMeta),
   165  		trace: cfg.TraceWriter,
   166  	}
   167  
   168  	t.logf("\nStarting trim in dir %q with files:", dir)
   169  	for i, file := range files {
   170  		t.logf(" %d: %s", i, file.Filename)
   171  	}
   172  	t.logf("\nFinding static dependencies")
   173  	t.findStaticDependencies(files)
   174  	t.logf("\nFinding patterns")
   175  	t.findPatterns(v)
   176  	t.logf("\nFinding disjunctions")
   177  	t.findDisjunctions(v)
   178  	t.logf("\nFinding redundances")
   179  	t.findRedundancies(v, false)
   180  	t.logf("\nSolve undecideds")
   181  	t.solveUndecideds()
   182  
   183  	t.logf("\nTrimming source")
   184  	return t.trim(files, dir)
   185  }
   186  
   187  type nodeMeta struct {
   188  	// The static parent - i.e. parent from the AST.
   189  	parent *nodeMeta
   190  
   191  	src ast.Node
   192  
   193  	// If true, then this node must not be removed, because it is not
   194  	// redundant in at least one place where it's used.
   195  	required bool
   196  
   197  	// If true, then conjuncts of this node should be ignored for the
   198  	// purpose of testing for redundant conjuncts.
   199  	ignoreConjunct bool
   200  
   201  	// If this is true then this node has one or more embedded values
   202  	// (statically) - i.e. EmbedDecl has been found within this node
   203  	// (and src will be either a File or a StructLit).
   204  	hasEmbedding bool
   205  
   206  	// If x.requiredBy = {y,z} then it means x must be kept if one or
   207  	// more of {y,z} are kept. It is directional: if x must be kept for
   208  	// other reasons, then that says nothing about whether any of {y,z}
   209  	// must be kept.
   210  	requiredBy []*nodeMeta
   211  
   212  	// The number of times conjuncts of this node have been found in
   213  	// the vertices. This is used for choosing winning conjuncts, and
   214  	// to ensure that we never remove a node which we have only seen in
   215  	// the AST, and not in result of evaluation.
   216  	seenCount int
   217  }
   218  
   219  func (nm *nodeMeta) incSeenCount() {
   220  	nm.seenCount++
   221  }
   222  
   223  func (nm *nodeMeta) markRequired() {
   224  	nm.required = true
   225  }
   226  
   227  func (nm *nodeMeta) addRequiredBy(e *nodeMeta) {
   228  	for _, f := range nm.requiredBy {
   229  		if f == e {
   230  			return
   231  		}
   232  	}
   233  	nm.requiredBy = append(nm.requiredBy, e)
   234  }
   235  
   236  func (a *nodeMeta) isRequiredBy(b *nodeMeta) bool {
   237  	if a == b {
   238  		return true
   239  	}
   240  	return a._isRequiredBy(map[*nodeMeta]struct{}{a: {}}, b)
   241  }
   242  
   243  // Need to cope with cycles, hence the seen/visited-set.
   244  func (a *nodeMeta) _isRequiredBy(seen map[*nodeMeta]struct{}, b *nodeMeta) bool {
   245  	for _, e := range a.requiredBy {
   246  		if e == b {
   247  			return true
   248  		}
   249  		if _, found := seen[e]; found {
   250  			continue
   251  		}
   252  		seen[e] = struct{}{}
   253  		if e._isRequiredBy(seen, b) {
   254  			return true
   255  		}
   256  	}
   257  	return false
   258  }
   259  
   260  // True iff this node is required, or any of the nodes that require
   261  // this node are themselves required (transitively).
   262  func (nm *nodeMeta) isRequired() bool {
   263  	if nm.required {
   264  		return true
   265  	}
   266  	if len(nm.requiredBy) == 0 {
   267  		return false
   268  	}
   269  	return nm._isRequired(map[*nodeMeta]struct{}{nm: {}})
   270  }
   271  
   272  func (nm *nodeMeta) _isRequired(seen map[*nodeMeta]struct{}) bool {
   273  	if nm.required {
   274  		return true
   275  	}
   276  	for _, e := range nm.requiredBy {
   277  		if _, found := seen[e]; found {
   278  			continue
   279  		}
   280  		seen[e] = struct{}{}
   281  		if e._isRequired(seen) {
   282  			nm.required = true
   283  			return true
   284  		}
   285  	}
   286  	return false
   287  }
   288  
   289  // True iff this node or any of its parent nodes (static/AST parents),
   290  // have been identified as containing embedded values.
   291  func (nm *nodeMeta) isEmbedded() bool {
   292  	for ; nm != nil; nm = nm.parent {
   293  		if nm.hasEmbedding {
   294  			return true
   295  		}
   296  	}
   297  	return false
   298  }
   299  
   300  // True iff a is an ancestor of b (in the static/AST parent-child
   301  // sense).
   302  func (a *nodeMeta) isAncestorOf(b *nodeMeta) bool {
   303  	if a == nil {
   304  		return false
   305  	}
   306  	for b != nil {
   307  		if b == a {
   308  			return true
   309  		}
   310  		b = b.parent
   311  	}
   312  	return false
   313  }
   314  
   315  type trimmerV3 struct {
   316  	r     *runtime.Runtime
   317  	ctx   *adt.OpContext
   318  	nodes map[ast.Node]*nodeMeta
   319  
   320  	undecided []nodeMetas
   321  
   322  	// depth is purely for debugging trace indentation level.
   323  	depth int
   324  	trace io.Writer
   325  }
   326  
   327  func (t *trimmerV3) logf(format string, args ...any) {
   328  	w := t.trace
   329  	if w == nil {
   330  		return
   331  	}
   332  	fmt.Fprintf(w, "%*s", t.depth*3, "")
   333  	fmt.Fprintf(w, format, args...)
   334  	fmt.Fprintln(w)
   335  }
   336  
   337  func (t *trimmerV3) inc() { t.depth++ }
   338  func (t *trimmerV3) dec() { t.depth-- }
   339  
   340  func (t *trimmerV3) getNodeMeta(n ast.Node) *nodeMeta {
   341  	if n == nil {
   342  		return nil
   343  	}
   344  	d, found := t.nodes[n]
   345  	if !found {
   346  		d = &nodeMeta{src: n}
   347  		t.nodes[n] = d
   348  	}
   349  	return d
   350  }
   351  
   352  // Discovers findStaticDependencies between nodes by walking through the AST of
   353  // the files.
   354  //
   355  // 1. Establishes that if a node survives then its parent must also
   356  // survive. I.e. a parent is required by its children.
   357  //
   358  // 2. Marks the arguments for call expressions as required: no
   359  // simplification can occur there. This is because we cannot discover
   360  // the relationship between arguments to a function and the function's
   361  // result, and so any simplification of the arguments may change the
   362  // result of the function call in unknown ways.
   363  //
   364  // 3. The conjuncts in a adt.Vertex do not give any information as to
   365  // whether they have arrived via embedding or not. But, in the AST, we
   366  // do have that information. So find and record embedding information.
   367  func (t *trimmerV3) findStaticDependencies(files []*ast.File) {
   368  	t.inc()
   369  	defer t.dec()
   370  
   371  	var ancestors []*nodeMeta
   372  	callCount := 0
   373  	for _, f := range files {
   374  		t.logf("%s", f.Filename)
   375  		ast.Walk(f, func(n ast.Node) bool {
   376  			t.inc()
   377  			t.logf("%p::%T %v", n, n, n.Pos())
   378  			nm := t.getNodeMeta(n)
   379  			if field, ok := n.(*ast.Field); ok {
   380  				switch field.Constraint {
   381  				case token.NOT, token.OPTION:
   382  					t.logf(" ignoring %v", nm.src.Pos())
   383  					nm.ignoreConjunct = true
   384  					nm.markRequired()
   385  				}
   386  			}
   387  			if l := len(ancestors); l > 0 {
   388  				parent := ancestors[l-1]
   389  				parent.addRequiredBy(nm)
   390  				nm.parent = parent
   391  			}
   392  			ancestors = append(ancestors, nm)
   393  			if _, ok := n.(*ast.CallExpr); ok {
   394  				callCount++
   395  			}
   396  			if callCount > 0 {
   397  				// This is somewhat unfortunate, but for now, as soon as
   398  				// we're in the arguments for a function call, we prevent
   399  				// all simplifications.
   400  				nm.markRequired()
   401  			}
   402  			if _, ok := n.(*ast.EmbedDecl); ok && nm.parent != nil {
   403  				// The parent of an EmbedDecl is always either a File or a
   404  				// StructLit.
   405  				nm.parent.hasEmbedding = true
   406  			}
   407  			return true
   408  		}, func(n ast.Node) {
   409  			if _, ok := n.(*ast.CallExpr); ok {
   410  				callCount--
   411  			}
   412  			ancestors = ancestors[:len(ancestors)-1]
   413  			t.dec()
   414  		})
   415  	}
   416  }
   417  
   418  // Discovers patterns by walking vertices and their arcs recursively.
   419  //
   420  // Conjuncts that originate from the pattern constraint must be
   421  // ignored when searching for redundancies, otherwise they can be
   422  // found to be more-or-equally-specific than the vertex in which
   423  // they're found, and could lead to the entire field being
   424  // removed. These conjuncts must also be kept because even if the
   425  // pattern is not actually used, it may form part of the public API of
   426  // the CUE, and so removing an unused pattern may alter the API.
   427  //
   428  // We only need to mark the conjuncts at the "top level" of the
   429  // pattern constraint as required+ignore; we do not need to descend
   430  // into the arcs of the pattern constraint. This is because the
   431  // pattern only matches against a key, and not a path. So, even with:
   432  //
   433  //	a: [string]: x: y: z: 5
   434  //
   435  // we only need to mark the x as required+ignore, and not the y, z, or
   436  // 5. This ensures we later ignore only this x when simplifying other
   437  // conjuncts in a vertex who's label has matched this pattern. If we
   438  // add:
   439  //
   440  //	b: w: x: y: {}
   441  //	b: a
   442  //
   443  // This will get trimmed to:
   444  //
   445  //	a: [string]: x: y: z: 5
   446  //	b: w: _
   447  //	b: a
   448  //
   449  // I.e. by ignoring the pattern`s "top level" conjuncts, we ensure we
   450  // keep b: w, even though the pattern is equally specific to the
   451  // vertex for b.w, and the explicit b: w (from line 2) is less
   452  // specific.
   453  func (t *trimmerV3) findPatterns(v *adt.Vertex) {
   454  	t.inc()
   455  	defer t.dec()
   456  
   457  	worklist := []*adt.Vertex{v}
   458  	for len(worklist) != 0 {
   459  		v := worklist[0]
   460  		worklist = worklist[1:]
   461  
   462  		t.logf("vertex %p; kind %v; value %p::%T",
   463  			v, v.Kind(), v.BaseValue, v.BaseValue)
   464  		t.inc()
   465  
   466  		if patterns := v.PatternConstraints; patterns != nil {
   467  			for i, pair := range patterns.Pairs {
   468  				t.logf("pattern %d %p::%T", i, pair.Constraint, pair.Constraint)
   469  				t.inc()
   470  				pair.Constraint.VisitLeafConjuncts(func(c adt.Conjunct) bool {
   471  					field := c.Field()
   472  					elem := c.Elem()
   473  					expr := c.Expr()
   474  					t.logf("conjunct field: %p::%T, elem: %p::%T, expr: %p::%T",
   475  						field, field, elem, elem, expr, expr)
   476  
   477  					if src := field.Source(); src != nil {
   478  						nm := t.getNodeMeta(src)
   479  						t.logf(" ignoring %v", nm.src.Pos())
   480  						nm.ignoreConjunct = true
   481  						nm.markRequired()
   482  					}
   483  
   484  					return true
   485  				})
   486  				t.dec()
   487  			}
   488  		}
   489  
   490  		t.dec()
   491  
   492  		worklist = append(worklist, v.Arcs...)
   493  		if v, ok := v.BaseValue.(*adt.Vertex); ok {
   494  			worklist = append(worklist, v)
   495  		}
   496  	}
   497  }
   498  
   499  // Discovers disjunctions by walking vertices and their arcs
   500  // recursively.
   501  //
   502  // Disjunctions and their branches must be found before we attempt to
   503  // simplify vertices. We must find disjunctions and mark all conjuncts
   504  // within each branch of a disjunction, including all conjuncts that
   505  // can be reached via resolution, as required+ignore.
   506  //
   507  // Failure to do this can lead to the removal of conjuncts in a vertex
   508  // which were essential for discriminating between branches of a
   509  // disjunction.
   510  func (t *trimmerV3) findDisjunctions(v *adt.Vertex) {
   511  	t.inc()
   512  	defer t.dec()
   513  
   514  	var branches []*adt.Vertex
   515  	seen := make(map[*adt.Vertex]struct{})
   516  	worklist := []*adt.Vertex{v}
   517  	for len(worklist) != 0 {
   518  		v := worklist[0]
   519  		worklist = worklist[1:]
   520  
   521  		if _, found := seen[v]; found {
   522  			continue
   523  		}
   524  		seen[v] = struct{}{}
   525  
   526  		t.logf("vertex %p; kind %v; value %p::%T",
   527  			v, v.Kind(), v.BaseValue, v.BaseValue)
   528  		t.inc()
   529  
   530  		v.VisitLeafConjuncts(func(c adt.Conjunct) bool {
   531  			switch disj := c.Elem().(type) {
   532  			case *adt.Disjunction:
   533  				t.logf("found disjunction")
   534  				for i, val := range disj.Values {
   535  					t.logf("branch %d", i)
   536  					branch := &adt.Vertex{
   537  						Parent: v.Parent,
   538  						Label:  v.Label,
   539  					}
   540  					c := adt.MakeConjunct(c.Env, val, c.CloseInfo)
   541  					branch.InsertConjunct(c)
   542  					branch.Finalize(t.ctx)
   543  					branches = append(branches, branch)
   544  				}
   545  
   546  			case *adt.DisjunctionExpr:
   547  				t.logf("found disjunctionexpr")
   548  				for i, val := range disj.Values {
   549  					t.logf("branch %d", i)
   550  					branch := &adt.Vertex{
   551  						Parent: v.Parent,
   552  						Label:  v.Label,
   553  					}
   554  					c := adt.MakeConjunct(c.Env, val.Val, c.CloseInfo)
   555  					branch.InsertConjunct(c)
   556  					branch.Finalize(t.ctx)
   557  					branches = append(branches, branch)
   558  				}
   559  			}
   560  			return true
   561  		})
   562  
   563  		t.dec()
   564  
   565  		worklist = append(worklist, v.Arcs...)
   566  		if v, ok := v.BaseValue.(*adt.Vertex); ok {
   567  			worklist = append(worklist, v)
   568  		}
   569  	}
   570  
   571  	clear(seen)
   572  	worklist = branches
   573  	for len(worklist) != 0 {
   574  		v := worklist[0]
   575  		worklist = worklist[1:]
   576  
   577  		if _, found := seen[v]; found {
   578  			continue
   579  		}
   580  		seen[v] = struct{}{}
   581  
   582  		v.VisitLeafConjuncts(func(c adt.Conjunct) bool {
   583  			if src := c.Field().Source(); src != nil {
   584  				nm := t.getNodeMeta(src)
   585  				t.logf(" ignoring %v", nm.src.Pos())
   586  				nm.ignoreConjunct = true
   587  				nm.markRequired()
   588  			}
   589  			t.resolveElemAll(c, func(resolver adt.Resolver, resolvedTo *adt.Vertex) {
   590  				worklist = append(worklist, resolvedTo.Arcs...)
   591  			})
   592  			return true
   593  		})
   594  		worklist = append(worklist, v.Arcs...)
   595  	}
   596  }
   597  
   598  func (t *trimmerV3) keepAllChildren(n ast.Node) {
   599  	ast.Walk(n, func(n ast.Node) bool {
   600  		nm := t.getNodeMeta(n)
   601  		nm.markRequired()
   602  		return true
   603  	}, nil)
   604  }
   605  
   606  // Once we have identified, and masked out, call expressions,
   607  // embeddings, patterns, and disjunctions, we can finally work
   608  // recursively through the vertices, testing their conjuncts to find
   609  // redundant conjuncts.
   610  func (t *trimmerV3) findRedundancies(v *adt.Vertex, keepAll bool) {
   611  	v = v.DerefDisjunct()
   612  	t.logf("vertex %p (parent %p); kind %v; value %p::%T",
   613  		v, v.Parent, v.Kind(), v.BaseValue, v.BaseValue)
   614  	t.inc()
   615  	defer t.dec()
   616  
   617  	_, isDisjunct := v.BaseValue.(*adt.Disjunction)
   618  	for _, si := range v.Structs {
   619  		if src := si.StructLit.Src; src != nil {
   620  			t.logf("struct lit %p src: %p::%T %v", si.StructLit, src, src, src.Pos())
   621  			nm := t.getNodeMeta(src)
   622  			nm.incSeenCount()
   623  			keepAll = keepAll || nm.isEmbedded()
   624  			if nm.hasEmbedding {
   625  				t.logf(" (has embedding root)")
   626  			}
   627  			if nm.isEmbedded() {
   628  				t.logf(" (isEmbedded)")
   629  			} else if keepAll {
   630  				t.logf(" (keepAll)")
   631  			}
   632  
   633  			if !isDisjunct {
   634  				continue
   635  			}
   636  			v1 := &adt.Vertex{
   637  				Parent: v.Parent,
   638  				Label:  v.Label,
   639  			}
   640  			c := adt.MakeConjunct(si.Env, si.StructLit, si.CloseInfo)
   641  			v1.InsertConjunct(c)
   642  			v1.Finalize(t.ctx)
   643  			t.logf("exploring disj struct lit %p (src %v): start", si, src.Pos())
   644  			t.findRedundancies(v1, keepAll)
   645  			t.logf("exploring disj struct lit %p (src %v): end", si, src.Pos())
   646  		}
   647  	}
   648  
   649  	if keepAll {
   650  		for _, si := range v.Structs {
   651  			if src := si.StructLit.Src; src != nil {
   652  				t.keepAllChildren(src)
   653  			}
   654  		}
   655  	}
   656  
   657  	if patterns := v.PatternConstraints; patterns != nil {
   658  		for i, pair := range patterns.Pairs {
   659  			t.logf("pattern %d %p::%T", i, pair.Constraint, pair.Constraint)
   660  			t.findRedundancies(pair.Constraint, keepAll)
   661  		}
   662  	}
   663  
   664  	var nodeMetas, winners, disjDefaultWinners []*nodeMeta
   665  	v.VisitLeafConjuncts(func(c adt.Conjunct) bool {
   666  		field := c.Field()
   667  		elem := c.Elem()
   668  		expr := c.Expr()
   669  		src := field.Source()
   670  		if src == nil {
   671  			t.logf("conjunct field: %p::%T, elem: %p::%T, expr: %p::%T, src nil",
   672  				field, field, elem, elem, expr, expr)
   673  			return true
   674  		}
   675  
   676  		t.logf("conjunct field: %p::%T, elem: %p::%T, expr: %p::%T, src: %v",
   677  			field, field, elem, elem, expr, expr, src.Pos())
   678  
   679  		nm := t.getNodeMeta(src)
   680  		nm.incSeenCount()
   681  
   682  		// Currently we replace redundant structs with _. If it becomes
   683  		// desired to replace them with {} instead, then we want this
   684  		// code instead of the block that follows:
   685  		//
   686  		// if exprSrc := expr.Source(); exprSrc != nil {
   687  		// 	exprNm := t.getNodeMeta(exprSrc)
   688  		// 	exprNm.addRequiredBy(nm)
   689  		// }
   690  		if exprSrc := expr.Source(); exprSrc != nil && len(v.Arcs) == 0 {
   691  			switch expr.(type) {
   692  			case *adt.StructLit, *adt.ListLit:
   693  				t.logf(" saving emptyness")
   694  				exprNm := t.getNodeMeta(exprSrc)
   695  				exprNm.addRequiredBy(nm)
   696  			}
   697  		}
   698  
   699  		if nm.ignoreConjunct {
   700  			t.logf(" ignoring conjunct")
   701  		} else {
   702  			nodeMetas = append(nodeMetas, nm)
   703  			if t.equallySpecific(v, c) {
   704  				winners = append(winners, nm)
   705  				t.logf(" equally specific: %p::%T", field, field)
   706  			} else {
   707  				t.logf(" redundant here: %p::%T", field, field)
   708  			}
   709  		}
   710  
   711  		if disj, ok := expr.(*adt.DisjunctionExpr); ok && disj.HasDefaults {
   712  			defaultCount := 0
   713  			matchingDefaultCount := 0
   714  			for _, branch := range disj.Values {
   715  				if !branch.Default {
   716  					continue
   717  				}
   718  				defaultCount++
   719  				c := adt.MakeConjunct(c.Env, branch.Val, c.CloseInfo)
   720  				if t.equallySpecific(v, c) {
   721  					matchingDefaultCount++
   722  				}
   723  			}
   724  			if defaultCount > 0 && defaultCount == matchingDefaultCount {
   725  				t.logf(" found %d matching defaults in disjunction",
   726  					matchingDefaultCount)
   727  				disjDefaultWinners = append(disjDefaultWinners, nm)
   728  			}
   729  		}
   730  
   731  		if compr, ok := elem.(*adt.Comprehension); ok {
   732  			t.logf("comprehension found")
   733  			for _, clause := range compr.Clauses {
   734  				var conj adt.Conjunct
   735  				switch clause := clause.(type) {
   736  				case *adt.IfClause:
   737  					conj = adt.MakeConjunct(c.Env, clause.Condition, c.CloseInfo)
   738  				case *adt.ForClause:
   739  					conj = adt.MakeConjunct(c.Env, clause.Src, c.CloseInfo)
   740  				case *adt.LetClause:
   741  					conj = adt.MakeConjunct(c.Env, clause.Expr, c.CloseInfo)
   742  				}
   743  				t.linkResolvers(conj, true)
   744  			}
   745  		}
   746  
   747  		t.linkResolvers(c, false)
   748  		return true
   749  	})
   750  
   751  	if keepAll {
   752  		t.logf("keeping all %d nodes", len(nodeMetas))
   753  		for _, d := range nodeMetas {
   754  			t.logf(" %p::%T %v", d.src, d.src, d.src.Pos())
   755  			d.markRequired()
   756  		}
   757  
   758  	} else {
   759  		if len(disjDefaultWinners) != 0 {
   760  			// For all the conjuncts that were disjunctions and contained
   761  			// defaults, and *every* default is equally specific as the
   762  			// vertex as a whole, then we should be able to ignore all
   763  			// other winning conjuncts.
   764  			winners = disjDefaultWinners
   765  		}
   766  		switch len(winners) {
   767  		case 0:
   768  			t.logf("no winners; keeping all %d nodes", len(nodeMetas))
   769  			for _, d := range nodeMetas {
   770  				t.logf(" %p::%T %v", d.src, d.src, d.src.Pos())
   771  				d.markRequired()
   772  			}
   773  
   774  		case 1:
   775  			t.logf("1 winner")
   776  			src := winners[0].src
   777  			t.logf(" %p::%T %v", src, src, src.Pos())
   778  			winners[0].markRequired()
   779  
   780  		default:
   781  			t.logf("%d winners found", len(winners))
   782  			foundRequired := false
   783  			for _, d := range winners {
   784  				if d.isRequired() {
   785  					foundRequired = true
   786  					break
   787  				}
   788  			}
   789  			if !foundRequired {
   790  				t.logf("no winner already required")
   791  				t.undecided = append(t.undecided, winners)
   792  			}
   793  		}
   794  	}
   795  
   796  	for i, a := range v.Arcs {
   797  		t.logf("arc %d %v", i, a.Label)
   798  		t.findRedundancies(a, keepAll)
   799  	}
   800  
   801  	if v, ok := v.BaseValue.(*adt.Vertex); ok && v != nil {
   802  		t.logf("exploring base value: start")
   803  		t.findRedundancies(v, keepAll)
   804  		t.logf("exploring base value: end")
   805  	}
   806  }
   807  
   808  // If somewhere within a conjunct, there's a *[adt.FieldReference], or
   809  // other type of [adt.Resolver], then we need to find that, and ensure
   810  // that:
   811  //
   812  //  1. if the resolver part of this conjunct survives, then the target
   813  //     of the resolver must survive too (i.e. we don't create dangling
   814  //     pointers). This bit is done for free, because if a vertex
   815  //     contains a conjunct for some reference `r`, then whatever `r`
   816  //     resolved to will also appear in this vertex's conjuncts.
   817  //
   818  //  2. if the target of the resolver survives, then we must
   819  //     survive. This enforces the basic rule that if a conjunct
   820  //     survives then all the references to that conjunct must also
   821  //     survive.
   822  func (t *trimmerV3) linkResolvers(c adt.Conjunct, addInverse bool) {
   823  	var origNm *nodeMeta
   824  	if src := c.Field().Source(); src != nil {
   825  		origNm = t.getNodeMeta(src)
   826  	}
   827  
   828  	t.resolveElemAll(c, func(resolver adt.Resolver, resolvedTo *adt.Vertex) {
   829  		resolvedTo.VisitLeafConjuncts(func(resolvedToC adt.Conjunct) bool {
   830  			src := resolvedToC.Source()
   831  			if src == nil {
   832  				return true
   833  			}
   834  			resolvedToNm := t.getNodeMeta(src)
   835  			resolverNm := t.getNodeMeta(resolver.Source())
   836  
   837  			// If the resolvedToC conjunct survives, then the resolver
   838  			// itself must survive too.
   839  			resolverNm.addRequiredBy(resolvedToNm)
   840  			t.logf("  (regular) %v reqBy %v",
   841  				resolverNm.src.Pos(), resolvedToNm.src.Pos())
   842  			if addInverse {
   843  				t.logf("  (inverse) %v reqBy %v",
   844  					resolvedToNm.src.Pos(), resolverNm.src.Pos())
   845  				resolvedToNm.addRequiredBy(resolverNm)
   846  			}
   847  
   848  			// Don't break lexical scopes. Consider:
   849  			//
   850  			//	c: {
   851  			//		x: int
   852  			//		y: x
   853  			//	}
   854  			//	c: x: 5
   855  			//
   856  			// We must make sure that if `y: x` survives, then `x:
   857  			// int` survives (or at least the field does - it could
   858  			// be simplified to `x: _`) *even though* there is a
   859  			// more specific value for c.x in the final line. Thus
   860  			// the field which we have found by resolution, is
   861  			// required by the original element.
   862  			if origNm != nil &&
   863  				resolvedToNm.parent.isAncestorOf(origNm) {
   864  				t.logf("  (extra) %v reqBy %v",
   865  					resolvedToNm.src.Pos(), origNm.src.Pos())
   866  				resolvedToNm.addRequiredBy(origNm)
   867  			}
   868  			return true
   869  		})
   870  	})
   871  }
   872  
   873  func (t *trimmerV3) resolveElemAll(c adt.Conjunct, f func(adt.Resolver, *adt.Vertex)) {
   874  	worklist := []adt.Elem{c.Elem()}
   875  	for len(worklist) != 0 {
   876  		elem := worklist[0]
   877  		worklist = worklist[1:]
   878  
   879  		switch elemT := elem.(type) {
   880  		case *adt.UnaryExpr:
   881  			worklist = append(worklist, elemT.X)
   882  		case *adt.BinaryExpr:
   883  			worklist = append(worklist, elemT.X, elemT.Y)
   884  		case *adt.DisjunctionExpr:
   885  			for _, disjunct := range elemT.Values {
   886  				worklist = append(worklist, disjunct.Val)
   887  			}
   888  		case *adt.Disjunction:
   889  			for _, disjunct := range elemT.Values {
   890  				worklist = append(worklist, disjunct)
   891  			}
   892  		case *adt.Ellipsis:
   893  			worklist = append(worklist, elemT.Value)
   894  		case *adt.BoundExpr:
   895  			worklist = append(worklist, elemT.Expr)
   896  		case *adt.BoundValue:
   897  			worklist = append(worklist, elemT.Value)
   898  		case *adt.Interpolation:
   899  			for _, part := range elemT.Parts {
   900  				worklist = append(worklist, part)
   901  			}
   902  		case *adt.Conjunction:
   903  			for _, val := range elemT.Values {
   904  				worklist = append(worklist, val)
   905  			}
   906  		case *adt.CallExpr:
   907  			worklist = append(worklist, elemT.Fun)
   908  			for _, arg := range elemT.Args {
   909  				worklist = append(worklist, arg)
   910  			}
   911  		case *adt.Comprehension:
   912  			for _, y := range elemT.Clauses {
   913  				switch y := y.(type) {
   914  				case *adt.IfClause:
   915  					worklist = append(worklist, y.Condition)
   916  				case *adt.LetClause:
   917  					worklist = append(worklist, y.Expr)
   918  				case *adt.ForClause:
   919  					worklist = append(worklist, y.Src)
   920  				}
   921  			}
   922  		case *adt.LabelReference:
   923  			elem = &adt.ValueReference{UpCount: elemT.UpCount, Src: elemT.Src}
   924  			t.logf(" converting LabelReference to ValueReference")
   925  		}
   926  
   927  		if r, ok := elem.(adt.Resolver); ok && elem.Source() != nil {
   928  			resolvedTo, bot := t.ctx.Resolve(c, r)
   929  			if bot != nil {
   930  				continue
   931  			}
   932  			t.logf(" resolved to %p", resolvedTo)
   933  			f(r, resolvedTo)
   934  		}
   935  	}
   936  }
   937  
   938  // Are all the cs combined, (more or) equally as specific as v?
   939  func (t *trimmerV3) equallySpecific(v *adt.Vertex, cs ...adt.Conjunct) bool {
   940  	t.inc()
   941  	//	t.ctx.LogEval = 1
   942  	conjVertex := &adt.Vertex{
   943  		Parent: v.Parent,
   944  		Label:  v.Label,
   945  	}
   946  	for _, c := range cs {
   947  		if r, ok := c.Elem().(adt.Resolver); ok {
   948  			v1, bot := t.ctx.Resolve(c, r)
   949  			if bot == nil {
   950  				v1.VisitLeafConjuncts(func(c adt.Conjunct) bool {
   951  					conjVertex.InsertConjunct(c)
   952  					return true
   953  				})
   954  				continue
   955  			}
   956  		}
   957  		conjVertex.InsertConjunct(c)
   958  	}
   959  	conjVertex.Finalize(t.ctx)
   960  	err := subsume.Value(t.ctx, v, conjVertex)
   961  	if err != nil {
   962  		t.logf(" not equallySpecific")
   963  		if t.trace != nil && t.ctx.LogEval > 0 {
   964  			errors.Print(t.trace, err, nil)
   965  		}
   966  	}
   967  	//	t.ctx.LogEval = 0
   968  	t.dec()
   969  	return err == nil
   970  }
   971  
   972  // NB this is not perfect. We do not attempt to track dependencies
   973  // *between* different sets of "winning" nodes.
   974  //
   975  // We could have two sets, [a, b, c] and [c, d], and decide here to
   976  // require a from the first set, and then c from the second set. This
   977  // preserves more nodes than strictly necessary (preserving c on its
   978  // own is sufficient to satisfy both sets). However, doing this
   979  // perfectly is the “Hitting Set Problem”, and it is proven
   980  // NP-complete. Thus for efficiency, we consider each set (more or
   981  // less) in isolation.
   982  func (t *trimmerV3) solveUndecideds() {
   983  	if len(t.undecided) == 0 {
   984  		return
   985  	}
   986  	undecided := t.undecided
   987  	for i, ds := range undecided {
   988  		ds.sort()
   989  		if ds.hasRequired() {
   990  			undecided[i] = nil
   991  		}
   992  	}
   993  
   994  	slices.SortFunc(undecided, func(as, bs nodeMetas) int {
   995  		aSum, bSum := as.seenCountSum(), bs.seenCountSum()
   996  		if aSum != bSum {
   997  			return bSum - aSum
   998  		}
   999  		aLen, bLen := len(as), len(bs)
  1000  		if aLen != bLen {
  1001  			return bLen - aLen
  1002  		}
  1003  		for i, a := range as {
  1004  			b := bs[i]
  1005  			if posCmp := a.src.Pos().Compare(b.src.Pos()); posCmp != 0 {
  1006  				return posCmp
  1007  			}
  1008  		}
  1009  		return 0
  1010  	})
  1011  
  1012  	for _, nms := range undecided {
  1013  		if len(nms) == 0 {
  1014  			// once we get to length of 0, everything that follows must
  1015  			// also be length of 0
  1016  			break
  1017  		}
  1018  		t.logf("choosing winner from %v", nms)
  1019  		if nms.hasRequired() {
  1020  			t.logf(" already contains required node")
  1021  			continue
  1022  		}
  1023  
  1024  		nms[0].markRequired()
  1025  	}
  1026  }
  1027  
  1028  type nodeMetas []*nodeMeta
  1029  
  1030  // Sort a single set of nodeMetas. If a set contains x and y:
  1031  //
  1032  // - if x is required by y, then x will come first;
  1033  // - otherwise whichever node has a higher seenCount comes first;
  1034  // - otherwise sort x and y by their src position.
  1035  func (nms nodeMetas) sort() {
  1036  	slices.SortFunc(nms, func(a, b *nodeMeta) int {
  1037  		if a.isRequiredBy(b) {
  1038  			return -1
  1039  		}
  1040  		if b.isRequiredBy(a) {
  1041  			return 1
  1042  		}
  1043  		aSeen, bSeen := a.seenCount, b.seenCount
  1044  		if aSeen != bSeen {
  1045  			return bSeen - aSeen
  1046  		}
  1047  		return a.src.Pos().Compare(b.src.Pos())
  1048  	})
  1049  }
  1050  
  1051  func (nms nodeMetas) seenCountSum() (sum int) {
  1052  	for _, d := range nms {
  1053  		sum += d.seenCount
  1054  	}
  1055  	return sum
  1056  }
  1057  
  1058  func (nms nodeMetas) hasRequired() bool {
  1059  	for _, d := range nms {
  1060  		if d.isRequired() {
  1061  			return true
  1062  		}
  1063  	}
  1064  	return false
  1065  }
  1066  
  1067  // After all the analysis is complete, trim finally modifies the AST,
  1068  // removing (or simplifying) nodes which have not been found to be
  1069  // required.
  1070  func (t *trimmerV3) trim(files []*ast.File, dir string) error {
  1071  	t.inc()
  1072  	defer t.dec()
  1073  
  1074  	for _, f := range files {
  1075  		if !strings.HasPrefix(f.Filename, dir) {
  1076  			continue
  1077  		}
  1078  		t.logf("%s", f.Filename)
  1079  		t.inc()
  1080  		astutil.Apply(f, func(c astutil.Cursor) bool {
  1081  			n := c.Node()
  1082  			d := t.nodes[n]
  1083  
  1084  			if !d.isRequired() && d.seenCount > 0 {
  1085  				// The astutils cursor only supports deleting nodes if the
  1086  				// node is a child of a structlit or a file. So in all
  1087  				// other cases, we must replace the child with top.
  1088  				var replacement ast.Node = ast.NewIdent("_")
  1089  				if d.parent != nil {
  1090  					switch parentN := d.parent.src.(type) {
  1091  					case *ast.File, *ast.StructLit:
  1092  						replacement = nil
  1093  					case *ast.Comprehension:
  1094  						if n == parentN.Value {
  1095  							replacement = ast.NewStruct()
  1096  						}
  1097  					}
  1098  				}
  1099  				if replacement == nil {
  1100  					t.logf("deleting node %p::%T %v", n, n, n.Pos())
  1101  					c.Delete()
  1102  				} else {
  1103  					t.logf("replacing node %p::%T with %T %v",
  1104  						n, n, replacement, n.Pos())
  1105  					c.Replace(replacement)
  1106  				}
  1107  			}
  1108  
  1109  			return true
  1110  		}, nil)
  1111  		if err := astutil.Sanitize(f); err != nil {
  1112  			return err
  1113  		}
  1114  		t.dec()
  1115  	}
  1116  	return nil
  1117  }