cuelang.org/go@v0.13.0/internal/core/toposort/vertex.go (about)

     1  // Copyright 2024 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package toposort
    16  
    17  // Ultimately we need to build a graph of field names. Those field
    18  // names can come from different constructions, such as:
    19  //
    20  // 1. Within a struct
    21  //
    22  //	x: {z: _, y: _}
    23  //
    24  // When considering x, there should be a edge from z to y (written
    25  // from now on as (z -> y)).
    26  //
    27  // 2. Explicit unification
    28  //
    29  //	x: {z: _, y: _} & {x: _, w: _}
    30  //
    31  // When considering x, we want no edges between the arguments of the
    32  // explicit unification operator '&'.  There should only be edges (z
    33  // -> y) and (x -> w). Through explicit unifications, cycles of field
    34  // names can be introduced, e.g.:
    35  //
    36  //	x: {z: _, y: _} & {y: _, w: _, z: _}
    37  //
    38  // 3. Embeddings
    39  //
    40  //	b: {x: _, w: _}
    41  //	a: {z: _, y: _}
    42  //	c: { a, b }
    43  //
    44  // Here, a and b are embedded within c, and the order is important, so
    45  // at a minimum we want edges (z -> y), (x -> w), and (y -> x). Other
    46  // edges which don't introduce cycles are also acceptable (e.g. (z ->
    47  // x), (y -> w) etc).
    48  //
    49  // 4. Implicit unification
    50  //
    51  //	c: {z: _, y: _}
    52  //	c: {x: _, w: _}
    53  //
    54  // Here, like with embeddings, we choose that the source order is
    55  // important, and so we must have a minimum of (z -> y), (x -> w) and
    56  // (y -> x).
    57  //
    58  // Currently, the evaluator does not always provide enough information
    59  // for us to be able to reliably identify all implicit unifications,
    60  // especially where the ordering is enforced via some intermediate
    61  // node. For example:
    62  //
    63  //	a: {
    64  //		d: z: _
    65  //		d: t: _
    66  //		e: {x: _, w: _}
    67  //	}
    68  //	c: a.d & a.e
    69  //
    70  // Here, the information we get when sorting the fields of c (post
    71  // evaluation), is insufficient to be able to establish the edge (z ->
    72  // t), but it is sufficient to establish (x -> w). So in this case, we
    73  // end up only with the edge (x -> w), and so the other field names
    74  // fall back to lexicographical sorting.
    75  //
    76  // 5. Duplicates
    77  //
    78  //	a: {z: _, y: _, z: int}
    79  //
    80  //	b: c: _
    81  //	b: d: _
    82  //	b: c: int
    83  //
    84  // For a, we want to try to avoid adding an edge (y -> z), and for b
    85  // we want to try to avoid adding an edge (d -> c). So within a
    86  // regular struct, we do not add any additional edges when revisiting
    87  // a declaration previously visited within the same struct. Similarly,
    88  // for implicit unifications within the same file, we do not add any
    89  // additional edges when revisiting a declaration.
    90  //
    91  // In order to get as close as possible to the desired ordering, we
    92  // range over the Vertex's StructInfos, maintaining a list of Features
    93  // which must come before any new Features, i.e. a frontier. For this
    94  // to work, we need to sort the Vertex's StructInfos. Two approaches
    95  // are used:
    96  //
    97  // 1. A topological sorting of a Vertex's StructInfos. This is
    98  // effective for embeddings, and the relationship between embeddings
    99  // and regular fields. For example:
   100  //
   101  //	a: {y: _, x: _}
   102  //	b: {z: _, a}
   103  //
   104  // For b, a topological analysis will find that we can't enter the
   105  // StructInfo containing y and x, until after we've processed the
   106  // declaration of z.
   107  //
   108  // 2. However, even after a topological analysis, we'll often have
   109  // many root StructInfos. We order these by source position (not the
   110  // soure position of the StructInfo's StructLit itself, but of the
   111  // references (if any) that resolved to the StructInfo's StructLit),
   112  // then group them. If several StructInfos share the same position,
   113  // then they are batched together and considered to be explictly
   114  // unified. Then, consecutive batches of explicitly unified
   115  // StructInfos are grouped together.
   116  //
   117  // The result is that explicit unification is correctly
   118  // identified. E.g.:
   119  //
   120  //	a: {x: _}
   121  //	b: {z: int}
   122  //	c: {y: >10}
   123  //	o: a & b & c
   124  //
   125  // for o, the StructInfos corresponding to a, b and c will all be
   126  // grouped together in a single batch and considered to be explicitly
   127  // unified. Also, structInfos that correspond to the same position
   128  // (including no position) will be treated as explicity unified, and
   129  // so no weight will be given to their relative position within the
   130  // Vertex's slice of StructInfos.
   131  //
   132  // TODO: Switch if possible to finding if a struct has been unified
   133  // with a definition and as much as possible taking order from the
   134  // definition. In order words, if a cycle is only created by edges
   135  // that come from non-definitions, then we ignore those edges, and
   136  // thus don't end up dealing with a cycle.
   137  
   138  import (
   139  	"fmt"
   140  	"maps"
   141  	"slices"
   142  
   143  	"cuelang.org/go/cue/token"
   144  	"cuelang.org/go/internal/core/adt"
   145  )
   146  
   147  type structMeta struct {
   148  	structInfo *adt.StructInfo
   149  	pos        token.Pos
   150  
   151  	// Should this struct be considered to be part of an explicit
   152  	// unification (e.g. x & y)?
   153  	isExplicit bool
   154  }
   155  
   156  func (sMeta *structMeta) String() string {
   157  	var sl *adt.StructLit
   158  	if sMeta.structInfo != nil {
   159  		sl = sMeta.structInfo.StructLit
   160  	}
   161  	return fmt.Sprintf("{%p sl:%p %v (explicit? %v)}",
   162  		sMeta, sl, sMeta.pos, sMeta.isExplicit)
   163  }
   164  
   165  func (sm *structMeta) hasDynamic(dynFieldsMap map[*adt.DynamicField][]adt.Feature) bool {
   166  	for _, decl := range sm.structInfo.Decls {
   167  		if dynField, ok := decl.(*adt.DynamicField); ok {
   168  			if _, found := dynFieldsMap[dynField]; found {
   169  				return true
   170  			}
   171  		}
   172  	}
   173  	return false
   174  }
   175  
   176  // We need to order a Vertex's StructInfos. To do that, we want a
   177  // filename+position for every StructInfo.
   178  //
   179  // We build a map from every StructInfo's StructLit and all its decls
   180  // to a *structMeta, using the structLit's position.
   181  //
   182  // The StructLit in a StructInfo may directly appear in the parent's
   183  // arc conjuncts. In this case, the StructLit's position is the
   184  // correct position to use. But the StructLit may have been reached
   185  // via a FieldReference, or SelectorExpr or something else. We want
   186  // the position of the reference, and not the StructLit itself. E.g.
   187  //
   188  //	a: {x: 5}
   189  //	b: {y: 7}
   190  //	c: b
   191  //	c: a
   192  //
   193  // If we're ordering the fields of c, we want the position of b and a
   194  // on lines 3 and 4, not the StructLits which declare a and b on lines
   195  // 1 and 2. To do this, we walk through the Vertex's Arc's
   196  // conjuncts. If a conjunct's Field has been reached via some
   197  // resolver, then the conjunct's Refs will record that, and will allow
   198  // us to update the Field's position (and hence the StructLit's
   199  // position) to that of the reference.
   200  //
   201  // Additionally, we need to discover whether each StructLit is
   202  // included as a result of explicit unification (c: a & b), implicit
   203  // unification:
   204  //
   205  //	c: b
   206  //	c: a
   207  //
   208  // or embedding:
   209  //
   210  //	c: {
   211  //	    b
   212  //	    a
   213  //	}
   214  //
   215  // Explicit unification needs treating specially so to avoid incorrect
   216  // edges between the fields of the lhs and rhs of the &. To do this,
   217  // we look at the vertex's conjuncts. If a conjunct is a binary
   218  // expression &, then we look up the structMeta for the arguments to
   219  // the binary expression, and mark them as explicit unification.
   220  func analyseStructs(v *adt.Vertex, builder *GraphBuilder) []*structMeta {
   221  	structInfos := v.Structs
   222  	// Note that it's important that nodeToStructMetas avoids duplicate entries,
   223  	// which cause significant slowness for some large configs.
   224  	nodeToStructMetas := make(map[adt.Node]map[*structMeta]bool)
   225  	// structMetaMap is heplful as we can't insert into a map unless we make it.
   226  	structMetaMap := func(node adt.Node) map[*structMeta]bool {
   227  		if m := nodeToStructMetas[node]; m != nil {
   228  			return m
   229  		}
   230  		m := make(map[*structMeta]bool)
   231  		nodeToStructMetas[node] = m
   232  		return m
   233  	}
   234  	structMetas := make([]*structMeta, 0, len(structInfos))
   235  
   236  	// Create all the structMetas and map to them from a StructInfo's
   237  	// StructLit, and all its internal Decls. Initial attempt at
   238  	// recording a position, which will be correct only for direct use
   239  	// of literal structs in the calculation of vertex v.
   240  	for _, s := range structInfos {
   241  		sl := s.StructLit
   242  		sMeta := &structMeta{
   243  			structInfo: s,
   244  		}
   245  		structMetas = append(structMetas, sMeta)
   246  
   247  		if src := sl.Source(); src != nil {
   248  			sMeta.pos = src.Pos()
   249  		}
   250  		structMetaMap(sl)[sMeta] = true
   251  		for _, decl := range sl.Decls {
   252  			structMetaMap(decl)[sMeta] = true
   253  		}
   254  	}
   255  
   256  	// If an arc's conjunct's Field is a node we care about, and it has
   257  	// been reached via resolution, then unwind those resolutions to
   258  	// uncover the position of the earliest reference.
   259  	for _, arc := range v.Arcs {
   260  		builder.EnsureNode(arc.Label)
   261  		arc.VisitLeafConjuncts(func(c adt.Conjunct) bool {
   262  			field := c.Field()
   263  			debug("self arc conjunct field %p :: %T, expr %p :: %T (%v)\n",
   264  				field, field, c.Expr(), c.Expr(), c.Expr().Source())
   265  			sMetas, found := nodeToStructMetas[field]
   266  			if !found {
   267  				return true
   268  			}
   269  			if src := field.Source(); src != nil {
   270  				for sMeta := range sMetas {
   271  					sMeta.pos = src.Pos()
   272  				}
   273  			}
   274  			refs := c.CloseInfo.CycleInfo.Refs
   275  			if refs == nil {
   276  				return true
   277  			}
   278  			debug(" ref %p :: %T (%v)\n",
   279  				refs.Ref, refs.Ref, refs.Ref.Source().Pos())
   280  			for refs.Next != nil {
   281  				refs = refs.Next
   282  				debug(" ref %p :: %T (%v)\n",
   283  					refs.Ref, refs.Ref, refs.Ref.Source().Pos())
   284  			}
   285  			maps.Insert(structMetaMap(refs.Ref), maps.All(sMetas))
   286  			if pos := refs.Ref.Source().Pos(); pos != token.NoPos {
   287  				for sMeta := range nodeToStructMetas[refs.Ref] {
   288  					sMeta.pos = pos
   289  				}
   290  			}
   291  
   292  			return true
   293  		})
   294  	}
   295  
   296  	// Explore our own conjuncts, and the decls from our StructList, to
   297  	// find explicit unifications, and mark structMetas accordingly.
   298  	var worklist []adt.Expr
   299  	v.VisitLeafConjuncts(func(c adt.Conjunct) bool {
   300  		debug("self conjunct field %p :: %T, expr %p :: %T\n",
   301  			c.Field(), c.Field(), c.Expr(), c.Expr())
   302  		worklist = append(worklist, c.Expr())
   303  		return true
   304  	})
   305  	for _, si := range structInfos {
   306  		for _, decl := range si.StructLit.Decls {
   307  			if expr, ok := decl.(adt.Expr); ok {
   308  				worklist = append(worklist, expr)
   309  			}
   310  		}
   311  	}
   312  
   313  	for len(worklist) != 0 {
   314  		expr := worklist[0]
   315  		worklist = worklist[1:]
   316  
   317  		binExpr, ok := expr.(*adt.BinaryExpr)
   318  		if !ok || binExpr.Op != adt.AndOp {
   319  			continue
   320  		}
   321  		for _, expr := range []adt.Expr{binExpr.X, binExpr.Y} {
   322  			for sMeta := range nodeToStructMetas[expr] {
   323  				sMeta.isExplicit = true
   324  				debug(" now explicit: %v\n", sMeta)
   325  			}
   326  		}
   327  		worklist = append(worklist, binExpr.X, binExpr.Y)
   328  	}
   329  
   330  	return structMetas
   331  }
   332  
   333  // Find all fields which have been created as a result of successful
   334  // evaluation of a dynamic field name.
   335  func dynamicFieldsFeatures(v *adt.Vertex) map[*adt.DynamicField][]adt.Feature {
   336  	var m map[*adt.DynamicField][]adt.Feature
   337  	for _, arc := range v.Arcs {
   338  		arc.VisitLeafConjuncts(func(c adt.Conjunct) bool {
   339  			if dynField, ok := c.Field().(*adt.DynamicField); ok {
   340  				if m == nil {
   341  					m = make(map[*adt.DynamicField][]adt.Feature)
   342  				}
   343  				m[dynField] = append(m[dynField], arc.Label)
   344  			}
   345  			return true
   346  		})
   347  	}
   348  	return m
   349  }
   350  
   351  type structMetaBatch []*structMeta
   352  
   353  func (batch structMetaBatch) isExplicit() bool {
   354  	return len(batch) > 1 || (len(batch) == 1 && batch[0].isExplicit)
   355  }
   356  
   357  type structMetaBatches []structMetaBatch
   358  
   359  func (batchesPtr *structMetaBatches) appendBatch(batch structMetaBatch) {
   360  	if len(batch) == 0 {
   361  		return
   362  	}
   363  	batches := *batchesPtr
   364  	if l := len(batches); l == 0 {
   365  		*batchesPtr = append(batches, batch)
   366  	} else if prevBatch := batches[l-1]; batch.isExplicit() &&
   367  		prevBatch.isExplicit() &&
   368  		batch[0].pos.Filename() == prevBatch[0].pos.Filename() {
   369  		batches[l-1] = append(batches[l-1], batch...)
   370  	} else {
   371  		*batchesPtr = append(batches, batch)
   372  	}
   373  }
   374  
   375  type vertexFeatures struct {
   376  	builder      *GraphBuilder
   377  	dynFieldsMap map[*adt.DynamicField][]adt.Feature
   378  }
   379  
   380  func (vf *vertexFeatures) compareStructMeta(a, b *structMeta) int {
   381  	if c := a.pos.Compare(b.pos); c != 0 {
   382  		return c
   383  	}
   384  	aHasDyn := a.hasDynamic(vf.dynFieldsMap)
   385  	bHasDyn := b.hasDynamic(vf.dynFieldsMap)
   386  	switch {
   387  	case aHasDyn == bHasDyn:
   388  		return 0
   389  	case aHasDyn:
   390  		return 1 // gather dynamic fields at the end
   391  	default:
   392  		return -1
   393  	}
   394  }
   395  
   396  func VertexFeatures(ctx *adt.OpContext, v *adt.Vertex) []adt.Feature {
   397  	debug("\n*** V (%s %v %p) ***\n", v.Label.SelectorString(ctx), v.Label, v)
   398  
   399  	builder := NewGraphBuilder(!ctx.Config.SortFields)
   400  	dynFieldsMap := dynamicFieldsFeatures(v)
   401  	roots := analyseStructs(v, builder)
   402  
   403  	vf := &vertexFeatures{
   404  		builder:      builder,
   405  		dynFieldsMap: dynFieldsMap,
   406  	}
   407  
   408  	slices.SortFunc(roots, vf.compareStructMeta)
   409  	debug("roots: %v\n", roots)
   410  
   411  	var batches structMetaBatches
   412  	var batch structMetaBatch
   413  	for _, root := range roots {
   414  		if len(batch) == 0 ||
   415  			(batch[0].pos == root.pos && !root.hasDynamic(dynFieldsMap)) {
   416  			batch = append(batch, root)
   417  		} else {
   418  			batches.appendBatch(batch)
   419  			batch = structMetaBatch{root}
   420  		}
   421  	}
   422  	batches.appendBatch(batch)
   423  	debug("batches: %v\n", batches)
   424  
   425  	var previous, next []adt.Feature
   426  	var previousBatch structMetaBatch
   427  	for _, batch := range batches {
   428  		explicit := batch.isExplicit()
   429  		if len(previousBatch) != 0 &&
   430  			previousBatch[0].pos.Filename() != batch[0].pos.Filename() {
   431  			previous = nil
   432  		}
   433  		for _, root := range batch {
   434  			root.isExplicit = explicit
   435  			debug("starting root. Explicit unification? %v\n", explicit)
   436  			next = append(next, vf.addEdges(previous, root)...)
   437  		}
   438  		previous = next
   439  		next = nil
   440  		previousBatch = batch
   441  	}
   442  
   443  	debug("edges: %v\n", builder.edgesSet)
   444  	return builder.Build().Sort(ctx)
   445  }
   446  
   447  func (vf *vertexFeatures) addEdges(previous []adt.Feature, sMeta *structMeta) []adt.Feature {
   448  	debug("--- S %p (sl: %p) (explicit? %v) ---\n",
   449  		sMeta, sMeta.structInfo.StructLit, sMeta.isExplicit)
   450  	debug(" previous: %v\n", previous)
   451  	var next []adt.Feature
   452  
   453  	filename := sMeta.pos.Filename()
   454  	debug(" filename: %s (%v)\n", filename, sMeta.pos)
   455  
   456  	for i, decl := range sMeta.structInfo.Decls {
   457  		debug(" %p / %d: d (%p :: %T)\n", sMeta, i, decl, decl)
   458  		if bin, ok := decl.(*adt.BinaryExpr); ok {
   459  			debug("  binary expr: %p :: %T %v %p :: %T\n",
   460  				bin.X, bin.X, bin.Op, bin.Y, bin.Y)
   461  		}
   462  
   463  		currentLabel := adt.InvalidLabel
   464  		switch decl := decl.(type) {
   465  		case *adt.Field:
   466  			currentLabel = decl.Label
   467  			debug(" value %p :: %T (%v)\n", decl.Value, decl.Value, decl.Value)
   468  			if src := decl.Value.Source(); src != nil {
   469  				debug(" field value source: %v\n", src.Pos())
   470  			}
   471  		case *adt.DynamicField:
   472  			// This struct contains a dynamic field. If that dynamic
   473  			// field was successfully evaluated into a field, then insert
   474  			// that field into this chain.
   475  			if labels := vf.dynFieldsMap[decl]; len(labels) > 0 {
   476  				currentLabel = labels[0]
   477  				vf.dynFieldsMap[decl] = labels[1:]
   478  			}
   479  		}
   480  		if currentLabel != adt.InvalidLabel {
   481  			debug("  label %v\n", currentLabel)
   482  
   483  			node, exists := vf.builder.nodesByFeature[currentLabel]
   484  			if exists && node.structMeta == sMeta {
   485  				// same field within the same structLit
   486  				debug("    skipping 1\n")
   487  
   488  			} else if exists && !sMeta.isExplicit && sMeta.pos != token.NoPos &&
   489  				node.structMeta != nil &&
   490  				node.structMeta.pos.Filename() == filename {
   491  				// same field within the same file during implicit unification
   492  				debug("    skipping 2\n")
   493  
   494  			} else {
   495  				debug("    %v %v\n", node, exists)
   496  				node = vf.builder.EnsureNode(currentLabel)
   497  				node.structMeta = sMeta
   498  				next = append(next, currentLabel)
   499  				for _, prevLabel := range previous {
   500  					vf.builder.AddEdge(prevLabel, currentLabel)
   501  				}
   502  				previous = next
   503  				next = nil
   504  			}
   505  		}
   506  	}
   507  
   508  	return previous
   509  }