cuelang.org/go@v0.13.0/internal/core/adt/fields.go (about)

     1  // Copyright 2023 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package adt
    16  
    17  // This file holds the logic for the insertion of fields and pattern
    18  // constraints, including tracking closedness.
    19  //
    20  //
    21  // DESIGN GOALS
    22  //
    23  // Key to performance is to fail early during evaluation. This is especially
    24  // true for disjunctions. In CUE evaluation, conjuncts may be evaluated in a
    25  // fairly arbitrary order. We want to retain this flexibility while also failing
    26  // on disallowed fields as soon as we have enough data to tell for certain.
    27  //
    28  // Keeping track of which fields are allowed means keeping provenance data on
    29  // whether certain conjuncts originate from embeddings or definitions, as well
    30  // as how they group together with other conjuncts. These data structures should
    31  // allow for a "mark and unwind" approach to allow for backtracking when
    32  // computing disjunctions.
    33  //
    34  // References to the same CUE value may be added as conjuncts through various
    35  // paths. For instance, a reference to a definition may be added directly, or
    36  // through embedding. How they are added affects which set of fields are
    37  // allowed. This can make the removal of duplicate conjuncts hard. A solution
    38  // should make it straightforward to deduplicate conjuncts if they have the same
    39  // impact on field inclusion.
    40  //
    41  // All conjuncts associated with field constraints, including optional fields
    42  // and pattern constraints, should be collated, deduplicated, and evaluated as
    43  // if they were regular fields. This allows comparisons between values to be
    44  // meaningful and helps to filter disjuncts.
    45  //
    46  // The provenance data generated by this algorithm should ideally be easily
    47  // usable in external APIs.
    48  //
    49  //
    50  // DATA STRUCTURES
    51  //
    52  // Conjuncts
    53  //
    54  // To keep track of conjunct provenance, each conjunct has a few flags that
    55  // indicates whether it originates from
    56  //   - an embedding
    57  //   - a definition
    58  //   - a reference (optional and unimplemented)
    59  //
    60  // Conjuncts with the same origin are represented as a single Conjunct in the
    61  // Vertex, where this conjunct is a list of these conjuncts. In other words, the
    62  // conjuncts of a Vertex are really a forest (group of trees) of conjuncts that,
    63  // recursively, reflect the provenance of the conjuncts contained within it.
    64  //
    65  // The current implementation uses a Vertex for listing conjuncts with the same
    66  // origin. This Vertex is marked as "Dynamic", as it does not have a CUE path
    67  // that leads to them.
    68  //
    69  //
    70  // Constraints
    71  //
    72  // Vertex values separately keep track of pattern constraints. These consist of
    73  // a list of patterns with associated conjuncts, and a CUE expression that
    74  // represents the set of allowed fields. This information is mostly for equality
    75  // checking: by the time this data is produced, conjuncts associated with
    76  // patterns are already inserted into the computed subfields.
    77  //
    78  // Note that this representation assumes that patterns are always accrued
    79  // cumulatively: a field that is allowed will accrue the conjuncts of any
    80  // matched pattern, even if it originates from an embedding that itself does not
    81  // allow this field.
    82  //
    83  //
    84  // ALGORITHM
    85  //
    86  // When processing the conjuncts of a Vertex, subfields are tracked per
    87  // "grouping" (the list of conjuncts of the same origin). Each grouping keeps a
    88  // counter of the number of unprocessed conjuncts and subgroups associated with
    89  // it. Field inclusion (closedness) can be computed as soon as all subconjuncts
    90  // and subgroups are processed.
    91  //
    92  // Conjuncts of subfields are inserted in such a way that they reflect the same
    93  // grouping as the parent Vertex, plus any grouping that may be added by the
    94  // subfield itself.
    95  //
    96  // It would be possible, though, to collapse certain (combinations of) groups
    97  // that contain only a single conjunct. This can limit the size of such conjunct
    98  // trees.
    99  //
   100  // As conjuncts are added within their grouping context, it is possible to
   101  // uniquely identify conjuncts only by Vertex and expression pointer,
   102  // disregarding the Environment.
   103  //
   104  //
   105  // EXAMPLE DATA STRUCTURE
   106  //
   107  //    a: #A
   108  //    #A: {
   109  //        #B
   110  //        x: r1
   111  //    }
   112  //    #B: y: r2
   113  //    r1: z: r3
   114  //    r2: 2
   115  //    r3: foo: 2
   116  //
   117  // gets evaluated into:
   118  //
   119  //    V_a: Arcs{
   120  //        x: V_x [ V_def(#A)[ r1 ] ]
   121  //        y: V_y [ V_def(#A)[ V_embed(#B)[ r2 ] ] ]
   122  //    }
   123  //
   124  // When evaluating V_x, its Arcs, in turn become:
   125  //
   126  //    V_x: Arcs{
   127  //        z: V_z [ V_def(#A)[ V_ref(r1)[ r3 ]) ]]
   128  //    }
   129  //
   130  // The V_def(#A) is necessary here to ensure that closedness information can be
   131  // computed, if necessary. The V_ref's, however, are optional, and can be
   132  // omitted if provenance is less important:
   133  //
   134  //    V_x: Arcs{
   135  //        z: V_z [ V_def(#A)[ r3 ]]
   136  //    }
   137  //
   138  // Another possible optimization is to eliminate Vertices if there is only one
   139  // conjunct: the embedding and definition flags in the conjunct can be
   140  // sufficient in that case. The provenance data could potentially be derived
   141  // from the Environment in that case. If an embedding conjunct is itself the
   142  // only conjunct in a list, the embedding bit can be eliminated. So V_y in the
   143  // above example could be reduced to
   144  //
   145  //    V_y [ V_def(#A)[ r2 ] ]
   146  //
   147  
   148  // TODO(perf):
   149  // - the data structures could probably be collapsed with Conjunct. and the
   150  //   Vertex inserted into the Conjuncts could be a special ConjunctGroup.
   151  
   152  func (n *nodeContext) getArc(f Feature, mode ArcType) (arc *Vertex, isNew bool) {
   153  	// TODO(disjunct,perf): CopyOnRead
   154  	v := n.node
   155  	for _, a := range v.Arcs {
   156  		if a.Label == f {
   157  			if f.IsLet() {
   158  				a.MultiLet = true
   159  				// TODO: add return here?
   160  			}
   161  			a.updateArcType(mode)
   162  			return a, false
   163  		}
   164  	}
   165  
   166  	arc = &Vertex{
   167  		Parent:    v,
   168  		Label:     f,
   169  		ArcType:   mode,
   170  		nonRooted: v.IsDynamic || v.nonRooted,
   171  		anonymous: v.anonymous || v.Label.IsLet(),
   172  	}
   173  	if n.scheduler.frozen&fieldSetKnown != 0 {
   174  		b := n.ctx.NewErrf("adding field %v not allowed as field set was already referenced", f)
   175  		n.ctx.AddBottom(b)
   176  		// This may panic for list arithmetic. Safer to leave out for now.
   177  		arc.ArcType = ArcNotPresent
   178  	}
   179  	v.Arcs = append(v.Arcs, arc)
   180  	return arc, true
   181  }
   182  
   183  // allowedInClosed reports whether a field with label f is allowed in a closed
   184  // struct, even when it is not explicitly defined.
   185  //
   186  // TODO: see https://github.com/cue-lang/cue/issues/543
   187  // for whether to include f.IsDef.
   188  func allowedInClosed(f Feature) bool {
   189  	return f.IsHidden() || f.IsDef() || f.IsLet()
   190  }
   191  
   192  // insertConjunct inserts conjunct c into cc.
   193  func (v *Vertex) insertConjunct(ctx *OpContext, c Conjunct, id CloseInfo, mode ArcType, check, checkClosed bool) (pos int, added bool) {
   194  	n := v.getBareState(ctx)
   195  	if n == nil {
   196  		return 0, false
   197  	}
   198  
   199  	n.markNonCyclic(id)
   200  
   201  	v.updateArcType(mode)
   202  
   203  	var c2 Conjunct
   204  	pos = -1
   205  	if check {
   206  		pos, c2 = findConjunct(v.Conjuncts, c)
   207  
   208  	}
   209  	if pos == -1 {
   210  		pos = len(v.Conjuncts)
   211  		v.addConjunctUnchecked(c)
   212  		added = true
   213  	} else if srcRef := c2.CloseInfo.defID; srcRef != 0 {
   214  		// Most duplicates are deduped in insertVertexConjuncts by deduping the
   215  		// reference that brings in conjuncts in the first place. However, with
   216  		// API calls, and in some cases possibly with structure sharing, it may
   217  		// be possible that different Vertices refer to the same conjuncts. In
   218  		// this case, we need to ensure that the current defID also considers
   219  		// the ID associated with the original insertion in its set.
   220  		n.addReplacement(replaceID{from: id.defID, to: srcRef, add: true})
   221  	}
   222  
   223  	if v.isInProgress() {
   224  		n.scheduleConjunct(c, id)
   225  	}
   226  
   227  	for _, rec := range n.notify {
   228  		// TODO(evalv3): currently we get pending arcs here for some tests.
   229  		// That seems fine. But consider this again when most of evalv3 work
   230  		// is done. See test "pending.cue" in comprehensions/notify2.txtar
   231  		// It seems that only let arcs can be pending, though.
   232  
   233  		// TODO: we should probably only notify a conjunct once the root of the
   234  		// conjunct group is completed. This will make it easier to "stitch" the
   235  		// conjunct trees together, as its correctness will be guaranteed.
   236  		rec.v.state.scheduleConjunct(c, id)
   237  	}
   238  
   239  	return
   240  }
   241  
   242  func (n *nodeContext) insertArc(f Feature, mode ArcType, c Conjunct, id CloseInfo, check bool) *Vertex {
   243  	n.assertInitialized()
   244  
   245  	if n == nil {
   246  		panic("nil nodeContext")
   247  	}
   248  	if n.node == nil {
   249  		panic("nil node")
   250  	}
   251  
   252  	v, insertedArc := n.getArc(f, mode)
   253  
   254  	defer n.ctx.PopArc(n.ctx.PushArc(v))
   255  
   256  	// TODO: reporting the cycle error here results in better error paths.
   257  	// However, it causes the reference counting mechanism to be faulty.
   258  	// Reevaluate once the new evaluator is done.
   259  	// if v.ArcType == ArcNotPresent {
   260  	// 	// It was already determined before that this arc may not be present.
   261  	// 	// This case can only manifest itself if we have a cycle.
   262  	// 	n.node.reportFieldCycleError(n.ctx, pos(c.x), f)
   263  	// 	return v
   264  	// }
   265  
   266  	_, added := v.insertConjunct(n.ctx, c, id, mode, check, true)
   267  	if !added || !insertedArc {
   268  		return v
   269  	}
   270  
   271  	// Match and insert patterns.
   272  	if pcs := n.node.PatternConstraints; pcs != nil {
   273  		for _, pc := range pcs.Pairs {
   274  			if matchPattern(n.ctx, pc.Pattern, f) {
   275  				for _, c := range pc.Constraint.Conjuncts {
   276  					n.addConstraint(v, mode, c, check)
   277  				}
   278  			}
   279  		}
   280  	}
   281  
   282  	return v
   283  }
   284  
   285  // addConstraint adds a constraint to arc of n.
   286  //
   287  // In order to resolve LabelReferences, it is not always possible to walk up
   288  // the parent Vertex chain to determan the label, because a label reference
   289  // may point past a point of referral. For instance,
   290  //
   291  //	test: [ID=_]: name: ID
   292  //	test: A: {}
   293  //	B: test.A & {}  // B.name should be "A", not "B".
   294  //
   295  // The arc must be the node arc to which the conjunct is added.
   296  func (n *nodeContext) addConstraint(arc *Vertex, mode ArcType, c Conjunct, check bool) {
   297  	n.assertInitialized()
   298  
   299  	// TODO(perf): avoid cloning the Environment, if:
   300  	// - the pattern constraint has no LabelReference
   301  	//   (require compile-time support)
   302  	// - there are no references in the conjunct pointing to this node.
   303  	// - consider adding this value to the Conjunct struct
   304  	f := arc.Label
   305  	bulkEnv := *c.Env
   306  	bulkEnv.DynamicLabel = f
   307  	c.Env = &bulkEnv
   308  
   309  	// TODO: can go, but do in separate CL.
   310  	arc, _ = n.getArc(f, mode)
   311  
   312  	arc.insertConjunct(n.ctx, c, c.CloseInfo, mode, check, false)
   313  }
   314  
   315  func (n *nodeContext) insertPattern(pattern Value, c Conjunct) {
   316  	n.assertInitialized()
   317  
   318  	// Collect patterns in root vertex. This allows comparing disjuncts for
   319  	// equality as well as inserting new arcs down the line as they are
   320  	// inserted.
   321  	if n.insertConstraint(pattern, c) {
   322  		// Match against full set of arcs from root, but insert in current vertex.
   323  		// Hypothesis: this may not be necessary. Maybe for closedness.
   324  		// TODO: may need to replicate the closedContext for patterns.
   325  		// Also: Conjuncts for matching other arcs in this node may be different
   326  		// for matching arcs using v.foo?, if we need to ensure that conjuncts
   327  		// from arcs and patterns are grouped under the same vertex.
   328  		// TODO: verify. See test Pattern 1b
   329  		for _, a := range n.node.Arcs {
   330  			if matchPattern(n.ctx, pattern, a.Label) {
   331  				// TODO: is it necessary to check for uniqueness here?
   332  				n.addConstraint(a, a.ArcType, c, true)
   333  			}
   334  		}
   335  	}
   336  
   337  	if n.node.HasEllipsis {
   338  		return
   339  	}
   340  
   341  	// TODO: we could still try to accumulate patterns.
   342  }
   343  
   344  // isTotal reports whether pattern value p represents a full domain, that is,
   345  // whether it is of type BasicType or Top.
   346  func isTotal(p Value) bool {
   347  	switch p.(type) {
   348  	case *BasicType:
   349  		return true
   350  	case *Top:
   351  		return true
   352  	}
   353  	return false
   354  }
   355  
   356  func (ctx *OpContext) addPositions(c Conjunct) {
   357  	if x, ok := c.x.(*ConjunctGroup); ok {
   358  		for _, c := range *x {
   359  			ctx.addPositions(c)
   360  		}
   361  	}
   362  	if pos := c.Field(); pos != nil {
   363  		ctx.AddPosition(pos)
   364  	}
   365  }
   366  
   367  // notAllowedError reports a field not allowed error in n and sets the value
   368  // for arc f to that error.
   369  func (ctx *OpContext) notAllowedError(arc *Vertex) *Bottom {
   370  	defer ctx.PopArc(ctx.PushArc(arc))
   371  
   372  	defer ctx.ReleasePositions(ctx.MarkPositions())
   373  
   374  	for _, c := range arc.Conjuncts {
   375  		ctx.addPositions(c)
   376  	}
   377  	// TODO(0.7): Find another way to get this provenance information. Not
   378  	// currently stored in new evaluator.
   379  	// for _, s := range x.Structs {
   380  	//  s.AddPositions(ctx)
   381  	// }
   382  
   383  	// TODO: use the arcType from the closeContext.
   384  	if arc.ArcType == ArcPending {
   385  		// arc.ArcType = ArcNotPresent
   386  		// We do not know yet whether the arc will be present or not. Checking
   387  		// this will be deferred until this is known, after the comprehension
   388  		// has been evaluated.
   389  		return nil
   390  	}
   391  	ctx.Assertf(ctx.pos(), !allowedInClosed(arc.Label), "unexpected disallowed definition, let, or hidden field")
   392  	if ctx.HasErr() {
   393  		// The next error will override this error when not run in Strict mode.
   394  		return nil
   395  	}
   396  
   397  	// TODO: setting arc instead of n.node eliminates subfields. This may be
   398  	// desirable or not, but it differs, at least from <=v0.6 behavior.
   399  	err := ctx.NewErrf("field not allowed")
   400  	err.CloseCheck = true
   401  	arc.SetValue(ctx, err)
   402  	if arc.state != nil {
   403  		arc.state.kind = 0
   404  	}
   405  
   406  	// TODO: remove? We are now setting it on both fields, which seems to be
   407  	// necessary for now. But we should remove this as it often results in
   408  	// a duplicate error.
   409  	// v.SetValue(ctx, ctx.NewErrf("field not allowed"))
   410  
   411  	// TODO: create a special kind of error that gets the positions
   412  	// of the relevant locations upon request from the arc.
   413  	return err
   414  }
   415  
   416  // mergeConjunctions combines two values into one. It never modifies an
   417  // existing conjunction.
   418  //
   419  // TODO: this was used in the closeContext code. We can still use it to
   420  // construct pattern constraint conjunction in the future. This is currently
   421  // unimplemented.
   422  func mergeConjunctions(a, b Value) Value {
   423  	if a == nil {
   424  		return b
   425  	}
   426  	if b == nil {
   427  		return a
   428  	}
   429  	ca, _ := a.(*Conjunction)
   430  	cb, _ := b.(*Conjunction)
   431  	n := 2
   432  	if ca != nil {
   433  		n += len(ca.Values) - 1
   434  	}
   435  	if cb != nil {
   436  		n += len(cb.Values) - 1
   437  	}
   438  	vs := make([]Value, 0, n)
   439  	if ca != nil {
   440  		vs = append(vs, ca.Values...)
   441  	} else {
   442  		vs = append(vs, a)
   443  	}
   444  	if cb != nil {
   445  		vs = append(vs, cb.Values...)
   446  	} else {
   447  		vs = append(vs, b)
   448  	}
   449  	// TODO: potentially order conjuncts to make matching more likely.
   450  	return &Conjunction{Values: vs}
   451  }