cuelang.org/go@v0.13.0/encoding/jsonschema/decode.go (about)

     1  // Copyright 2019 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package jsonschema
    16  
    17  // TODO:
    18  // - replace converter from YAML to CUE to CUE (schema) to CUE.
    19  // - define OpenAPI definitions als CUE.
    20  
    21  import (
    22  	"fmt"
    23  	"math"
    24  	"net/url"
    25  	"regexp"
    26  	"regexp/syntax"
    27  	"slices"
    28  	"strconv"
    29  	"strings"
    30  
    31  	"cuelang.org/go/cue"
    32  	"cuelang.org/go/cue/ast"
    33  	"cuelang.org/go/cue/ast/astutil"
    34  	"cuelang.org/go/cue/errors"
    35  	"cuelang.org/go/cue/token"
    36  	"cuelang.org/go/internal"
    37  )
    38  
    39  const (
    40  	// DefaultRootID is used as the absolute base URI for a schema
    41  	// when no value is provided in [Config.ID].
    42  	DefaultRootID     = "https://" + DefaultRootIDHost
    43  	DefaultRootIDHost = "cue.jsonschema.invalid"
    44  )
    45  
    46  // rootDefs defines the top-level name of the map of definitions that do not
    47  // have a valid identifier name.
    48  //
    49  // TODO: find something more principled, like allowing #("a-b").
    50  const rootDefs = "#"
    51  
    52  // A decoder converts JSON schema to CUE.
    53  type decoder struct {
    54  	cfg          *Config
    55  	errs         errors.Error
    56  	mapURLErrors map[string]bool
    57  
    58  	root   cue.Value
    59  	rootID *url.URL
    60  
    61  	// defForValue holds an entry for internal values
    62  	// that are known to map to a defined schema.
    63  	// A nil entry is stored for nodes that have been
    64  	// referred to but we haven't yet seen when walking
    65  	// the schemas.
    66  	defForValue *valueMap[*definedSchema]
    67  
    68  	// danglingRefs records the number of nil entries in defForValue,
    69  	// representing the number of references into the internal
    70  	// structure that have not yet been resolved.
    71  	danglingRefs int
    72  
    73  	// defs holds the set of named schemas, indexed by URI (both
    74  	// canonical, and root-relative if known), including external
    75  	// schemas that aren't known.
    76  	defs map[string]*definedSchema
    77  
    78  	// builder is used to build the final syntax tree as it becomes known.
    79  	builder structBuilder
    80  
    81  	// needAnotherPass is set to true when we know that
    82  	// we need another pass through the schema extraction
    83  	// process. This can happen because `MapRef` might choose
    84  	// a different location depending on whether a reference is local
    85  	// or external. We don't know that until we've traversed the
    86  	// entire schema and the `$ref` might be seen before the
    87  	// schema it's referring to. Still more passes might be required
    88  	// if a $ref is found to be referring to a node that would not normally
    89  	// be considered part of the schema data.
    90  	needAnotherPass bool
    91  }
    92  
    93  // definedSchema records information for a schema or subschema.
    94  type definedSchema struct {
    95  	// importPath is empty for internal schemas.
    96  	importPath string
    97  
    98  	// path holds the location of the schema relative to importPath.
    99  	path cue.Path
   100  
   101  	// schema holds the actual syntax for the schema. This
   102  	// is nil if the entry was created by a reference only.
   103  	schema ast.Expr
   104  
   105  	// comment holds any doc comment associated with the above schema.
   106  	comment *ast.CommentGroup
   107  }
   108  
   109  // addImport registers
   110  func (d *decoder) addImport(n cue.Value, pkg string) *ast.Ident {
   111  	spec := ast.NewImport(nil, pkg)
   112  	info, err := astutil.ParseImportSpec(spec)
   113  	if err != nil {
   114  		d.errf(cue.Value{}, "invalid import %q", pkg)
   115  	}
   116  	ident := ast.NewIdent(info.Ident)
   117  	ident.Node = spec
   118  	ast.SetPos(ident, n.Pos())
   119  
   120  	return ident
   121  }
   122  
   123  func (d *decoder) decode(v cue.Value) *ast.File {
   124  	var defsRoot cue.Value
   125  	// docRoot represents the root of the actual data, by contrast
   126  	// with the "root" value as specified in [Config.Root] which
   127  	// represents the root of the schemas to be decoded.
   128  	docRoot := v
   129  	if d.cfg.Root != "" {
   130  		rootPath, err := parseRootRef(d.cfg.Root)
   131  		if err != nil {
   132  			d.errf(cue.Value{}, "invalid Config.Root value %q: %v", d.cfg.Root, err)
   133  			return nil
   134  		}
   135  		root := v.LookupPath(rootPath)
   136  		if !root.Exists() && !d.cfg.AllowNonExistentRoot {
   137  			d.errf(v, "root value at path %v does not exist", d.cfg.Root)
   138  			return nil
   139  		}
   140  		if d.cfg.SingleRoot {
   141  			v = root
   142  		} else {
   143  			if !root.Exists() {
   144  				root = v.Context().CompileString("{}")
   145  			}
   146  			if root.Kind() != cue.StructKind {
   147  				d.errf(root, "value at path %v must be struct containing definitions but is actually %v", d.cfg.Root, root)
   148  				return nil
   149  			}
   150  			defsRoot = root
   151  		}
   152  	}
   153  
   154  	var rootInfo schemaInfo
   155  	// extraSchemas records any nodes that are referred to
   156  	// but not part of the regular schema traversal.
   157  	var extraSchemas []cue.Value
   158  	// basePass records the last time that any new schemas were
   159  	// added for inspection. This can be set whenever new schemas
   160  	// not part of the regular traversal are found.
   161  	basePass := 0
   162  
   163  	for pass := 0; ; pass++ {
   164  		if pass > 10 {
   165  			// Should never happen: the most we should ever see in practice
   166  			// should be 2, but some pathological cases could end up with more.
   167  			d.errf(v, "internal error: too many passes without resolution")
   168  			return nil
   169  		}
   170  		root := &state{
   171  			decoder: d,
   172  			schemaInfo: schemaInfo{
   173  				schemaVersion: d.cfg.DefaultVersion,
   174  				id:            d.rootID,
   175  			},
   176  			isRoot: true,
   177  			pos:    docRoot,
   178  		}
   179  
   180  		if defsRoot.Exists() {
   181  			// When d.cfg.Root is non-empty, it points to a struct
   182  			// containing a field for each definition.
   183  			constraintAddDefinitions("schemas", defsRoot, root)
   184  		} else {
   185  			expr, state := root.schemaState(v, allTypes, func(s *state) {
   186  				// We want the top level state to be treated as root even
   187  				// though it's some levels below the actual document top level.
   188  				s.isRoot = true
   189  			})
   190  			if state.allowedTypes == 0 {
   191  				root.errf(v, "constraints are not possible to satisfy")
   192  				return nil
   193  			}
   194  			if !d.builder.put(cue.Path{}, expr, state.comment()) {
   195  				root.errf(v, "duplicate definition at root") // TODO better error message
   196  				return nil
   197  			}
   198  			rootInfo = state
   199  		}
   200  		if d.danglingRefs > 0 && pass == basePass+1 {
   201  			// There are still dangling references but we've been through the
   202  			// schema twice, so we know that there's a reference
   203  			// to a non-schema node. Technically this is not necessarily valid,
   204  			// but we do see this in the wild. This should be rare,
   205  			// so efficiency (re-parsing paths) shouldn't be a great issue.
   206  			for path, def := range d.defForValue.byPath {
   207  				if def != nil {
   208  					continue
   209  				}
   210  				n := d.root.LookupPath(cue.ParsePath(path))
   211  				if !n.Exists() {
   212  					panic("failed to find entry for dangling reference")
   213  				}
   214  				extraSchemas = append(extraSchemas, n)
   215  				basePass = pass
   216  			}
   217  		}
   218  		for _, n := range extraSchemas {
   219  			// As the ID namespace isn't well-defined we treat all such
   220  			// schemas as if they were directly under the root.
   221  			// See https://json-schema.org/draft/2020-12/json-schema-core#section-9.4.2
   222  			root.schema(n)
   223  		}
   224  		if !d.needAnotherPass && d.danglingRefs == 0 {
   225  			break
   226  		}
   227  
   228  		d.builder = structBuilder{}
   229  		for _, def := range d.defs {
   230  			def.schema = nil
   231  		}
   232  		d.needAnotherPass = false
   233  	}
   234  	if d.cfg.DefineSchema != nil {
   235  		// Let the caller know about any internal schemas that
   236  		// have been mapped to an external location.
   237  		for _, def := range d.defs {
   238  			if def.schema != nil && def.importPath != "" {
   239  				d.cfg.DefineSchema(def.importPath, def.path, def.schema, def.comment)
   240  			}
   241  		}
   242  	}
   243  	f, err := d.builder.syntax()
   244  	if err != nil {
   245  		d.errf(v, "cannot build final syntax: %v", err)
   246  		return nil
   247  	}
   248  	var preamble []ast.Decl
   249  	if d.cfg.PkgName != "" {
   250  		preamble = append(preamble, &ast.Package{Name: ast.NewIdent(d.cfg.PkgName)})
   251  	}
   252  	if rootInfo.schemaVersionPresent {
   253  		// TODO use cue/literal.String
   254  		// TODO is this actually useful information: why is knowing the schema
   255  		// version of the input useful?
   256  		preamble = append(preamble, &ast.Attribute{
   257  			Text: fmt.Sprintf("@jsonschema(schema=%q)", rootInfo.schemaVersion),
   258  		})
   259  	}
   260  	if rootInfo.deprecated {
   261  		preamble = append(preamble, &ast.Attribute{Text: "@deprecated()"})
   262  	}
   263  	if len(preamble) > 0 {
   264  		f.Decls = append(preamble, f.Decls...)
   265  	}
   266  	return f
   267  }
   268  
   269  func (d *decoder) errf(n cue.Value, format string, args ...interface{}) ast.Expr {
   270  	d.warnf(n.Pos(), format, args...)
   271  	return &ast.BadExpr{From: n.Pos()}
   272  }
   273  
   274  func (d *decoder) warnf(p token.Pos, format string, args ...interface{}) {
   275  	d.addErr(errors.Newf(p, format, args...))
   276  }
   277  
   278  func (d *decoder) addErr(err errors.Error) {
   279  	d.errs = errors.Append(d.errs, err)
   280  }
   281  
   282  func (d *decoder) number(n cue.Value) ast.Expr {
   283  	return n.Syntax(cue.Final()).(ast.Expr)
   284  }
   285  
   286  func (d *decoder) uint(nv cue.Value) ast.Expr {
   287  	n, err := uint64Value(nv)
   288  	if err != nil {
   289  		d.errf(nv, "invalid uint")
   290  	}
   291  	return &ast.BasicLit{
   292  		ValuePos: nv.Pos(),
   293  		Kind:     token.FLOAT,
   294  		Value:    strconv.FormatUint(n, 10),
   295  	}
   296  }
   297  
   298  func (d *decoder) boolValue(n cue.Value) bool {
   299  	x, err := n.Bool()
   300  	if err != nil {
   301  		d.errf(n, "invalid bool")
   302  	}
   303  	return x
   304  }
   305  
   306  func (d *decoder) string(n cue.Value) ast.Expr {
   307  	return n.Syntax(cue.Final()).(ast.Expr)
   308  }
   309  
   310  func (d *decoder) strValue(n cue.Value) (s string, ok bool) {
   311  	s, err := n.String()
   312  	if err != nil {
   313  		d.errf(n, "invalid string")
   314  		return "", false
   315  	}
   316  	return s, true
   317  }
   318  
   319  func (d *decoder) regexpValue(n cue.Value) (ast.Expr, bool) {
   320  	s, ok := d.strValue(n)
   321  	if !ok {
   322  		return nil, false
   323  	}
   324  	if !d.checkRegexp(n, s) {
   325  		return nil, false
   326  	}
   327  	return d.string(n), true
   328  }
   329  
   330  func (d *decoder) checkRegexp(n cue.Value, s string) bool {
   331  	_, err := syntax.Parse(s, syntax.Perl)
   332  	if err == nil {
   333  		return true
   334  	}
   335  	var regErr *syntax.Error
   336  	if errors.As(err, &regErr) {
   337  		switch regErr.Code {
   338  		case syntax.ErrInvalidPerlOp:
   339  			// It's Perl syntax that we'll never support because the CUE evaluation
   340  			// engine uses Go's regexp implementation and because the missing
   341  			// features are usually not there for good reason (e.g. exponential
   342  			// runtime). In other words, this is a missing feature but not an invalid
   343  			// regular expression as such.
   344  			if d.cfg.StrictFeatures {
   345  				// TODO: could fall back to  https://github.com/dlclark/regexp2 instead
   346  				d.errf(n, "unsupported Perl regexp syntax in %q: %v", s, err)
   347  			}
   348  			return false
   349  		case syntax.ErrInvalidCharRange:
   350  			// There are many more character class ranges than Go supports currently
   351  			// (see https://go.dev/issue/14509) so treat an unknown character class
   352  			// range as a feature error rather than a bad regexp.
   353  			// TODO translate names to Go-supported class names when possible.
   354  			if d.cfg.StrictFeatures {
   355  				d.errf(n, "unsupported regexp character class in %q: %v", s, err)
   356  			}
   357  			return false
   358  		}
   359  	}
   360  	d.errf(n, "invalid regexp %q: %v", s, err)
   361  	return false
   362  }
   363  
   364  // ensureDefinition ensures that node n will
   365  // be a defined schema.
   366  func (d *decoder) ensureDefinition(n cue.Value) {
   367  	if _, ok := d.defForValue.lookup(n); !ok {
   368  		d.defForValue.set(n, nil)
   369  		d.danglingRefs++
   370  	}
   371  }
   372  
   373  // const draftCutoff = 5
   374  
   375  type coreType int
   376  
   377  const (
   378  	nullType coreType = iota
   379  	boolType
   380  	numType
   381  	stringType
   382  	arrayType
   383  	objectType
   384  
   385  	numCoreTypes
   386  )
   387  
   388  var coreToCUE = []cue.Kind{
   389  	nullType:   cue.NullKind,
   390  	boolType:   cue.BoolKind,
   391  	numType:    cue.NumberKind, // Note: both int and float.
   392  	stringType: cue.StringKind,
   393  	arrayType:  cue.ListKind,
   394  	objectType: cue.StructKind,
   395  }
   396  
   397  func kindToAST(k cue.Kind, explicitOpen bool) ast.Expr {
   398  	switch k {
   399  	case cue.NullKind:
   400  		// TODO: handle OpenAPI restrictions.
   401  		return ast.NewNull()
   402  	case cue.BoolKind:
   403  		return ast.NewIdent("bool")
   404  	case cue.NumberKind:
   405  		return ast.NewIdent("number")
   406  	case cue.IntKind:
   407  		return ast.NewIdent("int")
   408  	case cue.FloatKind:
   409  		return ast.NewIdent("float")
   410  	case cue.StringKind:
   411  		return ast.NewIdent("string")
   412  	case cue.ListKind:
   413  		return ast.NewList(&ast.Ellipsis{})
   414  	case cue.StructKind:
   415  		if explicitOpen {
   416  			return ast.NewStruct()
   417  		}
   418  		return ast.NewStruct(&ast.Ellipsis{})
   419  	}
   420  	panic(fmt.Errorf("unexpected kind %v", k))
   421  }
   422  
   423  var coreTypeName = []string{
   424  	nullType:   "null",
   425  	boolType:   "bool",
   426  	numType:    "number",
   427  	stringType: "string",
   428  	arrayType:  "array",
   429  	objectType: "object",
   430  }
   431  
   432  type constraintInfo struct {
   433  	// typ is an identifier for the root type, if present.
   434  	// This can be omitted if there are constraints.
   435  	typ         ast.Expr
   436  	constraints []ast.Expr
   437  }
   438  
   439  func (c *constraintInfo) setTypeUsed(n cue.Value, t coreType, explicitOpen bool) {
   440  	c.typ = kindToAST(coreToCUE[t], explicitOpen)
   441  	setPos(c.typ, n)
   442  	ast.SetRelPos(c.typ, token.NoRelPos)
   443  }
   444  
   445  func (c *constraintInfo) add(n cue.Value, x ast.Expr) {
   446  	if !isTop(x) {
   447  		setPos(x, n)
   448  		ast.SetRelPos(x, token.NoRelPos)
   449  		c.constraints = append(c.constraints, x)
   450  	}
   451  }
   452  
   453  func (s *state) add(n cue.Value, t coreType, x ast.Expr) {
   454  	s.types[t].add(n, x)
   455  }
   456  
   457  func (s *state) setTypeUsed(n cue.Value, t coreType) {
   458  	if int(t) >= len(s.types) {
   459  		panic(fmt.Errorf("type out of range %v/%v", int(t), len(s.types)))
   460  	}
   461  	s.types[t].setTypeUsed(n, t, s.cfg.OpenOnlyWhenExplicit)
   462  }
   463  
   464  type state struct {
   465  	*decoder
   466  	schemaInfo
   467  
   468  	up *state
   469  
   470  	pos cue.Value
   471  
   472  	// The constraints in types represent disjunctions per type.
   473  	types    [numCoreTypes]constraintInfo
   474  	all      constraintInfo // values and oneOf etc.
   475  	nullable *ast.BasicLit  // nullable
   476  
   477  	exclusiveMin bool // For OpenAPI and legacy support.
   478  	exclusiveMax bool // For OpenAPI and legacy support.
   479  
   480  	// isRoot holds whether this state is at the root
   481  	// of the schema.
   482  	isRoot bool
   483  
   484  	minContains *uint64
   485  	maxContains *uint64
   486  
   487  	ifConstraint   cue.Value
   488  	thenConstraint cue.Value
   489  	elseConstraint cue.Value
   490  
   491  	definitions []ast.Decl
   492  
   493  	// Used for inserting definitions, properties, etc.
   494  	obj  *ast.StructLit
   495  	objN cue.Value // used for adding obj to constraints
   496  
   497  	patterns []ast.Expr
   498  
   499  	list *ast.ListLit
   500  
   501  	// listItemsIsArray keeps track of whether the
   502  	// value of the "items" keyword is an array.
   503  	// Without this, we can't distinguish between
   504  	//
   505  	//	"items": true
   506  	//
   507  	// and
   508  	//
   509  	//	"items": []
   510  	listItemsIsArray bool
   511  
   512  	// The following fields are used when the version is
   513  	// [VersionKubernetesCRD] to check that "properties" and
   514  	// "additionalProperties" may not be specified together.
   515  	hasProperties           bool
   516  	hasAdditionalProperties bool
   517  
   518  	// Keep track of whether "items" and "type": "array" have been specified, because
   519  	// in OpenAPI it's mandatory when "type" is "array".
   520  	hasItems bool
   521  	isArray  bool
   522  
   523  	// Keep track of whether a $ref keyword is present,
   524  	// because pre-2019-09 schemas ignore sibling keywords
   525  	// to $ref.
   526  	hasRefKeyword bool
   527  
   528  	// Keep track of whether we're preserving existing fields,
   529  	// which is preserved recursively by default, and is
   530  	// reset within properties or additionalProperties.
   531  	preserveUnknownFields bool
   532  
   533  	// k8sResourceKind and k8sAPIVersion record values from the
   534  	// x-kubernetes-group-version-kind keyword
   535  	// for the kind and apiVersion properties respectively.
   536  	k8sResourceKind string
   537  	k8sAPIVersion   string
   538  
   539  	// Keep track of whether the object has been explicitly
   540  	// closed or opened (see [Config.OpenOnlyWhenExplicit]).
   541  	openness openness
   542  }
   543  
   544  type openness int
   545  
   546  const (
   547  	implicitlyOpen   openness = iota
   548  	explicitlyOpen            // explicitly opened, e.g. additionalProperties: true
   549  	explicitlyClosed          // explicitly closed, e.g. additionalProperties: false
   550  	allFieldsCovered          // complete pattern present, e.g. additionalProperties: type: string
   551  )
   552  
   553  // schemaInfo holds information about a schema
   554  // after it has been created.
   555  type schemaInfo struct {
   556  	// allowedTypes holds the set of types that
   557  	// this node is allowed to be.
   558  	allowedTypes cue.Kind
   559  
   560  	// knownTypes holds the set of types that this node
   561  	// is known to be one of by virtue of the constraints inside
   562  	// all. This is used to avoid adding redundant elements
   563  	// to the disjunction created by [state.finalize].
   564  	knownTypes cue.Kind
   565  
   566  	title       string
   567  	description string
   568  
   569  	// id holds the absolute URI of the schema if has a $id field .
   570  	// It's the base URI for $ref or nested $id fields.
   571  	id         *url.URL
   572  	deprecated bool
   573  
   574  	schemaVersion        Version
   575  	schemaVersionPresent bool
   576  
   577  	hasConstraints bool
   578  }
   579  
   580  func (s *state) idTag() *ast.Attribute {
   581  	return &ast.Attribute{Text: fmt.Sprintf("@jsonschema(id=%q)", s.id)}
   582  }
   583  
   584  func (s *state) object(n cue.Value) *ast.StructLit {
   585  	if s.obj == nil {
   586  		s.obj = &ast.StructLit{}
   587  		s.objN = n
   588  	}
   589  	return s.obj
   590  }
   591  
   592  func (s *state) finalizeObject() {
   593  	if s.obj == nil && s.schemaVersion == VersionKubernetesCRD && (s.allowedTypes&cue.StructKind) != 0 && s.preserveUnknownFields {
   594  		// When x-kubernetes-preserve-unknown-fields is set, we need
   595  		// an explicit ellipsis even though kindToAST won't have added
   596  		// one, so make sure there's an object.
   597  		_ = s.object(s.pos)
   598  	}
   599  	if s.obj == nil {
   600  		return
   601  	}
   602  	if s.preserveUnknownFields {
   603  		s.openness = explicitlyOpen
   604  	}
   605  	var e ast.Expr = s.obj
   606  	if s.cfg.OpenOnlyWhenExplicit && s.openness == implicitlyOpen {
   607  		// Nothing to do: the struct is implicitly open but
   608  		// we've been directed to leave it like that.
   609  	} else if s.openness == allFieldsCovered {
   610  		// Nothing to do: there is a pattern constraint that covers all
   611  		// possible fields.
   612  	} else if s.openness == explicitlyClosed {
   613  		e = ast.NewCall(ast.NewIdent("close"), s.obj)
   614  	} else {
   615  		s.obj.Elts = append(s.obj.Elts, &ast.Ellipsis{})
   616  	}
   617  	s.add(s.objN, objectType, e)
   618  }
   619  
   620  func (s *state) hasConstraints() bool {
   621  	if len(s.all.constraints) > 0 {
   622  		return true
   623  	}
   624  	for _, t := range s.types {
   625  		if len(t.constraints) > 0 {
   626  			return true
   627  		}
   628  	}
   629  	return len(s.patterns) > 0 ||
   630  		s.title != "" ||
   631  		s.description != "" ||
   632  		s.obj != nil ||
   633  		s.id != nil
   634  }
   635  
   636  const allTypes = cue.BoolKind |
   637  	cue.ListKind |
   638  	cue.NullKind |
   639  	cue.NumberKind |
   640  	cue.IntKind |
   641  	cue.StringKind |
   642  	cue.StructKind
   643  
   644  // finalize constructs CUE syntax from the collected constraints.
   645  func (s *state) finalize() (e ast.Expr) {
   646  	if s.allowedTypes == 0 {
   647  		// Nothing is possible. This isn't a necessarily a problem, as
   648  		// we might be inside an allOf or oneOf with other valid constraints.
   649  		return bottom()
   650  	}
   651  
   652  	s.finalizeObject()
   653  
   654  	conjuncts := []ast.Expr{}
   655  	disjuncts := []ast.Expr{}
   656  
   657  	// Sort literal structs and list last for nicer formatting.
   658  	// Use a stable sort so that the relative order of constraints
   659  	// is otherwise kept as-is, for the sake of deterministic output.
   660  	slices.SortStableFunc(s.types[arrayType].constraints, func(a, b ast.Expr) int {
   661  		_, aList := a.(*ast.ListLit)
   662  		_, bList := b.(*ast.ListLit)
   663  		return cmpBool(aList, bList)
   664  	})
   665  	slices.SortStableFunc(s.types[objectType].constraints, func(a, b ast.Expr) int {
   666  		_, aStruct := a.(*ast.StructLit)
   667  		_, bStruct := b.(*ast.StructLit)
   668  		return cmpBool(aStruct, bStruct)
   669  	})
   670  
   671  	type excludeInfo struct {
   672  		pos      token.Pos
   673  		typIndex int
   674  	}
   675  	var excluded []excludeInfo
   676  
   677  	needsTypeDisjunction := s.allowedTypes != s.knownTypes
   678  	if !needsTypeDisjunction {
   679  		for i, t := range s.types {
   680  			k := coreToCUE[i]
   681  			if len(t.constraints) > 0 && s.allowedTypes&k != 0 {
   682  				// We need to include at least one type-specific
   683  				// constraint in the disjunction.
   684  				needsTypeDisjunction = true
   685  				break
   686  			}
   687  		}
   688  	}
   689  
   690  	if needsTypeDisjunction {
   691  		npossible := 0
   692  		nexcluded := 0
   693  		for i, t := range s.types {
   694  			k := coreToCUE[i]
   695  			allowed := s.allowedTypes&k != 0
   696  			switch {
   697  			case len(t.constraints) > 0:
   698  				npossible++
   699  				if !allowed {
   700  					nexcluded++
   701  					for _, c := range t.constraints {
   702  						excluded = append(excluded, excludeInfo{c.Pos(), i})
   703  					}
   704  					continue
   705  				}
   706  				x := ast.NewBinExpr(token.AND, t.constraints...)
   707  				disjuncts = append(disjuncts, x)
   708  			case allowed:
   709  				npossible++
   710  				if s.knownTypes&k != 0 {
   711  					disjuncts = append(disjuncts, kindToAST(k, s.cfg.OpenOnlyWhenExplicit))
   712  				}
   713  			}
   714  		}
   715  		if nexcluded == npossible {
   716  			// All possibilities have been excluded: this is an impossible
   717  			// schema.
   718  			for _, e := range excluded {
   719  				s.addErr(errors.Newf(e.pos,
   720  					"constraint not allowed because type %s is excluded",
   721  					coreTypeName[e.typIndex],
   722  				))
   723  			}
   724  		}
   725  	}
   726  	conjuncts = append(conjuncts, s.all.constraints...)
   727  
   728  	if len(disjuncts) > 0 {
   729  		conjuncts = append(conjuncts, ast.NewBinExpr(token.OR, disjuncts...))
   730  	}
   731  
   732  	if len(conjuncts) == 0 {
   733  		// There are no conjuncts, which can only happen when there
   734  		// are no disjuncts, which can only happen when the entire
   735  		// set of disjuncts is redundant with respect to the types
   736  		// already implied by s.all. As we've already checked that
   737  		// s.allowedTypes is non-zero (so we know that
   738  		// it's not bottom) and we need _some_ expression
   739  		// to be part of the subequent syntax, we use top.
   740  		e = top()
   741  	} else {
   742  		e = ast.NewBinExpr(token.AND, conjuncts...)
   743  	}
   744  
   745  	a := []ast.Expr{e}
   746  	if s.nullable != nil {
   747  		a = []ast.Expr{s.nullable, e}
   748  	}
   749  
   750  	e = ast.NewBinExpr(token.OR, a...)
   751  
   752  	if len(s.definitions) > 0 {
   753  		if st, ok := e.(*ast.StructLit); ok {
   754  			st.Elts = append(st.Elts, s.definitions...)
   755  		} else {
   756  			st = ast.NewStruct()
   757  			st.Elts = append(st.Elts, &ast.EmbedDecl{Expr: e})
   758  			st.Elts = append(st.Elts, s.definitions...)
   759  			e = st
   760  		}
   761  	}
   762  
   763  	// If an "$id" exists, make sure it's present in the output.
   764  	if s.id != nil {
   765  		if st, ok := e.(*ast.StructLit); ok {
   766  			st.Elts = append([]ast.Decl{s.idTag()}, st.Elts...)
   767  		} else {
   768  			e = &ast.StructLit{Elts: []ast.Decl{s.idTag(), &ast.EmbedDecl{Expr: e}}}
   769  		}
   770  	}
   771  
   772  	// Now that we've expressed the schema as actual syntax,
   773  	// all the allowed types are actually explicit and will not
   774  	// need to be mentioned again.
   775  	s.knownTypes = s.allowedTypes
   776  	return e
   777  }
   778  
   779  // cmpBool returns
   780  //
   781  //	-1 if x is less than y,
   782  //	 0 if x equals y,
   783  //	+1 if x is greater than y,
   784  //
   785  // where false is ordered before true.
   786  func cmpBool(x, y bool) int {
   787  	switch {
   788  	case !x && y:
   789  		return -1
   790  	case x && !y:
   791  		return +1
   792  	default:
   793  		return 0
   794  	}
   795  }
   796  
   797  func (s schemaInfo) comment() *ast.CommentGroup {
   798  	// Create documentation.
   799  	doc := strings.TrimSpace(s.title)
   800  	if s.description != "" {
   801  		if doc != "" {
   802  			doc += "\n\n"
   803  		}
   804  		doc += s.description
   805  		doc = strings.TrimSpace(doc)
   806  	}
   807  	// TODO: add examples as well?
   808  	if doc == "" {
   809  		return nil
   810  	}
   811  	return internal.NewComment(true, doc)
   812  }
   813  
   814  func (s *state) schema(n cue.Value) ast.Expr {
   815  	expr, _ := s.schemaState(n, allTypes, nil)
   816  	return expr
   817  }
   818  
   819  // schemaState returns a new state value derived from s.
   820  // n holds the JSONSchema node to translate to a schema.
   821  // types holds the set of possible types that the value can hold.
   822  //
   823  // If init is not nil, it is called on the newly created state value
   824  // before doing anything else.
   825  func (s0 *state) schemaState(n cue.Value, types cue.Kind, init func(*state)) (expr ast.Expr, info schemaInfo) {
   826  	s := &state{
   827  		up: s0,
   828  		schemaInfo: schemaInfo{
   829  			schemaVersion: s0.schemaVersion,
   830  			allowedTypes:  types,
   831  			knownTypes:    allTypes,
   832  		},
   833  		decoder:               s0.decoder,
   834  		pos:                   n,
   835  		isRoot:                s0.isRoot && n == s0.pos,
   836  		preserveUnknownFields: s0.preserveUnknownFields,
   837  	}
   838  	if init != nil {
   839  		init(s)
   840  	}
   841  	defer func() {
   842  		// Perhaps replace the schema expression with a reference.
   843  		expr = s.maybeDefine(expr, info)
   844  	}()
   845  	if n.Kind() == cue.BoolKind {
   846  		if s.schemaVersion.is(vfrom(VersionDraft6)) {
   847  			// From draft6 onwards, boolean values signify a schema that always passes or fails.
   848  			// TODO if false, set s.allowedTypes and s.knownTypes to zero?
   849  			return boolSchema(s.boolValue(n)), s.schemaInfo
   850  		}
   851  		return s.errf(n, "boolean schemas not supported in %v", s.schemaVersion), s.schemaInfo
   852  	}
   853  	if n.Kind() != cue.StructKind {
   854  		return s.errf(n, "schema expects mapping node, found %s", n.Kind()), s.schemaInfo
   855  	}
   856  
   857  	// do multiple passes over the constraints to ensure they are done in order.
   858  	for pass := 0; pass < numPhases; pass++ {
   859  		s.processMap(n, func(key string, value cue.Value) {
   860  			if pass == 0 && key == "$ref" {
   861  				// Before 2019-19, keywords alongside $ref are ignored so keep
   862  				// track of whether we've seen any non-$ref keywords so we can
   863  				// ignore those keywords. This could apply even when the schema
   864  				// is >=2019-19 because $schema could be used to change the version.
   865  				s.hasRefKeyword = true
   866  			}
   867  			// Convert each constraint into a either a value or a functor.
   868  			c := constraintMap[key]
   869  			if c == nil {
   870  				if strings.HasPrefix(key, "x-") {
   871  					// A keyword starting with a leading x- is clearly
   872  					// not intended to be a valid keyword, and is explicitly
   873  					// allowed by OpenAPI. It seems reasonable that
   874  					// this is not an error even with StrictKeywords enabled.
   875  					return
   876  				}
   877  				if pass == 0 && s.cfg.StrictKeywords {
   878  					// TODO: value is not the correct position, albeit close. Fix this.
   879  					s.warnUnrecognizedKeyword(key, value, "unknown keyword %q", key)
   880  				}
   881  				return
   882  			}
   883  			if c.phase != pass {
   884  				return
   885  			}
   886  			if !s.schemaVersion.is(c.versions) {
   887  				s.warnUnrecognizedKeyword(key, value, "keyword %q is not supported in JSON schema version %v", key, s.schemaVersion)
   888  				return
   889  			}
   890  			if pass > 0 && !s.schemaVersion.is(vfrom(VersionDraft2019_09)) && s.hasRefKeyword && key != "$ref" {
   891  				// We're using a schema version that ignores keywords alongside $ref.
   892  				//
   893  				// Note that we specifically exclude pass 0 (the pass in which $schema is checked)
   894  				// from this check, because hasRefKeyword is only set in pass 0 and we
   895  				// can get into a self-contradictory situation ($schema says we should
   896  				// ignore keywords alongside $ref, but $ref says we should ignore the $schema
   897  				// keyword itself). We could make that situation an explicit error, but other
   898  				// implementations don't, and it would require an entire extra pass just to do so.
   899  				s.warnUnrecognizedKeyword(key, value, "ignoring keyword %q alongside $ref", key)
   900  				return
   901  			}
   902  			c.fn(key, value, s)
   903  		})
   904  		if s.schemaVersion == VersionKubernetesCRD && s.isRoot {
   905  			// The root of a CRD is always a resource, so treat it as if it contained
   906  			// the x-kubernetes-embedded-resource keyword
   907  			c := constraintMap["x-kubernetes-embedded-resource"]
   908  			if c.phase != pass {
   909  				continue
   910  			}
   911  			// Note: there is no field value for the embedded-resource keyword,
   912  			// but it's not actually used except for its position so passing
   913  			// the parent object should work fine.
   914  			c.fn("x-kubernetes-embedded-resource", n, s)
   915  		}
   916  	}
   917  	if s.id != nil {
   918  		// If there's an ID, it can be referred to.
   919  		s.ensureDefinition(s.pos)
   920  	}
   921  	constraintIfThenElse(s)
   922  	if s.schemaVersion == VersionKubernetesCRD {
   923  		if s.hasProperties && s.hasAdditionalProperties {
   924  			s.errf(n, "additionalProperties may not be combined with properties in %v", s.schemaVersion)
   925  		}
   926  	}
   927  	if s.schemaVersion.is(openAPILike) {
   928  		if s.isArray && !s.hasItems {
   929  			// From https://github.com/OAI/OpenAPI-Specification/blob/3.0.0/versions/3.0.0.md#schema-object
   930  			// "`items` MUST be present if the `type` is `array`."
   931  			s.errf(n, `"items" must be present when the "type" is "array" in %v`, s.schemaVersion)
   932  		}
   933  	}
   934  
   935  	schemaExpr := s.finalize()
   936  	s.schemaInfo.hasConstraints = s.hasConstraints()
   937  	return schemaExpr, s.schemaInfo
   938  }
   939  
   940  func (s *state) warnUnrecognizedKeyword(key string, n cue.Value, msg string, args ...any) {
   941  	if !s.cfg.StrictKeywords {
   942  		return
   943  	}
   944  	if s.schemaVersion.is(openAPILike) && strings.HasPrefix(key, "x-") {
   945  		// Unimplemented x- keywords are allowed even with strict keywords
   946  		// under OpenAPI-like versions, because those versions enable
   947  		// strict keywords by default.
   948  		return
   949  	}
   950  	s.errf(n, msg, args...)
   951  }
   952  
   953  // maybeDefine checks whether we might need a definition
   954  // for n given its actual schema syntax expression. If
   955  // it does, it creates the definition as appropriate and returns
   956  // an expression that refers to that definition; if not,
   957  // it just returns expr itself.
   958  // TODO also report whether the schema has been defined at a place
   959  // where it can be unified with something else?
   960  func (s *state) maybeDefine(expr ast.Expr, info schemaInfo) ast.Expr {
   961  	def := s.definedSchemaForNode(s.pos)
   962  	if def == nil || len(def.path.Selectors()) == 0 {
   963  		return expr
   964  	}
   965  	def.schema = expr
   966  	def.comment = info.comment()
   967  	if def.importPath == "" {
   968  		// It's a local definition that's not at the root.
   969  		if !s.builder.put(def.path, expr, s.comment()) {
   970  			s.errf(s.pos, "redefinition of schema CUE path %v", def.path)
   971  			return expr
   972  		}
   973  	}
   974  	return s.refExpr(s.pos, def.importPath, def.path)
   975  }
   976  
   977  // definedSchemaForNode returns the definedSchema value
   978  // for the given node in the JSON schema, or nil
   979  // if the node does not need a definition.
   980  func (s *state) definedSchemaForNode(n cue.Value) *definedSchema {
   981  	def, ok := s.defForValue.lookup(n)
   982  	if !ok {
   983  		return nil
   984  	}
   985  	if def != nil {
   986  		// We've either made a definition in a previous pass
   987  		// or it's a redefinition.
   988  		// TODO if it's a redefinition, error.
   989  		return def
   990  	}
   991  	// This node has been referred to but not actually defined. We'll
   992  	// need another pass to sort out the reference even though the
   993  	// reference is no longer dangling.
   994  	s.needAnotherPass = true
   995  
   996  	def = s.addDefinition(n)
   997  	if def == nil {
   998  		return nil
   999  	}
  1000  	s.defForValue.set(n, def)
  1001  	s.danglingRefs--
  1002  	return def
  1003  }
  1004  
  1005  func (s *state) addDefinition(n cue.Value) *definedSchema {
  1006  	var loc SchemaLoc
  1007  	schemaRoot := s.schemaRoot()
  1008  	loc.ID = ref(*schemaRoot.id)
  1009  	loc.ID.Fragment = cuePathToJSONPointer(relPath(n, schemaRoot.pos))
  1010  	idStr := loc.ID.String()
  1011  	def, ok := s.defs[idStr]
  1012  	if ok {
  1013  		// We've already got a definition for this ID.
  1014  		// TODO if it's been defined in the same pass, then it's a redefinition
  1015  		// s.errf(n, "redefinition of schema %s at %v", idStr, n.Path())
  1016  		return def
  1017  	}
  1018  	loc.IsLocal = true
  1019  	loc.Path = relPath(n, s.root)
  1020  	importPath, path, err := s.cfg.MapRef(loc)
  1021  	if err != nil {
  1022  		s.errf(n, "cannot get reference for %v: %v", loc, err)
  1023  		return nil
  1024  	}
  1025  	def = &definedSchema{
  1026  		importPath: importPath,
  1027  		path:       path,
  1028  	}
  1029  	s.defs[idStr] = def
  1030  	return def
  1031  }
  1032  
  1033  // refExpr returns a CUE expression to refer to the given path within the given
  1034  // imported CUE package. If importPath is empty, it returns a reference
  1035  // relative to the root of the schema being generated.
  1036  func (s *state) refExpr(n cue.Value, importPath string, path cue.Path) ast.Expr {
  1037  	if importPath == "" {
  1038  		// Internal reference
  1039  		expr, err := s.builder.getRef(path)
  1040  		if err != nil {
  1041  			s.errf(n, "cannot generate reference: %v", err)
  1042  			return nil
  1043  		}
  1044  		return expr
  1045  	}
  1046  	// External reference
  1047  	ip := ast.ParseImportPath(importPath)
  1048  	if ip.Qualifier == "" {
  1049  		// TODO choose an arbitrary name here.
  1050  		s.errf(n, "cannot determine package name from import path %q", importPath)
  1051  		return nil
  1052  	}
  1053  	ident := ast.NewIdent(ip.Qualifier)
  1054  	ident.Node = &ast.ImportSpec{Path: ast.NewString(importPath)}
  1055  	expr, err := pathRefSyntax(path, ident)
  1056  	if err != nil {
  1057  		s.errf(n, "cannot determine CUE path: %v", err)
  1058  		return nil
  1059  	}
  1060  	return expr
  1061  }
  1062  
  1063  func (s *state) constValue(n cue.Value) ast.Expr {
  1064  	k := n.Kind()
  1065  	switch k {
  1066  	case cue.ListKind:
  1067  		a := []ast.Expr{}
  1068  		for i, _ := n.List(); i.Next(); {
  1069  			a = append(a, s.constValue(i.Value()))
  1070  		}
  1071  		return setPos(ast.NewList(a...), n)
  1072  
  1073  	case cue.StructKind:
  1074  		a := []ast.Decl{}
  1075  		s.processMap(n, func(key string, n cue.Value) {
  1076  			a = append(a, &ast.Field{
  1077  				Label:      ast.NewString(key),
  1078  				Value:      s.constValue(n),
  1079  				Constraint: token.NOT,
  1080  			})
  1081  		})
  1082  		return setPos(ast.NewCall(ast.NewIdent("close"), &ast.StructLit{Elts: a}), n)
  1083  	default:
  1084  		if !n.IsConcrete() {
  1085  			s.errf(n, "invalid non-concrete value")
  1086  		}
  1087  		return n.Syntax(cue.Final()).(ast.Expr)
  1088  	}
  1089  }
  1090  
  1091  func (s *state) value(n cue.Value) ast.Expr {
  1092  	k := n.Kind()
  1093  	switch k {
  1094  	case cue.ListKind:
  1095  		a := []ast.Expr{}
  1096  		for i, _ := n.List(); i.Next(); {
  1097  			a = append(a, s.value(i.Value()))
  1098  		}
  1099  		return setPos(ast.NewList(a...), n)
  1100  
  1101  	case cue.StructKind:
  1102  		a := []ast.Decl{}
  1103  		s.processMap(n, func(key string, n cue.Value) {
  1104  			a = append(a, &ast.Field{
  1105  				Label: ast.NewString(key),
  1106  				Value: s.value(n),
  1107  			})
  1108  		})
  1109  		return setPos(&ast.StructLit{Elts: a}, n)
  1110  
  1111  	default:
  1112  		if !n.IsConcrete() {
  1113  			s.errf(n, "invalid non-concrete value")
  1114  		}
  1115  		return n.Syntax(cue.Final()).(ast.Expr)
  1116  	}
  1117  }
  1118  
  1119  // processMap processes a yaml node, expanding merges.
  1120  //
  1121  // TODO: in some cases we can translate merges into CUE embeddings.
  1122  // This may also prevent exponential blow-up (as may happen when
  1123  // converting YAML to JSON).
  1124  func (s *state) processMap(n cue.Value, f func(key string, n cue.Value)) {
  1125  	// TODO: intercept references to allow for optimized performance.
  1126  	for i, _ := n.Fields(); i.Next(); {
  1127  		f(i.Selector().Unquoted(), i.Value())
  1128  	}
  1129  }
  1130  
  1131  func (s *state) listItems(name string, n cue.Value, allowEmpty bool) (a []cue.Value) {
  1132  	if n.Kind() != cue.ListKind {
  1133  		s.errf(n, `value of %q must be an array, found %v`, name, n.Kind())
  1134  	}
  1135  	for i, _ := n.List(); i.Next(); {
  1136  		a = append(a, i.Value())
  1137  	}
  1138  	if !allowEmpty && len(a) == 0 {
  1139  		s.errf(n, `array for %q must be non-empty`, name)
  1140  	}
  1141  	return a
  1142  }
  1143  
  1144  // excludeFields returns either an empty slice (if decls is empty)
  1145  // or a slice containing a CUE expression that can be used to exclude the
  1146  // fields of the given declaration in a label expression. For instance, for
  1147  //
  1148  //	{ foo: 1, bar: int }
  1149  //
  1150  // it creates a slice holding the expression
  1151  //
  1152  //	!~ "^(foo|bar)$"
  1153  //
  1154  // which can be used in a label expression to define types for all fields but
  1155  // those existing:
  1156  //
  1157  //	[!~"^(foo|bar)$"]: string
  1158  func excludeFields(decls []ast.Decl) []ast.Expr {
  1159  	if len(decls) == 0 {
  1160  		return nil
  1161  	}
  1162  	var buf strings.Builder
  1163  	first := true
  1164  	buf.WriteString("^(")
  1165  	for _, d := range decls {
  1166  		f, ok := d.(*ast.Field)
  1167  		if !ok {
  1168  			continue
  1169  		}
  1170  		str, _, _ := ast.LabelName(f.Label)
  1171  		if str != "" {
  1172  			if !first {
  1173  				buf.WriteByte('|')
  1174  			}
  1175  			buf.WriteString(regexp.QuoteMeta(str))
  1176  			first = false
  1177  		}
  1178  	}
  1179  	buf.WriteString(")$")
  1180  	return []ast.Expr{
  1181  		&ast.UnaryExpr{Op: token.NMAT, X: ast.NewString(buf.String())},
  1182  	}
  1183  }
  1184  
  1185  func bottom() ast.Expr {
  1186  	return &ast.BottomLit{}
  1187  }
  1188  
  1189  func top() ast.Expr {
  1190  	return ast.NewIdent("_")
  1191  }
  1192  
  1193  func boolSchema(ok bool) ast.Expr {
  1194  	if ok {
  1195  		return top()
  1196  	}
  1197  	return bottom()
  1198  }
  1199  
  1200  func isTop(s ast.Expr) bool {
  1201  	i, ok := s.(*ast.Ident)
  1202  	return ok && i.Name == "_"
  1203  }
  1204  
  1205  func isBottom(e ast.Expr) bool {
  1206  	_, ok := e.(*ast.BottomLit)
  1207  	return ok
  1208  }
  1209  
  1210  func addTag(field ast.Label, tag, value string) *ast.Field {
  1211  	return &ast.Field{
  1212  		Label: field,
  1213  		Value: top(),
  1214  		Attrs: []*ast.Attribute{
  1215  			{Text: fmt.Sprintf("@%s(%s)", tag, value)},
  1216  		},
  1217  	}
  1218  }
  1219  
  1220  func setPos(e ast.Expr, v cue.Value) ast.Expr {
  1221  	ast.SetPos(e, v.Pos())
  1222  	return e
  1223  }
  1224  
  1225  // uint64Value is like v.Uint64 except that it
  1226  // also allows floating point constants, as long
  1227  // as they have no fractional part.
  1228  func uint64Value(v cue.Value) (uint64, error) {
  1229  	n, err := v.Uint64()
  1230  	if err == nil {
  1231  		return n, nil
  1232  	}
  1233  	f, err := v.Float64()
  1234  	if err != nil {
  1235  		return 0, err
  1236  	}
  1237  	intPart, fracPart := math.Modf(f)
  1238  	if fracPart != 0 {
  1239  		return 0, errors.Newf(v.Pos(), "%v is not a whole number", v)
  1240  	}
  1241  	if intPart < 0 || intPart > math.MaxUint64 {
  1242  		return 0, errors.Newf(v.Pos(), "%v is out of bounds", v)
  1243  	}
  1244  	return uint64(intPart), nil
  1245  }