cuelang.org/go@v0.13.0/encoding/jsonschema/decode.go (about) 1 // Copyright 2019 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package jsonschema 16 17 // TODO: 18 // - replace converter from YAML to CUE to CUE (schema) to CUE. 19 // - define OpenAPI definitions als CUE. 20 21 import ( 22 "fmt" 23 "math" 24 "net/url" 25 "regexp" 26 "regexp/syntax" 27 "slices" 28 "strconv" 29 "strings" 30 31 "cuelang.org/go/cue" 32 "cuelang.org/go/cue/ast" 33 "cuelang.org/go/cue/ast/astutil" 34 "cuelang.org/go/cue/errors" 35 "cuelang.org/go/cue/token" 36 "cuelang.org/go/internal" 37 ) 38 39 const ( 40 // DefaultRootID is used as the absolute base URI for a schema 41 // when no value is provided in [Config.ID]. 42 DefaultRootID = "https://" + DefaultRootIDHost 43 DefaultRootIDHost = "cue.jsonschema.invalid" 44 ) 45 46 // rootDefs defines the top-level name of the map of definitions that do not 47 // have a valid identifier name. 48 // 49 // TODO: find something more principled, like allowing #("a-b"). 50 const rootDefs = "#" 51 52 // A decoder converts JSON schema to CUE. 53 type decoder struct { 54 cfg *Config 55 errs errors.Error 56 mapURLErrors map[string]bool 57 58 root cue.Value 59 rootID *url.URL 60 61 // defForValue holds an entry for internal values 62 // that are known to map to a defined schema. 63 // A nil entry is stored for nodes that have been 64 // referred to but we haven't yet seen when walking 65 // the schemas. 66 defForValue *valueMap[*definedSchema] 67 68 // danglingRefs records the number of nil entries in defForValue, 69 // representing the number of references into the internal 70 // structure that have not yet been resolved. 71 danglingRefs int 72 73 // defs holds the set of named schemas, indexed by URI (both 74 // canonical, and root-relative if known), including external 75 // schemas that aren't known. 76 defs map[string]*definedSchema 77 78 // builder is used to build the final syntax tree as it becomes known. 79 builder structBuilder 80 81 // needAnotherPass is set to true when we know that 82 // we need another pass through the schema extraction 83 // process. This can happen because `MapRef` might choose 84 // a different location depending on whether a reference is local 85 // or external. We don't know that until we've traversed the 86 // entire schema and the `$ref` might be seen before the 87 // schema it's referring to. Still more passes might be required 88 // if a $ref is found to be referring to a node that would not normally 89 // be considered part of the schema data. 90 needAnotherPass bool 91 } 92 93 // definedSchema records information for a schema or subschema. 94 type definedSchema struct { 95 // importPath is empty for internal schemas. 96 importPath string 97 98 // path holds the location of the schema relative to importPath. 99 path cue.Path 100 101 // schema holds the actual syntax for the schema. This 102 // is nil if the entry was created by a reference only. 103 schema ast.Expr 104 105 // comment holds any doc comment associated with the above schema. 106 comment *ast.CommentGroup 107 } 108 109 // addImport registers 110 func (d *decoder) addImport(n cue.Value, pkg string) *ast.Ident { 111 spec := ast.NewImport(nil, pkg) 112 info, err := astutil.ParseImportSpec(spec) 113 if err != nil { 114 d.errf(cue.Value{}, "invalid import %q", pkg) 115 } 116 ident := ast.NewIdent(info.Ident) 117 ident.Node = spec 118 ast.SetPos(ident, n.Pos()) 119 120 return ident 121 } 122 123 func (d *decoder) decode(v cue.Value) *ast.File { 124 var defsRoot cue.Value 125 // docRoot represents the root of the actual data, by contrast 126 // with the "root" value as specified in [Config.Root] which 127 // represents the root of the schemas to be decoded. 128 docRoot := v 129 if d.cfg.Root != "" { 130 rootPath, err := parseRootRef(d.cfg.Root) 131 if err != nil { 132 d.errf(cue.Value{}, "invalid Config.Root value %q: %v", d.cfg.Root, err) 133 return nil 134 } 135 root := v.LookupPath(rootPath) 136 if !root.Exists() && !d.cfg.AllowNonExistentRoot { 137 d.errf(v, "root value at path %v does not exist", d.cfg.Root) 138 return nil 139 } 140 if d.cfg.SingleRoot { 141 v = root 142 } else { 143 if !root.Exists() { 144 root = v.Context().CompileString("{}") 145 } 146 if root.Kind() != cue.StructKind { 147 d.errf(root, "value at path %v must be struct containing definitions but is actually %v", d.cfg.Root, root) 148 return nil 149 } 150 defsRoot = root 151 } 152 } 153 154 var rootInfo schemaInfo 155 // extraSchemas records any nodes that are referred to 156 // but not part of the regular schema traversal. 157 var extraSchemas []cue.Value 158 // basePass records the last time that any new schemas were 159 // added for inspection. This can be set whenever new schemas 160 // not part of the regular traversal are found. 161 basePass := 0 162 163 for pass := 0; ; pass++ { 164 if pass > 10 { 165 // Should never happen: the most we should ever see in practice 166 // should be 2, but some pathological cases could end up with more. 167 d.errf(v, "internal error: too many passes without resolution") 168 return nil 169 } 170 root := &state{ 171 decoder: d, 172 schemaInfo: schemaInfo{ 173 schemaVersion: d.cfg.DefaultVersion, 174 id: d.rootID, 175 }, 176 isRoot: true, 177 pos: docRoot, 178 } 179 180 if defsRoot.Exists() { 181 // When d.cfg.Root is non-empty, it points to a struct 182 // containing a field for each definition. 183 constraintAddDefinitions("schemas", defsRoot, root) 184 } else { 185 expr, state := root.schemaState(v, allTypes, func(s *state) { 186 // We want the top level state to be treated as root even 187 // though it's some levels below the actual document top level. 188 s.isRoot = true 189 }) 190 if state.allowedTypes == 0 { 191 root.errf(v, "constraints are not possible to satisfy") 192 return nil 193 } 194 if !d.builder.put(cue.Path{}, expr, state.comment()) { 195 root.errf(v, "duplicate definition at root") // TODO better error message 196 return nil 197 } 198 rootInfo = state 199 } 200 if d.danglingRefs > 0 && pass == basePass+1 { 201 // There are still dangling references but we've been through the 202 // schema twice, so we know that there's a reference 203 // to a non-schema node. Technically this is not necessarily valid, 204 // but we do see this in the wild. This should be rare, 205 // so efficiency (re-parsing paths) shouldn't be a great issue. 206 for path, def := range d.defForValue.byPath { 207 if def != nil { 208 continue 209 } 210 n := d.root.LookupPath(cue.ParsePath(path)) 211 if !n.Exists() { 212 panic("failed to find entry for dangling reference") 213 } 214 extraSchemas = append(extraSchemas, n) 215 basePass = pass 216 } 217 } 218 for _, n := range extraSchemas { 219 // As the ID namespace isn't well-defined we treat all such 220 // schemas as if they were directly under the root. 221 // See https://json-schema.org/draft/2020-12/json-schema-core#section-9.4.2 222 root.schema(n) 223 } 224 if !d.needAnotherPass && d.danglingRefs == 0 { 225 break 226 } 227 228 d.builder = structBuilder{} 229 for _, def := range d.defs { 230 def.schema = nil 231 } 232 d.needAnotherPass = false 233 } 234 if d.cfg.DefineSchema != nil { 235 // Let the caller know about any internal schemas that 236 // have been mapped to an external location. 237 for _, def := range d.defs { 238 if def.schema != nil && def.importPath != "" { 239 d.cfg.DefineSchema(def.importPath, def.path, def.schema, def.comment) 240 } 241 } 242 } 243 f, err := d.builder.syntax() 244 if err != nil { 245 d.errf(v, "cannot build final syntax: %v", err) 246 return nil 247 } 248 var preamble []ast.Decl 249 if d.cfg.PkgName != "" { 250 preamble = append(preamble, &ast.Package{Name: ast.NewIdent(d.cfg.PkgName)}) 251 } 252 if rootInfo.schemaVersionPresent { 253 // TODO use cue/literal.String 254 // TODO is this actually useful information: why is knowing the schema 255 // version of the input useful? 256 preamble = append(preamble, &ast.Attribute{ 257 Text: fmt.Sprintf("@jsonschema(schema=%q)", rootInfo.schemaVersion), 258 }) 259 } 260 if rootInfo.deprecated { 261 preamble = append(preamble, &ast.Attribute{Text: "@deprecated()"}) 262 } 263 if len(preamble) > 0 { 264 f.Decls = append(preamble, f.Decls...) 265 } 266 return f 267 } 268 269 func (d *decoder) errf(n cue.Value, format string, args ...interface{}) ast.Expr { 270 d.warnf(n.Pos(), format, args...) 271 return &ast.BadExpr{From: n.Pos()} 272 } 273 274 func (d *decoder) warnf(p token.Pos, format string, args ...interface{}) { 275 d.addErr(errors.Newf(p, format, args...)) 276 } 277 278 func (d *decoder) addErr(err errors.Error) { 279 d.errs = errors.Append(d.errs, err) 280 } 281 282 func (d *decoder) number(n cue.Value) ast.Expr { 283 return n.Syntax(cue.Final()).(ast.Expr) 284 } 285 286 func (d *decoder) uint(nv cue.Value) ast.Expr { 287 n, err := uint64Value(nv) 288 if err != nil { 289 d.errf(nv, "invalid uint") 290 } 291 return &ast.BasicLit{ 292 ValuePos: nv.Pos(), 293 Kind: token.FLOAT, 294 Value: strconv.FormatUint(n, 10), 295 } 296 } 297 298 func (d *decoder) boolValue(n cue.Value) bool { 299 x, err := n.Bool() 300 if err != nil { 301 d.errf(n, "invalid bool") 302 } 303 return x 304 } 305 306 func (d *decoder) string(n cue.Value) ast.Expr { 307 return n.Syntax(cue.Final()).(ast.Expr) 308 } 309 310 func (d *decoder) strValue(n cue.Value) (s string, ok bool) { 311 s, err := n.String() 312 if err != nil { 313 d.errf(n, "invalid string") 314 return "", false 315 } 316 return s, true 317 } 318 319 func (d *decoder) regexpValue(n cue.Value) (ast.Expr, bool) { 320 s, ok := d.strValue(n) 321 if !ok { 322 return nil, false 323 } 324 if !d.checkRegexp(n, s) { 325 return nil, false 326 } 327 return d.string(n), true 328 } 329 330 func (d *decoder) checkRegexp(n cue.Value, s string) bool { 331 _, err := syntax.Parse(s, syntax.Perl) 332 if err == nil { 333 return true 334 } 335 var regErr *syntax.Error 336 if errors.As(err, ®Err) { 337 switch regErr.Code { 338 case syntax.ErrInvalidPerlOp: 339 // It's Perl syntax that we'll never support because the CUE evaluation 340 // engine uses Go's regexp implementation and because the missing 341 // features are usually not there for good reason (e.g. exponential 342 // runtime). In other words, this is a missing feature but not an invalid 343 // regular expression as such. 344 if d.cfg.StrictFeatures { 345 // TODO: could fall back to https://github.com/dlclark/regexp2 instead 346 d.errf(n, "unsupported Perl regexp syntax in %q: %v", s, err) 347 } 348 return false 349 case syntax.ErrInvalidCharRange: 350 // There are many more character class ranges than Go supports currently 351 // (see https://go.dev/issue/14509) so treat an unknown character class 352 // range as a feature error rather than a bad regexp. 353 // TODO translate names to Go-supported class names when possible. 354 if d.cfg.StrictFeatures { 355 d.errf(n, "unsupported regexp character class in %q: %v", s, err) 356 } 357 return false 358 } 359 } 360 d.errf(n, "invalid regexp %q: %v", s, err) 361 return false 362 } 363 364 // ensureDefinition ensures that node n will 365 // be a defined schema. 366 func (d *decoder) ensureDefinition(n cue.Value) { 367 if _, ok := d.defForValue.lookup(n); !ok { 368 d.defForValue.set(n, nil) 369 d.danglingRefs++ 370 } 371 } 372 373 // const draftCutoff = 5 374 375 type coreType int 376 377 const ( 378 nullType coreType = iota 379 boolType 380 numType 381 stringType 382 arrayType 383 objectType 384 385 numCoreTypes 386 ) 387 388 var coreToCUE = []cue.Kind{ 389 nullType: cue.NullKind, 390 boolType: cue.BoolKind, 391 numType: cue.NumberKind, // Note: both int and float. 392 stringType: cue.StringKind, 393 arrayType: cue.ListKind, 394 objectType: cue.StructKind, 395 } 396 397 func kindToAST(k cue.Kind, explicitOpen bool) ast.Expr { 398 switch k { 399 case cue.NullKind: 400 // TODO: handle OpenAPI restrictions. 401 return ast.NewNull() 402 case cue.BoolKind: 403 return ast.NewIdent("bool") 404 case cue.NumberKind: 405 return ast.NewIdent("number") 406 case cue.IntKind: 407 return ast.NewIdent("int") 408 case cue.FloatKind: 409 return ast.NewIdent("float") 410 case cue.StringKind: 411 return ast.NewIdent("string") 412 case cue.ListKind: 413 return ast.NewList(&ast.Ellipsis{}) 414 case cue.StructKind: 415 if explicitOpen { 416 return ast.NewStruct() 417 } 418 return ast.NewStruct(&ast.Ellipsis{}) 419 } 420 panic(fmt.Errorf("unexpected kind %v", k)) 421 } 422 423 var coreTypeName = []string{ 424 nullType: "null", 425 boolType: "bool", 426 numType: "number", 427 stringType: "string", 428 arrayType: "array", 429 objectType: "object", 430 } 431 432 type constraintInfo struct { 433 // typ is an identifier for the root type, if present. 434 // This can be omitted if there are constraints. 435 typ ast.Expr 436 constraints []ast.Expr 437 } 438 439 func (c *constraintInfo) setTypeUsed(n cue.Value, t coreType, explicitOpen bool) { 440 c.typ = kindToAST(coreToCUE[t], explicitOpen) 441 setPos(c.typ, n) 442 ast.SetRelPos(c.typ, token.NoRelPos) 443 } 444 445 func (c *constraintInfo) add(n cue.Value, x ast.Expr) { 446 if !isTop(x) { 447 setPos(x, n) 448 ast.SetRelPos(x, token.NoRelPos) 449 c.constraints = append(c.constraints, x) 450 } 451 } 452 453 func (s *state) add(n cue.Value, t coreType, x ast.Expr) { 454 s.types[t].add(n, x) 455 } 456 457 func (s *state) setTypeUsed(n cue.Value, t coreType) { 458 if int(t) >= len(s.types) { 459 panic(fmt.Errorf("type out of range %v/%v", int(t), len(s.types))) 460 } 461 s.types[t].setTypeUsed(n, t, s.cfg.OpenOnlyWhenExplicit) 462 } 463 464 type state struct { 465 *decoder 466 schemaInfo 467 468 up *state 469 470 pos cue.Value 471 472 // The constraints in types represent disjunctions per type. 473 types [numCoreTypes]constraintInfo 474 all constraintInfo // values and oneOf etc. 475 nullable *ast.BasicLit // nullable 476 477 exclusiveMin bool // For OpenAPI and legacy support. 478 exclusiveMax bool // For OpenAPI and legacy support. 479 480 // isRoot holds whether this state is at the root 481 // of the schema. 482 isRoot bool 483 484 minContains *uint64 485 maxContains *uint64 486 487 ifConstraint cue.Value 488 thenConstraint cue.Value 489 elseConstraint cue.Value 490 491 definitions []ast.Decl 492 493 // Used for inserting definitions, properties, etc. 494 obj *ast.StructLit 495 objN cue.Value // used for adding obj to constraints 496 497 patterns []ast.Expr 498 499 list *ast.ListLit 500 501 // listItemsIsArray keeps track of whether the 502 // value of the "items" keyword is an array. 503 // Without this, we can't distinguish between 504 // 505 // "items": true 506 // 507 // and 508 // 509 // "items": [] 510 listItemsIsArray bool 511 512 // The following fields are used when the version is 513 // [VersionKubernetesCRD] to check that "properties" and 514 // "additionalProperties" may not be specified together. 515 hasProperties bool 516 hasAdditionalProperties bool 517 518 // Keep track of whether "items" and "type": "array" have been specified, because 519 // in OpenAPI it's mandatory when "type" is "array". 520 hasItems bool 521 isArray bool 522 523 // Keep track of whether a $ref keyword is present, 524 // because pre-2019-09 schemas ignore sibling keywords 525 // to $ref. 526 hasRefKeyword bool 527 528 // Keep track of whether we're preserving existing fields, 529 // which is preserved recursively by default, and is 530 // reset within properties or additionalProperties. 531 preserveUnknownFields bool 532 533 // k8sResourceKind and k8sAPIVersion record values from the 534 // x-kubernetes-group-version-kind keyword 535 // for the kind and apiVersion properties respectively. 536 k8sResourceKind string 537 k8sAPIVersion string 538 539 // Keep track of whether the object has been explicitly 540 // closed or opened (see [Config.OpenOnlyWhenExplicit]). 541 openness openness 542 } 543 544 type openness int 545 546 const ( 547 implicitlyOpen openness = iota 548 explicitlyOpen // explicitly opened, e.g. additionalProperties: true 549 explicitlyClosed // explicitly closed, e.g. additionalProperties: false 550 allFieldsCovered // complete pattern present, e.g. additionalProperties: type: string 551 ) 552 553 // schemaInfo holds information about a schema 554 // after it has been created. 555 type schemaInfo struct { 556 // allowedTypes holds the set of types that 557 // this node is allowed to be. 558 allowedTypes cue.Kind 559 560 // knownTypes holds the set of types that this node 561 // is known to be one of by virtue of the constraints inside 562 // all. This is used to avoid adding redundant elements 563 // to the disjunction created by [state.finalize]. 564 knownTypes cue.Kind 565 566 title string 567 description string 568 569 // id holds the absolute URI of the schema if has a $id field . 570 // It's the base URI for $ref or nested $id fields. 571 id *url.URL 572 deprecated bool 573 574 schemaVersion Version 575 schemaVersionPresent bool 576 577 hasConstraints bool 578 } 579 580 func (s *state) idTag() *ast.Attribute { 581 return &ast.Attribute{Text: fmt.Sprintf("@jsonschema(id=%q)", s.id)} 582 } 583 584 func (s *state) object(n cue.Value) *ast.StructLit { 585 if s.obj == nil { 586 s.obj = &ast.StructLit{} 587 s.objN = n 588 } 589 return s.obj 590 } 591 592 func (s *state) finalizeObject() { 593 if s.obj == nil && s.schemaVersion == VersionKubernetesCRD && (s.allowedTypes&cue.StructKind) != 0 && s.preserveUnknownFields { 594 // When x-kubernetes-preserve-unknown-fields is set, we need 595 // an explicit ellipsis even though kindToAST won't have added 596 // one, so make sure there's an object. 597 _ = s.object(s.pos) 598 } 599 if s.obj == nil { 600 return 601 } 602 if s.preserveUnknownFields { 603 s.openness = explicitlyOpen 604 } 605 var e ast.Expr = s.obj 606 if s.cfg.OpenOnlyWhenExplicit && s.openness == implicitlyOpen { 607 // Nothing to do: the struct is implicitly open but 608 // we've been directed to leave it like that. 609 } else if s.openness == allFieldsCovered { 610 // Nothing to do: there is a pattern constraint that covers all 611 // possible fields. 612 } else if s.openness == explicitlyClosed { 613 e = ast.NewCall(ast.NewIdent("close"), s.obj) 614 } else { 615 s.obj.Elts = append(s.obj.Elts, &ast.Ellipsis{}) 616 } 617 s.add(s.objN, objectType, e) 618 } 619 620 func (s *state) hasConstraints() bool { 621 if len(s.all.constraints) > 0 { 622 return true 623 } 624 for _, t := range s.types { 625 if len(t.constraints) > 0 { 626 return true 627 } 628 } 629 return len(s.patterns) > 0 || 630 s.title != "" || 631 s.description != "" || 632 s.obj != nil || 633 s.id != nil 634 } 635 636 const allTypes = cue.BoolKind | 637 cue.ListKind | 638 cue.NullKind | 639 cue.NumberKind | 640 cue.IntKind | 641 cue.StringKind | 642 cue.StructKind 643 644 // finalize constructs CUE syntax from the collected constraints. 645 func (s *state) finalize() (e ast.Expr) { 646 if s.allowedTypes == 0 { 647 // Nothing is possible. This isn't a necessarily a problem, as 648 // we might be inside an allOf or oneOf with other valid constraints. 649 return bottom() 650 } 651 652 s.finalizeObject() 653 654 conjuncts := []ast.Expr{} 655 disjuncts := []ast.Expr{} 656 657 // Sort literal structs and list last for nicer formatting. 658 // Use a stable sort so that the relative order of constraints 659 // is otherwise kept as-is, for the sake of deterministic output. 660 slices.SortStableFunc(s.types[arrayType].constraints, func(a, b ast.Expr) int { 661 _, aList := a.(*ast.ListLit) 662 _, bList := b.(*ast.ListLit) 663 return cmpBool(aList, bList) 664 }) 665 slices.SortStableFunc(s.types[objectType].constraints, func(a, b ast.Expr) int { 666 _, aStruct := a.(*ast.StructLit) 667 _, bStruct := b.(*ast.StructLit) 668 return cmpBool(aStruct, bStruct) 669 }) 670 671 type excludeInfo struct { 672 pos token.Pos 673 typIndex int 674 } 675 var excluded []excludeInfo 676 677 needsTypeDisjunction := s.allowedTypes != s.knownTypes 678 if !needsTypeDisjunction { 679 for i, t := range s.types { 680 k := coreToCUE[i] 681 if len(t.constraints) > 0 && s.allowedTypes&k != 0 { 682 // We need to include at least one type-specific 683 // constraint in the disjunction. 684 needsTypeDisjunction = true 685 break 686 } 687 } 688 } 689 690 if needsTypeDisjunction { 691 npossible := 0 692 nexcluded := 0 693 for i, t := range s.types { 694 k := coreToCUE[i] 695 allowed := s.allowedTypes&k != 0 696 switch { 697 case len(t.constraints) > 0: 698 npossible++ 699 if !allowed { 700 nexcluded++ 701 for _, c := range t.constraints { 702 excluded = append(excluded, excludeInfo{c.Pos(), i}) 703 } 704 continue 705 } 706 x := ast.NewBinExpr(token.AND, t.constraints...) 707 disjuncts = append(disjuncts, x) 708 case allowed: 709 npossible++ 710 if s.knownTypes&k != 0 { 711 disjuncts = append(disjuncts, kindToAST(k, s.cfg.OpenOnlyWhenExplicit)) 712 } 713 } 714 } 715 if nexcluded == npossible { 716 // All possibilities have been excluded: this is an impossible 717 // schema. 718 for _, e := range excluded { 719 s.addErr(errors.Newf(e.pos, 720 "constraint not allowed because type %s is excluded", 721 coreTypeName[e.typIndex], 722 )) 723 } 724 } 725 } 726 conjuncts = append(conjuncts, s.all.constraints...) 727 728 if len(disjuncts) > 0 { 729 conjuncts = append(conjuncts, ast.NewBinExpr(token.OR, disjuncts...)) 730 } 731 732 if len(conjuncts) == 0 { 733 // There are no conjuncts, which can only happen when there 734 // are no disjuncts, which can only happen when the entire 735 // set of disjuncts is redundant with respect to the types 736 // already implied by s.all. As we've already checked that 737 // s.allowedTypes is non-zero (so we know that 738 // it's not bottom) and we need _some_ expression 739 // to be part of the subequent syntax, we use top. 740 e = top() 741 } else { 742 e = ast.NewBinExpr(token.AND, conjuncts...) 743 } 744 745 a := []ast.Expr{e} 746 if s.nullable != nil { 747 a = []ast.Expr{s.nullable, e} 748 } 749 750 e = ast.NewBinExpr(token.OR, a...) 751 752 if len(s.definitions) > 0 { 753 if st, ok := e.(*ast.StructLit); ok { 754 st.Elts = append(st.Elts, s.definitions...) 755 } else { 756 st = ast.NewStruct() 757 st.Elts = append(st.Elts, &ast.EmbedDecl{Expr: e}) 758 st.Elts = append(st.Elts, s.definitions...) 759 e = st 760 } 761 } 762 763 // If an "$id" exists, make sure it's present in the output. 764 if s.id != nil { 765 if st, ok := e.(*ast.StructLit); ok { 766 st.Elts = append([]ast.Decl{s.idTag()}, st.Elts...) 767 } else { 768 e = &ast.StructLit{Elts: []ast.Decl{s.idTag(), &ast.EmbedDecl{Expr: e}}} 769 } 770 } 771 772 // Now that we've expressed the schema as actual syntax, 773 // all the allowed types are actually explicit and will not 774 // need to be mentioned again. 775 s.knownTypes = s.allowedTypes 776 return e 777 } 778 779 // cmpBool returns 780 // 781 // -1 if x is less than y, 782 // 0 if x equals y, 783 // +1 if x is greater than y, 784 // 785 // where false is ordered before true. 786 func cmpBool(x, y bool) int { 787 switch { 788 case !x && y: 789 return -1 790 case x && !y: 791 return +1 792 default: 793 return 0 794 } 795 } 796 797 func (s schemaInfo) comment() *ast.CommentGroup { 798 // Create documentation. 799 doc := strings.TrimSpace(s.title) 800 if s.description != "" { 801 if doc != "" { 802 doc += "\n\n" 803 } 804 doc += s.description 805 doc = strings.TrimSpace(doc) 806 } 807 // TODO: add examples as well? 808 if doc == "" { 809 return nil 810 } 811 return internal.NewComment(true, doc) 812 } 813 814 func (s *state) schema(n cue.Value) ast.Expr { 815 expr, _ := s.schemaState(n, allTypes, nil) 816 return expr 817 } 818 819 // schemaState returns a new state value derived from s. 820 // n holds the JSONSchema node to translate to a schema. 821 // types holds the set of possible types that the value can hold. 822 // 823 // If init is not nil, it is called on the newly created state value 824 // before doing anything else. 825 func (s0 *state) schemaState(n cue.Value, types cue.Kind, init func(*state)) (expr ast.Expr, info schemaInfo) { 826 s := &state{ 827 up: s0, 828 schemaInfo: schemaInfo{ 829 schemaVersion: s0.schemaVersion, 830 allowedTypes: types, 831 knownTypes: allTypes, 832 }, 833 decoder: s0.decoder, 834 pos: n, 835 isRoot: s0.isRoot && n == s0.pos, 836 preserveUnknownFields: s0.preserveUnknownFields, 837 } 838 if init != nil { 839 init(s) 840 } 841 defer func() { 842 // Perhaps replace the schema expression with a reference. 843 expr = s.maybeDefine(expr, info) 844 }() 845 if n.Kind() == cue.BoolKind { 846 if s.schemaVersion.is(vfrom(VersionDraft6)) { 847 // From draft6 onwards, boolean values signify a schema that always passes or fails. 848 // TODO if false, set s.allowedTypes and s.knownTypes to zero? 849 return boolSchema(s.boolValue(n)), s.schemaInfo 850 } 851 return s.errf(n, "boolean schemas not supported in %v", s.schemaVersion), s.schemaInfo 852 } 853 if n.Kind() != cue.StructKind { 854 return s.errf(n, "schema expects mapping node, found %s", n.Kind()), s.schemaInfo 855 } 856 857 // do multiple passes over the constraints to ensure they are done in order. 858 for pass := 0; pass < numPhases; pass++ { 859 s.processMap(n, func(key string, value cue.Value) { 860 if pass == 0 && key == "$ref" { 861 // Before 2019-19, keywords alongside $ref are ignored so keep 862 // track of whether we've seen any non-$ref keywords so we can 863 // ignore those keywords. This could apply even when the schema 864 // is >=2019-19 because $schema could be used to change the version. 865 s.hasRefKeyword = true 866 } 867 // Convert each constraint into a either a value or a functor. 868 c := constraintMap[key] 869 if c == nil { 870 if strings.HasPrefix(key, "x-") { 871 // A keyword starting with a leading x- is clearly 872 // not intended to be a valid keyword, and is explicitly 873 // allowed by OpenAPI. It seems reasonable that 874 // this is not an error even with StrictKeywords enabled. 875 return 876 } 877 if pass == 0 && s.cfg.StrictKeywords { 878 // TODO: value is not the correct position, albeit close. Fix this. 879 s.warnUnrecognizedKeyword(key, value, "unknown keyword %q", key) 880 } 881 return 882 } 883 if c.phase != pass { 884 return 885 } 886 if !s.schemaVersion.is(c.versions) { 887 s.warnUnrecognizedKeyword(key, value, "keyword %q is not supported in JSON schema version %v", key, s.schemaVersion) 888 return 889 } 890 if pass > 0 && !s.schemaVersion.is(vfrom(VersionDraft2019_09)) && s.hasRefKeyword && key != "$ref" { 891 // We're using a schema version that ignores keywords alongside $ref. 892 // 893 // Note that we specifically exclude pass 0 (the pass in which $schema is checked) 894 // from this check, because hasRefKeyword is only set in pass 0 and we 895 // can get into a self-contradictory situation ($schema says we should 896 // ignore keywords alongside $ref, but $ref says we should ignore the $schema 897 // keyword itself). We could make that situation an explicit error, but other 898 // implementations don't, and it would require an entire extra pass just to do so. 899 s.warnUnrecognizedKeyword(key, value, "ignoring keyword %q alongside $ref", key) 900 return 901 } 902 c.fn(key, value, s) 903 }) 904 if s.schemaVersion == VersionKubernetesCRD && s.isRoot { 905 // The root of a CRD is always a resource, so treat it as if it contained 906 // the x-kubernetes-embedded-resource keyword 907 c := constraintMap["x-kubernetes-embedded-resource"] 908 if c.phase != pass { 909 continue 910 } 911 // Note: there is no field value for the embedded-resource keyword, 912 // but it's not actually used except for its position so passing 913 // the parent object should work fine. 914 c.fn("x-kubernetes-embedded-resource", n, s) 915 } 916 } 917 if s.id != nil { 918 // If there's an ID, it can be referred to. 919 s.ensureDefinition(s.pos) 920 } 921 constraintIfThenElse(s) 922 if s.schemaVersion == VersionKubernetesCRD { 923 if s.hasProperties && s.hasAdditionalProperties { 924 s.errf(n, "additionalProperties may not be combined with properties in %v", s.schemaVersion) 925 } 926 } 927 if s.schemaVersion.is(openAPILike) { 928 if s.isArray && !s.hasItems { 929 // From https://github.com/OAI/OpenAPI-Specification/blob/3.0.0/versions/3.0.0.md#schema-object 930 // "`items` MUST be present if the `type` is `array`." 931 s.errf(n, `"items" must be present when the "type" is "array" in %v`, s.schemaVersion) 932 } 933 } 934 935 schemaExpr := s.finalize() 936 s.schemaInfo.hasConstraints = s.hasConstraints() 937 return schemaExpr, s.schemaInfo 938 } 939 940 func (s *state) warnUnrecognizedKeyword(key string, n cue.Value, msg string, args ...any) { 941 if !s.cfg.StrictKeywords { 942 return 943 } 944 if s.schemaVersion.is(openAPILike) && strings.HasPrefix(key, "x-") { 945 // Unimplemented x- keywords are allowed even with strict keywords 946 // under OpenAPI-like versions, because those versions enable 947 // strict keywords by default. 948 return 949 } 950 s.errf(n, msg, args...) 951 } 952 953 // maybeDefine checks whether we might need a definition 954 // for n given its actual schema syntax expression. If 955 // it does, it creates the definition as appropriate and returns 956 // an expression that refers to that definition; if not, 957 // it just returns expr itself. 958 // TODO also report whether the schema has been defined at a place 959 // where it can be unified with something else? 960 func (s *state) maybeDefine(expr ast.Expr, info schemaInfo) ast.Expr { 961 def := s.definedSchemaForNode(s.pos) 962 if def == nil || len(def.path.Selectors()) == 0 { 963 return expr 964 } 965 def.schema = expr 966 def.comment = info.comment() 967 if def.importPath == "" { 968 // It's a local definition that's not at the root. 969 if !s.builder.put(def.path, expr, s.comment()) { 970 s.errf(s.pos, "redefinition of schema CUE path %v", def.path) 971 return expr 972 } 973 } 974 return s.refExpr(s.pos, def.importPath, def.path) 975 } 976 977 // definedSchemaForNode returns the definedSchema value 978 // for the given node in the JSON schema, or nil 979 // if the node does not need a definition. 980 func (s *state) definedSchemaForNode(n cue.Value) *definedSchema { 981 def, ok := s.defForValue.lookup(n) 982 if !ok { 983 return nil 984 } 985 if def != nil { 986 // We've either made a definition in a previous pass 987 // or it's a redefinition. 988 // TODO if it's a redefinition, error. 989 return def 990 } 991 // This node has been referred to but not actually defined. We'll 992 // need another pass to sort out the reference even though the 993 // reference is no longer dangling. 994 s.needAnotherPass = true 995 996 def = s.addDefinition(n) 997 if def == nil { 998 return nil 999 } 1000 s.defForValue.set(n, def) 1001 s.danglingRefs-- 1002 return def 1003 } 1004 1005 func (s *state) addDefinition(n cue.Value) *definedSchema { 1006 var loc SchemaLoc 1007 schemaRoot := s.schemaRoot() 1008 loc.ID = ref(*schemaRoot.id) 1009 loc.ID.Fragment = cuePathToJSONPointer(relPath(n, schemaRoot.pos)) 1010 idStr := loc.ID.String() 1011 def, ok := s.defs[idStr] 1012 if ok { 1013 // We've already got a definition for this ID. 1014 // TODO if it's been defined in the same pass, then it's a redefinition 1015 // s.errf(n, "redefinition of schema %s at %v", idStr, n.Path()) 1016 return def 1017 } 1018 loc.IsLocal = true 1019 loc.Path = relPath(n, s.root) 1020 importPath, path, err := s.cfg.MapRef(loc) 1021 if err != nil { 1022 s.errf(n, "cannot get reference for %v: %v", loc, err) 1023 return nil 1024 } 1025 def = &definedSchema{ 1026 importPath: importPath, 1027 path: path, 1028 } 1029 s.defs[idStr] = def 1030 return def 1031 } 1032 1033 // refExpr returns a CUE expression to refer to the given path within the given 1034 // imported CUE package. If importPath is empty, it returns a reference 1035 // relative to the root of the schema being generated. 1036 func (s *state) refExpr(n cue.Value, importPath string, path cue.Path) ast.Expr { 1037 if importPath == "" { 1038 // Internal reference 1039 expr, err := s.builder.getRef(path) 1040 if err != nil { 1041 s.errf(n, "cannot generate reference: %v", err) 1042 return nil 1043 } 1044 return expr 1045 } 1046 // External reference 1047 ip := ast.ParseImportPath(importPath) 1048 if ip.Qualifier == "" { 1049 // TODO choose an arbitrary name here. 1050 s.errf(n, "cannot determine package name from import path %q", importPath) 1051 return nil 1052 } 1053 ident := ast.NewIdent(ip.Qualifier) 1054 ident.Node = &ast.ImportSpec{Path: ast.NewString(importPath)} 1055 expr, err := pathRefSyntax(path, ident) 1056 if err != nil { 1057 s.errf(n, "cannot determine CUE path: %v", err) 1058 return nil 1059 } 1060 return expr 1061 } 1062 1063 func (s *state) constValue(n cue.Value) ast.Expr { 1064 k := n.Kind() 1065 switch k { 1066 case cue.ListKind: 1067 a := []ast.Expr{} 1068 for i, _ := n.List(); i.Next(); { 1069 a = append(a, s.constValue(i.Value())) 1070 } 1071 return setPos(ast.NewList(a...), n) 1072 1073 case cue.StructKind: 1074 a := []ast.Decl{} 1075 s.processMap(n, func(key string, n cue.Value) { 1076 a = append(a, &ast.Field{ 1077 Label: ast.NewString(key), 1078 Value: s.constValue(n), 1079 Constraint: token.NOT, 1080 }) 1081 }) 1082 return setPos(ast.NewCall(ast.NewIdent("close"), &ast.StructLit{Elts: a}), n) 1083 default: 1084 if !n.IsConcrete() { 1085 s.errf(n, "invalid non-concrete value") 1086 } 1087 return n.Syntax(cue.Final()).(ast.Expr) 1088 } 1089 } 1090 1091 func (s *state) value(n cue.Value) ast.Expr { 1092 k := n.Kind() 1093 switch k { 1094 case cue.ListKind: 1095 a := []ast.Expr{} 1096 for i, _ := n.List(); i.Next(); { 1097 a = append(a, s.value(i.Value())) 1098 } 1099 return setPos(ast.NewList(a...), n) 1100 1101 case cue.StructKind: 1102 a := []ast.Decl{} 1103 s.processMap(n, func(key string, n cue.Value) { 1104 a = append(a, &ast.Field{ 1105 Label: ast.NewString(key), 1106 Value: s.value(n), 1107 }) 1108 }) 1109 return setPos(&ast.StructLit{Elts: a}, n) 1110 1111 default: 1112 if !n.IsConcrete() { 1113 s.errf(n, "invalid non-concrete value") 1114 } 1115 return n.Syntax(cue.Final()).(ast.Expr) 1116 } 1117 } 1118 1119 // processMap processes a yaml node, expanding merges. 1120 // 1121 // TODO: in some cases we can translate merges into CUE embeddings. 1122 // This may also prevent exponential blow-up (as may happen when 1123 // converting YAML to JSON). 1124 func (s *state) processMap(n cue.Value, f func(key string, n cue.Value)) { 1125 // TODO: intercept references to allow for optimized performance. 1126 for i, _ := n.Fields(); i.Next(); { 1127 f(i.Selector().Unquoted(), i.Value()) 1128 } 1129 } 1130 1131 func (s *state) listItems(name string, n cue.Value, allowEmpty bool) (a []cue.Value) { 1132 if n.Kind() != cue.ListKind { 1133 s.errf(n, `value of %q must be an array, found %v`, name, n.Kind()) 1134 } 1135 for i, _ := n.List(); i.Next(); { 1136 a = append(a, i.Value()) 1137 } 1138 if !allowEmpty && len(a) == 0 { 1139 s.errf(n, `array for %q must be non-empty`, name) 1140 } 1141 return a 1142 } 1143 1144 // excludeFields returns either an empty slice (if decls is empty) 1145 // or a slice containing a CUE expression that can be used to exclude the 1146 // fields of the given declaration in a label expression. For instance, for 1147 // 1148 // { foo: 1, bar: int } 1149 // 1150 // it creates a slice holding the expression 1151 // 1152 // !~ "^(foo|bar)$" 1153 // 1154 // which can be used in a label expression to define types for all fields but 1155 // those existing: 1156 // 1157 // [!~"^(foo|bar)$"]: string 1158 func excludeFields(decls []ast.Decl) []ast.Expr { 1159 if len(decls) == 0 { 1160 return nil 1161 } 1162 var buf strings.Builder 1163 first := true 1164 buf.WriteString("^(") 1165 for _, d := range decls { 1166 f, ok := d.(*ast.Field) 1167 if !ok { 1168 continue 1169 } 1170 str, _, _ := ast.LabelName(f.Label) 1171 if str != "" { 1172 if !first { 1173 buf.WriteByte('|') 1174 } 1175 buf.WriteString(regexp.QuoteMeta(str)) 1176 first = false 1177 } 1178 } 1179 buf.WriteString(")$") 1180 return []ast.Expr{ 1181 &ast.UnaryExpr{Op: token.NMAT, X: ast.NewString(buf.String())}, 1182 } 1183 } 1184 1185 func bottom() ast.Expr { 1186 return &ast.BottomLit{} 1187 } 1188 1189 func top() ast.Expr { 1190 return ast.NewIdent("_") 1191 } 1192 1193 func boolSchema(ok bool) ast.Expr { 1194 if ok { 1195 return top() 1196 } 1197 return bottom() 1198 } 1199 1200 func isTop(s ast.Expr) bool { 1201 i, ok := s.(*ast.Ident) 1202 return ok && i.Name == "_" 1203 } 1204 1205 func isBottom(e ast.Expr) bool { 1206 _, ok := e.(*ast.BottomLit) 1207 return ok 1208 } 1209 1210 func addTag(field ast.Label, tag, value string) *ast.Field { 1211 return &ast.Field{ 1212 Label: field, 1213 Value: top(), 1214 Attrs: []*ast.Attribute{ 1215 {Text: fmt.Sprintf("@%s(%s)", tag, value)}, 1216 }, 1217 } 1218 } 1219 1220 func setPos(e ast.Expr, v cue.Value) ast.Expr { 1221 ast.SetPos(e, v.Pos()) 1222 return e 1223 } 1224 1225 // uint64Value is like v.Uint64 except that it 1226 // also allows floating point constants, as long 1227 // as they have no fractional part. 1228 func uint64Value(v cue.Value) (uint64, error) { 1229 n, err := v.Uint64() 1230 if err == nil { 1231 return n, nil 1232 } 1233 f, err := v.Float64() 1234 if err != nil { 1235 return 0, err 1236 } 1237 intPart, fracPart := math.Modf(f) 1238 if fracPart != 0 { 1239 return 0, errors.Newf(v.Pos(), "%v is not a whole number", v) 1240 } 1241 if intPart < 0 || intPart > math.MaxUint64 { 1242 return 0, errors.Newf(v.Pos(), "%v is out of bounds", v) 1243 } 1244 return uint64(intPart), nil 1245 }