cuelang.org/go@v0.13.0/internal/encoding/encoding.go (about) 1 // Copyright 2020 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // TODO: make this package public in cuelang.org/go/encoding 16 // once stabilized. 17 18 package encoding 19 20 import ( 21 "fmt" 22 "io" 23 "maps" 24 25 "cuelang.org/go/cue" 26 "cuelang.org/go/cue/ast" 27 "cuelang.org/go/cue/build" 28 "cuelang.org/go/cue/errors" 29 "cuelang.org/go/cue/format" 30 "cuelang.org/go/cue/literal" 31 "cuelang.org/go/cue/parser" 32 "cuelang.org/go/cue/token" 33 "cuelang.org/go/encoding/json" 34 "cuelang.org/go/encoding/jsonschema" 35 "cuelang.org/go/encoding/openapi" 36 "cuelang.org/go/encoding/protobuf" 37 "cuelang.org/go/encoding/protobuf/jsonpb" 38 "cuelang.org/go/encoding/protobuf/textproto" 39 "cuelang.org/go/encoding/toml" 40 "cuelang.org/go/encoding/xml/koala" 41 "cuelang.org/go/internal" 42 "cuelang.org/go/internal/encoding/yaml" 43 "cuelang.org/go/internal/filetypes" 44 "cuelang.org/go/internal/source" 45 "golang.org/x/text/encoding/unicode" 46 "golang.org/x/text/transform" 47 ) 48 49 type Decoder struct { 50 ctx *cue.Context 51 cfg *Config 52 closer io.Closer 53 next func() (ast.Expr, error) 54 rewriteFunc rewriteFunc 55 interpretFunc interpretFunc 56 interpretation build.Interpretation 57 expr ast.Expr 58 file *ast.File 59 filename string // may change on iteration for some formats 60 index int 61 err error 62 } 63 64 type interpretFunc func(cue.Value) (file *ast.File, err error) 65 type rewriteFunc func(*ast.File) (file *ast.File, err error) 66 67 func (i *Decoder) Filename() string { return i.filename } 68 69 // Interpretation returns the current interpretation detected by Detect. 70 func (i *Decoder) Interpretation() build.Interpretation { 71 return i.interpretation 72 } 73 func (i *Decoder) Index() int { return i.index } 74 func (i *Decoder) Done() bool { return i.err != nil } 75 76 func (i *Decoder) Next() { 77 if i.err != nil { 78 return 79 } 80 // Decoder level 81 i.file = nil 82 i.expr, i.err = i.next() 83 i.index++ 84 if i.err != nil { 85 return 86 } 87 i.doInterpret() 88 } 89 90 func (i *Decoder) doInterpret() { 91 if i.rewriteFunc != nil { 92 i.file = i.File() 93 var err error 94 i.file, err = i.rewriteFunc(i.file) 95 if err != nil { 96 i.err = err 97 return 98 } 99 } 100 if i.interpretFunc != nil { 101 i.file = i.File() 102 v := i.ctx.BuildFile(i.file) 103 if err := v.Err(); err != nil { 104 i.err = err 105 return 106 } 107 i.file, i.err = i.interpretFunc(v) 108 } 109 } 110 111 func (i *Decoder) File() *ast.File { 112 if i.file != nil { 113 return i.file 114 } 115 return internal.ToFile(i.expr) 116 } 117 118 func (i *Decoder) Err() error { 119 if i.err == io.EOF { 120 return nil 121 } 122 return i.err 123 } 124 125 func (i *Decoder) Close() { 126 if i.closer != nil { 127 i.closer.Close() 128 } 129 } 130 131 type Config struct { 132 Mode filetypes.Mode 133 134 // Out specifies an overwrite destination. 135 Out io.Writer 136 Stdin io.Reader 137 Stdout io.Writer 138 139 PkgName string // package name for files to generate 140 141 Force bool // overwrite existing files 142 Strict bool // strict mode for jsonschema (deprecated) 143 Stream bool // potentially write more than one document per file 144 AllErrors bool 145 146 Schema cue.Value // used for schema-based decoding 147 148 EscapeHTML bool 149 InlineImports bool // expand references to non-core imports 150 ProtoPath []string 151 Format []format.Option 152 ParseFile func(name string, src interface{}) (*ast.File, error) 153 } 154 155 // NewDecoder returns a stream of non-rooted data expressions. The encoding 156 // type of f must be a data type, but does not have to be an encoding that 157 // can stream. stdin is used in case the file is "-". 158 // 159 // This may change the contents of f. 160 func NewDecoder(ctx *cue.Context, f *build.File, cfg *Config) *Decoder { 161 if cfg == nil { 162 cfg = &Config{} 163 } 164 i := &Decoder{filename: f.Filename, ctx: ctx, cfg: cfg} 165 i.next = func() (ast.Expr, error) { 166 if i.err != nil { 167 return nil, i.err 168 } 169 return nil, io.EOF 170 } 171 172 if file, ok := f.Source.(*ast.File); ok { 173 i.file = file 174 i.validate(file, f) 175 return i 176 } 177 178 var r io.Reader 179 if f.Source == nil && f.Filename == "-" { 180 // TODO: should we allow this? 181 r = cfg.Stdin 182 } else { 183 rc, err := source.Open(f.Filename, f.Source) 184 i.closer = rc 185 i.err = err 186 if i.err != nil { 187 return i 188 } 189 r = rc 190 } 191 192 switch f.Interpretation { 193 case "": 194 case build.Auto: 195 openAPI := openAPIFunc(cfg, f) 196 jsonSchema := jsonSchemaFunc(cfg, f) 197 i.interpretFunc = func(v cue.Value) (file *ast.File, err error) { 198 199 switch i.interpretation = Detect(v); i.interpretation { 200 case build.JSONSchema: 201 return jsonSchema(v) 202 case build.OpenAPI: 203 return openAPI(v) 204 } 205 return i.file, i.err 206 } 207 case build.OpenAPI: 208 i.interpretation = build.OpenAPI 209 i.interpretFunc = openAPIFunc(cfg, f) 210 case build.JSONSchema: 211 i.interpretation = build.JSONSchema 212 i.interpretFunc = jsonSchemaFunc(cfg, f) 213 case build.ProtobufJSON: 214 i.interpretation = build.ProtobufJSON 215 i.rewriteFunc = protobufJSONFunc(cfg, f) 216 default: 217 i.err = fmt.Errorf("unsupported interpretation %q", f.Interpretation) 218 } 219 220 // Binary encodings should not be treated as UTF-8, so read directly from the file. 221 // Other encodings are interepted as UTF-8 with an optional BOM prefix. 222 // 223 // TODO: perhaps each encoding could have a "binary" boolean attribute 224 // so that we can use that here rather than hard-coding which encodings are binary. 225 // In the near future, others like [build.BinaryProto] should also be treated as binary. 226 if f.Encoding != build.Binary { 227 // TODO: this code also allows UTF16, which is too permissive for some 228 // encodings. Switch to unicode.UTF8Sig once available. 229 t := unicode.BOMOverride(unicode.UTF8.NewDecoder()) 230 r = transform.NewReader(r, t) 231 } 232 233 path := f.Filename 234 switch f.Encoding { 235 case build.CUE: 236 if cfg.ParseFile == nil { 237 i.file, i.err = parser.ParseFile(path, r, parser.ParseComments) 238 } else { 239 i.file, i.err = cfg.ParseFile(path, r) 240 } 241 i.validate(i.file, f) 242 if i.err == nil { 243 i.doInterpret() 244 } 245 case build.JSON: 246 b, err := io.ReadAll(r) 247 if err != nil { 248 i.err = err 249 break 250 } 251 i.expr, i.err = json.Extract(path, b) 252 if i.err == nil { 253 i.doInterpret() 254 } 255 case build.JSONL: 256 i.next = json.NewDecoder(nil, path, r).Extract 257 i.Next() 258 case build.YAML: 259 b, err := io.ReadAll(r) 260 i.err = err 261 i.next = yaml.NewDecoder(path, b).Decode 262 i.Next() 263 case build.TOML: 264 i.next = toml.NewDecoder(path, r).Decode 265 i.Next() 266 case build.XML: 267 switch { 268 case f.BoolTags["koala"]: 269 i.next = koala.NewDecoder(path, r).Decode 270 i.Next() 271 default: 272 i.err = fmt.Errorf("xml requires a variant, such as: xml+koala") 273 } 274 case build.Text: 275 b, err := io.ReadAll(r) 276 i.err = err 277 i.expr = ast.NewString(string(b)) 278 case build.Binary: 279 b, err := io.ReadAll(r) 280 i.err = err 281 s := literal.Bytes.WithTabIndent(1).Quote(string(b)) 282 i.expr = ast.NewLit(token.STRING, s) 283 case build.Protobuf: 284 paths := &protobuf.Config{ 285 Paths: cfg.ProtoPath, 286 PkgName: cfg.PkgName, 287 } 288 i.file, i.err = protobuf.Extract(path, r, paths) 289 case build.TextProto: 290 b, err := io.ReadAll(r) 291 i.err = err 292 if err == nil { 293 d := textproto.NewDecoder() 294 i.expr, i.err = d.Parse(cfg.Schema, path, b) 295 } 296 default: 297 i.err = fmt.Errorf("unsupported encoding %q", f.Encoding) 298 } 299 300 return i 301 } 302 303 func jsonSchemaFunc(cfg *Config, f *build.File) interpretFunc { 304 return func(v cue.Value) (file *ast.File, err error) { 305 tags := boolTagsForFile(f, build.JSONSchema) 306 cfg := &jsonschema.Config{ 307 PkgName: cfg.PkgName, 308 309 // Note: we don't populate Strict because then we'd 310 // be ignoring the values of the other tags when it's true, 311 // and there's (deliberately) nothing that Strict does that 312 // cannot be described by the other two keywords. 313 // The strictKeywords and strictFeatures tags are 314 // set by internal/filetypes from the strict tag when appropriate. 315 316 StrictKeywords: cfg.Strict || tags["strictKeywords"], 317 StrictFeatures: cfg.Strict || tags["strictFeatures"], 318 } 319 file, err = jsonschema.Extract(v, cfg) 320 // TODO: simplify currently erases file line info. Reintroduce after fix. 321 // file, err = simplify(file, err) 322 return file, err 323 } 324 } 325 326 func openAPIFunc(c *Config, f *build.File) interpretFunc { 327 return func(v cue.Value) (file *ast.File, err error) { 328 tags := boolTagsForFile(f, build.JSONSchema) 329 file, err = openapi.Extract(v, &openapi.Config{ 330 PkgName: c.PkgName, 331 332 // Note: don't populate Strict (see more detailed 333 // comment in jsonSchemaFunc) 334 335 StrictKeywords: c.Strict || tags["strictKeywords"], 336 StrictFeatures: c.Strict || tags["strictFeatures"], 337 }) 338 // TODO: simplify currently erases file line info. Reintroduce after fix. 339 // file, err = simplify(file, err) 340 return file, err 341 } 342 } 343 344 func protobufJSONFunc(cfg *Config, file *build.File) rewriteFunc { 345 return func(f *ast.File) (*ast.File, error) { 346 if !cfg.Schema.Exists() { 347 return f, errors.Newf(token.NoPos, 348 "no schema specified for protobuf interpretation.") 349 } 350 return f, jsonpb.NewDecoder(cfg.Schema).RewriteFile(f) 351 } 352 } 353 354 func boolTagsForFile(f *build.File, interp build.Interpretation) map[string]bool { 355 if f.Interpretation != build.Auto { 356 return f.BoolTags 357 } 358 defaultTags := filetypes.DefaultTagsForInterpretation(interp, filetypes.Input) 359 if len(defaultTags) == 0 { 360 return f.BoolTags 361 } 362 // We _could_ probably mutate f.Tags directly, but that doesn't 363 // seem quite right as it's been passed in from outside of internal/encoding. 364 // So go the extra mile and make a new map. 365 366 // Set values for tags that have a default value but aren't 367 // present in f.Tags. 368 var tags map[string]bool 369 for tag, val := range defaultTags { 370 if _, ok := f.BoolTags[tag]; ok { 371 continue 372 } 373 if tags == nil { 374 tags = make(map[string]bool) 375 } 376 tags[tag] = val 377 } 378 if tags == nil { 379 return f.BoolTags 380 } 381 maps.Copy(tags, f.BoolTags) 382 return tags 383 } 384 385 func shouldValidate(i *filetypes.FileInfo) bool { 386 // TODO: We ignore attributes for now. They should be enabled by default. 387 return false || 388 !i.Definitions || 389 !i.Data || 390 !i.Optional || 391 !i.Constraints || 392 !i.References || 393 !i.Cycles || 394 !i.KeepDefaults || 395 !i.Incomplete || 396 !i.Imports || 397 !i.Docs 398 } 399 400 type validator struct { 401 allErrors bool 402 count int 403 errs errors.Error 404 fileinfo *filetypes.FileInfo 405 } 406 407 func (d *Decoder) validate(f *ast.File, b *build.File) { 408 if d.err != nil { 409 return 410 } 411 fi, err := filetypes.FromFile(b, filetypes.Input) 412 if err != nil { 413 d.err = err 414 return 415 } 416 if !shouldValidate(fi) { 417 return 418 } 419 420 v := validator{fileinfo: fi, allErrors: d.cfg.AllErrors} 421 ast.Walk(f, v.validate, nil) 422 d.err = v.errs 423 } 424 425 func (v *validator) validate(n ast.Node) bool { 426 if v.count > 10 { 427 return false 428 } 429 430 i := v.fileinfo 431 432 // TODO: Cycles 433 434 ok := true 435 check := func(n ast.Node, option bool, s string, cond bool) { 436 if !option && cond { 437 v.errs = errors.Append(v.errs, errors.Newf(n.Pos(), 438 "%s not allowed in %s mode", s, v.fileinfo.Form)) 439 v.count++ 440 ok = false 441 } 442 } 443 444 // For now we don't make any distinction between these modes. 445 446 constraints := i.Constraints && i.Incomplete && i.Optional && i.References 447 448 check(n, i.Docs, "comments", len(ast.Comments(n)) > 0) 449 450 switch x := n.(type) { 451 case *ast.CommentGroup: 452 check(n, i.Docs, "comments", len(ast.Comments(n)) > 0) 453 return false 454 455 case *ast.ImportDecl, *ast.ImportSpec: 456 check(n, i.Imports, "imports", true) 457 458 case *ast.Field: 459 check(n, i.Definitions, "definitions", internal.IsDefinition(x.Label)) 460 check(n, i.Data, "regular fields", internal.IsRegularField(x)) 461 check(n, constraints, "optional fields", x.Optional != token.NoPos) 462 463 _, _, err := ast.LabelName(x.Label) 464 check(n, constraints, "optional fields", err != nil) 465 466 check(n, i.Attributes, "attributes", len(x.Attrs) > 0) 467 ast.Walk(x.Value, v.validate, nil) 468 return false 469 470 case *ast.UnaryExpr: 471 switch x.Op { 472 case token.MUL: 473 check(n, i.KeepDefaults, "default values", true) 474 case token.SUB, token.ADD: 475 // The parser represents negative numbers as an unary expression. 476 // Allow one `-` or `+`. 477 _, ok := x.X.(*ast.BasicLit) 478 check(n, constraints, "expressions", !ok) 479 case token.LSS, token.LEQ, token.EQL, token.GEQ, token.GTR, 480 token.NEQ, token.NMAT, token.MAT: 481 check(n, constraints, "constraints", true) 482 default: 483 check(n, constraints, "expressions", true) 484 } 485 486 case *ast.BinaryExpr, *ast.ParenExpr, *ast.IndexExpr, *ast.SliceExpr, 487 *ast.CallExpr, *ast.Comprehension, *ast.Interpolation: 488 check(n, constraints, "expressions", true) 489 490 case *ast.Ellipsis: 491 check(n, constraints, "ellipsis", true) 492 493 case *ast.Ident, *ast.SelectorExpr, *ast.Alias, *ast.LetClause: 494 check(n, i.References, "references", true) 495 496 default: 497 // Other types are either always okay or handled elsewhere. 498 } 499 return ok 500 }