cuelang.org/go@v0.13.0/internal/encoding/encoding.go (about)

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // TODO: make this package public in cuelang.org/go/encoding
    16  // once stabilized.
    17  
    18  package encoding
    19  
    20  import (
    21  	"fmt"
    22  	"io"
    23  	"maps"
    24  
    25  	"cuelang.org/go/cue"
    26  	"cuelang.org/go/cue/ast"
    27  	"cuelang.org/go/cue/build"
    28  	"cuelang.org/go/cue/errors"
    29  	"cuelang.org/go/cue/format"
    30  	"cuelang.org/go/cue/literal"
    31  	"cuelang.org/go/cue/parser"
    32  	"cuelang.org/go/cue/token"
    33  	"cuelang.org/go/encoding/json"
    34  	"cuelang.org/go/encoding/jsonschema"
    35  	"cuelang.org/go/encoding/openapi"
    36  	"cuelang.org/go/encoding/protobuf"
    37  	"cuelang.org/go/encoding/protobuf/jsonpb"
    38  	"cuelang.org/go/encoding/protobuf/textproto"
    39  	"cuelang.org/go/encoding/toml"
    40  	"cuelang.org/go/encoding/xml/koala"
    41  	"cuelang.org/go/internal"
    42  	"cuelang.org/go/internal/encoding/yaml"
    43  	"cuelang.org/go/internal/filetypes"
    44  	"cuelang.org/go/internal/source"
    45  	"golang.org/x/text/encoding/unicode"
    46  	"golang.org/x/text/transform"
    47  )
    48  
    49  type Decoder struct {
    50  	ctx            *cue.Context
    51  	cfg            *Config
    52  	closer         io.Closer
    53  	next           func() (ast.Expr, error)
    54  	rewriteFunc    rewriteFunc
    55  	interpretFunc  interpretFunc
    56  	interpretation build.Interpretation
    57  	expr           ast.Expr
    58  	file           *ast.File
    59  	filename       string // may change on iteration for some formats
    60  	index          int
    61  	err            error
    62  }
    63  
    64  type interpretFunc func(cue.Value) (file *ast.File, err error)
    65  type rewriteFunc func(*ast.File) (file *ast.File, err error)
    66  
    67  func (i *Decoder) Filename() string { return i.filename }
    68  
    69  // Interpretation returns the current interpretation detected by Detect.
    70  func (i *Decoder) Interpretation() build.Interpretation {
    71  	return i.interpretation
    72  }
    73  func (i *Decoder) Index() int { return i.index }
    74  func (i *Decoder) Done() bool { return i.err != nil }
    75  
    76  func (i *Decoder) Next() {
    77  	if i.err != nil {
    78  		return
    79  	}
    80  	// Decoder level
    81  	i.file = nil
    82  	i.expr, i.err = i.next()
    83  	i.index++
    84  	if i.err != nil {
    85  		return
    86  	}
    87  	i.doInterpret()
    88  }
    89  
    90  func (i *Decoder) doInterpret() {
    91  	if i.rewriteFunc != nil {
    92  		i.file = i.File()
    93  		var err error
    94  		i.file, err = i.rewriteFunc(i.file)
    95  		if err != nil {
    96  			i.err = err
    97  			return
    98  		}
    99  	}
   100  	if i.interpretFunc != nil {
   101  		i.file = i.File()
   102  		v := i.ctx.BuildFile(i.file)
   103  		if err := v.Err(); err != nil {
   104  			i.err = err
   105  			return
   106  		}
   107  		i.file, i.err = i.interpretFunc(v)
   108  	}
   109  }
   110  
   111  func (i *Decoder) File() *ast.File {
   112  	if i.file != nil {
   113  		return i.file
   114  	}
   115  	return internal.ToFile(i.expr)
   116  }
   117  
   118  func (i *Decoder) Err() error {
   119  	if i.err == io.EOF {
   120  		return nil
   121  	}
   122  	return i.err
   123  }
   124  
   125  func (i *Decoder) Close() {
   126  	if i.closer != nil {
   127  		i.closer.Close()
   128  	}
   129  }
   130  
   131  type Config struct {
   132  	Mode filetypes.Mode
   133  
   134  	// Out specifies an overwrite destination.
   135  	Out    io.Writer
   136  	Stdin  io.Reader
   137  	Stdout io.Writer
   138  
   139  	PkgName string // package name for files to generate
   140  
   141  	Force     bool // overwrite existing files
   142  	Strict    bool // strict mode for jsonschema (deprecated)
   143  	Stream    bool // potentially write more than one document per file
   144  	AllErrors bool
   145  
   146  	Schema cue.Value // used for schema-based decoding
   147  
   148  	EscapeHTML    bool
   149  	InlineImports bool // expand references to non-core imports
   150  	ProtoPath     []string
   151  	Format        []format.Option
   152  	ParseFile     func(name string, src interface{}) (*ast.File, error)
   153  }
   154  
   155  // NewDecoder returns a stream of non-rooted data expressions. The encoding
   156  // type of f must be a data type, but does not have to be an encoding that
   157  // can stream. stdin is used in case the file is "-".
   158  //
   159  // This may change the contents of f.
   160  func NewDecoder(ctx *cue.Context, f *build.File, cfg *Config) *Decoder {
   161  	if cfg == nil {
   162  		cfg = &Config{}
   163  	}
   164  	i := &Decoder{filename: f.Filename, ctx: ctx, cfg: cfg}
   165  	i.next = func() (ast.Expr, error) {
   166  		if i.err != nil {
   167  			return nil, i.err
   168  		}
   169  		return nil, io.EOF
   170  	}
   171  
   172  	if file, ok := f.Source.(*ast.File); ok {
   173  		i.file = file
   174  		i.validate(file, f)
   175  		return i
   176  	}
   177  
   178  	var r io.Reader
   179  	if f.Source == nil && f.Filename == "-" {
   180  		// TODO: should we allow this?
   181  		r = cfg.Stdin
   182  	} else {
   183  		rc, err := source.Open(f.Filename, f.Source)
   184  		i.closer = rc
   185  		i.err = err
   186  		if i.err != nil {
   187  			return i
   188  		}
   189  		r = rc
   190  	}
   191  
   192  	switch f.Interpretation {
   193  	case "":
   194  	case build.Auto:
   195  		openAPI := openAPIFunc(cfg, f)
   196  		jsonSchema := jsonSchemaFunc(cfg, f)
   197  		i.interpretFunc = func(v cue.Value) (file *ast.File, err error) {
   198  
   199  			switch i.interpretation = Detect(v); i.interpretation {
   200  			case build.JSONSchema:
   201  				return jsonSchema(v)
   202  			case build.OpenAPI:
   203  				return openAPI(v)
   204  			}
   205  			return i.file, i.err
   206  		}
   207  	case build.OpenAPI:
   208  		i.interpretation = build.OpenAPI
   209  		i.interpretFunc = openAPIFunc(cfg, f)
   210  	case build.JSONSchema:
   211  		i.interpretation = build.JSONSchema
   212  		i.interpretFunc = jsonSchemaFunc(cfg, f)
   213  	case build.ProtobufJSON:
   214  		i.interpretation = build.ProtobufJSON
   215  		i.rewriteFunc = protobufJSONFunc(cfg, f)
   216  	default:
   217  		i.err = fmt.Errorf("unsupported interpretation %q", f.Interpretation)
   218  	}
   219  
   220  	// Binary encodings should not be treated as UTF-8, so read directly from the file.
   221  	// Other encodings are interepted as UTF-8 with an optional BOM prefix.
   222  	//
   223  	// TODO: perhaps each encoding could have a "binary" boolean attribute
   224  	// so that we can use that here rather than hard-coding which encodings are binary.
   225  	// In the near future, others like [build.BinaryProto] should also be treated as binary.
   226  	if f.Encoding != build.Binary {
   227  		// TODO: this code also allows UTF16, which is too permissive for some
   228  		// encodings. Switch to unicode.UTF8Sig once available.
   229  		t := unicode.BOMOverride(unicode.UTF8.NewDecoder())
   230  		r = transform.NewReader(r, t)
   231  	}
   232  
   233  	path := f.Filename
   234  	switch f.Encoding {
   235  	case build.CUE:
   236  		if cfg.ParseFile == nil {
   237  			i.file, i.err = parser.ParseFile(path, r, parser.ParseComments)
   238  		} else {
   239  			i.file, i.err = cfg.ParseFile(path, r)
   240  		}
   241  		i.validate(i.file, f)
   242  		if i.err == nil {
   243  			i.doInterpret()
   244  		}
   245  	case build.JSON:
   246  		b, err := io.ReadAll(r)
   247  		if err != nil {
   248  			i.err = err
   249  			break
   250  		}
   251  		i.expr, i.err = json.Extract(path, b)
   252  		if i.err == nil {
   253  			i.doInterpret()
   254  		}
   255  	case build.JSONL:
   256  		i.next = json.NewDecoder(nil, path, r).Extract
   257  		i.Next()
   258  	case build.YAML:
   259  		b, err := io.ReadAll(r)
   260  		i.err = err
   261  		i.next = yaml.NewDecoder(path, b).Decode
   262  		i.Next()
   263  	case build.TOML:
   264  		i.next = toml.NewDecoder(path, r).Decode
   265  		i.Next()
   266  	case build.XML:
   267  		switch {
   268  		case f.BoolTags["koala"]:
   269  			i.next = koala.NewDecoder(path, r).Decode
   270  			i.Next()
   271  		default:
   272  			i.err = fmt.Errorf("xml requires a variant, such as: xml+koala")
   273  		}
   274  	case build.Text:
   275  		b, err := io.ReadAll(r)
   276  		i.err = err
   277  		i.expr = ast.NewString(string(b))
   278  	case build.Binary:
   279  		b, err := io.ReadAll(r)
   280  		i.err = err
   281  		s := literal.Bytes.WithTabIndent(1).Quote(string(b))
   282  		i.expr = ast.NewLit(token.STRING, s)
   283  	case build.Protobuf:
   284  		paths := &protobuf.Config{
   285  			Paths:   cfg.ProtoPath,
   286  			PkgName: cfg.PkgName,
   287  		}
   288  		i.file, i.err = protobuf.Extract(path, r, paths)
   289  	case build.TextProto:
   290  		b, err := io.ReadAll(r)
   291  		i.err = err
   292  		if err == nil {
   293  			d := textproto.NewDecoder()
   294  			i.expr, i.err = d.Parse(cfg.Schema, path, b)
   295  		}
   296  	default:
   297  		i.err = fmt.Errorf("unsupported encoding %q", f.Encoding)
   298  	}
   299  
   300  	return i
   301  }
   302  
   303  func jsonSchemaFunc(cfg *Config, f *build.File) interpretFunc {
   304  	return func(v cue.Value) (file *ast.File, err error) {
   305  		tags := boolTagsForFile(f, build.JSONSchema)
   306  		cfg := &jsonschema.Config{
   307  			PkgName: cfg.PkgName,
   308  
   309  			// Note: we don't populate Strict because then we'd
   310  			// be ignoring the values of the other tags when it's true,
   311  			// and there's (deliberately) nothing that Strict does that
   312  			// cannot be described by the other two keywords.
   313  			// The strictKeywords and strictFeatures tags are
   314  			// set by internal/filetypes from the strict tag when appropriate.
   315  
   316  			StrictKeywords: cfg.Strict || tags["strictKeywords"],
   317  			StrictFeatures: cfg.Strict || tags["strictFeatures"],
   318  		}
   319  		file, err = jsonschema.Extract(v, cfg)
   320  		// TODO: simplify currently erases file line info. Reintroduce after fix.
   321  		// file, err = simplify(file, err)
   322  		return file, err
   323  	}
   324  }
   325  
   326  func openAPIFunc(c *Config, f *build.File) interpretFunc {
   327  	return func(v cue.Value) (file *ast.File, err error) {
   328  		tags := boolTagsForFile(f, build.JSONSchema)
   329  		file, err = openapi.Extract(v, &openapi.Config{
   330  			PkgName: c.PkgName,
   331  
   332  			// Note: don't populate Strict (see more detailed
   333  			// comment in jsonSchemaFunc)
   334  
   335  			StrictKeywords: c.Strict || tags["strictKeywords"],
   336  			StrictFeatures: c.Strict || tags["strictFeatures"],
   337  		})
   338  		// TODO: simplify currently erases file line info. Reintroduce after fix.
   339  		// file, err = simplify(file, err)
   340  		return file, err
   341  	}
   342  }
   343  
   344  func protobufJSONFunc(cfg *Config, file *build.File) rewriteFunc {
   345  	return func(f *ast.File) (*ast.File, error) {
   346  		if !cfg.Schema.Exists() {
   347  			return f, errors.Newf(token.NoPos,
   348  				"no schema specified for protobuf interpretation.")
   349  		}
   350  		return f, jsonpb.NewDecoder(cfg.Schema).RewriteFile(f)
   351  	}
   352  }
   353  
   354  func boolTagsForFile(f *build.File, interp build.Interpretation) map[string]bool {
   355  	if f.Interpretation != build.Auto {
   356  		return f.BoolTags
   357  	}
   358  	defaultTags := filetypes.DefaultTagsForInterpretation(interp, filetypes.Input)
   359  	if len(defaultTags) == 0 {
   360  		return f.BoolTags
   361  	}
   362  	// We _could_ probably mutate f.Tags directly, but that doesn't
   363  	// seem quite right as it's been passed in from outside of internal/encoding.
   364  	// So go the extra mile and make a new map.
   365  
   366  	// Set values for tags that have a default value but aren't
   367  	// present in f.Tags.
   368  	var tags map[string]bool
   369  	for tag, val := range defaultTags {
   370  		if _, ok := f.BoolTags[tag]; ok {
   371  			continue
   372  		}
   373  		if tags == nil {
   374  			tags = make(map[string]bool)
   375  		}
   376  		tags[tag] = val
   377  	}
   378  	if tags == nil {
   379  		return f.BoolTags
   380  	}
   381  	maps.Copy(tags, f.BoolTags)
   382  	return tags
   383  }
   384  
   385  func shouldValidate(i *filetypes.FileInfo) bool {
   386  	// TODO: We ignore attributes for now. They should be enabled by default.
   387  	return false ||
   388  		!i.Definitions ||
   389  		!i.Data ||
   390  		!i.Optional ||
   391  		!i.Constraints ||
   392  		!i.References ||
   393  		!i.Cycles ||
   394  		!i.KeepDefaults ||
   395  		!i.Incomplete ||
   396  		!i.Imports ||
   397  		!i.Docs
   398  }
   399  
   400  type validator struct {
   401  	allErrors bool
   402  	count     int
   403  	errs      errors.Error
   404  	fileinfo  *filetypes.FileInfo
   405  }
   406  
   407  func (d *Decoder) validate(f *ast.File, b *build.File) {
   408  	if d.err != nil {
   409  		return
   410  	}
   411  	fi, err := filetypes.FromFile(b, filetypes.Input)
   412  	if err != nil {
   413  		d.err = err
   414  		return
   415  	}
   416  	if !shouldValidate(fi) {
   417  		return
   418  	}
   419  
   420  	v := validator{fileinfo: fi, allErrors: d.cfg.AllErrors}
   421  	ast.Walk(f, v.validate, nil)
   422  	d.err = v.errs
   423  }
   424  
   425  func (v *validator) validate(n ast.Node) bool {
   426  	if v.count > 10 {
   427  		return false
   428  	}
   429  
   430  	i := v.fileinfo
   431  
   432  	// TODO: Cycles
   433  
   434  	ok := true
   435  	check := func(n ast.Node, option bool, s string, cond bool) {
   436  		if !option && cond {
   437  			v.errs = errors.Append(v.errs, errors.Newf(n.Pos(),
   438  				"%s not allowed in %s mode", s, v.fileinfo.Form))
   439  			v.count++
   440  			ok = false
   441  		}
   442  	}
   443  
   444  	// For now we don't make any distinction between these modes.
   445  
   446  	constraints := i.Constraints && i.Incomplete && i.Optional && i.References
   447  
   448  	check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
   449  
   450  	switch x := n.(type) {
   451  	case *ast.CommentGroup:
   452  		check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
   453  		return false
   454  
   455  	case *ast.ImportDecl, *ast.ImportSpec:
   456  		check(n, i.Imports, "imports", true)
   457  
   458  	case *ast.Field:
   459  		check(n, i.Definitions, "definitions", internal.IsDefinition(x.Label))
   460  		check(n, i.Data, "regular fields", internal.IsRegularField(x))
   461  		check(n, constraints, "optional fields", x.Optional != token.NoPos)
   462  
   463  		_, _, err := ast.LabelName(x.Label)
   464  		check(n, constraints, "optional fields", err != nil)
   465  
   466  		check(n, i.Attributes, "attributes", len(x.Attrs) > 0)
   467  		ast.Walk(x.Value, v.validate, nil)
   468  		return false
   469  
   470  	case *ast.UnaryExpr:
   471  		switch x.Op {
   472  		case token.MUL:
   473  			check(n, i.KeepDefaults, "default values", true)
   474  		case token.SUB, token.ADD:
   475  			// The parser represents negative numbers as an unary expression.
   476  			// Allow one `-` or `+`.
   477  			_, ok := x.X.(*ast.BasicLit)
   478  			check(n, constraints, "expressions", !ok)
   479  		case token.LSS, token.LEQ, token.EQL, token.GEQ, token.GTR,
   480  			token.NEQ, token.NMAT, token.MAT:
   481  			check(n, constraints, "constraints", true)
   482  		default:
   483  			check(n, constraints, "expressions", true)
   484  		}
   485  
   486  	case *ast.BinaryExpr, *ast.ParenExpr, *ast.IndexExpr, *ast.SliceExpr,
   487  		*ast.CallExpr, *ast.Comprehension, *ast.Interpolation:
   488  		check(n, constraints, "expressions", true)
   489  
   490  	case *ast.Ellipsis:
   491  		check(n, constraints, "ellipsis", true)
   492  
   493  	case *ast.Ident, *ast.SelectorExpr, *ast.Alias, *ast.LetClause:
   494  		check(n, i.References, "references", true)
   495  
   496  	default:
   497  		// Other types are either always okay or handled elsewhere.
   498  	}
   499  	return ok
   500  }