cuelang.org/go@v0.10.1/internal/encoding/encoding.go (about)

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // TODO: make this package public in cuelang.org/go/encoding
    16  // once stabalized.
    17  
    18  package encoding
    19  
    20  import (
    21  	"fmt"
    22  	"io"
    23  	"net/url"
    24  	"strings"
    25  
    26  	"cuelang.org/go/cue"
    27  	"cuelang.org/go/cue/ast"
    28  	"cuelang.org/go/cue/build"
    29  	"cuelang.org/go/cue/errors"
    30  	"cuelang.org/go/cue/format"
    31  	"cuelang.org/go/cue/literal"
    32  	"cuelang.org/go/cue/parser"
    33  	"cuelang.org/go/cue/token"
    34  	"cuelang.org/go/encoding/json"
    35  	"cuelang.org/go/encoding/jsonschema"
    36  	"cuelang.org/go/encoding/openapi"
    37  	"cuelang.org/go/encoding/protobuf"
    38  	"cuelang.org/go/encoding/protobuf/jsonpb"
    39  	"cuelang.org/go/encoding/protobuf/textproto"
    40  	"cuelang.org/go/encoding/toml"
    41  	"cuelang.org/go/internal"
    42  	"cuelang.org/go/internal/encoding/yaml"
    43  	"cuelang.org/go/internal/filetypes"
    44  	"cuelang.org/go/internal/source"
    45  	"golang.org/x/text/encoding/unicode"
    46  	"golang.org/x/text/transform"
    47  )
    48  
    49  type Decoder struct {
    50  	ctx            *cue.Context
    51  	cfg            *Config
    52  	closer         io.Closer
    53  	next           func() (ast.Expr, error)
    54  	rewriteFunc    rewriteFunc
    55  	interpretFunc  interpretFunc
    56  	interpretation build.Interpretation
    57  	expr           ast.Expr
    58  	file           *ast.File
    59  	filename       string // may change on iteration for some formats
    60  	id             string
    61  	index          int
    62  	err            error
    63  }
    64  
    65  type interpretFunc func(cue.Value) (file *ast.File, id string, err error)
    66  type rewriteFunc func(*ast.File) (file *ast.File, err error)
    67  
    68  // ID returns a canonical identifier for the decoded object or "" if no such
    69  // identifier could be found.
    70  func (i *Decoder) ID() string {
    71  	return i.id
    72  }
    73  func (i *Decoder) Filename() string { return i.filename }
    74  
    75  // Interpretation returns the current interpretation detected by Detect.
    76  func (i *Decoder) Interpretation() build.Interpretation {
    77  	return i.interpretation
    78  }
    79  func (i *Decoder) Index() int { return i.index }
    80  func (i *Decoder) Done() bool { return i.err != nil }
    81  
    82  func (i *Decoder) Next() {
    83  	if i.err != nil {
    84  		return
    85  	}
    86  	// Decoder level
    87  	i.file = nil
    88  	i.expr, i.err = i.next()
    89  	i.index++
    90  	if i.err != nil {
    91  		return
    92  	}
    93  	i.doInterpret()
    94  }
    95  
    96  func (i *Decoder) doInterpret() {
    97  	if i.rewriteFunc != nil {
    98  		i.file = i.File()
    99  		var err error
   100  		i.file, err = i.rewriteFunc(i.file)
   101  		if err != nil {
   102  			i.err = err
   103  			return
   104  		}
   105  	}
   106  	if i.interpretFunc != nil {
   107  		i.file = i.File()
   108  		v := i.ctx.BuildFile(i.file)
   109  		if err := v.Err(); err != nil {
   110  			i.err = err
   111  			return
   112  		}
   113  		i.file, i.id, i.err = i.interpretFunc(v)
   114  	}
   115  }
   116  
   117  func (i *Decoder) File() *ast.File {
   118  	if i.file != nil {
   119  		return i.file
   120  	}
   121  	return internal.ToFile(i.expr)
   122  }
   123  
   124  func (i *Decoder) Err() error {
   125  	if i.err == io.EOF {
   126  		return nil
   127  	}
   128  	return i.err
   129  }
   130  
   131  func (i *Decoder) Close() {
   132  	if i.closer != nil {
   133  		i.closer.Close()
   134  	}
   135  }
   136  
   137  type Config struct {
   138  	Mode filetypes.Mode
   139  
   140  	// Out specifies an overwrite destination.
   141  	Out    io.Writer
   142  	Stdin  io.Reader
   143  	Stdout io.Writer
   144  
   145  	PkgName string // package name for files to generate
   146  
   147  	Force     bool // overwrite existing files
   148  	Strict    bool
   149  	Stream    bool // potentially write more than one document per file
   150  	AllErrors bool
   151  
   152  	Schema cue.Value // used for schema-based decoding
   153  
   154  	EscapeHTML    bool
   155  	InlineImports bool // expand references to non-core imports
   156  	ProtoPath     []string
   157  	Format        []format.Option
   158  	ParseFile     func(name string, src interface{}) (*ast.File, error)
   159  }
   160  
   161  // NewDecoder returns a stream of non-rooted data expressions. The encoding
   162  // type of f must be a data type, but does not have to be an encoding that
   163  // can stream. stdin is used in case the file is "-".
   164  func NewDecoder(ctx *cue.Context, f *build.File, cfg *Config) *Decoder {
   165  	if cfg == nil {
   166  		cfg = &Config{}
   167  	}
   168  	i := &Decoder{filename: f.Filename, ctx: ctx, cfg: cfg}
   169  	i.next = func() (ast.Expr, error) {
   170  		if i.err != nil {
   171  			return nil, i.err
   172  		}
   173  		return nil, io.EOF
   174  	}
   175  
   176  	if file, ok := f.Source.(*ast.File); ok {
   177  		i.file = file
   178  		i.validate(file, f)
   179  		return i
   180  	}
   181  
   182  	var srcr io.Reader
   183  	if f.Source == nil && f.Filename == "-" {
   184  		// TODO: should we allow this?
   185  		srcr = cfg.Stdin
   186  	} else {
   187  		rc, err := source.Open(f.Filename, f.Source)
   188  		i.closer = rc
   189  		i.err = err
   190  		if i.err != nil {
   191  			return i
   192  		}
   193  		srcr = rc
   194  	}
   195  
   196  	// For now we assume that all encodings require UTF-8. This will not be the
   197  	// case for some binary protocols. We need to exempt those explicitly here
   198  	// once we introduce them.
   199  	// TODO: this code also allows UTF16, which is too permissive for some
   200  	// encodings. Switch to unicode.UTF8Sig once available.
   201  	t := unicode.BOMOverride(unicode.UTF8.NewDecoder())
   202  	r := transform.NewReader(srcr, t)
   203  
   204  	switch f.Interpretation {
   205  	case "":
   206  	case build.Auto:
   207  		openAPI := openAPIFunc(cfg, f)
   208  		jsonSchema := jsonSchemaFunc(cfg, f)
   209  		i.interpretFunc = func(v cue.Value) (file *ast.File, id string, err error) {
   210  			switch i.interpretation = Detect(v); i.interpretation {
   211  			case build.JSONSchema:
   212  				return jsonSchema(v)
   213  			case build.OpenAPI:
   214  				return openAPI(v)
   215  			}
   216  			return i.file, "", i.err
   217  		}
   218  	case build.OpenAPI:
   219  		i.interpretation = build.OpenAPI
   220  		i.interpretFunc = openAPIFunc(cfg, f)
   221  	case build.JSONSchema:
   222  		i.interpretation = build.JSONSchema
   223  		i.interpretFunc = jsonSchemaFunc(cfg, f)
   224  	case build.ProtobufJSON:
   225  		i.interpretation = build.ProtobufJSON
   226  		i.rewriteFunc = protobufJSONFunc(cfg, f)
   227  	default:
   228  		i.err = fmt.Errorf("unsupported interpretation %q", f.Interpretation)
   229  	}
   230  
   231  	path := f.Filename
   232  	switch f.Encoding {
   233  	case build.CUE:
   234  		if cfg.ParseFile == nil {
   235  			i.file, i.err = parser.ParseFile(path, r, parser.ParseComments)
   236  		} else {
   237  			i.file, i.err = cfg.ParseFile(path, r)
   238  		}
   239  		i.validate(i.file, f)
   240  		if i.err == nil {
   241  			i.doInterpret()
   242  		}
   243  	case build.JSON:
   244  		b, err := io.ReadAll(r)
   245  		if err != nil {
   246  			i.err = err
   247  			break
   248  		}
   249  		i.expr, i.err = json.Extract(path, b)
   250  		if i.err == nil {
   251  			i.doInterpret()
   252  		}
   253  	case build.JSONL:
   254  		i.next = json.NewDecoder(nil, path, r).Extract
   255  		i.Next()
   256  	case build.YAML:
   257  		b, err := io.ReadAll(r)
   258  		i.err = err
   259  		i.next = yaml.NewDecoder(path, b).Decode
   260  		i.Next()
   261  	case build.TOML:
   262  		i.next = toml.NewDecoder(path, r).Decode
   263  		i.Next()
   264  	case build.Text:
   265  		b, err := io.ReadAll(r)
   266  		i.err = err
   267  		i.expr = ast.NewString(string(b))
   268  	case build.Binary:
   269  		b, err := io.ReadAll(r)
   270  		i.err = err
   271  		s := literal.Bytes.WithTabIndent(1).Quote(string(b))
   272  		i.expr = ast.NewLit(token.STRING, s)
   273  	case build.Protobuf:
   274  		paths := &protobuf.Config{
   275  			Paths:   cfg.ProtoPath,
   276  			PkgName: cfg.PkgName,
   277  		}
   278  		i.file, i.err = protobuf.Extract(path, r, paths)
   279  	case build.TextProto:
   280  		b, err := io.ReadAll(r)
   281  		i.err = err
   282  		if err == nil {
   283  			d := textproto.NewDecoder()
   284  			i.expr, i.err = d.Parse(cfg.Schema, path, b)
   285  		}
   286  	default:
   287  		i.err = fmt.Errorf("unsupported encoding %q", f.Encoding)
   288  	}
   289  
   290  	return i
   291  }
   292  
   293  func jsonSchemaFunc(cfg *Config, f *build.File) interpretFunc {
   294  	return func(v cue.Value) (file *ast.File, id string, err error) {
   295  		id = f.Tags["id"]
   296  		if id == "" {
   297  			id, _ = v.LookupPath(cue.MakePath(cue.Str("$id"))).String()
   298  		}
   299  		if id != "" {
   300  			u, err := url.Parse(id)
   301  			if err != nil {
   302  				return nil, "", errors.Wrapf(err, token.NoPos, "invalid id")
   303  			}
   304  			u.Scheme = ""
   305  			id = strings.TrimPrefix(u.String(), "//")
   306  		}
   307  		cfg := &jsonschema.Config{
   308  			ID:      id,
   309  			PkgName: cfg.PkgName,
   310  
   311  			Strict: cfg.Strict,
   312  		}
   313  		file, err = jsonschema.Extract(v, cfg)
   314  		// TODO: simplify currently erases file line info. Reintroduce after fix.
   315  		// file, err = simplify(file, err)
   316  		return file, id, err
   317  	}
   318  }
   319  
   320  func openAPIFunc(c *Config, f *build.File) interpretFunc {
   321  	cfg := &openapi.Config{PkgName: c.PkgName}
   322  	return func(v cue.Value) (file *ast.File, id string, err error) {
   323  		file, err = openapi.Extract(v, cfg)
   324  		// TODO: simplify currently erases file line info. Reintroduce after fix.
   325  		// file, err = simplify(file, err)
   326  		return file, "", err
   327  	}
   328  }
   329  
   330  func protobufJSONFunc(cfg *Config, file *build.File) rewriteFunc {
   331  	return func(f *ast.File) (*ast.File, error) {
   332  		if !cfg.Schema.Exists() {
   333  			return f, errors.Newf(token.NoPos,
   334  				"no schema specified for protobuf interpretation.")
   335  		}
   336  		return f, jsonpb.NewDecoder(cfg.Schema).RewriteFile(f)
   337  	}
   338  }
   339  
   340  func shouldValidate(i *filetypes.FileInfo) bool {
   341  	// TODO: We ignore attributes for now. They should be enabled by default.
   342  	return false ||
   343  		!i.Definitions ||
   344  		!i.Data ||
   345  		!i.Optional ||
   346  		!i.Constraints ||
   347  		!i.References ||
   348  		!i.Cycles ||
   349  		!i.KeepDefaults ||
   350  		!i.Incomplete ||
   351  		!i.Imports ||
   352  		!i.Docs
   353  }
   354  
   355  type validator struct {
   356  	allErrors bool
   357  	count     int
   358  	errs      errors.Error
   359  	fileinfo  *filetypes.FileInfo
   360  }
   361  
   362  func (d *Decoder) validate(f *ast.File, b *build.File) {
   363  	if d.err != nil {
   364  		return
   365  	}
   366  	fi, err := filetypes.FromFile(b, filetypes.Input)
   367  	if err != nil {
   368  		d.err = err
   369  		return
   370  	}
   371  	if !shouldValidate(fi) {
   372  		return
   373  	}
   374  
   375  	v := validator{fileinfo: fi, allErrors: d.cfg.AllErrors}
   376  	ast.Walk(f, v.validate, nil)
   377  	d.err = v.errs
   378  }
   379  
   380  func (v *validator) validate(n ast.Node) bool {
   381  	if v.count > 10 {
   382  		return false
   383  	}
   384  
   385  	i := v.fileinfo
   386  
   387  	// TODO: Cycles
   388  
   389  	ok := true
   390  	check := func(n ast.Node, option bool, s string, cond bool) {
   391  		if !option && cond {
   392  			v.errs = errors.Append(v.errs, errors.Newf(n.Pos(),
   393  				"%s not allowed in %s mode", s, v.fileinfo.Form))
   394  			v.count++
   395  			ok = false
   396  		}
   397  	}
   398  
   399  	// For now we don't make any distinction between these modes.
   400  
   401  	constraints := i.Constraints && i.Incomplete && i.Optional && i.References
   402  
   403  	check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
   404  
   405  	switch x := n.(type) {
   406  	case *ast.CommentGroup:
   407  		check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
   408  		return false
   409  
   410  	case *ast.ImportDecl, *ast.ImportSpec:
   411  		check(n, i.Imports, "imports", true)
   412  
   413  	case *ast.Field:
   414  		check(n, i.Definitions, "definitions", internal.IsDefinition(x.Label))
   415  		check(n, i.Data, "regular fields", internal.IsRegularField(x))
   416  		check(n, constraints, "optional fields", x.Optional != token.NoPos)
   417  
   418  		_, _, err := ast.LabelName(x.Label)
   419  		check(n, constraints, "optional fields", err != nil)
   420  
   421  		check(n, i.Attributes, "attributes", len(x.Attrs) > 0)
   422  		ast.Walk(x.Value, v.validate, nil)
   423  		return false
   424  
   425  	case *ast.UnaryExpr:
   426  		switch x.Op {
   427  		case token.MUL:
   428  			check(n, i.KeepDefaults, "default values", true)
   429  		case token.SUB, token.ADD:
   430  			// The parser represents negative numbers as an unary expression.
   431  			// Allow one `-` or `+`.
   432  			_, ok := x.X.(*ast.BasicLit)
   433  			check(n, constraints, "expressions", !ok)
   434  		case token.LSS, token.LEQ, token.EQL, token.GEQ, token.GTR,
   435  			token.NEQ, token.NMAT, token.MAT:
   436  			check(n, constraints, "constraints", true)
   437  		default:
   438  			check(n, constraints, "expressions", true)
   439  		}
   440  
   441  	case *ast.BinaryExpr, *ast.ParenExpr, *ast.IndexExpr, *ast.SliceExpr,
   442  		*ast.CallExpr, *ast.Comprehension, *ast.Interpolation:
   443  		check(n, constraints, "expressions", true)
   444  
   445  	case *ast.Ellipsis:
   446  		check(n, constraints, "ellipsis", true)
   447  
   448  	case *ast.Ident, *ast.SelectorExpr, *ast.Alias, *ast.LetClause:
   449  		check(n, i.References, "references", true)
   450  
   451  	default:
   452  		// Other types are either always okay or handled elsewhere.
   453  	}
   454  	return ok
   455  }