cuelang.org/go@v0.10.1/cue/interpreter/embed/embed.go (about)

     1  // Copyright 2024 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package embed provides capabilities to CUE to embed any file that resides
    16  // within a CUE module into CUE either verbatim or decoded.
    17  //
    18  // This package is EXPERIMENTAL and subject to change.
    19  //
    20  // # Overview
    21  //
    22  // To enable file embedding, a file must include the file-level @extern(embed)
    23  // attribute. This allows a quick glance to see if a file embeds any files at
    24  // all. This allows the @embed attribute to be used to load a file within a CUE
    25  // module into a field.
    26  //
    27  // References to files are always relative to directory in which the referring
    28  // file resides. Only files that exist within the CUE module are accessible.
    29  //
    30  // # The @embed attribute
    31  //
    32  // There are two main ways to embed files which are distinguished by the file
    33  // and glob arguments. The @embed attribute supports the following arguments:
    34  //
    35  // file=$filename
    36  //
    37  // The use of the file argument tells embed to load a single file into the
    38  // field. This argument many not be used in conjunction with the glob argument.
    39  //
    40  // glob=$pattern
    41  //
    42  // The use of the glob argument tells embed to load multiple files into the
    43  // field as a map of file paths to the decoded values. The paths are normalized
    44  // to use forward slashes. This argument may not be used in conjunction with the
    45  // file argument.
    46  //
    47  // type=$type
    48  //
    49  // By default, the file type is interpreted based on the file extension. This
    50  // behavior can be overridden by the type argument. See cue help filetypes for
    51  // the list of supported types. This field is required if a file extension is
    52  // unknown, or if a wildcard is used for the file extension in the glob pattern.
    53  //
    54  // # Limitations
    55  //
    56  // The embed interpreter currently does not support:
    57  // - stream values, such as .ndjson or YAML streams.
    58  // - schema-based decoding, such as needed for textproto
    59  //
    60  // # Example
    61  //
    62  //	@extern(embed)
    63  //
    64  //	package foo
    65  //
    66  //	// interpreted as JSON
    67  //	a: _ @embed(file="file1.json") // the quotes are optional here
    68  //
    69  //	// interpreted the same file as JSON schema
    70  //	#A: _ @embed(file=file1.json, type=jsonschema)
    71  //
    72  //	// interpret a proprietary extension as OpenAPI represented as YAML
    73  //	b: _ @embed(file="file2.crd", type=openapi+yaml)
    74  //
    75  //	// include all YAML files in the x directory interpreted as YAML
    76  //	// The result is a map of file paths to the decoded YAML values.
    77  //	files: _ @embed(glob=x/*.yaml)
    78  //
    79  //	// include all files in the y directory as a map of file paths to binary
    80  //	// data. The entries are unified into the same map as above.
    81  //	files: _ @embed(glob=y/*.*, type=binary)
    82  package embed
    83  
    84  import (
    85  	"io/fs"
    86  	"os"
    87  	"path"
    88  	"path/filepath"
    89  	"strings"
    90  
    91  	"cuelang.org/go/cue"
    92  	"cuelang.org/go/cue/build"
    93  	"cuelang.org/go/cue/cuecontext"
    94  	"cuelang.org/go/cue/errors"
    95  	"cuelang.org/go/cue/token"
    96  	"cuelang.org/go/internal"
    97  	"cuelang.org/go/internal/core/adt"
    98  	"cuelang.org/go/internal/core/runtime"
    99  	"cuelang.org/go/internal/cueexperiment"
   100  	"cuelang.org/go/internal/encoding"
   101  	"cuelang.org/go/internal/filetypes"
   102  	"cuelang.org/go/internal/value"
   103  	pkgpath "cuelang.org/go/pkg/path"
   104  )
   105  
   106  // TODO: obtain a fs.FS from load or something similar
   107  // TODO: disallow files from submodules
   108  // TODO: record files in build.Instance
   109  // TODO: support stream values
   110  // TODO: support schema-based decoding
   111  // TODO: maybe: option to include hidden files?
   112  
   113  // interpreter is a [cuecontext.ExternInterpreter] for embedded files.
   114  type interpreter struct{}
   115  
   116  // New returns a new interpreter for embedded files as a
   117  // [cuecontext.ExternInterpreter] suitable for passing to [cuecontext.New].
   118  func New() cuecontext.ExternInterpreter {
   119  	return &interpreter{}
   120  }
   121  
   122  func (i *interpreter) Kind() string {
   123  	return "embed"
   124  }
   125  
   126  // NewCompiler returns a compiler that can decode and embed files that exist
   127  // within a CUE module.
   128  func (i *interpreter) NewCompiler(b *build.Instance, r *runtime.Runtime) (runtime.Compiler, errors.Error) {
   129  	return &compiler{
   130  		b:       b,
   131  		runtime: (*cue.Context)(r),
   132  	}, nil
   133  }
   134  
   135  // A compiler is a [runtime.Compiler] that allows embedding files into CUE
   136  // values.
   137  type compiler struct {
   138  	b       *build.Instance
   139  	runtime *cue.Context
   140  	opCtx   *adt.OpContext
   141  
   142  	// file system cache
   143  	dir string
   144  	fs  fs.StatFS
   145  	pos token.Pos
   146  }
   147  
   148  // Compile interprets an embed attribute to either load a file
   149  // (@embed(file=...)) or a glob of files (@embed(glob=...)).
   150  // and decodes the given files.
   151  func (c *compiler) Compile(funcName string, scope adt.Value, a *internal.Attr) (adt.Expr, errors.Error) {
   152  	// This is a really weird spot to disable embedding, but I could not get
   153  	// the wasm tests to pass without doing it like this.
   154  	if !cueexperiment.Flags.Embed {
   155  		return &adt.Top{}, nil
   156  	}
   157  
   158  	file, _, err := a.Lookup(0, "file")
   159  	if err != nil {
   160  		return nil, errors.Promote(err, "invalid attribute")
   161  	}
   162  
   163  	glob, _, err := a.Lookup(0, "glob")
   164  	if err != nil {
   165  		return nil, errors.Promote(err, "invalid attribute")
   166  	}
   167  
   168  	typ, _, err := a.Lookup(0, "type")
   169  	if err != nil {
   170  		return nil, errors.Promote(err, "invalid type argument")
   171  	}
   172  
   173  	c.opCtx = adt.NewContext((*runtime.Runtime)(c.runtime), nil)
   174  
   175  	pos := a.Pos
   176  	c.pos = pos
   177  
   178  	// Jump through some hoops to get file operations to behave the same for
   179  	// Windows and Unix.
   180  	// TODO: obtain a fs.FS from load or something similar.
   181  	dir := filepath.Dir(pos.File().Name())
   182  	if c.dir != dir {
   183  		c.fs = os.DirFS(dir).(fs.StatFS) // Documented as implementing fs.StatFS
   184  		c.dir = dir
   185  	}
   186  
   187  	switch {
   188  	case file == "" && glob == "":
   189  		return nil, errors.Newf(a.Pos, "attribute must have file or glob field")
   190  
   191  	case file != "" && glob != "":
   192  		return nil, errors.Newf(a.Pos, "attribute cannot have both file and glob field")
   193  
   194  	case file != "":
   195  		return c.processFile(file, typ, scope)
   196  
   197  	default: // glob != "":
   198  		return c.processGlob(glob, typ, scope)
   199  	}
   200  }
   201  
   202  func (c *compiler) processFile(file, scope string, schema adt.Value) (adt.Expr, errors.Error) {
   203  	file, err := c.clean(file)
   204  	if err != nil {
   205  		return nil, err
   206  	}
   207  	for dir := path.Dir(file); dir != "."; dir = path.Dir(dir) {
   208  		if _, err := c.fs.Stat(path.Join(dir, "cue.mod")); err == nil {
   209  			return nil, errors.Newf(c.pos, "cannot embed file %q: in different module", file)
   210  		}
   211  	}
   212  
   213  	return c.decodeFile(file, scope, schema)
   214  }
   215  
   216  func (c *compiler) processGlob(glob, scope string, schema adt.Value) (adt.Expr, errors.Error) {
   217  	glob, ce := c.clean(glob)
   218  	if ce != nil {
   219  		return nil, ce
   220  	}
   221  
   222  	// Validate that the glob pattern is valid per [pkgpath.Match].
   223  	// Note that we use Unix match semantics because all embed paths are Unix-like.
   224  	if _, err := pkgpath.Match(glob, "", pkgpath.Unix); err != nil {
   225  		return nil, errors.Wrapf(err, c.pos, "invalid glob pattern %q", glob)
   226  	}
   227  
   228  	// If we do not have a type, ensure the extension of the base is fully
   229  	// specified, i.e. does not contain any meta characters as specified by
   230  	// path.Match.
   231  	if scope == "" {
   232  		ext := path.Ext(path.Base(glob))
   233  		if ext == "" || strings.ContainsAny(ext, "*?[\\") {
   234  			return nil, errors.Newf(c.pos, "extension not fully specified; type argument required")
   235  		}
   236  	}
   237  
   238  	m := &adt.StructLit{}
   239  
   240  	matches, err := fs.Glob(c.fs, glob)
   241  	if err != nil {
   242  		return nil, errors.Promote(err, "failed to match glob")
   243  	}
   244  
   245  	dirs := make(map[string]string)
   246  	for _, f := range matches {
   247  		if c.isHidden(f) {
   248  			// TODO: allow option for including hidden files?
   249  			continue
   250  		}
   251  		// TODO: lots of stat calls happening in this MVP so another won't hurt.
   252  		// We don't support '**' initially, and '*' only matches files, so skip
   253  		// any directories.
   254  		if fi, err := c.fs.Stat(f); err != nil {
   255  			return nil, errors.Newf(c.pos, "failed to stat %s: %v", f, err)
   256  		} else if fi.IsDir() {
   257  			continue
   258  		}
   259  		// Add all parents of the embedded file that
   260  		// aren't the current directory (if there's a cue.mod
   261  		// in the current directory, that's the current module
   262  		// not nested).
   263  		for dir := path.Dir(f); dir != "."; dir = path.Dir(dir) {
   264  			dirs[dir] = f
   265  		}
   266  
   267  		expr, err := c.decodeFile(f, scope, schema)
   268  		if err != nil {
   269  			return nil, err
   270  		}
   271  
   272  		m.Decls = append(m.Decls, &adt.Field{
   273  			Label: c.opCtx.StringLabel(f),
   274  			Value: expr,
   275  		})
   276  	}
   277  	// Check that none of the matches were in a nested module
   278  	// directory.
   279  	for dir, f := range dirs {
   280  		if _, err := c.fs.Stat(path.Join(dir, "cue.mod")); err == nil {
   281  			return nil, errors.Newf(c.pos, "cannot embed file %q: in different module", f)
   282  		}
   283  	}
   284  	return m, nil
   285  }
   286  
   287  func (c *compiler) clean(s string) (string, errors.Error) {
   288  	file := path.Clean(s)
   289  	if file != s {
   290  		return file, errors.Newf(c.pos, "path not normalized, use %q instead", file)
   291  	}
   292  	if path.IsAbs(file) {
   293  		return "", errors.Newf(c.pos, "only relative files are allowed")
   294  	}
   295  	if file == ".." || strings.HasPrefix(file, "../") {
   296  		return "", errors.Newf(c.pos, "cannot refer to parent directory")
   297  	}
   298  	return file, nil
   299  }
   300  
   301  // isHidden checks if a file is hidden on Windows. We do not return an error
   302  // if the file does not exist and will check that elsewhere.
   303  func (c *compiler) isHidden(file string) bool {
   304  	return strings.HasPrefix(file, ".") || strings.Contains(file, "/.")
   305  }
   306  
   307  func (c *compiler) decodeFile(file, scope string, schema adt.Value) (adt.Expr, errors.Error) {
   308  	// Do not use the most obvious filetypes.Input in order to disable "auto"
   309  	// mode.
   310  	f, err := filetypes.ParseFileAndType(file, scope, filetypes.Def)
   311  	if err != nil {
   312  		return nil, errors.Promote(err, "invalid file type")
   313  	}
   314  
   315  	// Open and pre-load the file system using fs.FS, instead of relying
   316  	r, err := c.fs.Open(file)
   317  	if err != nil {
   318  		return nil, errors.Newf(c.pos, "open %v: no such file or directory", file)
   319  	}
   320  	defer r.Close()
   321  
   322  	info, err := r.Stat()
   323  	if err != nil {
   324  		return nil, errors.Promote(err, "failed to decode file")
   325  	}
   326  	if info.IsDir() {
   327  		return nil, errors.Newf(c.pos, "cannot embed directories")
   328  	}
   329  	f.Source = r
   330  
   331  	// TODO: this really should be done at the start of the build process.
   332  	// c.b.ExternFiles = append(c.b.ExternFiles, f)
   333  
   334  	config := &encoding.Config{
   335  		// TODO: schema is currently the wrong schema, which is a bug in
   336  		// internal/core/runtime. There is also an outstanding design choice:
   337  		// do we imply the schema from the schema of the current field, or do
   338  		// we explicitly enable schema-based encoding with a "schema" argument.
   339  		// In the case of YAML it seems to be better to be explicit. In the case
   340  		// of textproto it seems to be more convenient to do it implicitly.
   341  		// Schema: value.Make(c.opCtx, schema),
   342  	}
   343  
   344  	d := encoding.NewDecoder(c.runtime, f, config)
   345  	if err := d.Err(); err != nil {
   346  		return nil, errors.Promote(err, "failed to decode file")
   347  	}
   348  
   349  	defer d.Close()
   350  
   351  	n := d.File()
   352  
   353  	if d.Next(); !d.Done() {
   354  		return nil, errors.Newf(c.pos, "streaming not implemented: found more than one value in file")
   355  	}
   356  
   357  	switch f.Encoding {
   358  	case build.CUE:
   359  		return nil, errors.Newf(c.pos, "encoding %q not (yet) supported", f.Encoding)
   360  	case build.JSONL:
   361  		return nil, errors.Newf(c.pos, "encoding %q not (yet) supported: requires support for streaming", f.Encoding)
   362  	case build.BinaryProto, build.TextProto:
   363  		return nil, errors.Newf(c.pos, "encoding %q not (yet) supported: requires support for schema-guided decoding", f.Encoding)
   364  	}
   365  
   366  	val := c.runtime.BuildFile(n)
   367  	if err := val.Err(); err != nil {
   368  		return nil, errors.Promote(err, "failed to build file")
   369  	}
   370  
   371  	_, v := value.ToInternal(val)
   372  	return v, nil
   373  }