cuelang.org/go@v0.13.0/cue/interpreter/embed/embed.go (about)

     1  // Copyright 2024 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package embed provides capabilities to CUE to embed any file that resides
    16  // within a CUE module into CUE either verbatim or decoded.
    17  //
    18  // This package is EXPERIMENTAL and subject to change.
    19  //
    20  // # Overview
    21  //
    22  // To enable file embedding, a file must include the file-level @extern(embed)
    23  // attribute. This allows a quick glance to see if a file embeds any files at
    24  // all. This allows the @embed attribute to be used to load a file within a CUE
    25  // module into a field.
    26  //
    27  // References to files are always relative to directory in which the referring
    28  // file resides. Only files that exist within the CUE module are accessible.
    29  //
    30  // # The @embed attribute
    31  //
    32  // There are two main ways to embed files which are distinguished by the file
    33  // and glob arguments. The @embed attribute supports the following arguments:
    34  //
    35  // file=$filename
    36  //
    37  // The use of the file argument tells embed to load a single file into the
    38  // field. This argument many not be used in conjunction with the glob argument.
    39  //
    40  // glob=$pattern
    41  //
    42  // The use of the glob argument tells embed to load multiple files into the
    43  // field as a map of file paths to the decoded values. The paths are normalized
    44  // to use forward slashes. This argument may not be used in conjunction with the
    45  // file argument.
    46  //
    47  // type=$type
    48  //
    49  // By default, the file type is interpreted based on the file extension. This
    50  // behavior can be overridden by the type argument. See cue help filetypes for
    51  // the list of supported types. This field is required if a file extension is
    52  // unknown, or if a wildcard is used for the file extension in the glob pattern.
    53  //
    54  // # Limitations
    55  //
    56  // The embed interpreter currently does not support:
    57  // - stream values, such as .ndjson or YAML streams.
    58  // - schema-based decoding, such as needed for textproto
    59  //
    60  // # Example
    61  //
    62  //	@extern(embed)
    63  //
    64  //	package foo
    65  //
    66  //	// interpreted as JSON
    67  //	a: _ @embed(file="file1.json") // the quotes are optional here
    68  //
    69  //	// interpreted the same file as JSON schema
    70  //	#A: _ @embed(file=file1.json, type=jsonschema)
    71  //
    72  //	// interpret a proprietary extension as OpenAPI represented as YAML
    73  //	b: _ @embed(file="file2.crd", type=openapi+yaml)
    74  //
    75  //	// include all YAML files in the x directory interpreted as YAML
    76  //	// The result is a map of file paths to the decoded YAML values.
    77  //	files: _ @embed(glob=x/*.yaml)
    78  //
    79  //	// include all files in the y directory as a map of file paths to binary
    80  //	// data. The entries are unified into the same map as above.
    81  //	files: _ @embed(glob=y/*.*, type=binary)
    82  package embed
    83  
    84  import (
    85  	"io/fs"
    86  	"os"
    87  	"path"
    88  	"path/filepath"
    89  	"strings"
    90  
    91  	"cuelang.org/go/cue"
    92  	"cuelang.org/go/cue/build"
    93  	"cuelang.org/go/cue/cuecontext"
    94  	"cuelang.org/go/cue/errors"
    95  	"cuelang.org/go/cue/token"
    96  	"cuelang.org/go/internal"
    97  	"cuelang.org/go/internal/core/adt"
    98  	"cuelang.org/go/internal/core/runtime"
    99  	"cuelang.org/go/internal/encoding"
   100  	"cuelang.org/go/internal/filetypes"
   101  	"cuelang.org/go/internal/value"
   102  	pkgpath "cuelang.org/go/pkg/path"
   103  )
   104  
   105  // TODO: record files in build.Instance
   106  
   107  // interpreter is a [cuecontext.ExternInterpreter] for embedded files.
   108  type interpreter struct{}
   109  
   110  // New returns a new interpreter for embedded files as a
   111  // [cuecontext.ExternInterpreter] suitable for passing to [cuecontext.New].
   112  func New() cuecontext.ExternInterpreter {
   113  	return &interpreter{}
   114  }
   115  
   116  func (i *interpreter) Kind() string {
   117  	return "embed"
   118  }
   119  
   120  // NewCompiler returns a compiler that can decode and embed files that exist
   121  // within a CUE module.
   122  func (i *interpreter) NewCompiler(b *build.Instance, r *runtime.Runtime) (runtime.Compiler, errors.Error) {
   123  	if b.Module == "" {
   124  		return nil, errors.Newf(token.Pos{}, "cannot embed files when not in a module")
   125  	}
   126  	if b.Root == "" {
   127  		return nil, errors.Newf(token.Pos{}, "cannot embed files: no module root found")
   128  	}
   129  	return &compiler{
   130  		b:       b,
   131  		runtime: (*cue.Context)(r),
   132  	}, nil
   133  }
   134  
   135  // A compiler is a [runtime.Compiler] that allows embedding files into CUE
   136  // values.
   137  type compiler struct {
   138  	b       *build.Instance
   139  	runtime *cue.Context
   140  	opCtx   *adt.OpContext
   141  
   142  	// file system cache
   143  	dir string
   144  	fs  fs.StatFS
   145  	pos token.Pos
   146  }
   147  
   148  // Compile interprets an embed attribute to either load a file
   149  // (@embed(file=...)) or a glob of files (@embed(glob=...)).
   150  // and decodes the given files.
   151  func (c *compiler) Compile(funcName string, scope adt.Value, a *internal.Attr) (adt.Expr, errors.Error) {
   152  
   153  	file, _, err := a.Lookup(0, "file")
   154  	if err != nil {
   155  		return nil, errors.Promote(err, "invalid attribute")
   156  	}
   157  
   158  	glob, _, err := a.Lookup(0, "glob")
   159  	if err != nil {
   160  		return nil, errors.Promote(err, "invalid attribute")
   161  	}
   162  
   163  	typ, _, err := a.Lookup(0, "type")
   164  	if err != nil {
   165  		return nil, errors.Promote(err, "invalid type argument")
   166  	}
   167  
   168  	c.opCtx = adt.NewContext((*runtime.Runtime)(c.runtime), nil)
   169  
   170  	pos := a.Pos
   171  	c.pos = pos
   172  
   173  	// Jump through some hoops to get file operations to behave the same for
   174  	// Windows and Unix.
   175  	// TODO: obtain a fs.FS from load or something similar.
   176  	dir := filepath.Dir(pos.File().Name())
   177  	if c.dir != dir {
   178  		c.fs = os.DirFS(dir).(fs.StatFS) // Documented as implementing fs.StatFS
   179  		c.dir = dir
   180  	}
   181  
   182  	switch {
   183  	case file == "" && glob == "":
   184  		return nil, errors.Newf(a.Pos, "attribute must have file or glob field")
   185  
   186  	case file != "" && glob != "":
   187  		return nil, errors.Newf(a.Pos, "attribute cannot have both file and glob field")
   188  
   189  	case file != "":
   190  		return c.processFile(file, typ, scope)
   191  
   192  	default: // glob != "":
   193  		return c.processGlob(glob, typ, scope)
   194  	}
   195  }
   196  
   197  func (c *compiler) processFile(file, scope string, schema adt.Value) (adt.Expr, errors.Error) {
   198  	file, err := c.clean(file)
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  	for dir := path.Dir(file); dir != "."; dir = path.Dir(dir) {
   203  		if _, err := c.fs.Stat(path.Join(dir, "cue.mod")); err == nil {
   204  			return nil, errors.Newf(c.pos, "cannot embed file %q: in different module", file)
   205  		}
   206  	}
   207  
   208  	return c.decodeFile(file, scope, schema)
   209  }
   210  
   211  func (c *compiler) processGlob(glob, scope string, schema adt.Value) (adt.Expr, errors.Error) {
   212  	glob, ce := c.clean(glob)
   213  	if ce != nil {
   214  		return nil, ce
   215  	}
   216  
   217  	// Validate that the glob pattern is valid per [pkgpath.Match].
   218  	// Note that we use Unix match semantics because all embed paths are Unix-like.
   219  	if _, err := pkgpath.Match(glob, "", pkgpath.Unix); err != nil {
   220  		return nil, errors.Wrapf(err, c.pos, "invalid glob pattern %q", glob)
   221  	}
   222  
   223  	// If we do not have a type, ensure the extension of the base is fully
   224  	// specified, i.e. does not contain any meta characters as specified by
   225  	// path.Match.
   226  	if scope == "" {
   227  		ext := path.Ext(path.Base(glob))
   228  		if ext == "" || strings.ContainsAny(ext, "*?[\\") {
   229  			return nil, errors.Newf(c.pos, "extension not fully specified; type argument required")
   230  		}
   231  	}
   232  
   233  	m := &adt.StructLit{}
   234  
   235  	matches, err := fsGlob(c.fs, glob)
   236  	if err != nil {
   237  		return nil, errors.Promote(err, "failed to match glob")
   238  	}
   239  	if len(matches) == 0 {
   240  		return nil, errors.Newf(c.pos, "no matches for glob pattern %q", glob)
   241  	}
   242  
   243  	dirs := make(map[string]string)
   244  	for _, f := range matches {
   245  		// TODO: lots of stat calls happening in this MVP so another won't hurt.
   246  		// We don't support '**' initially, and '*' only matches files, so skip
   247  		// any directories.
   248  		if fi, err := c.fs.Stat(f); err != nil {
   249  			return nil, errors.Newf(c.pos, "failed to stat %s: %v", f, err)
   250  		} else if fi.IsDir() {
   251  			continue
   252  		}
   253  		// Add all parents of the embedded file that
   254  		// aren't the current directory (if there's a cue.mod
   255  		// in the current directory, that's the current module
   256  		// not nested).
   257  		for dir := path.Dir(f); dir != "."; dir = path.Dir(dir) {
   258  			dirs[dir] = f
   259  		}
   260  
   261  		expr, err := c.decodeFile(f, scope, schema)
   262  		if err != nil {
   263  			return nil, err
   264  		}
   265  
   266  		m.Decls = append(m.Decls, &adt.Field{
   267  			Label: c.opCtx.StringLabel(f),
   268  			Value: expr,
   269  		})
   270  	}
   271  	// Check that none of the matches were in a nested module
   272  	// directory.
   273  	for dir, f := range dirs {
   274  		if _, err := c.fs.Stat(path.Join(dir, "cue.mod")); err == nil {
   275  			return nil, errors.Newf(c.pos, "cannot embed file %q: in different module", f)
   276  		}
   277  	}
   278  	return m, nil
   279  }
   280  
   281  func (c *compiler) clean(s string) (string, errors.Error) {
   282  	file := path.Clean(s)
   283  	if file != s {
   284  		return file, errors.Newf(c.pos, "path not normalized, use %q instead", file)
   285  	}
   286  	if path.IsAbs(file) {
   287  		return "", errors.Newf(c.pos, "only relative files are allowed")
   288  	}
   289  	if file == ".." || strings.HasPrefix(file, "../") {
   290  		return "", errors.Newf(c.pos, "cannot refer to parent directory")
   291  	}
   292  	return file, nil
   293  }
   294  
   295  // fsGlob is like [fs.Glob] but only includes dot-prefixed files
   296  // when the dot is explictly present in an element.
   297  // TODO: add option for including dot files?
   298  func fsGlob(fsys fs.FS, pattern string) ([]string, error) {
   299  	pattern = path.Clean(pattern)
   300  	matches, err := fs.Glob(fsys, pattern)
   301  	if err != nil {
   302  		return nil, err
   303  	}
   304  	patElems := strings.Split(pattern, "/")
   305  	included := func(m string) bool {
   306  		for i, elem := range strings.Split(m, "/") {
   307  			// Technically there should never be more elements in m than
   308  			// there are in patElems, but be defensive and check bounds just in case.
   309  			if strings.HasPrefix(elem, ".") && (i >= len(patElems) || !strings.HasPrefix(patElems[i], ".")) {
   310  				return false
   311  			}
   312  		}
   313  		return true
   314  	}
   315  
   316  	i := 0
   317  	for _, m := range matches {
   318  		if included(m) {
   319  			matches[i] = m
   320  			i++
   321  		}
   322  	}
   323  	return matches[:i], nil
   324  }
   325  
   326  func (c *compiler) decodeFile(file, scope string, schema adt.Value) (adt.Expr, errors.Error) {
   327  	// Do not use the most obvious filetypes.Input in order to disable "auto"
   328  	// mode.
   329  	f, err := filetypes.ParseFileAndType(file, scope, filetypes.Def)
   330  	if err != nil {
   331  		return nil, errors.Promote(err, "invalid file type")
   332  	}
   333  
   334  	// Open and pre-load the file system using fs.FS, instead of relying
   335  	r, err := c.fs.Open(file)
   336  	if err != nil {
   337  		return nil, errors.Newf(c.pos, "open %v: no such file or directory", file)
   338  	}
   339  	defer r.Close()
   340  
   341  	info, err := r.Stat()
   342  	if err != nil {
   343  		return nil, errors.Promote(err, "failed to decode file")
   344  	}
   345  	if info.IsDir() {
   346  		return nil, errors.Newf(c.pos, "cannot embed directories")
   347  	}
   348  	f.Source = r
   349  
   350  	// TODO: this really should be done at the start of the build process.
   351  	// c.b.ExternFiles = append(c.b.ExternFiles, f)
   352  
   353  	config := &encoding.Config{
   354  		// TODO: schema is currently the wrong schema, which is a bug in
   355  		// internal/core/runtime. There is also an outstanding design choice:
   356  		// do we imply the schema from the schema of the current field, or do
   357  		// we explicitly enable schema-based encoding with a "schema" argument.
   358  		// In the case of YAML it seems to be better to be explicit. In the case
   359  		// of textproto it seems to be more convenient to do it implicitly.
   360  		// Schema: value.Make(c.opCtx, schema),
   361  	}
   362  
   363  	d := encoding.NewDecoder(c.runtime, f, config)
   364  	if err := d.Err(); err != nil {
   365  		return nil, errors.Promote(err, "failed to decode file")
   366  	}
   367  
   368  	defer d.Close()
   369  
   370  	n := d.File()
   371  
   372  	if d.Next(); !d.Done() {
   373  		// TODO: support streaming values
   374  		return nil, errors.Newf(c.pos, "streaming not implemented: found more than one value in file")
   375  	}
   376  
   377  	// TODO: each of these encodings should probably be supported in the future
   378  	switch f.Encoding {
   379  	case build.CUE:
   380  		return nil, errors.Newf(c.pos, "encoding %q not (yet) supported", f.Encoding)
   381  	case build.JSONL:
   382  		return nil, errors.Newf(c.pos, "encoding %q not (yet) supported: requires support for streaming", f.Encoding)
   383  	case build.BinaryProto, build.TextProto:
   384  		return nil, errors.Newf(c.pos, "encoding %q not (yet) supported: requires support for schema-guided decoding", f.Encoding)
   385  	}
   386  
   387  	val := c.runtime.BuildFile(n)
   388  	if err := val.Err(); err != nil {
   389  		return nil, errors.Promote(err, "failed to build file")
   390  	}
   391  
   392  	_, v := value.ToInternal(val)
   393  	return v, nil
   394  }