cuelang.org/go@v0.10.1/cue/interpreter/embed/embed.go (about) 1 // Copyright 2024 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package embed provides capabilities to CUE to embed any file that resides 16 // within a CUE module into CUE either verbatim or decoded. 17 // 18 // This package is EXPERIMENTAL and subject to change. 19 // 20 // # Overview 21 // 22 // To enable file embedding, a file must include the file-level @extern(embed) 23 // attribute. This allows a quick glance to see if a file embeds any files at 24 // all. This allows the @embed attribute to be used to load a file within a CUE 25 // module into a field. 26 // 27 // References to files are always relative to directory in which the referring 28 // file resides. Only files that exist within the CUE module are accessible. 29 // 30 // # The @embed attribute 31 // 32 // There are two main ways to embed files which are distinguished by the file 33 // and glob arguments. The @embed attribute supports the following arguments: 34 // 35 // file=$filename 36 // 37 // The use of the file argument tells embed to load a single file into the 38 // field. This argument many not be used in conjunction with the glob argument. 39 // 40 // glob=$pattern 41 // 42 // The use of the glob argument tells embed to load multiple files into the 43 // field as a map of file paths to the decoded values. The paths are normalized 44 // to use forward slashes. This argument may not be used in conjunction with the 45 // file argument. 46 // 47 // type=$type 48 // 49 // By default, the file type is interpreted based on the file extension. This 50 // behavior can be overridden by the type argument. See cue help filetypes for 51 // the list of supported types. This field is required if a file extension is 52 // unknown, or if a wildcard is used for the file extension in the glob pattern. 53 // 54 // # Limitations 55 // 56 // The embed interpreter currently does not support: 57 // - stream values, such as .ndjson or YAML streams. 58 // - schema-based decoding, such as needed for textproto 59 // 60 // # Example 61 // 62 // @extern(embed) 63 // 64 // package foo 65 // 66 // // interpreted as JSON 67 // a: _ @embed(file="file1.json") // the quotes are optional here 68 // 69 // // interpreted the same file as JSON schema 70 // #A: _ @embed(file=file1.json, type=jsonschema) 71 // 72 // // interpret a proprietary extension as OpenAPI represented as YAML 73 // b: _ @embed(file="file2.crd", type=openapi+yaml) 74 // 75 // // include all YAML files in the x directory interpreted as YAML 76 // // The result is a map of file paths to the decoded YAML values. 77 // files: _ @embed(glob=x/*.yaml) 78 // 79 // // include all files in the y directory as a map of file paths to binary 80 // // data. The entries are unified into the same map as above. 81 // files: _ @embed(glob=y/*.*, type=binary) 82 package embed 83 84 import ( 85 "io/fs" 86 "os" 87 "path" 88 "path/filepath" 89 "strings" 90 91 "cuelang.org/go/cue" 92 "cuelang.org/go/cue/build" 93 "cuelang.org/go/cue/cuecontext" 94 "cuelang.org/go/cue/errors" 95 "cuelang.org/go/cue/token" 96 "cuelang.org/go/internal" 97 "cuelang.org/go/internal/core/adt" 98 "cuelang.org/go/internal/core/runtime" 99 "cuelang.org/go/internal/cueexperiment" 100 "cuelang.org/go/internal/encoding" 101 "cuelang.org/go/internal/filetypes" 102 "cuelang.org/go/internal/value" 103 pkgpath "cuelang.org/go/pkg/path" 104 ) 105 106 // TODO: obtain a fs.FS from load or something similar 107 // TODO: disallow files from submodules 108 // TODO: record files in build.Instance 109 // TODO: support stream values 110 // TODO: support schema-based decoding 111 // TODO: maybe: option to include hidden files? 112 113 // interpreter is a [cuecontext.ExternInterpreter] for embedded files. 114 type interpreter struct{} 115 116 // New returns a new interpreter for embedded files as a 117 // [cuecontext.ExternInterpreter] suitable for passing to [cuecontext.New]. 118 func New() cuecontext.ExternInterpreter { 119 return &interpreter{} 120 } 121 122 func (i *interpreter) Kind() string { 123 return "embed" 124 } 125 126 // NewCompiler returns a compiler that can decode and embed files that exist 127 // within a CUE module. 128 func (i *interpreter) NewCompiler(b *build.Instance, r *runtime.Runtime) (runtime.Compiler, errors.Error) { 129 return &compiler{ 130 b: b, 131 runtime: (*cue.Context)(r), 132 }, nil 133 } 134 135 // A compiler is a [runtime.Compiler] that allows embedding files into CUE 136 // values. 137 type compiler struct { 138 b *build.Instance 139 runtime *cue.Context 140 opCtx *adt.OpContext 141 142 // file system cache 143 dir string 144 fs fs.StatFS 145 pos token.Pos 146 } 147 148 // Compile interprets an embed attribute to either load a file 149 // (@embed(file=...)) or a glob of files (@embed(glob=...)). 150 // and decodes the given files. 151 func (c *compiler) Compile(funcName string, scope adt.Value, a *internal.Attr) (adt.Expr, errors.Error) { 152 // This is a really weird spot to disable embedding, but I could not get 153 // the wasm tests to pass without doing it like this. 154 if !cueexperiment.Flags.Embed { 155 return &adt.Top{}, nil 156 } 157 158 file, _, err := a.Lookup(0, "file") 159 if err != nil { 160 return nil, errors.Promote(err, "invalid attribute") 161 } 162 163 glob, _, err := a.Lookup(0, "glob") 164 if err != nil { 165 return nil, errors.Promote(err, "invalid attribute") 166 } 167 168 typ, _, err := a.Lookup(0, "type") 169 if err != nil { 170 return nil, errors.Promote(err, "invalid type argument") 171 } 172 173 c.opCtx = adt.NewContext((*runtime.Runtime)(c.runtime), nil) 174 175 pos := a.Pos 176 c.pos = pos 177 178 // Jump through some hoops to get file operations to behave the same for 179 // Windows and Unix. 180 // TODO: obtain a fs.FS from load or something similar. 181 dir := filepath.Dir(pos.File().Name()) 182 if c.dir != dir { 183 c.fs = os.DirFS(dir).(fs.StatFS) // Documented as implementing fs.StatFS 184 c.dir = dir 185 } 186 187 switch { 188 case file == "" && glob == "": 189 return nil, errors.Newf(a.Pos, "attribute must have file or glob field") 190 191 case file != "" && glob != "": 192 return nil, errors.Newf(a.Pos, "attribute cannot have both file and glob field") 193 194 case file != "": 195 return c.processFile(file, typ, scope) 196 197 default: // glob != "": 198 return c.processGlob(glob, typ, scope) 199 } 200 } 201 202 func (c *compiler) processFile(file, scope string, schema adt.Value) (adt.Expr, errors.Error) { 203 file, err := c.clean(file) 204 if err != nil { 205 return nil, err 206 } 207 for dir := path.Dir(file); dir != "."; dir = path.Dir(dir) { 208 if _, err := c.fs.Stat(path.Join(dir, "cue.mod")); err == nil { 209 return nil, errors.Newf(c.pos, "cannot embed file %q: in different module", file) 210 } 211 } 212 213 return c.decodeFile(file, scope, schema) 214 } 215 216 func (c *compiler) processGlob(glob, scope string, schema adt.Value) (adt.Expr, errors.Error) { 217 glob, ce := c.clean(glob) 218 if ce != nil { 219 return nil, ce 220 } 221 222 // Validate that the glob pattern is valid per [pkgpath.Match]. 223 // Note that we use Unix match semantics because all embed paths are Unix-like. 224 if _, err := pkgpath.Match(glob, "", pkgpath.Unix); err != nil { 225 return nil, errors.Wrapf(err, c.pos, "invalid glob pattern %q", glob) 226 } 227 228 // If we do not have a type, ensure the extension of the base is fully 229 // specified, i.e. does not contain any meta characters as specified by 230 // path.Match. 231 if scope == "" { 232 ext := path.Ext(path.Base(glob)) 233 if ext == "" || strings.ContainsAny(ext, "*?[\\") { 234 return nil, errors.Newf(c.pos, "extension not fully specified; type argument required") 235 } 236 } 237 238 m := &adt.StructLit{} 239 240 matches, err := fs.Glob(c.fs, glob) 241 if err != nil { 242 return nil, errors.Promote(err, "failed to match glob") 243 } 244 245 dirs := make(map[string]string) 246 for _, f := range matches { 247 if c.isHidden(f) { 248 // TODO: allow option for including hidden files? 249 continue 250 } 251 // TODO: lots of stat calls happening in this MVP so another won't hurt. 252 // We don't support '**' initially, and '*' only matches files, so skip 253 // any directories. 254 if fi, err := c.fs.Stat(f); err != nil { 255 return nil, errors.Newf(c.pos, "failed to stat %s: %v", f, err) 256 } else if fi.IsDir() { 257 continue 258 } 259 // Add all parents of the embedded file that 260 // aren't the current directory (if there's a cue.mod 261 // in the current directory, that's the current module 262 // not nested). 263 for dir := path.Dir(f); dir != "."; dir = path.Dir(dir) { 264 dirs[dir] = f 265 } 266 267 expr, err := c.decodeFile(f, scope, schema) 268 if err != nil { 269 return nil, err 270 } 271 272 m.Decls = append(m.Decls, &adt.Field{ 273 Label: c.opCtx.StringLabel(f), 274 Value: expr, 275 }) 276 } 277 // Check that none of the matches were in a nested module 278 // directory. 279 for dir, f := range dirs { 280 if _, err := c.fs.Stat(path.Join(dir, "cue.mod")); err == nil { 281 return nil, errors.Newf(c.pos, "cannot embed file %q: in different module", f) 282 } 283 } 284 return m, nil 285 } 286 287 func (c *compiler) clean(s string) (string, errors.Error) { 288 file := path.Clean(s) 289 if file != s { 290 return file, errors.Newf(c.pos, "path not normalized, use %q instead", file) 291 } 292 if path.IsAbs(file) { 293 return "", errors.Newf(c.pos, "only relative files are allowed") 294 } 295 if file == ".." || strings.HasPrefix(file, "../") { 296 return "", errors.Newf(c.pos, "cannot refer to parent directory") 297 } 298 return file, nil 299 } 300 301 // isHidden checks if a file is hidden on Windows. We do not return an error 302 // if the file does not exist and will check that elsewhere. 303 func (c *compiler) isHidden(file string) bool { 304 return strings.HasPrefix(file, ".") || strings.Contains(file, "/.") 305 } 306 307 func (c *compiler) decodeFile(file, scope string, schema adt.Value) (adt.Expr, errors.Error) { 308 // Do not use the most obvious filetypes.Input in order to disable "auto" 309 // mode. 310 f, err := filetypes.ParseFileAndType(file, scope, filetypes.Def) 311 if err != nil { 312 return nil, errors.Promote(err, "invalid file type") 313 } 314 315 // Open and pre-load the file system using fs.FS, instead of relying 316 r, err := c.fs.Open(file) 317 if err != nil { 318 return nil, errors.Newf(c.pos, "open %v: no such file or directory", file) 319 } 320 defer r.Close() 321 322 info, err := r.Stat() 323 if err != nil { 324 return nil, errors.Promote(err, "failed to decode file") 325 } 326 if info.IsDir() { 327 return nil, errors.Newf(c.pos, "cannot embed directories") 328 } 329 f.Source = r 330 331 // TODO: this really should be done at the start of the build process. 332 // c.b.ExternFiles = append(c.b.ExternFiles, f) 333 334 config := &encoding.Config{ 335 // TODO: schema is currently the wrong schema, which is a bug in 336 // internal/core/runtime. There is also an outstanding design choice: 337 // do we imply the schema from the schema of the current field, or do 338 // we explicitly enable schema-based encoding with a "schema" argument. 339 // In the case of YAML it seems to be better to be explicit. In the case 340 // of textproto it seems to be more convenient to do it implicitly. 341 // Schema: value.Make(c.opCtx, schema), 342 } 343 344 d := encoding.NewDecoder(c.runtime, f, config) 345 if err := d.Err(); err != nil { 346 return nil, errors.Promote(err, "failed to decode file") 347 } 348 349 defer d.Close() 350 351 n := d.File() 352 353 if d.Next(); !d.Done() { 354 return nil, errors.Newf(c.pos, "streaming not implemented: found more than one value in file") 355 } 356 357 switch f.Encoding { 358 case build.CUE: 359 return nil, errors.Newf(c.pos, "encoding %q not (yet) supported", f.Encoding) 360 case build.JSONL: 361 return nil, errors.Newf(c.pos, "encoding %q not (yet) supported: requires support for streaming", f.Encoding) 362 case build.BinaryProto, build.TextProto: 363 return nil, errors.Newf(c.pos, "encoding %q not (yet) supported: requires support for schema-guided decoding", f.Encoding) 364 } 365 366 val := c.runtime.BuildFile(n) 367 if err := val.Err(); err != nil { 368 return nil, errors.Promote(err, "failed to build file") 369 } 370 371 _, v := value.ToInternal(val) 372 return v, nil 373 }