cuelang.org/go@v0.13.0/cue/interpreter/embed/embed.go (about) 1 // Copyright 2024 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package embed provides capabilities to CUE to embed any file that resides 16 // within a CUE module into CUE either verbatim or decoded. 17 // 18 // This package is EXPERIMENTAL and subject to change. 19 // 20 // # Overview 21 // 22 // To enable file embedding, a file must include the file-level @extern(embed) 23 // attribute. This allows a quick glance to see if a file embeds any files at 24 // all. This allows the @embed attribute to be used to load a file within a CUE 25 // module into a field. 26 // 27 // References to files are always relative to directory in which the referring 28 // file resides. Only files that exist within the CUE module are accessible. 29 // 30 // # The @embed attribute 31 // 32 // There are two main ways to embed files which are distinguished by the file 33 // and glob arguments. The @embed attribute supports the following arguments: 34 // 35 // file=$filename 36 // 37 // The use of the file argument tells embed to load a single file into the 38 // field. This argument many not be used in conjunction with the glob argument. 39 // 40 // glob=$pattern 41 // 42 // The use of the glob argument tells embed to load multiple files into the 43 // field as a map of file paths to the decoded values. The paths are normalized 44 // to use forward slashes. This argument may not be used in conjunction with the 45 // file argument. 46 // 47 // type=$type 48 // 49 // By default, the file type is interpreted based on the file extension. This 50 // behavior can be overridden by the type argument. See cue help filetypes for 51 // the list of supported types. This field is required if a file extension is 52 // unknown, or if a wildcard is used for the file extension in the glob pattern. 53 // 54 // # Limitations 55 // 56 // The embed interpreter currently does not support: 57 // - stream values, such as .ndjson or YAML streams. 58 // - schema-based decoding, such as needed for textproto 59 // 60 // # Example 61 // 62 // @extern(embed) 63 // 64 // package foo 65 // 66 // // interpreted as JSON 67 // a: _ @embed(file="file1.json") // the quotes are optional here 68 // 69 // // interpreted the same file as JSON schema 70 // #A: _ @embed(file=file1.json, type=jsonschema) 71 // 72 // // interpret a proprietary extension as OpenAPI represented as YAML 73 // b: _ @embed(file="file2.crd", type=openapi+yaml) 74 // 75 // // include all YAML files in the x directory interpreted as YAML 76 // // The result is a map of file paths to the decoded YAML values. 77 // files: _ @embed(glob=x/*.yaml) 78 // 79 // // include all files in the y directory as a map of file paths to binary 80 // // data. The entries are unified into the same map as above. 81 // files: _ @embed(glob=y/*.*, type=binary) 82 package embed 83 84 import ( 85 "io/fs" 86 "os" 87 "path" 88 "path/filepath" 89 "strings" 90 91 "cuelang.org/go/cue" 92 "cuelang.org/go/cue/build" 93 "cuelang.org/go/cue/cuecontext" 94 "cuelang.org/go/cue/errors" 95 "cuelang.org/go/cue/token" 96 "cuelang.org/go/internal" 97 "cuelang.org/go/internal/core/adt" 98 "cuelang.org/go/internal/core/runtime" 99 "cuelang.org/go/internal/encoding" 100 "cuelang.org/go/internal/filetypes" 101 "cuelang.org/go/internal/value" 102 pkgpath "cuelang.org/go/pkg/path" 103 ) 104 105 // TODO: record files in build.Instance 106 107 // interpreter is a [cuecontext.ExternInterpreter] for embedded files. 108 type interpreter struct{} 109 110 // New returns a new interpreter for embedded files as a 111 // [cuecontext.ExternInterpreter] suitable for passing to [cuecontext.New]. 112 func New() cuecontext.ExternInterpreter { 113 return &interpreter{} 114 } 115 116 func (i *interpreter) Kind() string { 117 return "embed" 118 } 119 120 // NewCompiler returns a compiler that can decode and embed files that exist 121 // within a CUE module. 122 func (i *interpreter) NewCompiler(b *build.Instance, r *runtime.Runtime) (runtime.Compiler, errors.Error) { 123 if b.Module == "" { 124 return nil, errors.Newf(token.Pos{}, "cannot embed files when not in a module") 125 } 126 if b.Root == "" { 127 return nil, errors.Newf(token.Pos{}, "cannot embed files: no module root found") 128 } 129 return &compiler{ 130 b: b, 131 runtime: (*cue.Context)(r), 132 }, nil 133 } 134 135 // A compiler is a [runtime.Compiler] that allows embedding files into CUE 136 // values. 137 type compiler struct { 138 b *build.Instance 139 runtime *cue.Context 140 opCtx *adt.OpContext 141 142 // file system cache 143 dir string 144 fs fs.StatFS 145 pos token.Pos 146 } 147 148 // Compile interprets an embed attribute to either load a file 149 // (@embed(file=...)) or a glob of files (@embed(glob=...)). 150 // and decodes the given files. 151 func (c *compiler) Compile(funcName string, scope adt.Value, a *internal.Attr) (adt.Expr, errors.Error) { 152 153 file, _, err := a.Lookup(0, "file") 154 if err != nil { 155 return nil, errors.Promote(err, "invalid attribute") 156 } 157 158 glob, _, err := a.Lookup(0, "glob") 159 if err != nil { 160 return nil, errors.Promote(err, "invalid attribute") 161 } 162 163 typ, _, err := a.Lookup(0, "type") 164 if err != nil { 165 return nil, errors.Promote(err, "invalid type argument") 166 } 167 168 c.opCtx = adt.NewContext((*runtime.Runtime)(c.runtime), nil) 169 170 pos := a.Pos 171 c.pos = pos 172 173 // Jump through some hoops to get file operations to behave the same for 174 // Windows and Unix. 175 // TODO: obtain a fs.FS from load or something similar. 176 dir := filepath.Dir(pos.File().Name()) 177 if c.dir != dir { 178 c.fs = os.DirFS(dir).(fs.StatFS) // Documented as implementing fs.StatFS 179 c.dir = dir 180 } 181 182 switch { 183 case file == "" && glob == "": 184 return nil, errors.Newf(a.Pos, "attribute must have file or glob field") 185 186 case file != "" && glob != "": 187 return nil, errors.Newf(a.Pos, "attribute cannot have both file and glob field") 188 189 case file != "": 190 return c.processFile(file, typ, scope) 191 192 default: // glob != "": 193 return c.processGlob(glob, typ, scope) 194 } 195 } 196 197 func (c *compiler) processFile(file, scope string, schema adt.Value) (adt.Expr, errors.Error) { 198 file, err := c.clean(file) 199 if err != nil { 200 return nil, err 201 } 202 for dir := path.Dir(file); dir != "."; dir = path.Dir(dir) { 203 if _, err := c.fs.Stat(path.Join(dir, "cue.mod")); err == nil { 204 return nil, errors.Newf(c.pos, "cannot embed file %q: in different module", file) 205 } 206 } 207 208 return c.decodeFile(file, scope, schema) 209 } 210 211 func (c *compiler) processGlob(glob, scope string, schema adt.Value) (adt.Expr, errors.Error) { 212 glob, ce := c.clean(glob) 213 if ce != nil { 214 return nil, ce 215 } 216 217 // Validate that the glob pattern is valid per [pkgpath.Match]. 218 // Note that we use Unix match semantics because all embed paths are Unix-like. 219 if _, err := pkgpath.Match(glob, "", pkgpath.Unix); err != nil { 220 return nil, errors.Wrapf(err, c.pos, "invalid glob pattern %q", glob) 221 } 222 223 // If we do not have a type, ensure the extension of the base is fully 224 // specified, i.e. does not contain any meta characters as specified by 225 // path.Match. 226 if scope == "" { 227 ext := path.Ext(path.Base(glob)) 228 if ext == "" || strings.ContainsAny(ext, "*?[\\") { 229 return nil, errors.Newf(c.pos, "extension not fully specified; type argument required") 230 } 231 } 232 233 m := &adt.StructLit{} 234 235 matches, err := fsGlob(c.fs, glob) 236 if err != nil { 237 return nil, errors.Promote(err, "failed to match glob") 238 } 239 if len(matches) == 0 { 240 return nil, errors.Newf(c.pos, "no matches for glob pattern %q", glob) 241 } 242 243 dirs := make(map[string]string) 244 for _, f := range matches { 245 // TODO: lots of stat calls happening in this MVP so another won't hurt. 246 // We don't support '**' initially, and '*' only matches files, so skip 247 // any directories. 248 if fi, err := c.fs.Stat(f); err != nil { 249 return nil, errors.Newf(c.pos, "failed to stat %s: %v", f, err) 250 } else if fi.IsDir() { 251 continue 252 } 253 // Add all parents of the embedded file that 254 // aren't the current directory (if there's a cue.mod 255 // in the current directory, that's the current module 256 // not nested). 257 for dir := path.Dir(f); dir != "."; dir = path.Dir(dir) { 258 dirs[dir] = f 259 } 260 261 expr, err := c.decodeFile(f, scope, schema) 262 if err != nil { 263 return nil, err 264 } 265 266 m.Decls = append(m.Decls, &adt.Field{ 267 Label: c.opCtx.StringLabel(f), 268 Value: expr, 269 }) 270 } 271 // Check that none of the matches were in a nested module 272 // directory. 273 for dir, f := range dirs { 274 if _, err := c.fs.Stat(path.Join(dir, "cue.mod")); err == nil { 275 return nil, errors.Newf(c.pos, "cannot embed file %q: in different module", f) 276 } 277 } 278 return m, nil 279 } 280 281 func (c *compiler) clean(s string) (string, errors.Error) { 282 file := path.Clean(s) 283 if file != s { 284 return file, errors.Newf(c.pos, "path not normalized, use %q instead", file) 285 } 286 if path.IsAbs(file) { 287 return "", errors.Newf(c.pos, "only relative files are allowed") 288 } 289 if file == ".." || strings.HasPrefix(file, "../") { 290 return "", errors.Newf(c.pos, "cannot refer to parent directory") 291 } 292 return file, nil 293 } 294 295 // fsGlob is like [fs.Glob] but only includes dot-prefixed files 296 // when the dot is explictly present in an element. 297 // TODO: add option for including dot files? 298 func fsGlob(fsys fs.FS, pattern string) ([]string, error) { 299 pattern = path.Clean(pattern) 300 matches, err := fs.Glob(fsys, pattern) 301 if err != nil { 302 return nil, err 303 } 304 patElems := strings.Split(pattern, "/") 305 included := func(m string) bool { 306 for i, elem := range strings.Split(m, "/") { 307 // Technically there should never be more elements in m than 308 // there are in patElems, but be defensive and check bounds just in case. 309 if strings.HasPrefix(elem, ".") && (i >= len(patElems) || !strings.HasPrefix(patElems[i], ".")) { 310 return false 311 } 312 } 313 return true 314 } 315 316 i := 0 317 for _, m := range matches { 318 if included(m) { 319 matches[i] = m 320 i++ 321 } 322 } 323 return matches[:i], nil 324 } 325 326 func (c *compiler) decodeFile(file, scope string, schema adt.Value) (adt.Expr, errors.Error) { 327 // Do not use the most obvious filetypes.Input in order to disable "auto" 328 // mode. 329 f, err := filetypes.ParseFileAndType(file, scope, filetypes.Def) 330 if err != nil { 331 return nil, errors.Promote(err, "invalid file type") 332 } 333 334 // Open and pre-load the file system using fs.FS, instead of relying 335 r, err := c.fs.Open(file) 336 if err != nil { 337 return nil, errors.Newf(c.pos, "open %v: no such file or directory", file) 338 } 339 defer r.Close() 340 341 info, err := r.Stat() 342 if err != nil { 343 return nil, errors.Promote(err, "failed to decode file") 344 } 345 if info.IsDir() { 346 return nil, errors.Newf(c.pos, "cannot embed directories") 347 } 348 f.Source = r 349 350 // TODO: this really should be done at the start of the build process. 351 // c.b.ExternFiles = append(c.b.ExternFiles, f) 352 353 config := &encoding.Config{ 354 // TODO: schema is currently the wrong schema, which is a bug in 355 // internal/core/runtime. There is also an outstanding design choice: 356 // do we imply the schema from the schema of the current field, or do 357 // we explicitly enable schema-based encoding with a "schema" argument. 358 // In the case of YAML it seems to be better to be explicit. In the case 359 // of textproto it seems to be more convenient to do it implicitly. 360 // Schema: value.Make(c.opCtx, schema), 361 } 362 363 d := encoding.NewDecoder(c.runtime, f, config) 364 if err := d.Err(); err != nil { 365 return nil, errors.Promote(err, "failed to decode file") 366 } 367 368 defer d.Close() 369 370 n := d.File() 371 372 if d.Next(); !d.Done() { 373 // TODO: support streaming values 374 return nil, errors.Newf(c.pos, "streaming not implemented: found more than one value in file") 375 } 376 377 // TODO: each of these encodings should probably be supported in the future 378 switch f.Encoding { 379 case build.CUE: 380 return nil, errors.Newf(c.pos, "encoding %q not (yet) supported", f.Encoding) 381 case build.JSONL: 382 return nil, errors.Newf(c.pos, "encoding %q not (yet) supported: requires support for streaming", f.Encoding) 383 case build.BinaryProto, build.TextProto: 384 return nil, errors.Newf(c.pos, "encoding %q not (yet) supported: requires support for schema-guided decoding", f.Encoding) 385 } 386 387 val := c.runtime.BuildFile(n) 388 if err := val.Err(); err != nil { 389 return nil, errors.Promote(err, "failed to build file") 390 } 391 392 _, v := value.ToInternal(val) 393 return v, nil 394 }