cuelang.org/go@v0.13.0/mod/modcache/fetch.go (about)

     1  package modcache
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"io/fs"
     9  	"log"
    10  	"math/rand"
    11  	"os"
    12  	"path/filepath"
    13  	"slices"
    14  	"strconv"
    15  	"strings"
    16  
    17  	"github.com/rogpeppe/go-internal/robustio"
    18  
    19  	"cuelang.org/go/internal/par"
    20  	"cuelang.org/go/mod/modfile"
    21  	"cuelang.org/go/mod/modregistry"
    22  	"cuelang.org/go/mod/module"
    23  	"cuelang.org/go/mod/modzip"
    24  )
    25  
    26  const logging = false // TODO hook this up to CUE_DEBUG
    27  
    28  // New returns r wrapped inside a caching layer that
    29  // stores persistent cached content inside the given
    30  // OS directory, typically ${CUE_CACHE_DIR}.
    31  //
    32  // The `module.SourceLoc.FS` fields in the locations
    33  // returned by the registry implement the `OSRootFS` interface,
    34  // allowing a caller to find the native OS filepath where modules
    35  // are stored.
    36  //
    37  // The returned type implements [modconfig.Registry]
    38  // and [modconfig.CachedRegistry].
    39  func New(registry *modregistry.Client, dir string) (*Cache, error) {
    40  	info, err := os.Stat(dir)
    41  	if err == nil && !info.IsDir() {
    42  		return nil, fmt.Errorf("%q is not a directory", dir)
    43  	}
    44  	return &Cache{
    45  		dir: filepath.Join(dir, "mod"),
    46  		reg: registry,
    47  	}, nil
    48  }
    49  
    50  type Cache struct {
    51  	dir              string // typically ${CUE_CACHE_DIR}/mod
    52  	reg              *modregistry.Client
    53  	downloadZipCache par.ErrCache[module.Version, string]
    54  	modFileCache     par.ErrCache[string, []byte]
    55  }
    56  
    57  func (c *Cache) Requirements(ctx context.Context, mv module.Version) ([]module.Version, error) {
    58  	data, err := c.downloadModFile(ctx, mv)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  	mf, err := modfile.Parse(data, mv.String())
    63  	if err != nil {
    64  		return nil, fmt.Errorf("cannot parse module file from %v: %v", mv, err)
    65  	}
    66  	return mf.DepVersions(), nil
    67  }
    68  
    69  // FetchFromCache implements [cuelang.org/go/mod/modconfig.CachedRegistry].
    70  func (c *Cache) FetchFromCache(mv module.Version) (module.SourceLoc, error) {
    71  	dir, err := c.downloadDir(mv)
    72  	if err != nil {
    73  		if errors.Is(err, fs.ErrNotExist) {
    74  			return module.SourceLoc{}, modregistry.ErrNotFound
    75  		}
    76  		return module.SourceLoc{}, err
    77  	}
    78  	return c.dirToLocation(dir), nil
    79  }
    80  
    81  // Fetch returns the location of the contents for the given module
    82  // version, downloading it if necessary.
    83  func (c *Cache) Fetch(ctx context.Context, mv module.Version) (module.SourceLoc, error) {
    84  	dir, err := c.downloadDir(mv)
    85  	if err == nil {
    86  		// The directory has already been completely extracted (no .partial file exists).
    87  		return c.dirToLocation(dir), nil
    88  	}
    89  	if dir == "" || !errors.Is(err, fs.ErrNotExist) {
    90  		return module.SourceLoc{}, err
    91  	}
    92  
    93  	// To avoid cluttering the cache with extraneous files,
    94  	// DownloadZip uses the same lockfile as Download.
    95  	// Invoke DownloadZip before locking the file.
    96  	zipfile, err := c.downloadZip(ctx, mv)
    97  	if err != nil {
    98  		return module.SourceLoc{}, err
    99  	}
   100  
   101  	unlock, err := c.lockVersion(mv)
   102  	if err != nil {
   103  		return module.SourceLoc{}, err
   104  	}
   105  	defer unlock()
   106  
   107  	// Check whether the directory was populated while we were waiting on the lock.
   108  	_, dirErr := c.downloadDir(mv)
   109  	if dirErr == nil {
   110  		return c.dirToLocation(dir), nil
   111  	}
   112  	_, dirExists := dirErr.(*downloadDirPartialError)
   113  
   114  	// Clean up any partially extracted directories (indicated by
   115  	// DownloadDirPartialError, usually because of a .partial file). This is only
   116  	// safe to do because the lock file ensures that their writers are no longer
   117  	// active.
   118  	parentDir := filepath.Dir(dir)
   119  	tmpPrefix := filepath.Base(dir) + ".tmp-"
   120  
   121  	entries, _ := os.ReadDir(parentDir)
   122  	for _, entry := range entries {
   123  		if strings.HasPrefix(entry.Name(), tmpPrefix) {
   124  			RemoveAll(filepath.Join(parentDir, entry.Name())) // best effort
   125  		}
   126  	}
   127  	if dirExists {
   128  		if err := RemoveAll(dir); err != nil {
   129  			return module.SourceLoc{}, err
   130  		}
   131  	}
   132  
   133  	partialPath, err := c.cachePath(mv, "partial")
   134  	if err != nil {
   135  		return module.SourceLoc{}, err
   136  	}
   137  
   138  	// Extract the module zip directory at its final location.
   139  	//
   140  	// To prevent other processes from reading the directory if we crash,
   141  	// create a .partial file before extracting the directory, and delete
   142  	// the .partial file afterward (all while holding the lock).
   143  	//
   144  	// A technique used previously was to extract to a temporary directory with a random name
   145  	// then rename it into place with os.Rename. On Windows, this can fail with
   146  	// ERROR_ACCESS_DENIED when another process (usually an anti-virus scanner)
   147  	// opened files in the temporary directory.
   148  	if err := os.MkdirAll(parentDir, 0777); err != nil {
   149  		return module.SourceLoc{}, err
   150  	}
   151  	if err := os.WriteFile(partialPath, nil, 0666); err != nil {
   152  		return module.SourceLoc{}, err
   153  	}
   154  	if err := modzip.Unzip(dir, mv, zipfile); err != nil {
   155  		if rmErr := RemoveAll(dir); rmErr == nil {
   156  			os.Remove(partialPath)
   157  		}
   158  		return module.SourceLoc{}, err
   159  	}
   160  	if err := os.Remove(partialPath); err != nil {
   161  		return module.SourceLoc{}, err
   162  	}
   163  	makeDirsReadOnly(dir)
   164  	return c.dirToLocation(dir), nil
   165  }
   166  
   167  // ModuleVersions implements [modload.Registry.ModuleVersions].
   168  func (c *Cache) ModuleVersions(ctx context.Context, mpath string) ([]string, error) {
   169  	// TODO should this do any kind of short-term caching?
   170  	return c.reg.ModuleVersions(ctx, mpath)
   171  }
   172  
   173  func (c *Cache) downloadZip(ctx context.Context, mv module.Version) (zipfile string, err error) {
   174  	return c.downloadZipCache.Do(mv, func() (string, error) {
   175  		zipfile, err := c.cachePath(mv, "zip")
   176  		if err != nil {
   177  			return "", err
   178  		}
   179  
   180  		// Return without locking if the zip file exists.
   181  		if _, err := os.Stat(zipfile); err == nil {
   182  			return zipfile, nil
   183  		}
   184  		logf("cue: downloading %s", mv)
   185  		unlock, err := c.lockVersion(mv)
   186  		if err != nil {
   187  			return "", err
   188  		}
   189  		defer unlock()
   190  
   191  		if err := c.downloadZip1(ctx, mv, zipfile); err != nil {
   192  			return "", err
   193  		}
   194  		return zipfile, nil
   195  	})
   196  }
   197  
   198  func (c *Cache) downloadZip1(ctx context.Context, mod module.Version, zipfile string) (err error) {
   199  	// Double-check that the zipfile was not created while we were waiting for
   200  	// the lock in downloadZip.
   201  	if _, err := os.Stat(zipfile); err == nil {
   202  		return nil
   203  	}
   204  
   205  	// Create parent directories.
   206  	if err := os.MkdirAll(filepath.Dir(zipfile), 0777); err != nil {
   207  		return err
   208  	}
   209  
   210  	// Clean up any remaining tempfiles from previous runs.
   211  	// This is only safe to do because the lock file ensures that their
   212  	// writers are no longer active.
   213  	tmpPattern := filepath.Base(zipfile) + "*.tmp"
   214  	if old, err := filepath.Glob(filepath.Join(quoteGlob(filepath.Dir(zipfile)), tmpPattern)); err == nil {
   215  		for _, path := range old {
   216  			os.Remove(path) // best effort
   217  		}
   218  	}
   219  
   220  	// From here to the os.Rename call below is functionally almost equivalent to
   221  	// renameio.WriteToFile. We avoid using that so that we have control over the
   222  	// names of the temporary files (see the cleanup above) and to avoid adding
   223  	// renameio as an extra dependency.
   224  	f, err := tempFile(ctx, filepath.Dir(zipfile), filepath.Base(zipfile), 0666)
   225  	if err != nil {
   226  		return err
   227  	}
   228  	defer func() {
   229  		if err != nil {
   230  			f.Close()
   231  			os.Remove(f.Name())
   232  		}
   233  	}()
   234  
   235  	// TODO cache the result of GetModule so we don't have to do
   236  	// an extra round trip when we've already fetched the module file.
   237  	m, err := c.reg.GetModule(ctx, mod)
   238  	if err != nil {
   239  		return err
   240  	}
   241  	r, err := m.GetZip(ctx)
   242  	if err != nil {
   243  		return err
   244  	}
   245  	defer r.Close()
   246  	if _, err := io.Copy(f, r); err != nil {
   247  		return fmt.Errorf("failed to get module zip contents: %v", err)
   248  	}
   249  	if err := f.Close(); err != nil {
   250  		return err
   251  	}
   252  	if err := os.Rename(f.Name(), zipfile); err != nil {
   253  		return err
   254  	}
   255  	// TODO should we check the zip file for well-formedness?
   256  	// TODO: Should we make the .zip file read-only to discourage tampering?
   257  	return nil
   258  }
   259  
   260  func (c *Cache) downloadModFile(ctx context.Context, mod module.Version) ([]byte, error) {
   261  	return c.modFileCache.Do(mod.String(), func() ([]byte, error) {
   262  		modfile, data, err := c.readDiskModFile(mod)
   263  		if err == nil {
   264  			return data, nil
   265  		}
   266  		logf("cue: downloading %s", mod)
   267  		unlock, err := c.lockVersion(mod)
   268  		if err != nil {
   269  			return nil, err
   270  		}
   271  		defer unlock()
   272  		// Double-check that the file hasn't been created while we were
   273  		// acquiring the lock.
   274  		_, data, err = c.readDiskModFile(mod)
   275  		if err == nil {
   276  			return data, nil
   277  		}
   278  		return c.downloadModFile1(ctx, mod, modfile)
   279  	})
   280  }
   281  
   282  func (c *Cache) downloadModFile1(ctx context.Context, mod module.Version, modfile string) ([]byte, error) {
   283  	m, err := c.reg.GetModule(ctx, mod)
   284  	if err != nil {
   285  		return nil, err
   286  	}
   287  	data, err := m.ModuleFile(ctx)
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  	if err := c.writeDiskModFile(ctx, modfile, data); err != nil {
   292  		return nil, err
   293  	}
   294  	return data, nil
   295  }
   296  
   297  func (c *Cache) dirToLocation(fpath string) module.SourceLoc {
   298  	return module.SourceLoc{
   299  		FS:  module.OSDirFS(fpath),
   300  		Dir: ".",
   301  	}
   302  }
   303  
   304  // makeDirsReadOnly makes a best-effort attempt to remove write permissions for dir
   305  // and its transitive contents.
   306  func makeDirsReadOnly(dir string) {
   307  	type pathMode struct {
   308  		path string
   309  		mode fs.FileMode
   310  	}
   311  	var dirs []pathMode // in lexical order
   312  	filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
   313  		if err == nil && d.IsDir() {
   314  			info, err := d.Info()
   315  			if err == nil && info.Mode()&0222 != 0 {
   316  				dirs = append(dirs, pathMode{path, info.Mode()})
   317  			}
   318  		}
   319  		return nil
   320  	})
   321  
   322  	// Run over list backward to chmod children before parents.
   323  	for _, dir := range slices.Backward(dirs) {
   324  		os.Chmod(dir.path, dir.mode&^0222)
   325  	}
   326  }
   327  
   328  // RemoveAll removes a directory written by the cache, first applying
   329  // any permission changes needed to do so.
   330  func RemoveAll(dir string) error {
   331  	// Module cache has 0555 directories; make them writable in order to remove content.
   332  	filepath.WalkDir(dir, func(path string, info fs.DirEntry, err error) error {
   333  		if err != nil {
   334  			return nil // ignore errors walking in file system
   335  		}
   336  		if info.IsDir() {
   337  			os.Chmod(path, 0777)
   338  		}
   339  		return nil
   340  	})
   341  	return robustio.RemoveAll(dir)
   342  }
   343  
   344  // quoteGlob returns s with all Glob metacharacters quoted.
   345  // We don't try to handle backslash here, as that can appear in a
   346  // file path on Windows.
   347  func quoteGlob(s string) string {
   348  	if !strings.ContainsAny(s, `*?[]`) {
   349  		return s
   350  	}
   351  	var sb strings.Builder
   352  	for _, c := range s {
   353  		switch c {
   354  		case '*', '?', '[', ']':
   355  			sb.WriteByte('\\')
   356  		}
   357  		sb.WriteRune(c)
   358  	}
   359  	return sb.String()
   360  }
   361  
   362  // tempFile creates a new temporary file with given permission bits.
   363  func tempFile(ctx context.Context, dir, prefix string, perm fs.FileMode) (f *os.File, err error) {
   364  	for i := 0; i < 10000; i++ {
   365  		name := filepath.Join(dir, prefix+strconv.Itoa(rand.Intn(1000000000))+".tmp")
   366  		f, err = os.OpenFile(name, os.O_RDWR|os.O_CREATE|os.O_EXCL, perm)
   367  		if os.IsExist(err) {
   368  			if ctx.Err() != nil {
   369  				return nil, ctx.Err()
   370  			}
   371  			continue
   372  		}
   373  		break
   374  	}
   375  	return
   376  }
   377  
   378  func logf(f string, a ...any) {
   379  	if logging {
   380  		log.Printf(f, a...)
   381  	}
   382  }