cuelang.org/go@v0.10.1/mod/modzip/zip.go (about)

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package modzip provides functions for creating and extracting module zip files.
     6  //
     7  // WARNING: THIS PACKAGE IS EXPERIMENTAL.
     8  // ITS API MAY CHANGE AT ANY TIME.
     9  //
    10  // Module zip files have several restrictions listed below. These are necessary
    11  // to ensure that module zip files can be extracted consistently on supported
    12  // platforms and file systems.
    13  //
    14  // • All file paths within a zip file must be valid (see cuelang.org/go/mod/module.CheckFilePath).
    15  //
    16  // • No two file paths may be equal under Unicode case-folding (see
    17  // strings.EqualFold).
    18  //
    19  // • A cue.mod/module.cue file must appear in the top-level directory. If present,
    20  // it must be named exactly that, not any other case. Directories or files named "cue.mod"
    21  // are not allowed in any other directory.
    22  //
    23  // • The total size in bytes of a module zip file may be at most MaxZipFile
    24  // bytes (500 MiB). The total uncompressed size of the files within the
    25  // zip may also be at most MaxZipFile bytes.
    26  //
    27  // • Each file's uncompressed size must match its declared 64-bit uncompressed
    28  // size in the zip file header.
    29  //
    30  // • If the zip contains files named "cue.mod/module.cue" or
    31  // "LICENSE", their sizes in bytes may be at most
    32  // MaxCUEMod or MaxLICENSE, respectively (both are 16 MiB).
    33  //
    34  // • Empty directories are ignored. File permissions and timestamps are also
    35  // ignored.
    36  //
    37  // • Symbolic links and other irregular files are not allowed.
    38  //
    39  // Note that this package does not provide hashing functionality. See
    40  // golang.org/x/mod/sumdb/dirhash.
    41  package modzip
    42  
    43  import (
    44  	"archive/zip"
    45  	"bytes"
    46  	"cmp"
    47  	"errors"
    48  	"fmt"
    49  	"io"
    50  	"io/fs"
    51  	"os"
    52  	"path"
    53  	"path/filepath"
    54  	"slices"
    55  	"strings"
    56  	"unicode"
    57  	"unicode/utf8"
    58  
    59  	"cuelang.org/go/mod/module"
    60  )
    61  
    62  const (
    63  	// MaxZipFile is the maximum size in bytes of a module zip file. The
    64  	// cue command will report an error if either the zip file or its extracted
    65  	// content is larger than this.
    66  	MaxZipFile = 500 << 20
    67  
    68  	// MaxCUEMod is the maximum size in bytes of a cue.mod/module.cue file within a
    69  	// module zip file.
    70  	MaxCUEMod = 16 << 20
    71  
    72  	// MaxLICENSE is the maximum size in bytes of a LICENSE file within a
    73  	// module zip file.
    74  	MaxLICENSE = 16 << 20
    75  )
    76  
    77  // File provides an abstraction for a file in a directory, zip, or anything
    78  // else that looks like a file - it knows how to open files represented
    79  // as a particular type without being a file itself.
    80  //
    81  // Deprecated: this will be removed in a future API iteration that reduces
    82  // dependence on zip archives.
    83  type FileIO[F any] interface {
    84  	// Path returns a clean slash-separated relative path from the module root
    85  	// directory to the file.
    86  	Path(f F) string
    87  
    88  	// Lstat returns information about the file. If the file is a symbolic link,
    89  	// Lstat returns information about the link itself, not the file it points to.
    90  	Lstat(f F) (os.FileInfo, error)
    91  
    92  	// Open provides access to the data within a regular file. Open may return
    93  	// an error if called on a directory or symbolic link.
    94  	Open(f F) (io.ReadCloser, error)
    95  }
    96  
    97  // CheckedFiles reports whether a set of files satisfy the name and size
    98  // constraints required by module zip files. The constraints are listed in the
    99  // package documentation.
   100  //
   101  // Functions that produce this report may include slightly different sets of
   102  // files. See documentation for CheckFiles, CheckDir, and CheckZip for details.
   103  type CheckedFiles struct {
   104  	// Valid is a list of file paths that should be included in a zip file.
   105  	Valid []string
   106  
   107  	// Omitted is a list of files that are ignored when creating a module zip
   108  	// file, along with the reason each file is ignored.
   109  	Omitted []FileError
   110  
   111  	// Invalid is a list of files that should not be included in a module zip
   112  	// file, along with the reason each file is invalid.
   113  	Invalid []FileError
   114  
   115  	// SizeError is non-nil if the total uncompressed size of the valid files
   116  	// exceeds the module zip size limit or if the zip file itself exceeds the
   117  	// limit.
   118  	SizeError error
   119  
   120  	// NoModError is non-nil if there was no module.cue file present.
   121  	NoModError error
   122  }
   123  
   124  // Err returns an error if CheckedFiles does not describe a valid module zip
   125  // file. SizeError is returned if that field is set. A FileErrorList is returned
   126  // if there are one or more invalid files. Other errors may be returned in the
   127  // future.
   128  func (cf CheckedFiles) Err() error {
   129  	if cf.SizeError != nil {
   130  		return cf.SizeError
   131  	}
   132  	if len(cf.Invalid) > 0 {
   133  		return FileErrorList(cf.Invalid)
   134  	}
   135  	if cf.NoModError != nil {
   136  		return cf.NoModError
   137  	}
   138  	return nil
   139  }
   140  
   141  type FileErrorList []FileError
   142  
   143  func (el FileErrorList) Error() string {
   144  	buf := &strings.Builder{}
   145  	sep := ""
   146  	for _, e := range el {
   147  		buf.WriteString(sep)
   148  		buf.WriteString(e.Error())
   149  		sep = "\n"
   150  	}
   151  	return buf.String()
   152  }
   153  
   154  type FileError struct {
   155  	Path string
   156  	Err  error
   157  }
   158  
   159  func (e FileError) Error() string {
   160  	return fmt.Sprintf("%s: %s", e.Path, e.Err)
   161  }
   162  
   163  func (e FileError) Unwrap() error {
   164  	return e.Err
   165  }
   166  
   167  var (
   168  	// Predefined error messages for invalid files. Not exhaustive.
   169  	errPathNotClean    = errors.New("file path is not clean")
   170  	errPathNotRelative = errors.New("file path is not relative")
   171  	errCUEModCase      = errors.New("cue.mod directories must have lowercase names")
   172  	errCUEModuleCase   = errors.New("cue.mod/module.cue files must have lowercase names")
   173  	errCUEModSize      = fmt.Errorf("cue.mod/module.cue file too large (max size is %d bytes)", MaxCUEMod)
   174  	errLICENSESize     = fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
   175  
   176  	// Predefined error messages for omitted files. Not exhaustive.
   177  	errVCS           = errors.New("directory is a version control repository")
   178  	errVendored      = errors.New("file is in vendor directory")
   179  	errSubmoduleFile = errors.New("file is in another module")
   180  	errSubmoduleDir  = errors.New("directory is in another module")
   181  	errHgArchivalTxt = errors.New("file is inserted by 'hg archive' and is always omitted")
   182  	errSymlink       = errors.New("file is a symbolic link")
   183  	errNotRegular    = errors.New("not a regular file")
   184  
   185  	// Other errors
   186  	errNoMod = fmt.Errorf("no cue.mod/module.cue file found")
   187  )
   188  
   189  // CheckFiles reports whether a list of files satisfy the name and size
   190  // constraints listed in the package documentation. The returned CheckedFiles
   191  // record contains lists of valid, invalid, and omitted files. Every file in
   192  // the given list will be included in exactly one of those lists.
   193  //
   194  // CheckFiles returns an error if the returned CheckedFiles does not describe
   195  // a valid module zip file (according to CheckedFiles.Err). The returned
   196  // CheckedFiles is still populated when an error is returned.
   197  //
   198  // Note that CheckFiles will not open any files, so Create may still fail when
   199  // CheckFiles is successful due to I/O errors, reported size differences
   200  // or an invalid module.cue file.
   201  //
   202  // Deprecated: this will be removed in a future API iteration that reduces
   203  // dependence on zip archives.
   204  func CheckFiles[F any](files []F, fio FileIO[F]) (CheckedFiles, error) {
   205  	cf, _, _ := checkFiles(files, fio)
   206  	return cf, cf.Err()
   207  }
   208  
   209  // checkFiles implements CheckFiles and also returns lists of valid files and
   210  // their sizes, corresponding to cf.Valid. It omits files in submodules, files
   211  // in vendored packages, symlinked files, and various other unwanted files.
   212  //
   213  // The lists returned are used in Create to avoid repeated calls to File.Lstat.
   214  func checkFiles[F any](files []F, fio FileIO[F]) (cf CheckedFiles, validFiles []F, validSizes []int64) {
   215  	errPaths := make(map[string]struct{})
   216  	addError := func(path string, omitted bool, err error) {
   217  		if _, ok := errPaths[path]; ok {
   218  			return
   219  		}
   220  		errPaths[path] = struct{}{}
   221  		fe := FileError{Path: path, Err: err}
   222  		if omitted {
   223  			cf.Omitted = append(cf.Omitted, fe)
   224  		} else {
   225  			cf.Invalid = append(cf.Invalid, fe)
   226  		}
   227  	}
   228  
   229  	// Find directories containing cue.mod files or directories (other than the root).
   230  	// Files in these directories will be omitted.
   231  	// These directories will not be included in the output zip.
   232  	haveCUEMod := make(map[string]bool)
   233  	for _, f := range files {
   234  		if dir, rest := splitCUEMod(fio.Path(f)); rest != "" {
   235  			haveCUEMod[dir] = true
   236  		}
   237  	}
   238  
   239  	inSubmodule := func(p string) bool {
   240  		for {
   241  			dir, _ := path.Split(p)
   242  			if dir == "" {
   243  				return false
   244  			}
   245  			if haveCUEMod[dir] {
   246  				return true
   247  			}
   248  			p = dir[:len(dir)-1]
   249  		}
   250  	}
   251  
   252  	collisions := make(collisionChecker)
   253  	maxSize := int64(MaxZipFile)
   254  	foundModuleCUE := false
   255  	for _, f := range files {
   256  		p := fio.Path(f)
   257  		info, err := fio.Lstat(f)
   258  		if err != nil {
   259  			addError(p, false, err)
   260  			continue
   261  		}
   262  		if info.IsDir() {
   263  			continue
   264  		}
   265  		if p != path.Clean(p) {
   266  			addError(p, false, errPathNotClean)
   267  			continue
   268  		}
   269  		if path.IsAbs(p) {
   270  			addError(p, false, errPathNotRelative)
   271  			continue
   272  		}
   273  		if isVendoredPackage(p) {
   274  			// Skip files in vendored packages.
   275  			// Note: although CUE doesn't actually include the concept of
   276  			// vendoring yet, this check acts as future-proofing so we can
   277  			// use the vendor directory for that at some future date.
   278  			addError(p, true, errVendored)
   279  			continue
   280  		}
   281  		if inSubmodule(p) {
   282  			// Skip submodule files.
   283  			addError(p, true, errSubmoduleFile)
   284  			continue
   285  		}
   286  		if p == ".hg_archival.txt" {
   287  			// Inserted by hg archive.
   288  			// Drop this regardless of the VCS being used.
   289  			addError(p, true, errHgArchivalTxt)
   290  			continue
   291  		}
   292  		// TODO check for CUE-specific module paths.
   293  		if err := module.CheckFilePath(p); err != nil {
   294  			addError(p, false, err)
   295  			continue
   296  		}
   297  
   298  		if topDir, rest, _ := strings.Cut(p, "/"); strings.EqualFold(topDir, "cue.mod") {
   299  			if topDir != "cue.mod" {
   300  				addError(p, false, errCUEModCase)
   301  				continue
   302  			}
   303  			if strings.EqualFold(rest, "module.cue") && rest != "module.cue" {
   304  				addError(p, false, errCUEModuleCase)
   305  				continue
   306  			}
   307  			switch topDir, _, _ := strings.Cut(topDir, "/"); topDir {
   308  			case "pkg", "usr", "gen":
   309  				// TODO(rogpeppe) link to explanation.
   310  				addError(p, false, fmt.Errorf("cue.mod/pkg, cue.mod/usr, or cue.mod/gen directories are not allowed in a module because they conflict with module dependencies"))
   311  				continue
   312  			}
   313  		}
   314  		if err := collisions.check(p, info.IsDir()); err != nil {
   315  			addError(p, false, err)
   316  			continue
   317  		}
   318  		if info.Mode()&os.ModeType == os.ModeSymlink {
   319  			// Skip symbolic links (golang.org/issue/27093).
   320  			addError(p, true, errSymlink)
   321  			continue
   322  		}
   323  		if !info.Mode().IsRegular() {
   324  			addError(p, true, errNotRegular)
   325  			continue
   326  		}
   327  		size := info.Size()
   328  		if size >= 0 && size <= maxSize {
   329  			maxSize -= size
   330  		} else if cf.SizeError == nil {
   331  			cf.SizeError = fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
   332  		}
   333  		if p == "cue.mod/module.cue" {
   334  			if size > MaxCUEMod {
   335  				addError(p, false, errCUEModSize)
   336  				continue
   337  			}
   338  			foundModuleCUE = true
   339  
   340  		}
   341  		if p == "LICENSE" && size > MaxLICENSE {
   342  			addError(p, false, errLICENSESize)
   343  			continue
   344  		}
   345  
   346  		cf.Valid = append(cf.Valid, p)
   347  		validFiles = append(validFiles, f)
   348  		validSizes = append(validSizes, info.Size())
   349  	}
   350  	if !foundModuleCUE {
   351  		cf.NoModError = errNoMod
   352  	}
   353  	return cf, validFiles, validSizes
   354  }
   355  
   356  // CheckDir reports whether the files in dir satisfy the name and size
   357  // constraints listed in the package documentation. The returned CheckedFiles
   358  // record contains lists of valid, invalid, and omitted files. If a directory is
   359  // omitted (for example, a nested module or vendor directory), it will appear in
   360  // the omitted list, but its files won't be listed.
   361  //
   362  // CheckDir returns an error if it encounters an I/O error or if the returned
   363  // CheckedFiles does not describe a valid module zip file (according to
   364  // CheckedFiles.Err). The returned CheckedFiles is still populated when such
   365  // an error is returned.
   366  //
   367  // Note that CheckDir will not open any files, so CreateFromDir may still fail
   368  // when CheckDir is successful due to I/O errors.
   369  //
   370  // Deprecated: this will be removed in a future API iteration that reduces
   371  // dependence on zip archives.
   372  func CheckDir(dir string) (CheckedFiles, error) {
   373  	// List files (as CreateFromDir would) and check which ones are omitted
   374  	// or invalid.
   375  	files, omitted, err := listFilesInDir(dir)
   376  	if err != nil {
   377  		return CheckedFiles{}, err
   378  	}
   379  	cf, cfErr := CheckFiles(files, dirFileIO{})
   380  	_ = cfErr // ignore this error; we'll generate our own after rewriting paths.
   381  
   382  	// Replace all paths with file system paths.
   383  	// Paths returned by CheckFiles will be slash-separated paths relative to dir.
   384  	// That's probably not appropriate for error messages.
   385  	for i := range cf.Valid {
   386  		cf.Valid[i] = filepath.Join(dir, cf.Valid[i])
   387  	}
   388  	cf.Omitted = append(cf.Omitted, omitted...)
   389  	for i := range cf.Omitted {
   390  		cf.Omitted[i].Path = filepath.Join(dir, cf.Omitted[i].Path)
   391  	}
   392  	for i := range cf.Invalid {
   393  		cf.Invalid[i].Path = filepath.Join(dir, cf.Invalid[i].Path)
   394  	}
   395  	return cf, cf.Err()
   396  }
   397  
   398  // CheckZipFile calls CheckZip with the given zip file.
   399  func CheckZipFile(m module.Version, zipFile string) (CheckedFiles, error) {
   400  	f, err := os.Open(zipFile)
   401  	if err != nil {
   402  		return CheckedFiles{}, err
   403  	}
   404  	defer f.Close()
   405  	info, err := f.Stat()
   406  	if err != nil {
   407  		return CheckedFiles{}, err
   408  	}
   409  	_, _, cf, err := CheckZip(m, f, info.Size())
   410  	return cf, err
   411  }
   412  
   413  // CheckZip reports whether the files contained in a zip file satisfy the name
   414  // and size constraints listed in the package documentation.
   415  //
   416  // CheckZip returns an error if the returned CheckedFiles does not describe
   417  // a valid module zip file (according to CheckedFiles.Err). The returned
   418  // CheckedFiles is still populated when an error is returned. CheckZip will
   419  // also return an error if the module path or version is malformed or if it
   420  // encounters an error reading the zip file.
   421  //
   422  // It also returns the file entry for the module.cue file.
   423  //
   424  // Note that checkZip does not read individual files, so zip.Unzip may still fail
   425  // when checkZip is successful due to I/O errors.
   426  func CheckZip(m module.Version, r io.ReaderAt, zipSize int64) (*zip.Reader, *zip.File, CheckedFiles, error) {
   427  	if zipSize > MaxZipFile {
   428  		cf := CheckedFiles{SizeError: fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)}
   429  		return nil, nil, cf, cf.Err()
   430  	}
   431  
   432  	// Check for valid file names, collisions.
   433  	var cf CheckedFiles
   434  	addError := func(zf *zip.File, err error) {
   435  		cf.Invalid = append(cf.Invalid, FileError{Path: zf.Name, Err: err})
   436  	}
   437  	z, err := zip.NewReader(r, zipSize)
   438  	if err != nil {
   439  		return nil, nil, CheckedFiles{}, err
   440  	}
   441  	collisions := make(collisionChecker)
   442  	var size int64
   443  	var modFile *zip.File
   444  	for _, zf := range z.File {
   445  		name := zf.Name
   446  		isDir := strings.HasSuffix(name, "/")
   447  		if isDir {
   448  			name = name[:len(name)-1]
   449  		}
   450  		if path.Clean(name) != name {
   451  			addError(zf, errPathNotClean)
   452  			continue
   453  		}
   454  		if err := module.CheckFilePath(name); err != nil {
   455  			addError(zf, err)
   456  			continue
   457  		}
   458  		if err := collisions.check(name, isDir); err != nil {
   459  			addError(zf, err)
   460  			continue
   461  		}
   462  		prefix, rest := splitCUEMod(name)
   463  		if rest != "" {
   464  			if prefix != "" {
   465  				// cue.mod directories or files aren't allowed to exist anywhere except in the root.
   466  				addError(zf, fmt.Errorf("cue.mod not in module root directory"))
   467  				continue
   468  			}
   469  			if !strings.Contains(rest, "/") {
   470  				addError(zf, fmt.Errorf("cue.mod is not a directory"))
   471  				continue
   472  			}
   473  			if !strings.HasPrefix(rest, "cue.mod/") {
   474  				addError(zf, errCUEModCase)
   475  				continue
   476  			}
   477  			if strings.EqualFold(rest, "cue.mod/module.cue") {
   478  				if rest != "cue.mod/module.cue" {
   479  					addError(zf, errCUEModuleCase)
   480  					continue
   481  				}
   482  				modFile = zf
   483  			}
   484  		}
   485  		if isDir {
   486  			continue
   487  		}
   488  		// TODO check for case-equivalent names too
   489  		sz := int64(zf.UncompressedSize64)
   490  		if sz >= 0 && MaxZipFile-size >= sz {
   491  			size += sz
   492  		} else if cf.SizeError == nil {
   493  			cf.SizeError = fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
   494  		}
   495  		if name == "cue.mod/module.cue" && sz > MaxCUEMod {
   496  			addError(zf, fmt.Errorf("cue.mod/module.cue file too large (max size is %d bytes)", MaxCUEMod))
   497  			continue
   498  		}
   499  		if name == "LICENSE" && sz > MaxLICENSE {
   500  			addError(zf, fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE))
   501  			continue
   502  		}
   503  		cf.Valid = append(cf.Valid, zf.Name)
   504  	}
   505  	if modFile == nil {
   506  		cf.NoModError = errNoMod
   507  	}
   508  
   509  	return z, modFile, cf, cf.Err()
   510  }
   511  
   512  // Create builds a zip archive for module m from an abstract list of files
   513  // and writes it to w, after first sorting the slice of files in a path-aware
   514  // lexical fashion (files first, then directories, both sorted lexically).
   515  //
   516  // Note that m.Version is checked for validity but only the major version
   517  // is used for checking correctness of the cue.mod/module.cue file.
   518  //
   519  // Create verifies the restrictions described in the package documentation
   520  // and should not produce an archive that Unzip cannot extract. Create does not
   521  // include files in the output archive if they don't belong in the module zip.
   522  // In particular, Create will not include files in modules found in
   523  // subdirectories, most files in vendor directories, or irregular files (such
   524  // as symbolic links) in the output archive.
   525  //
   526  // Deprecated: this will be removed in a future API iteration that reduces
   527  // dependence on zip archives.
   528  func Create[F any](w io.Writer, m module.Version, files []F, fio FileIO[F]) (err error) {
   529  	defer func() {
   530  		if err != nil {
   531  			err = &zipError{verb: "create zip", err: err}
   532  		}
   533  	}()
   534  
   535  	files = slices.Clone(files)
   536  	slices.SortFunc(files, func(a, b F) int {
   537  		ap := fio.Path(a)
   538  		bp := fio.Path(b)
   539  		ca := strings.Count(ap, string(filepath.Separator))
   540  		cb := strings.Count(ap, string(filepath.Separator))
   541  		if c := cmp.Compare(ca, cb); c != 0 {
   542  			return c
   543  		}
   544  		return cmp.Compare(ap, bp)
   545  	})
   546  
   547  	// Check whether files are valid, not valid, or should be omitted.
   548  	// Also check that the valid files don't exceed the maximum size.
   549  	cf, validFiles, validSizes := checkFiles(files, fio)
   550  	if err := cf.Err(); err != nil {
   551  		return err
   552  	}
   553  
   554  	// Create the module zip file.
   555  	zw := zip.NewWriter(w)
   556  
   557  	addFile := func(f F, path string, size int64) error {
   558  		rc, err := fio.Open(f)
   559  		if err != nil {
   560  			return err
   561  		}
   562  		defer rc.Close()
   563  		w, err := zw.Create(path)
   564  		if err != nil {
   565  			return err
   566  		}
   567  		lr := &io.LimitedReader{R: rc, N: size + 1}
   568  		if _, err := io.Copy(w, lr); err != nil {
   569  			return err
   570  		}
   571  		if lr.N <= 0 {
   572  			return fmt.Errorf("file %q is larger than declared size", path)
   573  		}
   574  		return nil
   575  	}
   576  
   577  	for i, f := range validFiles {
   578  		p := fio.Path(f)
   579  		size := validSizes[i]
   580  		if err := addFile(f, p, size); err != nil {
   581  			return err
   582  		}
   583  	}
   584  
   585  	return zw.Close()
   586  }
   587  
   588  // CreateFromDir creates a module zip file for module m from the contents of
   589  // a directory, dir. The zip content is written to w.
   590  //
   591  // CreateFromDir verifies the restrictions described in the package
   592  // documentation and should not produce an archive that Unzip cannot extract.
   593  // CreateFromDir does not include files in the output archive if they don't
   594  // belong in the module zip. In particular, CreateFromDir will not include
   595  // files in modules found in subdirectories, most files in vendor directories,
   596  // or irregular files (such as symbolic links) in the output archive.
   597  // Additionally, unlike Create, CreateFromDir will not include directories
   598  // named ".bzr", ".git", ".hg", or ".svn".
   599  func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
   600  	defer func() {
   601  		if zerr, ok := err.(*zipError); ok {
   602  			zerr.path = dir
   603  		} else if err != nil {
   604  			err = &zipError{verb: "create zip from directory", path: dir, err: err}
   605  		}
   606  	}()
   607  
   608  	files, _, err := listFilesInDir(dir)
   609  	if err != nil {
   610  		return err
   611  	}
   612  
   613  	return Create(w, m, files, dirFileIO{})
   614  }
   615  
   616  type dirFile struct {
   617  	filePath, slashPath string
   618  	entry               fs.DirEntry
   619  }
   620  
   621  type dirFileIO struct{}
   622  
   623  func (dirFileIO) Path(f dirFile) string                 { return f.slashPath }
   624  func (dirFileIO) Lstat(f dirFile) (os.FileInfo, error)  { return f.entry.Info() }
   625  func (dirFileIO) Open(f dirFile) (io.ReadCloser, error) { return os.Open(f.filePath) }
   626  
   627  // isVendoredPackage reports whether the given filename is inside
   628  // the cue.mod/vendor directory.
   629  func isVendoredPackage(name string) bool {
   630  	// TODO we have to decide what the vendor directory will actually be
   631  	// called. Maybe cue.mod/pkg is the one.
   632  	return strings.HasPrefix(name, "cue.mod/vendor/")
   633  }
   634  
   635  // Unzip extracts the contents of a module zip file to a directory.
   636  //
   637  // Unzip checks all restrictions listed in the package documentation and returns
   638  // an error if the zip archive is not valid. In some cases, files may be written
   639  // to dir before an error is returned (for example, if a file's uncompressed
   640  // size does not match its declared size).
   641  //
   642  // dir may or may not exist: Unzip will create it and any missing parent
   643  // directories if it doesn't exist. If dir exists, it must be empty.
   644  func Unzip(dir string, m module.Version, zipFile string) (err error) {
   645  	defer func() {
   646  		if err != nil {
   647  			err = &zipError{verb: "unzip", path: zipFile, err: err}
   648  		}
   649  	}()
   650  
   651  	// Check that the directory is empty. Don't create it yet in case there's
   652  	// an error reading the zip.
   653  	if files, _ := os.ReadDir(dir); len(files) > 0 {
   654  		return fmt.Errorf("target directory %v exists and is not empty (contents: %q)", dir, files)
   655  	}
   656  
   657  	// Open the zip and check that it satisfies all restrictions.
   658  	f, err := os.Open(zipFile)
   659  	if err != nil {
   660  		return err
   661  	}
   662  	defer f.Close()
   663  	info, err := f.Stat()
   664  	if err != nil {
   665  		return err
   666  	}
   667  	z, _, cf, err := CheckZip(m, f, info.Size())
   668  	if err != nil {
   669  		return err
   670  	}
   671  	if err := cf.Err(); err != nil {
   672  		return err
   673  	}
   674  
   675  	// Unzip, enforcing sizes declared in the zip file.
   676  	if err := os.MkdirAll(dir, 0777); err != nil {
   677  		return err
   678  	}
   679  	for _, zf := range z.File {
   680  		name := zf.Name
   681  		if name == "" || strings.HasSuffix(name, "/") {
   682  			continue
   683  		}
   684  		dst := filepath.Join(dir, name)
   685  		if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
   686  			return err
   687  		}
   688  		w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
   689  		if err != nil {
   690  			return err
   691  		}
   692  		r, err := zf.Open()
   693  		if err != nil {
   694  			w.Close()
   695  			return err
   696  		}
   697  		lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
   698  		_, err = io.Copy(w, lr)
   699  		r.Close()
   700  		if err != nil {
   701  			w.Close()
   702  			return err
   703  		}
   704  		if err := w.Close(); err != nil {
   705  			return err
   706  		}
   707  		if lr.N <= 0 {
   708  			return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
   709  		}
   710  	}
   711  
   712  	return nil
   713  }
   714  
   715  // collisionChecker finds case-insensitive name collisions and paths that
   716  // are listed as both files and directories.
   717  //
   718  // The keys of this map are processed with strToFold. pathInfo has the original
   719  // path for each folded path.
   720  type collisionChecker map[string]pathInfo
   721  
   722  type pathInfo struct {
   723  	path  string
   724  	isDir bool
   725  }
   726  
   727  func (cc collisionChecker) check(p string, isDir bool) error {
   728  	fold := strToFold(p)
   729  	if other, ok := cc[fold]; ok {
   730  		if p != other.path {
   731  			return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
   732  		}
   733  		if isDir != other.isDir {
   734  			return fmt.Errorf("entry %q is both a file and a directory", p)
   735  		}
   736  		if !isDir {
   737  			return fmt.Errorf("multiple entries for file %q", p)
   738  		}
   739  		// It's not an error if check is called with the same directory multiple
   740  		// times. check is called recursively on parent directories, so check
   741  		// may be called on the same directory many times.
   742  	} else {
   743  		cc[fold] = pathInfo{path: p, isDir: isDir}
   744  	}
   745  
   746  	if parent := path.Dir(p); parent != "." {
   747  		return cc.check(parent, true)
   748  	}
   749  	return nil
   750  }
   751  
   752  // listFilesInDir walks the directory tree rooted at dir and returns a list of
   753  // files, as well as a list of directories and files that were skipped (for
   754  // example, nested modules and symbolic links).
   755  func listFilesInDir(dir string) (files []dirFile, omitted []FileError, err error) {
   756  	err = filepath.WalkDir(dir, func(filePath string, entry fs.DirEntry, err error) error {
   757  		if err != nil {
   758  			return err
   759  		}
   760  		relPath, err := filepath.Rel(dir, filePath)
   761  		if err != nil {
   762  			return err
   763  		}
   764  		slashPath := filepath.ToSlash(relPath)
   765  
   766  		// We would like Create and CreateFromDir to produce the same result
   767  		// for a set of files, whether expressed as a directory tree or zip.
   768  		if isVendoredPackage(slashPath) {
   769  			omitted = append(omitted, FileError{Path: slashPath, Err: errVendored})
   770  			return nil
   771  		}
   772  
   773  		if entry.IsDir() {
   774  			if filePath == dir {
   775  				// Don't skip the top-level directory.
   776  				return nil
   777  			}
   778  
   779  			// Skip VCS directories.
   780  			// fossil repos are regular files with arbitrary names, so we don't try
   781  			// to exclude them.
   782  			switch filepath.Base(filePath) {
   783  			case ".bzr", ".git", ".hg", ".svn":
   784  				omitted = append(omitted, FileError{Path: slashPath, Err: errVCS})
   785  				return filepath.SkipDir
   786  			}
   787  
   788  			// Skip submodules (directories containing go.mod files).
   789  			if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
   790  				omitted = append(omitted, FileError{Path: slashPath, Err: errSubmoduleDir})
   791  				return filepath.SkipDir
   792  			}
   793  			return nil
   794  		}
   795  
   796  		// Skip irregular files and files in vendor directories.
   797  		// Irregular files are ignored. They're typically symbolic links.
   798  		if !entry.Type().IsRegular() {
   799  			omitted = append(omitted, FileError{Path: slashPath, Err: errNotRegular})
   800  			return nil
   801  		}
   802  
   803  		files = append(files, dirFile{
   804  			filePath:  filePath,
   805  			slashPath: slashPath,
   806  			entry:     entry,
   807  		})
   808  		return nil
   809  	})
   810  	if err != nil {
   811  		return nil, nil, err
   812  	}
   813  	return files, omitted, nil
   814  }
   815  
   816  type zipError struct {
   817  	verb, path string
   818  	err        error
   819  }
   820  
   821  func (e *zipError) Error() string {
   822  	if e.path == "" {
   823  		return fmt.Sprintf("%s: %v", e.verb, e.err)
   824  	} else {
   825  		return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
   826  	}
   827  }
   828  
   829  func (e *zipError) Unwrap() error {
   830  	return e.err
   831  }
   832  
   833  // strToFold returns a string with the property that
   834  //
   835  //	strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
   836  //
   837  // This lets us test a large set of strings for fold-equivalent
   838  // duplicates without making a quadratic number of calls
   839  // to EqualFold. Note that strings.ToUpper and strings.ToLower
   840  // do not have the desired property in some corner cases.
   841  func strToFold(s string) string {
   842  	// Fast path: all ASCII, no upper case.
   843  	// Most paths look like this already.
   844  	for i := range len(s) {
   845  		c := s[i]
   846  		if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
   847  			goto Slow
   848  		}
   849  	}
   850  	return s
   851  
   852  Slow:
   853  	var buf bytes.Buffer
   854  	for _, r := range s {
   855  		// SimpleFold(x) cycles to the next equivalent rune > x
   856  		// or wraps around to smaller values. Iterate until it wraps,
   857  		// and we've found the minimum value.
   858  		for {
   859  			r0 := r
   860  			r = unicode.SimpleFold(r0)
   861  			if r <= r0 {
   862  				break
   863  			}
   864  		}
   865  		// Exception to allow fast path above: A-Z => a-z
   866  		if 'A' <= r && r <= 'Z' {
   867  			r += 'a' - 'A'
   868  		}
   869  		buf.WriteRune(r)
   870  	}
   871  	return buf.String()
   872  }
   873  
   874  // splitCUEMod splits the path p into two elements:
   875  // the first before any cue.mod directory, and the second after
   876  // including the cue.mod directory itself.
   877  //
   878  // For example splitCUEMod("foo/bar/cue.mod/baz") would
   879  // return "foo/bar/", "cue.mod/baz".
   880  func splitCUEMod(p string) (string, string) {
   881  	s := p
   882  	for {
   883  		dir, f := path.Split(s)
   884  		if strings.EqualFold(f, "cue.mod") {
   885  			return p[:len(dir)], p[len(dir):]
   886  		}
   887  		dir = strings.TrimRight(dir, "/")
   888  		if dir == "" {
   889  			return p, ""
   890  		}
   891  		s = dir
   892  	}
   893  }