github.phpd.cn/thought-machine/please@v12.2.0+incompatible/tools/jarcat/zip/writer.go (about)

     1  // Package zip implements functions for jarcat that manipulate .zip files.
     2  package zip
     3  
     4  import (
     5  	"bytes"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  	"sort"
    14  	"strings"
    15  	"time"
    16  
    17  	"gopkg.in/op/go-logging.v1"
    18  
    19  	"fs"
    20  	"third_party/go/zip"
    21  )
    22  
    23  var log = logging.MustGetLogger("zip")
    24  var modTime = time.Date(2001, time.January, 1, 0, 0, 0, 0, time.UTC)
    25  
    26  // fileHeaderLen is the length of a file header in a zipfile.
    27  // We need to know this to adjust alignment.
    28  const fileHeaderLen = 30
    29  
    30  // A File represents an output zipfile.
    31  type File struct {
    32  	f        io.WriteCloser
    33  	w        *zip.Writer
    34  	filename string
    35  	input    string
    36  	// Include and Exclude are prefixes of filenames to include or exclude from the zipfile.
    37  	Include, Exclude []string
    38  	// Strict controls whether we deny duplicate files or not.
    39  	// Zipfiles can readily contain duplicates, if this is true we reject them unless they are identical.
    40  	// If false we allow duplicates and leave it to someone else to handle.
    41  	Strict bool
    42  	// RenameDirs is a map of directories to rename, from the old name to the new one.
    43  	RenameDirs map[string]string
    44  	// StripPrefix is a prefix that is stripped off any files added with AddFiles.
    45  	StripPrefix string
    46  	// Suffix is the suffix of files that we include while scanning.
    47  	Suffix []string
    48  	// ExcludeSuffix is a list of suffixes that are excluded from the file scan.
    49  	ExcludeSuffix []string
    50  	// StoreSuffix is a list of file suffixes that will be stored instead of deflated.
    51  	StoreSuffix []string
    52  	// IncludeOther will make the file scan include other files that are not part of a zip file.
    53  	IncludeOther bool
    54  	// AddInitPy will make the writer add __init__.py files to all directories that don't already have one on close.
    55  	AddInitPy bool
    56  	// DirEntries makes the writer add empty directory entries.
    57  	DirEntries bool
    58  	// Align aligns entries to a multiple of this many bytes.
    59  	Align int
    60  	// Prefix stores all files with this prefix.
    61  	Prefix string
    62  	// files tracks the files that we've written so far.
    63  	files map[string]fileRecord
    64  	// concatenatedFiles tracks the files that are built up as we go.
    65  	concatenatedFiles map[string][]byte
    66  }
    67  
    68  // A fileRecord records some information about a file that we use to check if they're exact duplicates.
    69  type fileRecord struct {
    70  	ZipFile            string
    71  	CompressedSize64   uint64
    72  	UncompressedSize64 uint64
    73  	CRC32              uint32
    74  }
    75  
    76  // NewFile constructs and returns a new File.
    77  func NewFile(output string, strict bool) *File {
    78  	f, err := os.Create(output)
    79  	if err != nil {
    80  		log.Fatalf("Failed to open output file: %s", err)
    81  	}
    82  	return &File{
    83  		f:                 f,
    84  		w:                 zip.NewWriter(f),
    85  		filename:          output,
    86  		Strict:            strict,
    87  		files:             map[string]fileRecord{},
    88  		concatenatedFiles: map[string][]byte{},
    89  	}
    90  }
    91  
    92  // Close must be called before the File is destroyed.
    93  func (f *File) Close() {
    94  	f.handleConcatenatedFiles()
    95  	if f.AddInitPy {
    96  		if err := f.AddInitPyFiles(); err != nil {
    97  			log.Fatalf("%s", err)
    98  		}
    99  	}
   100  	if err := f.w.Close(); err != nil {
   101  		log.Fatalf("Failed to finalise zip file: %s", err)
   102  	}
   103  	if err := f.f.Close(); err != nil {
   104  		log.Fatalf("Failed to close file: %s", err)
   105  	}
   106  }
   107  
   108  // AddZipFile copies the contents of a zip file into the new zipfile.
   109  func (f *File) AddZipFile(filepath string) error {
   110  	r, err := zip.OpenReader(filepath)
   111  	if err != nil {
   112  		return err
   113  	}
   114  	defer r.Close()
   115  
   116  	// Reopen file to get a directly readable version without decompression.
   117  	r2, err := os.Open(filepath)
   118  	if err != nil {
   119  		return err
   120  	}
   121  	defer r2.Close()
   122  
   123  	for _, rf := range r.File {
   124  		log.Debug("Found file %s (from %s)", rf.Name, filepath)
   125  		if !f.shouldInclude(rf.Name) {
   126  			continue
   127  		}
   128  		// This directory is very awkward. We need to merge the contents by concatenating them,
   129  		// we can't replace them or leave them out.
   130  		if strings.HasPrefix(rf.Name, "META-INF/services/") ||
   131  			strings.HasPrefix(rf.Name, "META-INF/spring") ||
   132  			rf.Name == "META-INF/please_sourcemap" ||
   133  			// akka libs each have their own reference.conf. if you are using
   134  			// akka as a lib-only (e.g akka-remote), those need to be merged together
   135  			rf.Name == "reference.conf" {
   136  			if err := f.concatenateFile(rf); err != nil {
   137  				return err
   138  			}
   139  			continue
   140  		}
   141  		hasTrailingSlash := strings.HasSuffix(rf.Name, "/")
   142  		isDir := hasTrailingSlash || rf.FileInfo().IsDir()
   143  		if isDir && !hasTrailingSlash {
   144  			rf.Name = rf.Name + "/"
   145  		}
   146  		if existing, present := f.files[rf.Name]; present {
   147  			// Allow duplicates of directories. Seemingly the best way to identify them is that
   148  			// they end in a trailing slash.
   149  			if isDir {
   150  				continue
   151  			}
   152  			// Allow skipping existing files that are exactly the same as the added ones.
   153  			// It's unnecessarily awkward to insist on not ever doubling up on a dependency.
   154  			// TODO(pebers): Bit of a hack ignoring it when CRC is 0, would be better to add
   155  			//               the correct CRC when added through WriteFile.
   156  			if existing.CRC32 == rf.CRC32 || existing.CRC32 == 0 {
   157  				log.Info("Skipping %s / %s: already added (from %s)", filepath, rf.Name, existing.ZipFile)
   158  				continue
   159  			}
   160  			if f.Strict {
   161  				log.Error("Duplicate file %s (from %s, already added from %s); crc %d / %d", rf.Name, filepath, existing.ZipFile, rf.CRC32, existing.CRC32)
   162  				return fmt.Errorf("File %s already added to destination zip file (from %s)", rf.Name, existing.ZipFile)
   163  			}
   164  			continue
   165  		}
   166  		for before, after := range f.RenameDirs {
   167  			if strings.HasPrefix(rf.Name, before) {
   168  				rf.Name = path.Join(after, strings.TrimPrefix(rf.Name, before))
   169  				if isDir {
   170  					rf.Name = rf.Name + "/"
   171  				}
   172  				break
   173  			}
   174  		}
   175  		if f.StripPrefix != "" {
   176  			rf.Name = strings.TrimPrefix(rf.Name, f.StripPrefix)
   177  		}
   178  		if f.Prefix != "" {
   179  			rf.Name = path.Join(f.Prefix, rf.Name)
   180  		}
   181  		// Java tools don't seem to like writing a data descriptor for stored items.
   182  		// Unsure if this is a limitation of the format or a problem of those tools.
   183  		rf.Flags = 0
   184  		f.addExistingFile(rf.Name, filepath, rf.CompressedSize64, rf.UncompressedSize64, rf.CRC32)
   185  
   186  		start, err := rf.DataOffset()
   187  		if err != nil {
   188  			return err
   189  		}
   190  		if _, err := r2.Seek(start, 0); err != nil {
   191  			return err
   192  		}
   193  		if err := f.addFile(&rf.FileHeader, r2, rf.CRC32); err != nil {
   194  			return err
   195  		}
   196  	}
   197  	return nil
   198  }
   199  
   200  // walk is a callback to walk a file tree and add all files found in it.
   201  func (f *File) walk(path string, isDir bool, mode os.FileMode) error {
   202  	if path != f.input && (mode&os.ModeSymlink) != 0 {
   203  		if resolved, err := filepath.EvalSymlinks(path); err != nil {
   204  			return err
   205  		} else if isDir {
   206  			// TODO(peterebden): Is this case still needed?
   207  			return fs.WalkMode(resolved, f.walk)
   208  		}
   209  	}
   210  	if path == f.filename {
   211  		return nil
   212  	} else if !isDir {
   213  		if !f.matchesSuffix(path, f.ExcludeSuffix) {
   214  			if f.matchesSuffix(path, f.Suffix) {
   215  				log.Debug("Adding zip file %s", path)
   216  				if err := f.AddZipFile(path); err != nil {
   217  					return fmt.Errorf("Error adding %s to zipfile: %s", path, err)
   218  				}
   219  			} else if f.IncludeOther && !f.HasExistingFile(path) {
   220  				log.Debug("Including existing non-zip file %s", path)
   221  				if info, err := os.Lstat(path); err != nil {
   222  					return err
   223  				} else if b, err := ioutil.ReadFile(path); err != nil {
   224  					return fmt.Errorf("Error reading %s to zipfile: %s", path, err)
   225  				} else if err := f.StripBytecodeTimestamp(path, b); err != nil {
   226  					return err
   227  				} else if err := f.WriteFile(path, b, info.Mode()&os.ModePerm); err != nil {
   228  					return err
   229  				}
   230  			}
   231  		}
   232  	} else if (len(f.Suffix) == 0 || f.AddInitPy) && path != "." && f.DirEntries { // Only add directory entries in "dumb" mode.
   233  		log.Debug("Adding directory entry %s/", path)
   234  		if err := f.WriteDir(path); err != nil {
   235  			return err
   236  		}
   237  	}
   238  	return nil
   239  }
   240  
   241  // AddFiles walks the given directory and adds any zip files (determined by suffix) that it finds within.
   242  func (f *File) AddFiles(in string) error {
   243  	f.input = in
   244  	return fs.WalkMode(in, f.walk)
   245  }
   246  
   247  // shouldExcludeSuffix returns true if the given filename has a suffix that should be excluded.
   248  func (f *File) matchesSuffix(path string, suffixes []string) bool {
   249  	for _, suffix := range suffixes {
   250  		if suffix != "" && strings.HasSuffix(path, suffix) {
   251  			return true
   252  		}
   253  	}
   254  	return false
   255  }
   256  
   257  // shouldInclude returns true if the given filename should be included according to the include / exclude sets of this File.
   258  func (f *File) shouldInclude(name string) bool {
   259  	for _, excl := range f.Exclude {
   260  		if matched, _ := filepath.Match(excl, name); matched {
   261  			log.Debug("Skipping %s (excluded by %s)", name, excl)
   262  			return false
   263  		} else if matched, _ := filepath.Match(excl, filepath.Base(name)); matched {
   264  			log.Debug("Skipping %s (excluded by %s)", name, excl)
   265  			return false
   266  		}
   267  	}
   268  	if len(f.Include) == 0 {
   269  		return true
   270  	}
   271  	for _, incl := range f.Include {
   272  		if matched, _ := filepath.Match(incl, name); matched || strings.HasPrefix(name, incl) {
   273  			return true
   274  		}
   275  	}
   276  	log.Debug("Skipping %s (didn't match any includes)", name)
   277  	return false
   278  }
   279  
   280  // AddInitPyFiles adds an __init__.py file to every directory in the zip file that doesn't already have one.
   281  func (f *File) AddInitPyFiles() error {
   282  	s := make([]string, 0, len(f.files))
   283  	for p := range f.files {
   284  		s = append(s, p)
   285  	}
   286  	sort.Strings(s)
   287  	for _, p := range s {
   288  		for d := filepath.Dir(p); d != "."; d = filepath.Dir(d) {
   289  			if filepath.Base(d) == "__pycache__" {
   290  				break // Don't need to add an __init__.py here.
   291  			}
   292  			initPyPath := path.Join(d, "__init__.py")
   293  			// Don't write one at the root, it's not necessary.
   294  			if _, present := f.files[initPyPath]; present || initPyPath == "__init__.py" {
   295  				break
   296  			} else if _, present := f.files[initPyPath+"c"]; present {
   297  				// If we already have a pyc / pyo we don't need the __init__.py as well.
   298  				break
   299  			} else if _, present := f.files[initPyPath+"o"]; present {
   300  				break
   301  			}
   302  			log.Debug("Adding %s", initPyPath)
   303  			f.files[initPyPath] = fileRecord{}
   304  			if err := f.WriteFile(initPyPath, []byte{}, 0644); err != nil {
   305  				return err
   306  			}
   307  		}
   308  	}
   309  	return nil
   310  }
   311  
   312  // AddManifest adds a manifest to the given zip writer with a Main-Class entry (and a couple of others)
   313  func (f *File) AddManifest(mainClass string) error {
   314  	manifest := fmt.Sprintf("Manifest-Version: 1.0\nMain-Class: %s\n", mainClass)
   315  	return f.WriteFile("META-INF/MANIFEST.MF", []byte(manifest), 0644)
   316  }
   317  
   318  // HasExistingFile returns true if the writer has already written the given file.
   319  func (f *File) HasExistingFile(name string) bool {
   320  	_, present := f.files[name]
   321  	return present
   322  }
   323  
   324  // addExistingFile adds a record for an existing file, although doesn't write any contents.
   325  func (f *File) addExistingFile(name, file string, compressedSize, uncompressedSize uint64, crc uint32) {
   326  	f.files[name] = fileRecord{file, compressedSize, uncompressedSize, crc}
   327  }
   328  
   329  // concatenateFile adds a file to the zip which is concatenated with any existing content with the same name.
   330  // Writing is deferred since we obviously can't append to it later.
   331  func (f *File) concatenateFile(zf *zip.File) error {
   332  	r, err := zf.Open()
   333  	if err != nil {
   334  		return err
   335  	}
   336  	defer r.Close()
   337  	var buf bytes.Buffer
   338  	if _, err := io.Copy(&buf, r); err != nil {
   339  		return err
   340  	}
   341  	contents := buf.Bytes()
   342  	if !bytes.HasSuffix(contents, []byte{'\n'}) {
   343  		contents = append(contents, '\n')
   344  	}
   345  	f.concatenatedFiles[zf.Name] = append(f.concatenatedFiles[zf.Name], contents...)
   346  	return nil
   347  }
   348  
   349  // handleConcatenatedFiles appends concatenated files to the archive's directory for writing.
   350  func (f *File) handleConcatenatedFiles() error {
   351  	// Must do it in a deterministic order
   352  	files := make([]string, 0, len(f.concatenatedFiles))
   353  	for name := range f.concatenatedFiles {
   354  		files = append(files, name)
   355  	}
   356  	sort.Strings(files)
   357  	for _, name := range files {
   358  		if err := f.WriteFile(name, f.concatenatedFiles[name], 0644); err != nil {
   359  			return err
   360  		}
   361  	}
   362  	return nil
   363  }
   364  
   365  // addFile writes a file to the new writer.
   366  func (f *File) addFile(fh *zip.FileHeader, r io.Reader, crc uint32) error {
   367  	f.align(fh)
   368  	fh.Flags = 0 // we're not writing a data descriptor after the file
   369  	comp := func(w io.Writer) (io.WriteCloser, error) { return nopCloser{w}, nil }
   370  	fh.SetModTime(modTime)
   371  	fw, err := f.w.CreateHeaderWithCompressor(fh, comp, fixedCrc32{value: crc})
   372  	if err == nil {
   373  		_, err = io.CopyN(fw, r, int64(fh.CompressedSize64))
   374  	}
   375  	return err
   376  }
   377  
   378  // WriteFile writes a complete file to the writer.
   379  func (f *File) WriteFile(filename string, data []byte, mode os.FileMode) error {
   380  	filename = path.Join(f.Prefix, filename)
   381  	fh := zip.FileHeader{
   382  		Name:   filename,
   383  		Method: zip.Deflate,
   384  	}
   385  	fh.SetMode(mode)
   386  	fh.SetModTime(modTime)
   387  
   388  	for _, ext := range f.StoreSuffix {
   389  		if strings.HasSuffix(filename, ext) {
   390  			fh.Method = zip.Store
   391  			break
   392  		}
   393  	}
   394  
   395  	f.align(&fh)
   396  	if fw, err := f.w.CreateHeader(&fh); err != nil {
   397  		return err
   398  	} else if _, err := fw.Write(data); err != nil {
   399  		return err
   400  	}
   401  	f.addExistingFile(filename, filename, 0, 0, 0)
   402  	return nil
   403  }
   404  
   405  // align writes any necessary bytes to align the next file.
   406  func (f *File) align(h *zip.FileHeader) {
   407  	if f.Align != 0 && h.Method == zip.Store {
   408  		// We have to allow space for writing the header, so we predict what the offset will be after it.
   409  		fileStart := f.w.Offset() + fileHeaderLen + len(h.Name) + len(h.Extra)
   410  		if overlap := fileStart % f.Align; overlap != 0 {
   411  			if err := f.w.WriteRaw(bytes.Repeat([]byte{0}, f.Align-overlap)); err != nil {
   412  				log.Error("Failed to pad file: %s", err)
   413  			}
   414  		}
   415  	}
   416  }
   417  
   418  // WriteDir writes a directory entry to the writer.
   419  func (f *File) WriteDir(filename string) error {
   420  	filename = path.Join(f.Prefix, filename)
   421  	filename += "/" // Must have trailing slash to tell it it's a directory.
   422  	fh := zip.FileHeader{
   423  		Name:   filename,
   424  		Method: zip.Store,
   425  	}
   426  	fh.SetModTime(modTime)
   427  	if _, err := f.w.CreateHeader(&fh); err != nil {
   428  		return err
   429  	}
   430  	f.addExistingFile(filename, filename, 0, 0, 0)
   431  	return nil
   432  }
   433  
   434  // WritePreamble writes a preamble to the zipfile.
   435  func (f *File) WritePreamble(preamble []byte) error {
   436  	return f.w.WriteRaw(preamble)
   437  }
   438  
   439  // StripBytecodeTimestamp strips a timestamp from a .pyc or .pyo file.
   440  // This is important so our output is deterministic.
   441  func (f *File) StripBytecodeTimestamp(filename string, contents []byte) error {
   442  	if strings.HasSuffix(filename, ".pyc") || strings.HasSuffix(filename, ".pyo") {
   443  		if len(contents) < 8 {
   444  			log.Warning("Invalid bytecode file, will not strip timestamp")
   445  		} else {
   446  			// The .pyc format starts with a two-byte magic number, a \r\n, then a four-byte
   447  			// timestamp. It is that timestamp we are interested in; we overwrite it with
   448  			// the same mtime we use in the zipfile directory (it's important that it is
   449  			// deterministic, but also that it matches, otherwise zipimport complains).
   450  			var buf bytes.Buffer
   451  			binary.Write(&buf, binary.LittleEndian, modTime.Unix())
   452  			b := buf.Bytes()
   453  			contents[4] = b[0]
   454  			contents[5] = b[1]
   455  			contents[6] = b[2]
   456  			contents[7] = b[3]
   457  		}
   458  	}
   459  	return nil
   460  }
   461  
   462  type nopCloser struct {
   463  	io.Writer
   464  }
   465  
   466  func (w nopCloser) Close() error {
   467  	return nil
   468  }
   469  
   470  // fixedCrc32 implements a Hash32 interface that just writes out a predetermined value.
   471  // this is really cheating of course but serves our purposes here.
   472  type fixedCrc32 struct {
   473  	value uint32
   474  }
   475  
   476  func (crc fixedCrc32) Write(p []byte) (n int, err error) {
   477  	return len(p), nil
   478  }
   479  
   480  func (crc fixedCrc32) Sum(b []byte) []byte {
   481  	buf := make([]byte, 4)
   482  	binary.LittleEndian.PutUint32(buf, crc.value)
   483  	return b
   484  }
   485  
   486  func (crc fixedCrc32) Sum32() uint32 {
   487  	return crc.value
   488  }
   489  
   490  func (crc fixedCrc32) Reset() {
   491  }
   492  
   493  func (crc fixedCrc32) Size() int {
   494  	return 32
   495  }
   496  
   497  func (crc fixedCrc32) BlockSize() int {
   498  	return 32
   499  }