github.com/tiagovtristao/plz@v13.4.0+incompatible/tools/jarcat/zip/writer.go (about)

     1  // Package zip implements functions for jarcat that manipulate .zip files.
     2  package zip
     3  
     4  import (
     5  	"bytes"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"io"
     9  	"io/ioutil"
    10  	"os"
    11  	"path"
    12  	"path/filepath"
    13  	"sort"
    14  	"strings"
    15  	"time"
    16  
    17  	"gopkg.in/op/go-logging.v1"
    18  
    19  	"github.com/thought-machine/please/src/fs"
    20  	"third_party/go/zip"
    21  )
    22  
    23  var log = logging.MustGetLogger("zip")
    24  var modTime = time.Date(2001, time.January, 1, 0, 0, 0, 0, time.UTC)
    25  
    26  // fileHeaderLen is the length of a file header in a zipfile.
    27  // We need to know this to adjust alignment.
    28  const fileHeaderLen = 30
    29  
    30  // A File represents an output zipfile.
    31  type File struct {
    32  	f        io.WriteCloser
    33  	w        *zip.Writer
    34  	filename string
    35  	input    string
    36  	// Include and Exclude are prefixes of filenames to include or exclude from the zipfile.
    37  	Include, Exclude []string
    38  	// Strict controls whether we deny duplicate files or not.
    39  	// Zipfiles can readily contain duplicates, if this is true we reject them unless they are identical.
    40  	// If false we allow duplicates and leave it to someone else to handle.
    41  	Strict bool
    42  	// RenameDirs is a map of directories to rename, from the old name to the new one.
    43  	RenameDirs map[string]string
    44  	// StripPrefix is a prefix that is stripped off any files added with AddFiles.
    45  	StripPrefix string
    46  	// Suffix is the suffix of files that we include while scanning.
    47  	Suffix []string
    48  	// ExcludeSuffix is a list of suffixes that are excluded from the file scan.
    49  	ExcludeSuffix []string
    50  	// StoreSuffix is a list of file suffixes that will be stored instead of deflated.
    51  	StoreSuffix []string
    52  	// IncludeOther will make the file scan include other files that are not part of a zip file.
    53  	IncludeOther bool
    54  	// AddInitPy will make the writer add __init__.py files to all directories that don't already have one on close.
    55  	AddInitPy bool
    56  	// StripPy will strip .py files when there is a corresponding .pyc
    57  	StripPy bool
    58  	// DirEntries makes the writer add empty directory entries.
    59  	DirEntries bool
    60  	// Align aligns entries to a multiple of this many bytes.
    61  	Align int
    62  	// Prefix stores all files with this prefix.
    63  	Prefix string
    64  	// files tracks the files that we've written so far.
    65  	files map[string]fileRecord
    66  	// concatenatedFiles tracks the files that are built up as we go.
    67  	concatenatedFiles map[string][]byte
    68  }
    69  
    70  // A fileRecord records some information about a file that we use to check if they're exact duplicates.
    71  type fileRecord struct {
    72  	ZipFile            string
    73  	CompressedSize64   uint64
    74  	UncompressedSize64 uint64
    75  	CRC32              uint32
    76  }
    77  
    78  // NewFile constructs and returns a new File.
    79  func NewFile(output string, strict bool) *File {
    80  	f, err := os.Create(output)
    81  	if err != nil {
    82  		log.Fatalf("Failed to open output file: %s", err)
    83  	}
    84  	return &File{
    85  		f:                 f,
    86  		w:                 zip.NewWriter(f),
    87  		filename:          output,
    88  		Strict:            strict,
    89  		files:             map[string]fileRecord{},
    90  		concatenatedFiles: map[string][]byte{},
    91  	}
    92  }
    93  
    94  // Close must be called before the File is destroyed.
    95  func (f *File) Close() {
    96  	f.handleConcatenatedFiles()
    97  	if f.AddInitPy {
    98  		if err := f.AddInitPyFiles(); err != nil {
    99  			log.Fatalf("%s", err)
   100  		}
   101  	}
   102  	if err := f.w.Close(); err != nil {
   103  		log.Fatalf("Failed to finalise zip file: %s", err)
   104  	}
   105  	if err := f.f.Close(); err != nil {
   106  		log.Fatalf("Failed to close file: %s", err)
   107  	}
   108  }
   109  
   110  // AddZipFile copies the contents of a zip file into the new zipfile.
   111  func (f *File) AddZipFile(filepath string) error {
   112  	r, err := zip.OpenReader(filepath)
   113  	if err != nil {
   114  		return err
   115  	}
   116  	defer r.Close()
   117  
   118  	// Reopen file to get a directly readable version without decompression.
   119  	r2, err := os.Open(filepath)
   120  	if err != nil {
   121  		return err
   122  	}
   123  	defer r2.Close()
   124  
   125  	// Need to know all the filenames upfront if we're stripping sources.
   126  	filelist := map[string]struct{}{}
   127  	if f.StripPy {
   128  		for _, rf := range r.File {
   129  			filelist[rf.Name] = struct{}{}
   130  		}
   131  	}
   132  
   133  	for _, rf := range r.File {
   134  		log.Debug("Found file %s (from %s)", rf.Name, filepath)
   135  		if !f.shouldInclude(rf.Name) {
   136  			continue
   137  		}
   138  		// This directory is very awkward. We need to merge the contents by concatenating them,
   139  		// we can't replace them or leave them out.
   140  		if strings.HasPrefix(rf.Name, "META-INF/services/") ||
   141  			strings.HasPrefix(rf.Name, "META-INF/spring") ||
   142  			rf.Name == "META-INF/please_sourcemap" ||
   143  			// akka libs each have their own reference.conf. if you are using
   144  			// akka as a lib-only (e.g akka-remote), those need to be merged together
   145  			rf.Name == "reference.conf" {
   146  			if err := f.concatenateFile(rf); err != nil {
   147  				return err
   148  			}
   149  			continue
   150  		}
   151  		hasTrailingSlash := strings.HasSuffix(rf.Name, "/")
   152  		isDir := hasTrailingSlash || rf.FileInfo().IsDir()
   153  		if isDir && !hasTrailingSlash {
   154  			rf.Name = rf.Name + "/"
   155  		}
   156  		if existing, present := f.files[rf.Name]; present {
   157  			// Allow duplicates of directories. Seemingly the best way to identify them is that
   158  			// they end in a trailing slash.
   159  			if isDir {
   160  				continue
   161  			}
   162  			// Allow skipping existing files that are exactly the same as the added ones.
   163  			// It's unnecessarily awkward to insist on not ever doubling up on a dependency.
   164  			// TODO(pebers): Bit of a hack ignoring it when CRC is 0, would be better to add
   165  			//               the correct CRC when added through WriteFile.
   166  			if existing.CRC32 == rf.CRC32 || existing.CRC32 == 0 {
   167  				log.Info("Skipping %s / %s: already added (from %s)", filepath, rf.Name, existing.ZipFile)
   168  				continue
   169  			}
   170  			if f.Strict {
   171  				log.Error("Duplicate file %s (from %s, already added from %s); crc %d / %d", rf.Name, filepath, existing.ZipFile, rf.CRC32, existing.CRC32)
   172  				return fmt.Errorf("File %s already added to destination zip file (from %s)", rf.Name, existing.ZipFile)
   173  			}
   174  			continue
   175  		}
   176  		for before, after := range f.RenameDirs {
   177  			if strings.HasPrefix(rf.Name, before) {
   178  				rf.Name = path.Join(after, strings.TrimPrefix(rf.Name, before))
   179  				if isDir {
   180  					rf.Name = rf.Name + "/"
   181  				}
   182  				break
   183  			}
   184  		}
   185  		if f.StripPrefix != "" {
   186  			rf.Name = strings.TrimPrefix(rf.Name, f.StripPrefix)
   187  		}
   188  		if f.Prefix != "" {
   189  			rf.Name = path.Join(f.Prefix, rf.Name)
   190  		}
   191  		if f.StripPy && strings.HasSuffix(rf.Name, ".py") {
   192  			pyc := rf.Name + "c"
   193  			if f.HasExistingFile(pyc) {
   194  				log.Debug("Skipping %s since %s exists", rf.Name, pyc)
   195  				continue
   196  			} else if _, present := filelist[pyc]; present {
   197  				log.Debug("Skipping %s since %s exists in this archive", rf.Name, pyc)
   198  				continue
   199  			}
   200  		}
   201  		// Java tools don't seem to like writing a data descriptor for stored items.
   202  		// Unsure if this is a limitation of the format or a problem of those tools.
   203  		rf.Flags = 0
   204  		f.addExistingFile(rf.Name, filepath, rf.CompressedSize64, rf.UncompressedSize64, rf.CRC32)
   205  
   206  		start, err := rf.DataOffset()
   207  		if err != nil {
   208  			return err
   209  		}
   210  		if _, err := r2.Seek(start, 0); err != nil {
   211  			return err
   212  		}
   213  		if err := f.addFile(&rf.FileHeader, r2, rf.CRC32); err != nil {
   214  			return err
   215  		}
   216  	}
   217  	return nil
   218  }
   219  
   220  // walk is a callback to walk a file tree and add all files found in it.
   221  func (f *File) walk(path string, isDir bool, mode os.FileMode) error {
   222  	if path != f.input && (mode&os.ModeSymlink) != 0 {
   223  		if resolved, err := filepath.EvalSymlinks(path); err != nil {
   224  			return err
   225  		} else if isDir {
   226  			// TODO(peterebden): Is this case still needed?
   227  			return fs.WalkMode(resolved, f.walk)
   228  		}
   229  	}
   230  	if samePaths(path, f.filename) {
   231  		return nil
   232  	} else if !isDir {
   233  		if !f.matchesSuffix(path, f.ExcludeSuffix) {
   234  			if f.matchesSuffix(path, f.Suffix) {
   235  				log.Debug("Adding zip file %s", path)
   236  				if err := f.AddZipFile(path); err != nil {
   237  					return fmt.Errorf("Error adding %s to zipfile: %s", path, err)
   238  				}
   239  			} else if f.IncludeOther && !f.HasExistingFile(path) {
   240  				if f.StripPy && strings.HasSuffix(path, ".py") && f.HasExistingFile(path+"c") {
   241  					log.Debug("Skipping %s since %sc exists", path, path)
   242  					return nil
   243  				}
   244  				log.Debug("Including existing non-zip file %s", path)
   245  				if info, err := os.Lstat(path); err != nil {
   246  					return err
   247  				} else if b, err := ioutil.ReadFile(path); err != nil {
   248  					return fmt.Errorf("Error reading %s to zipfile: %s", path, err)
   249  				} else if err := f.StripBytecodeTimestamp(path, b); err != nil {
   250  					return err
   251  				} else if err := f.WriteFile(path, b, info.Mode()&os.ModePerm); err != nil {
   252  					return err
   253  				}
   254  			}
   255  		}
   256  	} else if (len(f.Suffix) == 0 || f.AddInitPy) && path != "." && f.DirEntries { // Only add directory entries in "dumb" mode.
   257  		log.Debug("Adding directory entry %s/", path)
   258  		if err := f.WriteDir(path); err != nil {
   259  			return err
   260  		}
   261  	}
   262  	return nil
   263  }
   264  
   265  // samePaths returns true if two paths are the same (taking relative/absolute paths into account).
   266  func samePaths(a, b string) bool {
   267  	if path.IsAbs(a) && path.IsAbs(b) {
   268  		return a == b
   269  	}
   270  	wd, _ := os.Getwd()
   271  	if !path.IsAbs(a) {
   272  		a = path.Join(wd, a)
   273  	}
   274  	if !path.IsAbs(b) {
   275  		b = path.Join(wd, b)
   276  	}
   277  	return a == b
   278  }
   279  
   280  // AddFiles walks the given directory and adds any zip files (determined by suffix) that it finds within.
   281  func (f *File) AddFiles(in string) error {
   282  	f.input = in
   283  	return fs.WalkMode(in, f.walk)
   284  }
   285  
   286  // shouldExcludeSuffix returns true if the given filename has a suffix that should be excluded.
   287  func (f *File) matchesSuffix(path string, suffixes []string) bool {
   288  	for _, suffix := range suffixes {
   289  		if suffix != "" && strings.HasSuffix(path, suffix) {
   290  			return true
   291  		}
   292  	}
   293  	return false
   294  }
   295  
   296  // shouldInclude returns true if the given filename should be included according to the include / exclude sets of this File.
   297  func (f *File) shouldInclude(name string) bool {
   298  	for _, excl := range f.Exclude {
   299  		if matched, _ := filepath.Match(excl, name); matched {
   300  			log.Debug("Skipping %s (excluded by %s)", name, excl)
   301  			return false
   302  		} else if matched, _ := filepath.Match(excl, filepath.Base(name)); matched {
   303  			log.Debug("Skipping %s (excluded by %s)", name, excl)
   304  			return false
   305  		}
   306  	}
   307  	if len(f.Include) == 0 {
   308  		return true
   309  	}
   310  	for _, incl := range f.Include {
   311  		if matched, _ := filepath.Match(incl, name); matched || strings.HasPrefix(name, incl) {
   312  			return true
   313  		}
   314  	}
   315  	log.Debug("Skipping %s (didn't match any includes)", name)
   316  	return false
   317  }
   318  
   319  // AddInitPyFiles adds an __init__.py file to every directory in the zip file that doesn't already have one.
   320  func (f *File) AddInitPyFiles() error {
   321  	s := make([]string, 0, len(f.files))
   322  	sos := map[string]struct{}{}
   323  	for p := range f.files {
   324  		s = append(s, p)
   325  		// We use this to check that we don't shadow files that look importable.
   326  		if strings.HasSuffix(p, ".so") {
   327  			p = strings.TrimSuffix(p, ".so")
   328  			if idx := strings.LastIndex(p, ".cpython-"); idx != -1 {
   329  				p = p[:idx]
   330  			}
   331  			sos[p] = struct{}{}
   332  		}
   333  	}
   334  	sort.Strings(s)
   335  	for _, p := range s {
   336  		for d := filepath.Dir(p); d != "."; d = filepath.Dir(d) {
   337  			if filepath.Base(d) == "__pycache__" {
   338  				break // Don't need to add an __init__.py here.
   339  			}
   340  			initPyPath := path.Join(d, "__init__.py")
   341  			// Don't write one at the root, it's not necessary.
   342  			if _, present := f.files[initPyPath]; present || initPyPath == "__init__.py" {
   343  				break
   344  			} else if _, present := f.files[initPyPath+"c"]; present {
   345  				// If we already have a pyc / pyo we don't need the __init__.py as well.
   346  				break
   347  			} else if _, present := f.files[initPyPath+"o"]; present {
   348  				break
   349  			} else if _, present := f.files[d+".py"]; present {
   350  				break
   351  			} else if _, present := sos[d]; present {
   352  				break
   353  			}
   354  			log.Debug("Adding %s", initPyPath)
   355  			f.files[initPyPath] = fileRecord{}
   356  			if err := f.WriteFile(initPyPath, []byte{}, 0644); err != nil {
   357  				return err
   358  			}
   359  		}
   360  	}
   361  	return nil
   362  }
   363  
   364  // AddManifest adds a manifest to the given zip writer with a Main-Class entry (and a couple of others)
   365  func (f *File) AddManifest(mainClass string) error {
   366  	manifest := fmt.Sprintf("Manifest-Version: 1.0\nMain-Class: %s\n", mainClass)
   367  	return f.WriteFile("META-INF/MANIFEST.MF", []byte(manifest), 0644)
   368  }
   369  
   370  // HasExistingFile returns true if the writer has already written the given file.
   371  func (f *File) HasExistingFile(name string) bool {
   372  	_, present := f.files[name]
   373  	return present
   374  }
   375  
   376  // addExistingFile adds a record for an existing file, although doesn't write any contents.
   377  func (f *File) addExistingFile(name, file string, compressedSize, uncompressedSize uint64, crc uint32) {
   378  	f.files[name] = fileRecord{file, compressedSize, uncompressedSize, crc}
   379  }
   380  
   381  // concatenateFile adds a file to the zip which is concatenated with any existing content with the same name.
   382  // Writing is deferred since we obviously can't append to it later.
   383  func (f *File) concatenateFile(zf *zip.File) error {
   384  	r, err := zf.Open()
   385  	if err != nil {
   386  		return err
   387  	}
   388  	defer r.Close()
   389  	var buf bytes.Buffer
   390  	if _, err := io.Copy(&buf, r); err != nil {
   391  		return err
   392  	}
   393  	contents := buf.Bytes()
   394  	if !bytes.HasSuffix(contents, []byte{'\n'}) {
   395  		contents = append(contents, '\n')
   396  	}
   397  	f.concatenatedFiles[zf.Name] = append(f.concatenatedFiles[zf.Name], contents...)
   398  	return nil
   399  }
   400  
   401  // handleConcatenatedFiles appends concatenated files to the archive's directory for writing.
   402  func (f *File) handleConcatenatedFiles() error {
   403  	// Must do it in a deterministic order
   404  	files := make([]string, 0, len(f.concatenatedFiles))
   405  	for name := range f.concatenatedFiles {
   406  		files = append(files, name)
   407  	}
   408  	sort.Strings(files)
   409  	for _, name := range files {
   410  		if err := f.WriteFile(name, f.concatenatedFiles[name], 0644); err != nil {
   411  			return err
   412  		}
   413  	}
   414  	return nil
   415  }
   416  
   417  // addFile writes a file to the new writer.
   418  func (f *File) addFile(fh *zip.FileHeader, r io.Reader, crc uint32) error {
   419  	f.align(fh)
   420  	fh.Flags = 0 // we're not writing a data descriptor after the file
   421  	comp := func(w io.Writer) (io.WriteCloser, error) { return nopCloser{w}, nil }
   422  	fh.SetModTime(modTime)
   423  	fw, err := f.w.CreateHeaderWithCompressor(fh, comp, fixedCrc32{value: crc})
   424  	if err == nil {
   425  		_, err = io.CopyN(fw, r, int64(fh.CompressedSize64))
   426  	}
   427  	return err
   428  }
   429  
   430  // WriteFile writes a complete file to the writer.
   431  func (f *File) WriteFile(filename string, data []byte, mode os.FileMode) error {
   432  	filename = path.Join(f.Prefix, filename)
   433  	fh := zip.FileHeader{
   434  		Name:   filename,
   435  		Method: zip.Deflate,
   436  	}
   437  	fh.SetMode(mode)
   438  	fh.SetModTime(modTime)
   439  
   440  	for _, ext := range f.StoreSuffix {
   441  		if strings.HasSuffix(filename, ext) {
   442  			fh.Method = zip.Store
   443  			break
   444  		}
   445  	}
   446  
   447  	f.align(&fh)
   448  	if fw, err := f.w.CreateHeader(&fh); err != nil {
   449  		return err
   450  	} else if _, err := fw.Write(data); err != nil {
   451  		return err
   452  	}
   453  	f.addExistingFile(filename, filename, 0, 0, 0)
   454  	return nil
   455  }
   456  
   457  // align writes any necessary bytes to align the next file.
   458  func (f *File) align(h *zip.FileHeader) {
   459  	if f.Align != 0 && h.Method == zip.Store {
   460  		// We have to allow space for writing the header, so we predict what the offset will be after it.
   461  		fileStart := f.w.Offset() + fileHeaderLen + len(h.Name) + len(h.Extra)
   462  		if overlap := fileStart % f.Align; overlap != 0 {
   463  			if err := f.w.WriteRaw(bytes.Repeat([]byte{0}, f.Align-overlap)); err != nil {
   464  				log.Error("Failed to pad file: %s", err)
   465  			}
   466  		}
   467  	}
   468  }
   469  
   470  // WriteDir writes a directory entry to the writer.
   471  func (f *File) WriteDir(filename string) error {
   472  	filename = path.Join(f.Prefix, filename)
   473  	filename += "/" // Must have trailing slash to tell it it's a directory.
   474  	fh := zip.FileHeader{
   475  		Name:   filename,
   476  		Method: zip.Store,
   477  	}
   478  	fh.SetModTime(modTime)
   479  	if _, err := f.w.CreateHeader(&fh); err != nil {
   480  		return err
   481  	}
   482  	f.addExistingFile(filename, filename, 0, 0, 0)
   483  	return nil
   484  }
   485  
   486  // WritePreamble writes a preamble to the zipfile.
   487  func (f *File) WritePreamble(preamble []byte) error {
   488  	return f.w.WriteRaw(preamble)
   489  }
   490  
   491  // StripBytecodeTimestamp strips a timestamp from a .pyc or .pyo file.
   492  // This is important so our output is deterministic.
   493  func (f *File) StripBytecodeTimestamp(filename string, contents []byte) error {
   494  	if strings.HasSuffix(filename, ".pyc") || strings.HasSuffix(filename, ".pyo") {
   495  		if len(contents) < 12 {
   496  			log.Warning("Invalid bytecode file, will not strip timestamp")
   497  		} else if f.isPy37(contents) {
   498  			// Check whether this is hash verified. This is probably unlikely since we don't
   499  			// pass appropriate flags but at this point it doesn't hurt to check.
   500  			if (contents[4] & 1) != 0 {
   501  				// Is hash verified. It should never be checked though.
   502  				contents[4] &^= 2
   503  			} else {
   504  				// Timestamp verified, zero it out.
   505  				f.zeroPycTimestamp(contents, 8)
   506  			}
   507  		} else {
   508  			// The .pyc format starts with a two-byte magic number, a \r\n, then a four-byte
   509  			// timestamp. It is that timestamp we are interested in; we overwrite it with
   510  			// the same mtime we use in the zipfile directory (it's important that it is
   511  			// deterministic, but also that it matches, otherwise zipimport complains).
   512  			f.zeroPycTimestamp(contents, 4)
   513  		}
   514  	}
   515  	return nil
   516  }
   517  
   518  // isPy37 determines if the leading magic number in a .pyc corresponds to Python 3.7.
   519  // This is important to us because the structure changed (see PEP 552) and we have to handle that.
   520  func (f *File) isPy37(b []byte) bool {
   521  	i := (int(b[1]) << 8) + int(b[0])
   522  	// Python 2 versions use magic numbers in the 20-60,000 range. Ensure it's not one of them.
   523  	return i >= 3394 && i < 10000
   524  }
   525  
   526  // zeroPycTimestamp zeroes out a .pyc timestamp at a given offset.
   527  func (f *File) zeroPycTimestamp(contents []byte, offset int) {
   528  	var buf bytes.Buffer
   529  	binary.Write(&buf, binary.LittleEndian, modTime.Unix())
   530  	b := buf.Bytes()
   531  	contents[offset+0] = b[0]
   532  	contents[offset+1] = b[1]
   533  	contents[offset+2] = b[2]
   534  	contents[offset+3] = b[3]
   535  }
   536  
   537  type nopCloser struct {
   538  	io.Writer
   539  }
   540  
   541  func (w nopCloser) Close() error {
   542  	return nil
   543  }
   544  
   545  // fixedCrc32 implements a Hash32 interface that just writes out a predetermined value.
   546  // this is really cheating of course but serves our purposes here.
   547  type fixedCrc32 struct {
   548  	value uint32
   549  }
   550  
   551  func (crc fixedCrc32) Write(p []byte) (n int, err error) {
   552  	return len(p), nil
   553  }
   554  
   555  func (crc fixedCrc32) Sum(b []byte) []byte {
   556  	buf := make([]byte, 4)
   557  	binary.LittleEndian.PutUint32(buf, crc.value)
   558  	return b
   559  }
   560  
   561  func (crc fixedCrc32) Sum32() uint32 {
   562  	return crc.value
   563  }
   564  
   565  func (crc fixedCrc32) Reset() {
   566  }
   567  
   568  func (crc fixedCrc32) Size() int {
   569  	return 32
   570  }
   571  
   572  func (crc fixedCrc32) BlockSize() int {
   573  	return 32
   574  }