github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/internal/file/zip_read_closer.go (about)

     1  package file
     2  
     3  import (
     4  	"archive/zip"
     5  	"encoding/binary"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  )
    11  
    12  // directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically:
    13  // - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go
    14  // - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go
    15  // findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function.
    16  
    17  const (
    18  	directoryEndLen         = 22
    19  	directory64LocLen       = 20
    20  	directory64EndLen       = 56
    21  	directory64LocSignature = 0x07064b50
    22  	directory64EndSignature = 0x06064b50
    23  )
    24  
    25  // ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips
    26  // that have bytes prefixed to the front of the archive (common with self-extracting jars).
    27  type ZipReadCloser struct {
    28  	*zip.Reader
    29  	io.Closer
    30  }
    31  
    32  // OpenZip provides a ZipReadCloser for the given filepath.
    33  func OpenZip(filepath string) (*ZipReadCloser, error) {
    34  	f, err := os.Open(filepath)
    35  	if err != nil {
    36  		return nil, err
    37  	}
    38  	fi, err := f.Stat()
    39  	if err != nil {
    40  		f.Close()
    41  		return nil, err
    42  	}
    43  
    44  	// some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first
    45  	// need to find the start of the archive and keep track of this offset.
    46  	offset, err := findArchiveStartOffset(f, fi.Size())
    47  	if err != nil {
    48  		return nil, fmt.Errorf("cannot find beginning of zip archive=%q : %w", filepath, err)
    49  	}
    50  
    51  	if _, err := f.Seek(0, io.SeekStart); err != nil {
    52  		return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err)
    53  	}
    54  
    55  	size := fi.Size() - int64(offset)
    56  
    57  	r, err := zip.NewReader(io.NewSectionReader(f, int64(offset), size), size)
    58  	if err != nil {
    59  		return nil, fmt.Errorf("unable to open ZipReadCloser @ %q: %w", filepath, err)
    60  	}
    61  
    62  	return &ZipReadCloser{
    63  		Reader: r,
    64  		Closer: f,
    65  	}, nil
    66  }
    67  
    68  type readBuf []byte
    69  
    70  func (b *readBuf) uint16() uint16 {
    71  	v := binary.LittleEndian.Uint16(*b)
    72  	*b = (*b)[2:]
    73  	return v
    74  }
    75  
    76  func (b *readBuf) uint32() uint32 {
    77  	v := binary.LittleEndian.Uint32(*b)
    78  	*b = (*b)[4:]
    79  	return v
    80  }
    81  
    82  func (b *readBuf) uint64() uint64 {
    83  	v := binary.LittleEndian.Uint64(*b)
    84  	*b = (*b)[8:]
    85  	return v
    86  }
    87  
    88  type directoryEnd struct {
    89  	diskNbr            uint32 // unused
    90  	dirDiskNbr         uint32 // unused
    91  	dirRecordsThisDisk uint64 // unused
    92  	directoryRecords   uint64
    93  	directorySize      uint64
    94  	directoryOffset    uint64 // relative to file
    95  }
    96  
    97  // note: this is derived from readDirectoryEnd within the archive/zip package
    98  //
    99  //nolint:gocognit
   100  func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) {
   101  	// look for directoryEndSignature in the last 1k, then in the last 65k
   102  	var buf []byte
   103  	var directoryEndOffset int64
   104  	for i, bLen := range []int64{1024, 65 * 1024} {
   105  		if bLen > size {
   106  			bLen = size
   107  		}
   108  		buf = make([]byte, int(bLen))
   109  		if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) {
   110  			return 0, err
   111  		}
   112  		if p := findSignatureInBlock(buf); p >= 0 {
   113  			buf = buf[p:]
   114  			directoryEndOffset = size - bLen + int64(p)
   115  			break
   116  		}
   117  		if i == 1 || bLen == size {
   118  			return 0, zip.ErrFormat
   119  		}
   120  	}
   121  
   122  	if buf == nil {
   123  		// we were unable to find the directoryEndSignature block
   124  		return 0, zip.ErrFormat
   125  	}
   126  
   127  	// read header into struct
   128  	b := readBuf(buf[4:]) // skip signature
   129  	d := &directoryEnd{
   130  		diskNbr:            uint32(b.uint16()),
   131  		dirDiskNbr:         uint32(b.uint16()),
   132  		dirRecordsThisDisk: uint64(b.uint16()),
   133  		directoryRecords:   uint64(b.uint16()),
   134  		directorySize:      uint64(b.uint32()),
   135  		directoryOffset:    uint64(b.uint32()),
   136  	}
   137  	// Calculate where the zip data actually begins
   138  
   139  	// These values mean that the file can be a zip64 file
   140  	if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff {
   141  		p, err := findDirectory64End(r, directoryEndOffset)
   142  		if err == nil && p >= 0 {
   143  			directoryEndOffset = p
   144  			err = readDirectory64End(r, p, d)
   145  		}
   146  		if err != nil {
   147  			return 0, err
   148  		}
   149  	}
   150  	startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset
   151  
   152  	// Make sure directoryOffset points to somewhere in our file.
   153  	if o := int64(d.directoryOffset); o < 0 || o >= size {
   154  		return 0, zip.ErrFormat
   155  	}
   156  	return startOfArchive, nil
   157  }
   158  
   159  // findDirectory64End tries to read the zip64 locator just before the
   160  // directory end and returns the offset of the zip64 directory end if
   161  // found.
   162  func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) {
   163  	locOffset := directoryEndOffset - directory64LocLen
   164  	if locOffset < 0 {
   165  		return -1, nil // no need to look for a header outside the file
   166  	}
   167  	buf := make([]byte, directory64LocLen)
   168  	if _, err := r.ReadAt(buf, locOffset); err != nil {
   169  		return -1, err
   170  	}
   171  	b := readBuf(buf)
   172  	if sig := b.uint32(); sig != directory64LocSignature {
   173  		return -1, nil
   174  	}
   175  	if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory
   176  		return -1, nil // the file is not a valid zip64-file
   177  	}
   178  	p := b.uint64()      // relative offset of the zip64 end of central directory record
   179  	if b.uint32() != 1 { // total number of disks
   180  		return -1, nil // the file is not a valid zip64-file
   181  	}
   182  	return int64(p), nil
   183  }
   184  
   185  // readDirectory64End reads the zip64 directory end and updates the
   186  // directory end with the zip64 directory end values.
   187  func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) {
   188  	buf := make([]byte, directory64EndLen)
   189  	if _, err := r.ReadAt(buf, offset); err != nil {
   190  		return err
   191  	}
   192  
   193  	b := readBuf(buf)
   194  	if sig := b.uint32(); sig != directory64EndSignature {
   195  		return errors.New("could not read directory64End")
   196  	}
   197  
   198  	b = b[12:]                        // skip dir size, version and version needed (uint64 + 2x uint16)
   199  	d.diskNbr = b.uint32()            // number of this disk
   200  	d.dirDiskNbr = b.uint32()         // number of the disk with the start of the central directory
   201  	d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk
   202  	d.directoryRecords = b.uint64()   // total number of entries in the central directory
   203  	d.directorySize = b.uint64()      // size of the central directory
   204  	d.directoryOffset = b.uint64()    // offset of start of central directory with respect to the starting disk number
   205  
   206  	return nil
   207  }
   208  
   209  func findSignatureInBlock(b []byte) int {
   210  	for i := len(b) - directoryEndLen; i >= 0; i-- {
   211  		// defined from directoryEndSignature
   212  		if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 {
   213  			// n is length of comment
   214  			n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8
   215  			if n+directoryEndLen+i <= len(b) {
   216  				return i
   217  			}
   218  		}
   219  	}
   220  	return -1
   221  }