github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/internal/file/zip_read_closer.go (about) 1 package file 2 3 import ( 4 "archive/zip" 5 "encoding/binary" 6 "errors" 7 "fmt" 8 "io" 9 "os" 10 ) 11 12 // directoryEndLen, readByf, directoryEnd, and findSignatureInBlock were copied from the golang stdlib, specifically: 13 // - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/struct.go 14 // - https://github.com/golang/go/blob/go1.16.4/src/archive/zip/reader.go 15 // findArchiveStartOffset is derived from the same stdlib utils, specifically the readDirectoryEnd function. 16 17 const ( 18 directoryEndLen = 22 19 directory64LocLen = 20 20 directory64EndLen = 56 21 directory64LocSignature = 0x07064b50 22 directory64EndSignature = 0x06064b50 23 ) 24 25 // ZipReadCloser is a drop-in replacement for zip.ReadCloser (from zip.OpenReader) that additionally considers zips 26 // that have bytes prefixed to the front of the archive (common with self-extracting jars). 27 type ZipReadCloser struct { 28 *zip.Reader 29 io.Closer 30 } 31 32 // OpenZip provides a ZipReadCloser for the given filepath. 33 func OpenZip(filepath string) (*ZipReadCloser, error) { 34 f, err := os.Open(filepath) 35 if err != nil { 36 return nil, err 37 } 38 fi, err := f.Stat() 39 if err != nil { 40 f.Close() 41 return nil, err 42 } 43 44 // some archives may have bytes prepended to the front of the archive, such as with self executing JARs. We first 45 // need to find the start of the archive and keep track of this offset. 46 offset, err := findArchiveStartOffset(f, fi.Size()) 47 if err != nil { 48 return nil, fmt.Errorf("cannot find beginning of zip archive=%q : %w", filepath, err) 49 } 50 51 if _, err := f.Seek(0, io.SeekStart); err != nil { 52 return nil, fmt.Errorf("unable to seek to beginning of archive: %w", err) 53 } 54 55 size := fi.Size() - int64(offset) 56 57 r, err := zip.NewReader(io.NewSectionReader(f, int64(offset), size), size) 58 if err != nil { 59 return nil, fmt.Errorf("unable to open ZipReadCloser @ %q: %w", filepath, err) 60 } 61 62 return &ZipReadCloser{ 63 Reader: r, 64 Closer: f, 65 }, nil 66 } 67 68 type readBuf []byte 69 70 func (b *readBuf) uint16() uint16 { 71 v := binary.LittleEndian.Uint16(*b) 72 *b = (*b)[2:] 73 return v 74 } 75 76 func (b *readBuf) uint32() uint32 { 77 v := binary.LittleEndian.Uint32(*b) 78 *b = (*b)[4:] 79 return v 80 } 81 82 func (b *readBuf) uint64() uint64 { 83 v := binary.LittleEndian.Uint64(*b) 84 *b = (*b)[8:] 85 return v 86 } 87 88 type directoryEnd struct { 89 diskNbr uint32 // unused 90 dirDiskNbr uint32 // unused 91 dirRecordsThisDisk uint64 // unused 92 directoryRecords uint64 93 directorySize uint64 94 directoryOffset uint64 // relative to file 95 } 96 97 // note: this is derived from readDirectoryEnd within the archive/zip package 98 // 99 //nolint:gocognit 100 func findArchiveStartOffset(r io.ReaderAt, size int64) (startOfArchive uint64, err error) { 101 // look for directoryEndSignature in the last 1k, then in the last 65k 102 var buf []byte 103 var directoryEndOffset int64 104 for i, bLen := range []int64{1024, 65 * 1024} { 105 if bLen > size { 106 bLen = size 107 } 108 buf = make([]byte, int(bLen)) 109 if _, err := r.ReadAt(buf, size-bLen); err != nil && !errors.Is(err, io.EOF) { 110 return 0, err 111 } 112 if p := findSignatureInBlock(buf); p >= 0 { 113 buf = buf[p:] 114 directoryEndOffset = size - bLen + int64(p) 115 break 116 } 117 if i == 1 || bLen == size { 118 return 0, zip.ErrFormat 119 } 120 } 121 122 if buf == nil { 123 // we were unable to find the directoryEndSignature block 124 return 0, zip.ErrFormat 125 } 126 127 // read header into struct 128 b := readBuf(buf[4:]) // skip signature 129 d := &directoryEnd{ 130 diskNbr: uint32(b.uint16()), 131 dirDiskNbr: uint32(b.uint16()), 132 dirRecordsThisDisk: uint64(b.uint16()), 133 directoryRecords: uint64(b.uint16()), 134 directorySize: uint64(b.uint32()), 135 directoryOffset: uint64(b.uint32()), 136 } 137 // Calculate where the zip data actually begins 138 139 // These values mean that the file can be a zip64 file 140 if d.directoryRecords == 0xffff || d.directorySize == 0xffff || d.directoryOffset == 0xffffffff { 141 p, err := findDirectory64End(r, directoryEndOffset) 142 if err == nil && p >= 0 { 143 directoryEndOffset = p 144 err = readDirectory64End(r, p, d) 145 } 146 if err != nil { 147 return 0, err 148 } 149 } 150 startOfArchive = uint64(directoryEndOffset) - d.directorySize - d.directoryOffset 151 152 // Make sure directoryOffset points to somewhere in our file. 153 if o := int64(d.directoryOffset); o < 0 || o >= size { 154 return 0, zip.ErrFormat 155 } 156 return startOfArchive, nil 157 } 158 159 // findDirectory64End tries to read the zip64 locator just before the 160 // directory end and returns the offset of the zip64 directory end if 161 // found. 162 func findDirectory64End(r io.ReaderAt, directoryEndOffset int64) (int64, error) { 163 locOffset := directoryEndOffset - directory64LocLen 164 if locOffset < 0 { 165 return -1, nil // no need to look for a header outside the file 166 } 167 buf := make([]byte, directory64LocLen) 168 if _, err := r.ReadAt(buf, locOffset); err != nil { 169 return -1, err 170 } 171 b := readBuf(buf) 172 if sig := b.uint32(); sig != directory64LocSignature { 173 return -1, nil 174 } 175 if b.uint32() != 0 { // number of the disk with the start of the zip64 end of central directory 176 return -1, nil // the file is not a valid zip64-file 177 } 178 p := b.uint64() // relative offset of the zip64 end of central directory record 179 if b.uint32() != 1 { // total number of disks 180 return -1, nil // the file is not a valid zip64-file 181 } 182 return int64(p), nil 183 } 184 185 // readDirectory64End reads the zip64 directory end and updates the 186 // directory end with the zip64 directory end values. 187 func readDirectory64End(r io.ReaderAt, offset int64, d *directoryEnd) (err error) { 188 buf := make([]byte, directory64EndLen) 189 if _, err := r.ReadAt(buf, offset); err != nil { 190 return err 191 } 192 193 b := readBuf(buf) 194 if sig := b.uint32(); sig != directory64EndSignature { 195 return errors.New("could not read directory64End") 196 } 197 198 b = b[12:] // skip dir size, version and version needed (uint64 + 2x uint16) 199 d.diskNbr = b.uint32() // number of this disk 200 d.dirDiskNbr = b.uint32() // number of the disk with the start of the central directory 201 d.dirRecordsThisDisk = b.uint64() // total number of entries in the central directory on this disk 202 d.directoryRecords = b.uint64() // total number of entries in the central directory 203 d.directorySize = b.uint64() // size of the central directory 204 d.directoryOffset = b.uint64() // offset of start of central directory with respect to the starting disk number 205 206 return nil 207 } 208 209 func findSignatureInBlock(b []byte) int { 210 for i := len(b) - directoryEndLen; i >= 0; i-- { 211 // defined from directoryEndSignature 212 if b[i] == 'P' && b[i+1] == 'K' && b[i+2] == 0x05 && b[i+3] == 0x06 { 213 // n is length of comment 214 n := int(b[i+directoryEndLen-2]) | int(b[i+directoryEndLen-1])<<8 215 if n+directoryEndLen+i <= len(b) { 216 return i 217 } 218 } 219 } 220 return -1 221 }