github.com/bodgit/sevenzip@v1.5.1/reader.go (about)

     1  package sevenzip
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"encoding/binary"
     7  	"errors"
     8  	"fmt"
     9  	"hash/crc32"
    10  	"io"
    11  	"io/fs"
    12  	"os"
    13  	"path"
    14  	"path/filepath"
    15  	"sort"
    16  	"strings"
    17  	"sync"
    18  	"time"
    19  
    20  	"github.com/bodgit/plumbing"
    21  	"github.com/bodgit/sevenzip/internal/pool"
    22  	"github.com/bodgit/sevenzip/internal/util"
    23  	"github.com/hashicorp/go-multierror"
    24  	"go4.org/readerutil"
    25  )
    26  
    27  var (
    28  	errFormat   = errors.New("sevenzip: not a valid 7-zip file")
    29  	errChecksum = errors.New("sevenzip: checksum error")
    30  	errTooMuch  = errors.New("sevenzip: too much data")
    31  )
    32  
    33  // A Reader serves content from a 7-Zip archive.
    34  type Reader struct {
    35  	r     io.ReaderAt
    36  	start int64
    37  	end   int64
    38  	si    *streamsInfo
    39  	p     string
    40  	File  []*File
    41  	pool  []pool.Pooler
    42  
    43  	fileListOnce sync.Once
    44  	fileList     []fileListEntry
    45  }
    46  
    47  // A ReadCloser is a Reader that must be closed when no longer needed.
    48  type ReadCloser struct {
    49  	f []*os.File
    50  	Reader
    51  }
    52  
    53  // A File is a single file in a 7-Zip archive. The file information is in the
    54  // embedded FileHeader. The file content can be accessed by calling Open.
    55  type File struct {
    56  	FileHeader
    57  	zip    *Reader
    58  	folder int
    59  	offset int64
    60  }
    61  
    62  type fileReader struct {
    63  	rc util.SizeReadSeekCloser
    64  	f  *File
    65  	n  int64
    66  }
    67  
    68  func (fr *fileReader) Stat() (fs.FileInfo, error) {
    69  	return headerFileInfo{&fr.f.FileHeader}, nil
    70  }
    71  
    72  func (fr *fileReader) Read(p []byte) (n int, err error) {
    73  	if len(p) == 0 {
    74  		return 0, nil
    75  	}
    76  
    77  	if fr.n <= 0 {
    78  		return 0, io.EOF
    79  	}
    80  
    81  	if int64(len(p)) > fr.n {
    82  		p = p[0:fr.n]
    83  	}
    84  
    85  	n, err = fr.rc.Read(p)
    86  	fr.n -= int64(n)
    87  
    88  	return
    89  }
    90  
    91  func (fr *fileReader) Close() error {
    92  	if fr.rc == nil {
    93  		return nil
    94  	}
    95  
    96  	offset, err := fr.rc.Seek(0, io.SeekCurrent)
    97  	if err != nil {
    98  		return err
    99  	}
   100  
   101  	if offset == fr.rc.Size() { // EOF reached
   102  		if err := fr.rc.Close(); err != nil {
   103  			return err
   104  		}
   105  	} else {
   106  		f := fr.f
   107  		if _, err := f.zip.pool[f.folder].Put(offset, fr.rc); err != nil {
   108  			return err
   109  		}
   110  	}
   111  
   112  	fr.rc = nil
   113  
   114  	return nil
   115  }
   116  
   117  // Open returns an io.ReadCloser that provides access to the File's contents.
   118  // Multiple files may be read concurrently.
   119  func (f *File) Open() (io.ReadCloser, error) {
   120  	if f.FileHeader.isEmptyStream || f.FileHeader.isEmptyFile {
   121  		// Return empty reader for directory or empty file
   122  		return io.NopCloser(bytes.NewReader(nil)), nil
   123  	}
   124  
   125  	var err error
   126  
   127  	rc, _ := f.zip.pool[f.folder].Get(f.offset)
   128  	if rc == nil {
   129  		rc, _, err = f.zip.folderReader(f.zip.si, f.folder)
   130  		if err != nil {
   131  			return nil, err
   132  		}
   133  	}
   134  
   135  	if _, err = rc.Seek(f.offset, io.SeekStart); err != nil {
   136  		return nil, err
   137  	}
   138  
   139  	return &fileReader{
   140  		rc: rc,
   141  		f:  f,
   142  		n:  int64(f.UncompressedSize),
   143  	}, nil
   144  }
   145  
   146  // OpenReaderWithPassword will open the 7-zip file specified by name using
   147  // password as the basis of the decryption key and return a ReadCloser. If
   148  // name has a ".001" suffix it is assumed there are multiple volumes and each
   149  // sequential volume will be opened.
   150  //
   151  //nolint:cyclop,funlen
   152  func OpenReaderWithPassword(name, password string) (*ReadCloser, error) {
   153  	f, err := os.Open(name)
   154  	if err != nil {
   155  		return nil, err
   156  	}
   157  
   158  	info, err := f.Stat()
   159  	if err != nil {
   160  		err = multierror.Append(err, f.Close())
   161  
   162  		return nil, err
   163  	}
   164  
   165  	var reader io.ReaderAt = f
   166  
   167  	size := info.Size()
   168  	files := []*os.File{f}
   169  
   170  	if ext := filepath.Ext(name); ext == ".001" {
   171  		sr := []readerutil.SizeReaderAt{io.NewSectionReader(f, 0, size)}
   172  
   173  		for i := 2; true; i++ {
   174  			f, err := os.Open(fmt.Sprintf("%s.%03d", strings.TrimSuffix(name, ext), i))
   175  			if err != nil {
   176  				if errors.Is(err, fs.ErrNotExist) {
   177  					break
   178  				}
   179  
   180  				for _, file := range files {
   181  					err = multierror.Append(err, file.Close())
   182  				}
   183  
   184  				return nil, err
   185  			}
   186  
   187  			files = append(files, f)
   188  
   189  			info, err = f.Stat()
   190  			if err != nil {
   191  				for _, file := range files {
   192  					err = multierror.Append(err, file.Close())
   193  				}
   194  
   195  				return nil, err
   196  			}
   197  
   198  			sr = append(sr, io.NewSectionReader(f, 0, info.Size()))
   199  		}
   200  
   201  		mr := readerutil.NewMultiReaderAt(sr...)
   202  		reader, size = mr, mr.Size()
   203  	}
   204  
   205  	r := new(ReadCloser)
   206  	r.p = password
   207  
   208  	if err := r.init(reader, size); err != nil {
   209  		for _, file := range files {
   210  			err = multierror.Append(err, file.Close())
   211  		}
   212  
   213  		return nil, err
   214  	}
   215  
   216  	r.f = files
   217  
   218  	return r, nil
   219  }
   220  
   221  // OpenReader will open the 7-zip file specified by name and return a
   222  // ReadCloser. If name has a ".001" suffix it is assumed there are multiple
   223  // volumes and each sequential volume will be opened.
   224  func OpenReader(name string) (*ReadCloser, error) {
   225  	return OpenReaderWithPassword(name, "")
   226  }
   227  
   228  // NewReaderWithPassword returns a new Reader reading from r using password as
   229  // the basis of the decryption key, which is assumed to have the given size in
   230  // bytes.
   231  func NewReaderWithPassword(r io.ReaderAt, size int64, password string) (*Reader, error) {
   232  	if size < 0 {
   233  		return nil, errors.New("sevenzip: size cannot be negative")
   234  	}
   235  
   236  	zr := new(Reader)
   237  	zr.p = password
   238  
   239  	if err := zr.init(r, size); err != nil {
   240  		return nil, err
   241  	}
   242  
   243  	return zr, nil
   244  }
   245  
   246  // NewReader returns a new Reader reading from r, which is assumed to have the
   247  // given size in bytes.
   248  func NewReader(r io.ReaderAt, size int64) (*Reader, error) {
   249  	return NewReaderWithPassword(r, size, "")
   250  }
   251  
   252  func (z *Reader) folderReader(si *streamsInfo, f int) (*folderReadCloser, uint32, error) {
   253  	// Create a SectionReader covering all of the streams data
   254  	return si.FolderReader(io.NewSectionReader(z.r, z.start, z.end-z.start), f, z.p)
   255  }
   256  
   257  const (
   258  	chunkSize   = 4096
   259  	searchLimit = 1 << 20 // 1 MiB
   260  )
   261  
   262  func findSignature(r io.ReaderAt, search []byte) ([]int64, error) {
   263  	chunk := make([]byte, chunkSize+len(search))
   264  	offsets := make([]int64, 0, 2)
   265  
   266  	for offset := int64(0); offset < searchLimit; offset += chunkSize {
   267  		n, err := r.ReadAt(chunk, offset)
   268  
   269  		for i := 0; ; {
   270  			idx := bytes.Index(chunk[i:n], search)
   271  			if idx == -1 {
   272  				break
   273  			}
   274  
   275  			offsets = append(offsets, offset+int64(i+idx))
   276  			if offsets[0] == 0 {
   277  				// If signature is at the beginning, return immediately, it's a regular archive
   278  				return offsets, nil
   279  			}
   280  
   281  			i += idx + 1
   282  		}
   283  
   284  		if err != nil {
   285  			if errors.Is(err, io.EOF) {
   286  				break
   287  			}
   288  
   289  			return nil, err
   290  		}
   291  	}
   292  
   293  	return offsets, nil
   294  }
   295  
   296  //nolint:cyclop,funlen,gocognit,gocyclo
   297  func (z *Reader) init(r io.ReaderAt, size int64) error {
   298  	h := crc32.NewIEEE()
   299  	tra := plumbing.TeeReaderAt(r, h)
   300  
   301  	signature := []byte{'7', 'z', 0xbc, 0xaf, 0x27, 0x1c}
   302  
   303  	offsets, err := findSignature(r, signature)
   304  	if err != nil {
   305  		return err
   306  	}
   307  
   308  	if len(offsets) == 0 {
   309  		return errFormat
   310  	}
   311  
   312  	var (
   313  		sr    *io.SectionReader
   314  		off   int64
   315  		start startHeader
   316  	)
   317  
   318  	for _, off = range offsets {
   319  		sr = io.NewSectionReader(tra, off, size-off) // Will only read first 32 bytes
   320  
   321  		var sh signatureHeader
   322  		if err = binary.Read(sr, binary.LittleEndian, &sh); err != nil {
   323  			return err
   324  		}
   325  
   326  		z.r = r
   327  
   328  		h.Reset()
   329  
   330  		if err = binary.Read(sr, binary.LittleEndian, &start); err != nil {
   331  			return err
   332  		}
   333  
   334  		// CRC of the start header should match
   335  		if util.CRC32Equal(h.Sum(nil), sh.CRC) {
   336  			break
   337  		}
   338  
   339  		err = errChecksum
   340  	}
   341  
   342  	if err != nil {
   343  		return err
   344  	}
   345  
   346  	// Work out where we are in the file (32, avoiding magic numbers)
   347  	if z.start, err = sr.Seek(0, io.SeekCurrent); err != nil {
   348  		return err
   349  	}
   350  
   351  	// Seek over the streams
   352  	if z.end, err = sr.Seek(int64(start.Offset), io.SeekCurrent); err != nil {
   353  		return err
   354  	}
   355  
   356  	z.start += off
   357  	z.end += off
   358  
   359  	h.Reset()
   360  
   361  	// Bound bufio.Reader otherwise it can read trailing garbage which screws up the CRC check
   362  	br := bufio.NewReader(io.NewSectionReader(tra, z.end, int64(start.Size)))
   363  
   364  	id, err := br.ReadByte()
   365  	if err != nil {
   366  		return err
   367  	}
   368  
   369  	var (
   370  		header      *header
   371  		streamsInfo *streamsInfo
   372  	)
   373  
   374  	switch id {
   375  	case idHeader:
   376  		if header, err = readHeader(br); err != nil {
   377  			return err
   378  		}
   379  	case idEncodedHeader:
   380  		if streamsInfo, err = readStreamsInfo(br); err != nil {
   381  			return err
   382  		}
   383  	default:
   384  		return errUnexpectedID
   385  	}
   386  
   387  	// If there's more data to read, we've not parsed this correctly. This
   388  	// won't break with trailing data as the bufio.Reader was bounded
   389  	if n, _ := io.CopyN(io.Discard, br, 1); n != 0 {
   390  		return errTooMuch
   391  	}
   392  
   393  	// CRC should match the one from the start header
   394  	if !util.CRC32Equal(h.Sum(nil), start.CRC) {
   395  		return errChecksum
   396  	}
   397  
   398  	// If the header was encoded we should have sufficient information now
   399  	// to decode it
   400  	if streamsInfo != nil {
   401  		if streamsInfo.Folders() != 1 {
   402  			return errors.New("sevenzip: expected only one folder in header stream")
   403  		}
   404  
   405  		fr, crc, err := z.folderReader(streamsInfo, 0)
   406  		if err != nil {
   407  			return err
   408  		}
   409  		defer fr.Close()
   410  
   411  		if header, err = readEncodedHeader(util.ByteReadCloser(fr)); err != nil {
   412  			return err
   413  		}
   414  
   415  		if crc != 0 && !util.CRC32Equal(fr.Checksum(), crc) {
   416  			return errChecksum
   417  		}
   418  	}
   419  
   420  	z.si = header.streamsInfo
   421  
   422  	// spew.Dump(header)
   423  
   424  	folder, offset := 0, int64(0)
   425  	z.File = make([]*File, 0, len(header.filesInfo.file))
   426  	j := 0
   427  
   428  	filesPerStream := make(map[int]int, z.si.Folders())
   429  
   430  	for _, fh := range header.filesInfo.file {
   431  		f := new(File)
   432  		f.zip = z
   433  		f.FileHeader = fh
   434  
   435  		if f.FileHeader.FileInfo().IsDir() && !strings.HasSuffix(f.FileHeader.Name, "/") {
   436  			f.FileHeader.Name += "/"
   437  		}
   438  
   439  		if !fh.isEmptyStream && !fh.isEmptyFile {
   440  			f.folder, _ = header.streamsInfo.FileFolderAndSize(j)
   441  
   442  			// Make an exported copy of the folder index
   443  			f.Stream = f.folder
   444  
   445  			filesPerStream[f.folder]++
   446  
   447  			if f.folder != folder {
   448  				offset = 0
   449  			}
   450  
   451  			f.offset = offset
   452  			offset += int64(f.UncompressedSize)
   453  			folder = f.folder
   454  			j++
   455  		}
   456  
   457  		z.File = append(z.File, f)
   458  	}
   459  
   460  	// spew.Dump(filesPerStream)
   461  
   462  	z.pool = make([]pool.Pooler, z.si.Folders())
   463  	for i := range z.pool {
   464  		var newPool pool.Constructor = pool.NewNoopPool
   465  
   466  		if filesPerStream[i] > 1 {
   467  			newPool = pool.NewPool
   468  		}
   469  
   470  		if z.pool[i], err = newPool(); err != nil {
   471  			return err
   472  		}
   473  	}
   474  
   475  	return nil
   476  }
   477  
   478  // Volumes returns the list of volumes that have been opened as part of the current archive.
   479  func (rc *ReadCloser) Volumes() []string {
   480  	volumes := make([]string, len(rc.f))
   481  	for idx, f := range rc.f {
   482  		volumes[idx] = f.Name()
   483  	}
   484  
   485  	return volumes
   486  }
   487  
   488  // Close closes the 7-zip file or volumes, rendering them unusable for I/O.
   489  func (rc *ReadCloser) Close() error {
   490  	var err *multierror.Error
   491  	for _, f := range rc.f {
   492  		err = multierror.Append(err, f.Close())
   493  	}
   494  
   495  	return err.ErrorOrNil()
   496  }
   497  
   498  type fileListEntry struct {
   499  	name  string
   500  	file  *File
   501  	isDir bool
   502  	isDup bool
   503  }
   504  
   505  type fileInfoDirEntry interface {
   506  	fs.FileInfo
   507  	fs.DirEntry
   508  }
   509  
   510  func (e *fileListEntry) stat() (fileInfoDirEntry, error) {
   511  	if e.isDup {
   512  		return nil, errors.New(e.name + ": duplicate entries in 7-zip file")
   513  	}
   514  
   515  	if !e.isDir {
   516  		return headerFileInfo{&e.file.FileHeader}, nil
   517  	}
   518  
   519  	return e, nil
   520  }
   521  
   522  func (e *fileListEntry) Name() string {
   523  	_, elem := split(e.name)
   524  
   525  	return elem
   526  }
   527  
   528  func (e *fileListEntry) Size() int64       { return 0 }
   529  func (e *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0o555 }
   530  func (e *fileListEntry) Type() fs.FileMode { return fs.ModeDir }
   531  func (e *fileListEntry) IsDir() bool       { return true }
   532  func (e *fileListEntry) Sys() interface{}  { return nil }
   533  
   534  func (e *fileListEntry) ModTime() time.Time {
   535  	if e.file == nil {
   536  		return time.Time{}
   537  	}
   538  
   539  	return e.file.FileHeader.Modified.UTC()
   540  }
   541  
   542  func (e *fileListEntry) Info() (fs.FileInfo, error) { return e, nil }
   543  
   544  func toValidName(name string) string {
   545  	name = strings.ReplaceAll(name, `\`, `/`)
   546  
   547  	p := strings.TrimPrefix(path.Clean(name), "/")
   548  
   549  	for strings.HasPrefix(p, "../") {
   550  		p = p[len("../"):]
   551  	}
   552  
   553  	return p
   554  }
   555  
   556  //nolint:cyclop,funlen
   557  func (z *Reader) initFileList() {
   558  	z.fileListOnce.Do(func() {
   559  		files := make(map[string]int)
   560  		knownDirs := make(map[string]int)
   561  
   562  		dirs := make(map[string]struct{})
   563  
   564  		for _, file := range z.File {
   565  			isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/'
   566  
   567  			name := toValidName(file.Name)
   568  			if name == "" {
   569  				continue
   570  			}
   571  
   572  			if idx, ok := files[name]; ok {
   573  				z.fileList[idx].isDup = true
   574  
   575  				continue
   576  			}
   577  
   578  			if idx, ok := knownDirs[name]; ok {
   579  				z.fileList[idx].isDup = true
   580  
   581  				continue
   582  			}
   583  
   584  			for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) {
   585  				dirs[dir] = struct{}{}
   586  			}
   587  
   588  			idx := len(z.fileList)
   589  			entry := fileListEntry{
   590  				name:  name,
   591  				file:  file,
   592  				isDir: isDir,
   593  			}
   594  			z.fileList = append(z.fileList, entry)
   595  
   596  			if isDir {
   597  				knownDirs[name] = idx
   598  			} else {
   599  				files[name] = idx
   600  			}
   601  		}
   602  
   603  		for dir := range dirs {
   604  			if _, ok := knownDirs[dir]; !ok {
   605  				if idx, ok := files[dir]; ok {
   606  					z.fileList[idx].isDup = true
   607  				} else {
   608  					entry := fileListEntry{
   609  						name:  dir,
   610  						file:  nil,
   611  						isDir: true,
   612  					}
   613  					z.fileList = append(z.fileList, entry)
   614  				}
   615  			}
   616  		}
   617  
   618  		sort.Slice(z.fileList, func(i, j int) bool { return fileEntryLess(z.fileList[i].name, z.fileList[j].name) })
   619  	})
   620  }
   621  
   622  func fileEntryLess(x, y string) bool {
   623  	xdir, xelem := split(x)
   624  	ydir, yelem := split(y)
   625  
   626  	return xdir < ydir || xdir == ydir && xelem < yelem
   627  }
   628  
   629  // Open opens the named file in the 7-zip archive, using the semantics of
   630  // fs.FS.Open: paths are always slash separated, with no leading / or ../
   631  // elements.
   632  func (z *Reader) Open(name string) (fs.File, error) {
   633  	z.initFileList()
   634  
   635  	if !fs.ValidPath(name) {
   636  		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
   637  	}
   638  
   639  	e := z.openLookup(name)
   640  	if e == nil {
   641  		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist}
   642  	}
   643  
   644  	if e.isDir {
   645  		return &openDir{e, z.openReadDir(name), 0}, nil
   646  	}
   647  
   648  	rc, err := e.file.Open()
   649  	if err != nil {
   650  		return nil, err
   651  	}
   652  
   653  	return rc.(fs.File), nil //nolint:forcetypeassert
   654  }
   655  
   656  func split(name string) (dir, elem string) {
   657  	if len(name) > 0 && name[len(name)-1] == '/' {
   658  		name = name[:len(name)-1]
   659  	}
   660  
   661  	i := len(name) - 1
   662  	for i >= 0 && name[i] != '/' {
   663  		i--
   664  	}
   665  
   666  	if i < 0 {
   667  		return ".", name
   668  	}
   669  
   670  	return name[:i], name[i+1:]
   671  }
   672  
   673  //nolint:gochecknoglobals
   674  var dotFile = &fileListEntry{name: "./", isDir: true}
   675  
   676  func (z *Reader) openLookup(name string) *fileListEntry {
   677  	if name == "." {
   678  		return dotFile
   679  	}
   680  
   681  	dir, elem := split(name)
   682  
   683  	files := z.fileList
   684  	i := sort.Search(len(files), func(i int) bool {
   685  		idir, ielem := split(files[i].name)
   686  
   687  		return idir > dir || idir == dir && ielem >= elem
   688  	})
   689  
   690  	if i < len(files) {
   691  		fname := files[i].name
   692  		if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name {
   693  			return &files[i]
   694  		}
   695  	}
   696  
   697  	return nil
   698  }
   699  
   700  func (z *Reader) openReadDir(dir string) []fileListEntry {
   701  	files := z.fileList
   702  
   703  	i := sort.Search(len(files), func(i int) bool {
   704  		idir, _ := split(files[i].name)
   705  
   706  		return idir >= dir
   707  	})
   708  
   709  	j := sort.Search(len(files), func(j int) bool {
   710  		jdir, _ := split(files[j].name)
   711  
   712  		return jdir > dir
   713  	})
   714  
   715  	return files[i:j]
   716  }
   717  
   718  type openDir struct {
   719  	e      *fileListEntry
   720  	files  []fileListEntry
   721  	offset int
   722  }
   723  
   724  func (d *openDir) Close() error               { return nil }
   725  func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() }
   726  
   727  func (d *openDir) Read([]byte) (int, error) {
   728  	return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")}
   729  }
   730  
   731  func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) {
   732  	n := len(d.files) - d.offset
   733  	if count > 0 && n > count {
   734  		n = count
   735  	}
   736  
   737  	if n == 0 {
   738  		if count <= 0 {
   739  			return nil, nil
   740  		}
   741  
   742  		return nil, io.EOF
   743  	}
   744  
   745  	list := make([]fs.DirEntry, n)
   746  	for i := range list {
   747  		s, err := d.files[d.offset+i].stat()
   748  		if err != nil {
   749  			return nil, err
   750  		}
   751  
   752  		list[i] = s
   753  	}
   754  
   755  	d.offset += n
   756  
   757  	return list, nil
   758  }