github.com/bodgit/sevenzip@v1.5.1/reader.go (about) 1 package sevenzip 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/binary" 7 "errors" 8 "fmt" 9 "hash/crc32" 10 "io" 11 "io/fs" 12 "os" 13 "path" 14 "path/filepath" 15 "sort" 16 "strings" 17 "sync" 18 "time" 19 20 "github.com/bodgit/plumbing" 21 "github.com/bodgit/sevenzip/internal/pool" 22 "github.com/bodgit/sevenzip/internal/util" 23 "github.com/hashicorp/go-multierror" 24 "go4.org/readerutil" 25 ) 26 27 var ( 28 errFormat = errors.New("sevenzip: not a valid 7-zip file") 29 errChecksum = errors.New("sevenzip: checksum error") 30 errTooMuch = errors.New("sevenzip: too much data") 31 ) 32 33 // A Reader serves content from a 7-Zip archive. 34 type Reader struct { 35 r io.ReaderAt 36 start int64 37 end int64 38 si *streamsInfo 39 p string 40 File []*File 41 pool []pool.Pooler 42 43 fileListOnce sync.Once 44 fileList []fileListEntry 45 } 46 47 // A ReadCloser is a Reader that must be closed when no longer needed. 48 type ReadCloser struct { 49 f []*os.File 50 Reader 51 } 52 53 // A File is a single file in a 7-Zip archive. The file information is in the 54 // embedded FileHeader. The file content can be accessed by calling Open. 55 type File struct { 56 FileHeader 57 zip *Reader 58 folder int 59 offset int64 60 } 61 62 type fileReader struct { 63 rc util.SizeReadSeekCloser 64 f *File 65 n int64 66 } 67 68 func (fr *fileReader) Stat() (fs.FileInfo, error) { 69 return headerFileInfo{&fr.f.FileHeader}, nil 70 } 71 72 func (fr *fileReader) Read(p []byte) (n int, err error) { 73 if len(p) == 0 { 74 return 0, nil 75 } 76 77 if fr.n <= 0 { 78 return 0, io.EOF 79 } 80 81 if int64(len(p)) > fr.n { 82 p = p[0:fr.n] 83 } 84 85 n, err = fr.rc.Read(p) 86 fr.n -= int64(n) 87 88 return 89 } 90 91 func (fr *fileReader) Close() error { 92 if fr.rc == nil { 93 return nil 94 } 95 96 offset, err := fr.rc.Seek(0, io.SeekCurrent) 97 if err != nil { 98 return err 99 } 100 101 if offset == fr.rc.Size() { // EOF reached 102 if err := fr.rc.Close(); err != nil { 103 return err 104 } 105 } else { 106 f := fr.f 107 if _, err := f.zip.pool[f.folder].Put(offset, fr.rc); err != nil { 108 return err 109 } 110 } 111 112 fr.rc = nil 113 114 return nil 115 } 116 117 // Open returns an io.ReadCloser that provides access to the File's contents. 118 // Multiple files may be read concurrently. 119 func (f *File) Open() (io.ReadCloser, error) { 120 if f.FileHeader.isEmptyStream || f.FileHeader.isEmptyFile { 121 // Return empty reader for directory or empty file 122 return io.NopCloser(bytes.NewReader(nil)), nil 123 } 124 125 var err error 126 127 rc, _ := f.zip.pool[f.folder].Get(f.offset) 128 if rc == nil { 129 rc, _, err = f.zip.folderReader(f.zip.si, f.folder) 130 if err != nil { 131 return nil, err 132 } 133 } 134 135 if _, err = rc.Seek(f.offset, io.SeekStart); err != nil { 136 return nil, err 137 } 138 139 return &fileReader{ 140 rc: rc, 141 f: f, 142 n: int64(f.UncompressedSize), 143 }, nil 144 } 145 146 // OpenReaderWithPassword will open the 7-zip file specified by name using 147 // password as the basis of the decryption key and return a ReadCloser. If 148 // name has a ".001" suffix it is assumed there are multiple volumes and each 149 // sequential volume will be opened. 150 // 151 //nolint:cyclop,funlen 152 func OpenReaderWithPassword(name, password string) (*ReadCloser, error) { 153 f, err := os.Open(name) 154 if err != nil { 155 return nil, err 156 } 157 158 info, err := f.Stat() 159 if err != nil { 160 err = multierror.Append(err, f.Close()) 161 162 return nil, err 163 } 164 165 var reader io.ReaderAt = f 166 167 size := info.Size() 168 files := []*os.File{f} 169 170 if ext := filepath.Ext(name); ext == ".001" { 171 sr := []readerutil.SizeReaderAt{io.NewSectionReader(f, 0, size)} 172 173 for i := 2; true; i++ { 174 f, err := os.Open(fmt.Sprintf("%s.%03d", strings.TrimSuffix(name, ext), i)) 175 if err != nil { 176 if errors.Is(err, fs.ErrNotExist) { 177 break 178 } 179 180 for _, file := range files { 181 err = multierror.Append(err, file.Close()) 182 } 183 184 return nil, err 185 } 186 187 files = append(files, f) 188 189 info, err = f.Stat() 190 if err != nil { 191 for _, file := range files { 192 err = multierror.Append(err, file.Close()) 193 } 194 195 return nil, err 196 } 197 198 sr = append(sr, io.NewSectionReader(f, 0, info.Size())) 199 } 200 201 mr := readerutil.NewMultiReaderAt(sr...) 202 reader, size = mr, mr.Size() 203 } 204 205 r := new(ReadCloser) 206 r.p = password 207 208 if err := r.init(reader, size); err != nil { 209 for _, file := range files { 210 err = multierror.Append(err, file.Close()) 211 } 212 213 return nil, err 214 } 215 216 r.f = files 217 218 return r, nil 219 } 220 221 // OpenReader will open the 7-zip file specified by name and return a 222 // ReadCloser. If name has a ".001" suffix it is assumed there are multiple 223 // volumes and each sequential volume will be opened. 224 func OpenReader(name string) (*ReadCloser, error) { 225 return OpenReaderWithPassword(name, "") 226 } 227 228 // NewReaderWithPassword returns a new Reader reading from r using password as 229 // the basis of the decryption key, which is assumed to have the given size in 230 // bytes. 231 func NewReaderWithPassword(r io.ReaderAt, size int64, password string) (*Reader, error) { 232 if size < 0 { 233 return nil, errors.New("sevenzip: size cannot be negative") 234 } 235 236 zr := new(Reader) 237 zr.p = password 238 239 if err := zr.init(r, size); err != nil { 240 return nil, err 241 } 242 243 return zr, nil 244 } 245 246 // NewReader returns a new Reader reading from r, which is assumed to have the 247 // given size in bytes. 248 func NewReader(r io.ReaderAt, size int64) (*Reader, error) { 249 return NewReaderWithPassword(r, size, "") 250 } 251 252 func (z *Reader) folderReader(si *streamsInfo, f int) (*folderReadCloser, uint32, error) { 253 // Create a SectionReader covering all of the streams data 254 return si.FolderReader(io.NewSectionReader(z.r, z.start, z.end-z.start), f, z.p) 255 } 256 257 const ( 258 chunkSize = 4096 259 searchLimit = 1 << 20 // 1 MiB 260 ) 261 262 func findSignature(r io.ReaderAt, search []byte) ([]int64, error) { 263 chunk := make([]byte, chunkSize+len(search)) 264 offsets := make([]int64, 0, 2) 265 266 for offset := int64(0); offset < searchLimit; offset += chunkSize { 267 n, err := r.ReadAt(chunk, offset) 268 269 for i := 0; ; { 270 idx := bytes.Index(chunk[i:n], search) 271 if idx == -1 { 272 break 273 } 274 275 offsets = append(offsets, offset+int64(i+idx)) 276 if offsets[0] == 0 { 277 // If signature is at the beginning, return immediately, it's a regular archive 278 return offsets, nil 279 } 280 281 i += idx + 1 282 } 283 284 if err != nil { 285 if errors.Is(err, io.EOF) { 286 break 287 } 288 289 return nil, err 290 } 291 } 292 293 return offsets, nil 294 } 295 296 //nolint:cyclop,funlen,gocognit,gocyclo 297 func (z *Reader) init(r io.ReaderAt, size int64) error { 298 h := crc32.NewIEEE() 299 tra := plumbing.TeeReaderAt(r, h) 300 301 signature := []byte{'7', 'z', 0xbc, 0xaf, 0x27, 0x1c} 302 303 offsets, err := findSignature(r, signature) 304 if err != nil { 305 return err 306 } 307 308 if len(offsets) == 0 { 309 return errFormat 310 } 311 312 var ( 313 sr *io.SectionReader 314 off int64 315 start startHeader 316 ) 317 318 for _, off = range offsets { 319 sr = io.NewSectionReader(tra, off, size-off) // Will only read first 32 bytes 320 321 var sh signatureHeader 322 if err = binary.Read(sr, binary.LittleEndian, &sh); err != nil { 323 return err 324 } 325 326 z.r = r 327 328 h.Reset() 329 330 if err = binary.Read(sr, binary.LittleEndian, &start); err != nil { 331 return err 332 } 333 334 // CRC of the start header should match 335 if util.CRC32Equal(h.Sum(nil), sh.CRC) { 336 break 337 } 338 339 err = errChecksum 340 } 341 342 if err != nil { 343 return err 344 } 345 346 // Work out where we are in the file (32, avoiding magic numbers) 347 if z.start, err = sr.Seek(0, io.SeekCurrent); err != nil { 348 return err 349 } 350 351 // Seek over the streams 352 if z.end, err = sr.Seek(int64(start.Offset), io.SeekCurrent); err != nil { 353 return err 354 } 355 356 z.start += off 357 z.end += off 358 359 h.Reset() 360 361 // Bound bufio.Reader otherwise it can read trailing garbage which screws up the CRC check 362 br := bufio.NewReader(io.NewSectionReader(tra, z.end, int64(start.Size))) 363 364 id, err := br.ReadByte() 365 if err != nil { 366 return err 367 } 368 369 var ( 370 header *header 371 streamsInfo *streamsInfo 372 ) 373 374 switch id { 375 case idHeader: 376 if header, err = readHeader(br); err != nil { 377 return err 378 } 379 case idEncodedHeader: 380 if streamsInfo, err = readStreamsInfo(br); err != nil { 381 return err 382 } 383 default: 384 return errUnexpectedID 385 } 386 387 // If there's more data to read, we've not parsed this correctly. This 388 // won't break with trailing data as the bufio.Reader was bounded 389 if n, _ := io.CopyN(io.Discard, br, 1); n != 0 { 390 return errTooMuch 391 } 392 393 // CRC should match the one from the start header 394 if !util.CRC32Equal(h.Sum(nil), start.CRC) { 395 return errChecksum 396 } 397 398 // If the header was encoded we should have sufficient information now 399 // to decode it 400 if streamsInfo != nil { 401 if streamsInfo.Folders() != 1 { 402 return errors.New("sevenzip: expected only one folder in header stream") 403 } 404 405 fr, crc, err := z.folderReader(streamsInfo, 0) 406 if err != nil { 407 return err 408 } 409 defer fr.Close() 410 411 if header, err = readEncodedHeader(util.ByteReadCloser(fr)); err != nil { 412 return err 413 } 414 415 if crc != 0 && !util.CRC32Equal(fr.Checksum(), crc) { 416 return errChecksum 417 } 418 } 419 420 z.si = header.streamsInfo 421 422 // spew.Dump(header) 423 424 folder, offset := 0, int64(0) 425 z.File = make([]*File, 0, len(header.filesInfo.file)) 426 j := 0 427 428 filesPerStream := make(map[int]int, z.si.Folders()) 429 430 for _, fh := range header.filesInfo.file { 431 f := new(File) 432 f.zip = z 433 f.FileHeader = fh 434 435 if f.FileHeader.FileInfo().IsDir() && !strings.HasSuffix(f.FileHeader.Name, "/") { 436 f.FileHeader.Name += "/" 437 } 438 439 if !fh.isEmptyStream && !fh.isEmptyFile { 440 f.folder, _ = header.streamsInfo.FileFolderAndSize(j) 441 442 // Make an exported copy of the folder index 443 f.Stream = f.folder 444 445 filesPerStream[f.folder]++ 446 447 if f.folder != folder { 448 offset = 0 449 } 450 451 f.offset = offset 452 offset += int64(f.UncompressedSize) 453 folder = f.folder 454 j++ 455 } 456 457 z.File = append(z.File, f) 458 } 459 460 // spew.Dump(filesPerStream) 461 462 z.pool = make([]pool.Pooler, z.si.Folders()) 463 for i := range z.pool { 464 var newPool pool.Constructor = pool.NewNoopPool 465 466 if filesPerStream[i] > 1 { 467 newPool = pool.NewPool 468 } 469 470 if z.pool[i], err = newPool(); err != nil { 471 return err 472 } 473 } 474 475 return nil 476 } 477 478 // Volumes returns the list of volumes that have been opened as part of the current archive. 479 func (rc *ReadCloser) Volumes() []string { 480 volumes := make([]string, len(rc.f)) 481 for idx, f := range rc.f { 482 volumes[idx] = f.Name() 483 } 484 485 return volumes 486 } 487 488 // Close closes the 7-zip file or volumes, rendering them unusable for I/O. 489 func (rc *ReadCloser) Close() error { 490 var err *multierror.Error 491 for _, f := range rc.f { 492 err = multierror.Append(err, f.Close()) 493 } 494 495 return err.ErrorOrNil() 496 } 497 498 type fileListEntry struct { 499 name string 500 file *File 501 isDir bool 502 isDup bool 503 } 504 505 type fileInfoDirEntry interface { 506 fs.FileInfo 507 fs.DirEntry 508 } 509 510 func (e *fileListEntry) stat() (fileInfoDirEntry, error) { 511 if e.isDup { 512 return nil, errors.New(e.name + ": duplicate entries in 7-zip file") 513 } 514 515 if !e.isDir { 516 return headerFileInfo{&e.file.FileHeader}, nil 517 } 518 519 return e, nil 520 } 521 522 func (e *fileListEntry) Name() string { 523 _, elem := split(e.name) 524 525 return elem 526 } 527 528 func (e *fileListEntry) Size() int64 { return 0 } 529 func (e *fileListEntry) Mode() fs.FileMode { return fs.ModeDir | 0o555 } 530 func (e *fileListEntry) Type() fs.FileMode { return fs.ModeDir } 531 func (e *fileListEntry) IsDir() bool { return true } 532 func (e *fileListEntry) Sys() interface{} { return nil } 533 534 func (e *fileListEntry) ModTime() time.Time { 535 if e.file == nil { 536 return time.Time{} 537 } 538 539 return e.file.FileHeader.Modified.UTC() 540 } 541 542 func (e *fileListEntry) Info() (fs.FileInfo, error) { return e, nil } 543 544 func toValidName(name string) string { 545 name = strings.ReplaceAll(name, `\`, `/`) 546 547 p := strings.TrimPrefix(path.Clean(name), "/") 548 549 for strings.HasPrefix(p, "../") { 550 p = p[len("../"):] 551 } 552 553 return p 554 } 555 556 //nolint:cyclop,funlen 557 func (z *Reader) initFileList() { 558 z.fileListOnce.Do(func() { 559 files := make(map[string]int) 560 knownDirs := make(map[string]int) 561 562 dirs := make(map[string]struct{}) 563 564 for _, file := range z.File { 565 isDir := len(file.Name) > 0 && file.Name[len(file.Name)-1] == '/' 566 567 name := toValidName(file.Name) 568 if name == "" { 569 continue 570 } 571 572 if idx, ok := files[name]; ok { 573 z.fileList[idx].isDup = true 574 575 continue 576 } 577 578 if idx, ok := knownDirs[name]; ok { 579 z.fileList[idx].isDup = true 580 581 continue 582 } 583 584 for dir := path.Dir(name); dir != "."; dir = path.Dir(dir) { 585 dirs[dir] = struct{}{} 586 } 587 588 idx := len(z.fileList) 589 entry := fileListEntry{ 590 name: name, 591 file: file, 592 isDir: isDir, 593 } 594 z.fileList = append(z.fileList, entry) 595 596 if isDir { 597 knownDirs[name] = idx 598 } else { 599 files[name] = idx 600 } 601 } 602 603 for dir := range dirs { 604 if _, ok := knownDirs[dir]; !ok { 605 if idx, ok := files[dir]; ok { 606 z.fileList[idx].isDup = true 607 } else { 608 entry := fileListEntry{ 609 name: dir, 610 file: nil, 611 isDir: true, 612 } 613 z.fileList = append(z.fileList, entry) 614 } 615 } 616 } 617 618 sort.Slice(z.fileList, func(i, j int) bool { return fileEntryLess(z.fileList[i].name, z.fileList[j].name) }) 619 }) 620 } 621 622 func fileEntryLess(x, y string) bool { 623 xdir, xelem := split(x) 624 ydir, yelem := split(y) 625 626 return xdir < ydir || xdir == ydir && xelem < yelem 627 } 628 629 // Open opens the named file in the 7-zip archive, using the semantics of 630 // fs.FS.Open: paths are always slash separated, with no leading / or ../ 631 // elements. 632 func (z *Reader) Open(name string) (fs.File, error) { 633 z.initFileList() 634 635 if !fs.ValidPath(name) { 636 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} 637 } 638 639 e := z.openLookup(name) 640 if e == nil { 641 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrNotExist} 642 } 643 644 if e.isDir { 645 return &openDir{e, z.openReadDir(name), 0}, nil 646 } 647 648 rc, err := e.file.Open() 649 if err != nil { 650 return nil, err 651 } 652 653 return rc.(fs.File), nil //nolint:forcetypeassert 654 } 655 656 func split(name string) (dir, elem string) { 657 if len(name) > 0 && name[len(name)-1] == '/' { 658 name = name[:len(name)-1] 659 } 660 661 i := len(name) - 1 662 for i >= 0 && name[i] != '/' { 663 i-- 664 } 665 666 if i < 0 { 667 return ".", name 668 } 669 670 return name[:i], name[i+1:] 671 } 672 673 //nolint:gochecknoglobals 674 var dotFile = &fileListEntry{name: "./", isDir: true} 675 676 func (z *Reader) openLookup(name string) *fileListEntry { 677 if name == "." { 678 return dotFile 679 } 680 681 dir, elem := split(name) 682 683 files := z.fileList 684 i := sort.Search(len(files), func(i int) bool { 685 idir, ielem := split(files[i].name) 686 687 return idir > dir || idir == dir && ielem >= elem 688 }) 689 690 if i < len(files) { 691 fname := files[i].name 692 if fname == name || len(fname) == len(name)+1 && fname[len(name)] == '/' && fname[:len(name)] == name { 693 return &files[i] 694 } 695 } 696 697 return nil 698 } 699 700 func (z *Reader) openReadDir(dir string) []fileListEntry { 701 files := z.fileList 702 703 i := sort.Search(len(files), func(i int) bool { 704 idir, _ := split(files[i].name) 705 706 return idir >= dir 707 }) 708 709 j := sort.Search(len(files), func(j int) bool { 710 jdir, _ := split(files[j].name) 711 712 return jdir > dir 713 }) 714 715 return files[i:j] 716 } 717 718 type openDir struct { 719 e *fileListEntry 720 files []fileListEntry 721 offset int 722 } 723 724 func (d *openDir) Close() error { return nil } 725 func (d *openDir) Stat() (fs.FileInfo, error) { return d.e.stat() } 726 727 func (d *openDir) Read([]byte) (int, error) { 728 return 0, &fs.PathError{Op: "read", Path: d.e.name, Err: errors.New("is a directory")} 729 } 730 731 func (d *openDir) ReadDir(count int) ([]fs.DirEntry, error) { 732 n := len(d.files) - d.offset 733 if count > 0 && n > count { 734 n = count 735 } 736 737 if n == 0 { 738 if count <= 0 { 739 return nil, nil 740 } 741 742 return nil, io.EOF 743 } 744 745 list := make([]fs.DirEntry, n) 746 for i := range list { 747 s, err := d.files[d.offset+i].stat() 748 if err != nil { 749 return nil, err 750 } 751 752 list[i] = s 753 } 754 755 d.offset += n 756 757 return list, nil 758 }