github.com/bodgit/sevenzip@v1.5.1/types.go (about) 1 package sevenzip 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/binary" 7 "errors" 8 "fmt" 9 "io" 10 "math/bits" 11 "time" 12 13 "github.com/bodgit/sevenzip/internal/util" 14 "github.com/bodgit/windows" 15 "golang.org/x/text/encoding/unicode" 16 "golang.org/x/text/transform" 17 ) 18 19 const ( 20 idEnd = iota 21 idHeader 22 idArchiveProperties 23 idAdditionalStreamsInfo 24 idMainStreamsInfo 25 idFilesInfo 26 idPackInfo 27 idUnpackInfo 28 idSubStreamsInfo 29 idSize 30 idCRC 31 idFolder 32 idCodersUnpackSize 33 idNumUnpackStream 34 idEmptyStream 35 idEmptyFile 36 idAnti //nolint:deadcode,varcheck 37 idName 38 idCTime 39 idATime 40 idMTime 41 idWinAttributes 42 idComment //nolint:deadcode,varcheck 43 idEncodedHeader 44 idStartPos 45 idDummy 46 ) 47 48 var ( 49 errIncompleteRead = errors.New("sevenzip: incomplete read") 50 errUnexpectedID = errors.New("sevenzip: unexpected id") 51 errMissingUnpackInfo = errors.New("sevenzip: missing unpack info") 52 ) 53 54 func readUint64(r io.ByteReader) (uint64, error) { 55 b, err := r.ReadByte() 56 if err != nil { 57 return 0, fmt.Errorf("readUint64: ReadByte error: %w", err) 58 } 59 60 l := bits.LeadingZeros8(^b) 61 62 var v uint64 63 if l < 7 { 64 v |= uint64(b&((1<<(8-l))-1)) << (8 * l) 65 } 66 67 for i := 0; i < l; i++ { 68 b, err := r.ReadByte() 69 if err != nil { 70 return 0, fmt.Errorf("readUint64: ReadByte error: %w", err) 71 } 72 73 v |= uint64(b) << (8 * i) 74 } 75 76 return v, nil 77 } 78 79 func readBool(r io.ByteReader, count uint64) ([]bool, error) { 80 defined := make([]bool, count) 81 82 var b, mask byte 83 for i := range defined { 84 if mask == 0 { 85 var err error 86 87 b, err = r.ReadByte() 88 if err != nil { 89 return nil, fmt.Errorf("readBool: ReadByte error: %w", err) 90 } 91 92 mask = 0x80 93 } 94 95 defined[i] = (b & mask) != 0 96 mask >>= 1 97 } 98 99 return defined, nil 100 } 101 102 func readOptionalBool(r io.ByteReader, count uint64) ([]bool, error) { 103 all, err := r.ReadByte() 104 if err != nil { 105 return nil, fmt.Errorf("readOptionalBool: ReadByte error: %w", err) 106 } 107 108 if all == 0 { 109 return readBool(r, count) 110 } 111 112 defined := make([]bool, count) 113 for i := range defined { 114 defined[i] = true 115 } 116 117 return defined, nil 118 } 119 120 func readSizes(r io.ByteReader, count uint64) ([]uint64, error) { 121 sizes := make([]uint64, count) 122 123 for i := uint64(0); i < count; i++ { 124 size, err := readUint64(r) 125 if err != nil { 126 return nil, err 127 } 128 129 sizes[i] = size 130 } 131 132 return sizes, nil 133 } 134 135 func readCRC(r util.Reader, count uint64) ([]uint32, error) { 136 defined, err := readOptionalBool(r, count) 137 if err != nil { 138 return nil, err 139 } 140 141 crcs := make([]uint32, count) 142 143 for i := range defined { 144 if defined[i] { 145 if err := binary.Read(r, binary.LittleEndian, &crcs[i]); err != nil { 146 return nil, fmt.Errorf("readCRC: Read error: %w", err) 147 } 148 } 149 } 150 151 return crcs, nil 152 } 153 154 //nolint:cyclop 155 func readPackInfo(r util.Reader) (*packInfo, error) { 156 p := new(packInfo) 157 158 var err error 159 160 p.position, err = readUint64(r) 161 if err != nil { 162 return nil, err 163 } 164 165 p.streams, err = readUint64(r) 166 if err != nil { 167 return nil, err 168 } 169 170 id, err := r.ReadByte() 171 if err != nil { 172 return nil, fmt.Errorf("readPackInfo: ReadByte error: %w", err) 173 } 174 175 if id == idSize { 176 if p.size, err = readSizes(r, p.streams); err != nil { 177 return nil, err 178 } 179 180 id, err = r.ReadByte() 181 if err != nil { 182 return nil, fmt.Errorf("readPackInfo: ReadByte error: %w", err) 183 } 184 } 185 186 if id == idCRC { 187 if p.digest, err = readCRC(r, p.streams); err != nil { 188 return nil, err 189 } 190 191 id, err = r.ReadByte() 192 if err != nil { 193 return nil, fmt.Errorf("readPackInfo: ReadByte error: %w", err) 194 } 195 } 196 197 if id != idEnd { 198 return nil, errUnexpectedID 199 } 200 201 return p, nil 202 } 203 204 //nolint:cyclop 205 func readCoder(r util.Reader) (*coder, error) { 206 c := new(coder) 207 208 v, err := r.ReadByte() 209 if err != nil { 210 return nil, fmt.Errorf("readCoder: ReadByte error: %w", err) 211 } 212 213 c.id = make([]byte, v&0xf) 214 if n, err := r.Read(c.id); err != nil || n != int(v&0xf) { 215 if err != nil { 216 return nil, fmt.Errorf("readCoder: Read error: %w", err) 217 } 218 219 return nil, errIncompleteRead 220 } 221 222 if v&0x10 != 0 { 223 c.in, err = readUint64(r) 224 if err != nil { 225 return nil, err 226 } 227 228 c.out, err = readUint64(r) 229 if err != nil { 230 return nil, err 231 } 232 } else { 233 c.in, c.out = 1, 1 234 } 235 236 if v&0x20 != 0 { 237 size, err := readUint64(r) 238 if err != nil { 239 return nil, err 240 } 241 242 c.properties = make([]byte, size) 243 if n, err := r.Read(c.properties); err != nil || n != int(size) { 244 if err != nil { 245 return nil, fmt.Errorf("readCoder: Read error: %w", err) 246 } 247 248 return nil, errIncompleteRead 249 } 250 } 251 252 return c, nil 253 } 254 255 //nolint:cyclop 256 func readFolder(r util.Reader) (*folder, error) { 257 f := new(folder) 258 259 coders, err := readUint64(r) 260 if err != nil { 261 return nil, err 262 } 263 264 f.coder = make([]*coder, coders) 265 266 for i := uint64(0); i < coders; i++ { 267 if f.coder[i], err = readCoder(r); err != nil { 268 return nil, err 269 } 270 271 f.in += f.coder[i].in 272 f.out += f.coder[i].out 273 } 274 275 bindPairs := f.out - 1 276 277 f.bindPair = make([]*bindPair, bindPairs) 278 279 for i := uint64(0); i < bindPairs; i++ { 280 in, err := readUint64(r) 281 if err != nil { 282 return nil, err 283 } 284 285 out, err := readUint64(r) 286 if err != nil { 287 return nil, err 288 } 289 290 f.bindPair[i] = &bindPair{ 291 in: in, 292 out: out, 293 } 294 } 295 296 f.packedStreams = f.in - bindPairs 297 298 if f.packedStreams == 1 { 299 f.packed = []uint64{} 300 for i := uint64(0); i < f.in; i++ { 301 if f.findInBindPair(i) == nil { 302 f.packed = append(f.packed, i) 303 } 304 } 305 } else { 306 f.packed = make([]uint64, f.packedStreams) 307 for i := uint64(0); i < f.packedStreams; i++ { 308 if f.packed[i], err = readUint64(r); err != nil { 309 return nil, err 310 } 311 } 312 } 313 314 return f, nil 315 } 316 317 //nolint:cyclop,funlen,gocognit 318 func readUnpackInfo(r util.Reader) (*unpackInfo, error) { 319 u := new(unpackInfo) 320 321 if id, err := r.ReadByte(); err != nil || id != idFolder { 322 if err != nil { 323 return nil, fmt.Errorf("readUnpackInfo: ReadByte error: %w", err) 324 } 325 326 return nil, errUnexpectedID 327 } 328 329 folders, err := readUint64(r) 330 if err != nil { 331 return nil, err 332 } 333 334 external, err := r.ReadByte() 335 if err != nil { 336 return nil, fmt.Errorf("readUnpackInfo: ReadByte error: %w", err) 337 } 338 339 if external > 0 { 340 _, err := readUint64(r) 341 if err != nil { 342 return nil, err 343 } 344 // TODO Apparently we seek to this read offset and read the 345 // folder information from there. Not clear if the offset is 346 // absolute for the whole file, or relative to some known 347 // position in the file. Cowardly waiting for an example 348 return nil, errors.New("sevenzip: TODO readUnpackInfo external") //nolint:goerr113 349 } 350 351 u.folder = make([]*folder, folders) 352 353 for i := uint64(0); i < folders; i++ { 354 if u.folder[i], err = readFolder(r); err != nil { 355 return nil, err 356 } 357 } 358 359 if id, err := r.ReadByte(); err != nil || id != idCodersUnpackSize { 360 if err != nil { 361 return nil, fmt.Errorf("readUnpackInfo: ReadByte error: %w", err) 362 } 363 364 return nil, errUnexpectedID 365 } 366 367 for _, f := range u.folder { 368 total := uint64(0) 369 for _, c := range f.coder { 370 total += c.out 371 } 372 373 f.size = make([]uint64, total) 374 for i := range f.size { 375 if f.size[i], err = readUint64(r); err != nil { 376 return nil, err 377 } 378 } 379 } 380 381 id, err := r.ReadByte() 382 if err != nil { 383 return nil, fmt.Errorf("readUnpackInfo: ReadByte error: %w", err) 384 } 385 386 if id == idCRC { 387 if u.digest, err = readCRC(r, folders); err != nil { 388 return nil, err 389 } 390 391 id, err = r.ReadByte() 392 if err != nil { 393 return nil, fmt.Errorf("readUnpackInfo: ReadByte error: %w", err) 394 } 395 } 396 397 if id != idEnd { 398 return nil, errUnexpectedID 399 } 400 401 return u, nil 402 } 403 404 //nolint:cyclop,funlen 405 func readSubStreamsInfo(r util.Reader, folder []*folder) (*subStreamsInfo, error) { 406 s := new(subStreamsInfo) 407 408 id, err := r.ReadByte() 409 if err != nil { 410 return nil, fmt.Errorf("readSubStreamsInfo: ReadByte error: %w", err) 411 } 412 413 s.streams = make([]uint64, len(folder)) 414 if id == idNumUnpackStream { 415 for i := range s.streams { 416 if s.streams[i], err = readUint64(r); err != nil { 417 return nil, err 418 } 419 } 420 421 id, err = r.ReadByte() 422 if err != nil { 423 return nil, fmt.Errorf("readSubStreamsInfo: ReadByte error: %w", err) 424 } 425 } else { 426 for i := range s.streams { 427 s.streams[i] = 1 428 } 429 } 430 431 // Count the files in each stream 432 files := uint64(0) 433 for _, v := range s.streams { 434 files += v 435 } 436 437 if id == idSize { 438 s.size = make([]uint64, files) 439 k := 0 440 441 for i := range s.streams { 442 total := uint64(0) 443 444 for j := uint64(1); j < s.streams[i]; j++ { 445 if s.size[k], err = readUint64(r); err != nil { 446 return nil, err 447 } 448 449 total += s.size[k] 450 k++ 451 } 452 453 s.size[k] = folder[i].unpackSize() - total 454 k++ 455 } 456 457 id, err = r.ReadByte() 458 if err != nil { 459 return nil, fmt.Errorf("readSubStreamsInfo: ReadByte error: %w", err) 460 } 461 } 462 463 if id == idCRC { 464 if s.digest, err = readCRC(r, files); err != nil { 465 return nil, err 466 } 467 468 id, err = r.ReadByte() 469 if err != nil { 470 return nil, fmt.Errorf("readSubStreamsInfo: ReadByte error: %w", err) 471 } 472 } 473 474 if id != idEnd { 475 return nil, errUnexpectedID 476 } 477 478 return s, nil 479 } 480 481 //nolint:cyclop 482 func readStreamsInfo(r util.Reader) (*streamsInfo, error) { 483 s := new(streamsInfo) 484 485 id, err := r.ReadByte() 486 if err != nil { 487 return nil, fmt.Errorf("readStreamsInfo: ReadByte error: %w", err) 488 } 489 490 if id == idPackInfo { 491 if s.packInfo, err = readPackInfo(r); err != nil { 492 return nil, err 493 } 494 495 id, err = r.ReadByte() 496 if err != nil { 497 return nil, fmt.Errorf("readStreamsInfo: ReadByte error: %w", err) 498 } 499 } 500 501 if id == idUnpackInfo { 502 if s.unpackInfo, err = readUnpackInfo(r); err != nil { 503 return nil, err 504 } 505 506 id, err = r.ReadByte() 507 if err != nil { 508 return nil, fmt.Errorf("readStreamsInfo: ReadByte error: %w", err) 509 } 510 } 511 512 if id == idSubStreamsInfo { 513 if s.unpackInfo == nil { 514 return nil, errMissingUnpackInfo 515 } 516 517 if s.subStreamsInfo, err = readSubStreamsInfo(r, s.unpackInfo.folder); err != nil { 518 return nil, err 519 } 520 521 id, err = r.ReadByte() 522 if err != nil { 523 return nil, fmt.Errorf("readStreamsInfo: ReadByte error: %w", err) 524 } 525 } 526 527 if id != idEnd { 528 return nil, errUnexpectedID 529 } 530 531 return s, nil 532 } 533 534 func readTimes(r util.Reader, count uint64) ([]time.Time, error) { 535 defined, err := readOptionalBool(r, count) 536 if err != nil { 537 return nil, err 538 } 539 540 external, err := r.ReadByte() 541 if err != nil { 542 return nil, fmt.Errorf("readTimes: ReadByte error: %w", err) 543 } 544 545 if external > 0 { 546 _, err := readUint64(r) 547 if err != nil { 548 return nil, err 549 } 550 // TODO Apparently we seek to this read offset and read the 551 // folder information from there. Not clear if the offset is 552 // absolute for the whole file, or relative to some known 553 // position in the file. Cowardly waiting for an example 554 return nil, errors.New("sevenzip: TODO readTimes external") //nolint:goerr113 555 } 556 557 times := make([]time.Time, count) 558 559 for i := range defined { 560 if defined[i] { 561 var ft windows.Filetime 562 if err := binary.Read(r, binary.LittleEndian, &ft); err != nil { 563 return nil, fmt.Errorf("readTimes: Read error: %w", err) 564 } 565 566 times[i] = time.Unix(0, ft.Nanoseconds()).UTC() 567 } 568 } 569 570 return times, nil 571 } 572 573 func splitNull(data []byte, atEOF bool) (advance int, token []byte, err error) { 574 if atEOF && len(data) == 0 { 575 return 0, nil, nil 576 } 577 578 if i := bytes.IndexRune(data, rune(0)); i >= 0 { 579 return i + 1, data[0:i], nil 580 } 581 582 if atEOF { 583 return len(data), data, nil 584 } 585 586 return 587 } 588 589 func readNames(r util.Reader, count, length uint64) ([]string, error) { 590 external, err := r.ReadByte() 591 if err != nil { 592 return nil, fmt.Errorf("readNames: ReadByte error: %w", err) 593 } 594 595 if external > 0 { 596 _, err := readUint64(r) 597 if err != nil { 598 return nil, err 599 } 600 // TODO Apparently we seek to this read offset and read the 601 // folder information from there. Not clear if the offset is 602 // absolute for the whole file, or relative to some known 603 // position in the file. Cowardly waiting for an example 604 return nil, errors.New("sevenzip: TODO readNames external") //nolint:goerr113 605 } 606 607 utf16le := unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM) 608 scanner := bufio.NewScanner(transform.NewReader(io.LimitReader(r, int64(length-1)), utf16le.NewDecoder())) 609 scanner.Split(splitNull) 610 611 names, i := make([]string, 0, count), uint64(0) 612 for scanner.Scan() { 613 names = append(names, scanner.Text()) 614 i++ 615 } 616 617 if err = scanner.Err(); err != nil { 618 return nil, fmt.Errorf("readNames: Scan error: %w", err) 619 } 620 621 if i != count { 622 return nil, errors.New("sevenzip: wrong number of filenames") 623 } 624 625 return names, nil 626 } 627 628 func readAttributes(r util.Reader, count uint64) ([]uint32, error) { 629 defined, err := readOptionalBool(r, count) 630 if err != nil { 631 return nil, err 632 } 633 634 external, err := r.ReadByte() 635 if err != nil { 636 return nil, fmt.Errorf("readAttributes: ReadByte error: %w", err) 637 } 638 639 if external > 0 { 640 _, err := readUint64(r) 641 if err != nil { 642 return nil, err 643 } 644 // TODO Apparently we seek to this read offset and read the 645 // folder information from there. Not clear if the offset is 646 // absolute for the whole file, or relative to some known 647 // position in the file. Cowardly waiting for an example 648 return nil, errors.New("sevenzip: TODO readAttributes external") //nolint:goerr113 649 } 650 651 attributes := make([]uint32, count) 652 653 for i := range defined { 654 if defined[i] { 655 if err := binary.Read(r, binary.LittleEndian, &attributes[i]); err != nil { 656 return nil, fmt.Errorf("readAttributes: Read error: %w", err) 657 } 658 } 659 } 660 661 return attributes, nil 662 } 663 664 //nolint:cyclop,funlen,gocognit,gocyclo 665 func readFilesInfo(r util.Reader) (*filesInfo, error) { 666 f := new(filesInfo) 667 668 files, err := readUint64(r) 669 if err != nil { 670 return nil, err 671 } 672 673 f.file = make([]FileHeader, files) 674 675 var emptyStreams uint64 676 677 for { 678 property, err := r.ReadByte() 679 if err != nil { 680 return nil, fmt.Errorf("readFilesInfo: ReadByte error: %w", err) 681 } 682 683 if property == idEnd { 684 break 685 } 686 687 length, err := readUint64(r) 688 if err != nil { 689 return nil, err 690 } 691 692 switch property { 693 case idEmptyStream: 694 empty, err := readBool(r, files) 695 if err != nil { 696 return nil, err 697 } 698 699 for i := range f.file { 700 f.file[i].isEmptyStream = empty[i] 701 702 if empty[i] { 703 emptyStreams++ 704 } 705 } 706 case idEmptyFile: 707 empty, err := readBool(r, emptyStreams) 708 if err != nil { 709 return nil, err 710 } 711 712 j := 0 713 714 for i := range f.file { 715 if f.file[i].isEmptyStream { 716 f.file[i].isEmptyFile = empty[j] 717 j++ 718 } 719 } 720 case idCTime: 721 times, err := readTimes(r, files) 722 if err != nil { 723 return nil, err 724 } 725 726 for i, t := range times { 727 f.file[i].Created = t 728 } 729 case idATime: 730 times, err := readTimes(r, files) 731 if err != nil { 732 return nil, err 733 } 734 735 for i, t := range times { 736 f.file[i].Accessed = t 737 } 738 case idMTime: 739 times, err := readTimes(r, files) 740 if err != nil { 741 return nil, err 742 } 743 744 for i, t := range times { 745 f.file[i].Modified = t 746 } 747 case idName: 748 names, err := readNames(r, files, length) 749 if err != nil { 750 return nil, err 751 } 752 753 for i, n := range names { 754 f.file[i].Name = n 755 } 756 case idWinAttributes: 757 attributes, err := readAttributes(r, files) 758 if err != nil { 759 return nil, err 760 } 761 762 for i, a := range attributes { 763 f.file[i].Attributes = a 764 } 765 case idStartPos: 766 return nil, errors.New("sevenzip: TODO idStartPos") //nolint:goerr113 767 case idDummy: 768 if _, err := io.CopyN(io.Discard, r, int64(length)); err != nil { 769 return nil, fmt.Errorf("readFilesInfo: CopyN error: %w", err) 770 } 771 default: 772 return nil, errUnexpectedID 773 } 774 } 775 776 return f, nil 777 } 778 779 //nolint:cyclop,funlen 780 func readHeader(r util.Reader) (*header, error) { 781 h := new(header) 782 783 id, err := r.ReadByte() 784 if err != nil { 785 return nil, fmt.Errorf("readHeader: ReadByte error: %w", err) 786 } 787 788 if id == idArchiveProperties { 789 return nil, errors.New("sevenzip: TODO idArchiveProperties") //nolint:goerr113,revive 790 791 //nolint:govet 792 id, err = r.ReadByte() 793 if err != nil { 794 return nil, fmt.Errorf("readHeader: ReadByte error: %w", err) 795 } 796 } 797 798 if id == idAdditionalStreamsInfo { 799 return nil, errors.New("sevenzip: TODO idAdditionalStreamsInfo") //nolint:goerr113,revive 800 801 //nolint:govet 802 id, err = r.ReadByte() 803 if err != nil { 804 return nil, fmt.Errorf("readHeader: ReadByte error: %w", err) 805 } 806 } 807 808 if id == idMainStreamsInfo { 809 if h.streamsInfo, err = readStreamsInfo(r); err != nil { 810 return nil, err 811 } 812 813 id, err = r.ReadByte() 814 if err != nil { 815 return nil, fmt.Errorf("readHeader: ReadByte error: %w", err) 816 } 817 } 818 819 if id == idFilesInfo { 820 if h.filesInfo, err = readFilesInfo(r); err != nil { 821 return nil, err 822 } 823 824 id, err = r.ReadByte() 825 if err != nil { 826 return nil, fmt.Errorf("readHeader: ReadByte error: %w", err) 827 } 828 } 829 830 if id != idEnd { 831 return nil, errUnexpectedID 832 } 833 834 if h.streamsInfo == nil || h.filesInfo == nil { 835 return h, nil 836 } 837 838 j := 0 839 840 for i := range h.filesInfo.file { 841 if h.filesInfo.file[i].isEmptyStream { 842 continue 843 } 844 845 if h.streamsInfo.subStreamsInfo != nil { 846 h.filesInfo.file[i].CRC32 = h.streamsInfo.subStreamsInfo.digest[j] 847 } 848 849 _, h.filesInfo.file[i].UncompressedSize = h.streamsInfo.FileFolderAndSize(j) 850 j++ 851 } 852 853 return h, nil 854 } 855 856 func readEncodedHeader(r util.Reader) (*header, error) { 857 if id, err := r.ReadByte(); err != nil || id != idHeader { 858 if err != nil { 859 return nil, fmt.Errorf("readEncodedHeader: ReadByte error: %w", err) 860 } 861 862 return nil, errUnexpectedID 863 } 864 865 header, err := readHeader(r) 866 if err != nil { 867 return nil, err 868 } 869 870 return header, nil 871 }