github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/archive/zip/writer.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "io" 14 "io/fs" 15 "strings" 16 "unicode/utf8" 17 ) 18 19 var ( 20 errLongName = errors.New("zip: FileHeader.Name too long") 21 errLongExtra = errors.New("zip: FileHeader.Extra too long") 22 ) 23 24 // Writer implements a zip file writer. 25 type Writer struct { 26 cw *countWriter 27 dir []*header 28 last *fileWriter 29 closed bool 30 compressors map[uint16]Compressor 31 comment string 32 33 // testHookCloseSizeOffset if non-nil is called with the size 34 // of offset of the central directory at Close. 35 testHookCloseSizeOffset func(size, offset uint64) 36 } 37 38 type header struct { 39 *FileHeader 40 offset uint64 41 raw bool 42 } 43 44 // NewWriter returns a new [Writer] writing a zip file to w. 45 func NewWriter(w io.Writer) *Writer { 46 return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}} 47 } 48 49 // SetOffset sets the offset of the beginning of the zip data within the 50 // underlying writer. It should be used when the zip data is appended to an 51 // existing file, such as a binary executable. 52 // It must be called before any data is written. 53 func (w *Writer) SetOffset(n int64) { 54 if w.cw.count != 0 { 55 panic("zip: SetOffset called after data was written") 56 } 57 w.cw.count = n 58 } 59 60 // Flush flushes any buffered data to the underlying writer. 61 // Calling Flush is not normally necessary; calling Close is sufficient. 62 func (w *Writer) Flush() error { 63 return w.cw.w.(*bufio.Writer).Flush() 64 } 65 66 // SetComment sets the end-of-central-directory comment field. 67 // It can only be called before [Writer.Close]. 68 func (w *Writer) SetComment(comment string) error { 69 if len(comment) > uint16max { 70 return errors.New("zip: Writer.Comment too long") 71 } 72 w.comment = comment 73 return nil 74 } 75 76 // Close finishes writing the zip file by writing the central directory. 77 // It does not close the underlying writer. 78 func (w *Writer) Close() error { 79 if w.last != nil && !w.last.closed { 80 if err := w.last.close(); err != nil { 81 return err 82 } 83 w.last = nil 84 } 85 if w.closed { 86 return errors.New("zip: writer closed twice") 87 } 88 w.closed = true 89 90 // write central directory 91 start := w.cw.count 92 for _, h := range w.dir { 93 var buf [directoryHeaderLen]byte 94 b := writeBuf(buf[:]) 95 b.uint32(uint32(directoryHeaderSignature)) 96 b.uint16(h.CreatorVersion) 97 b.uint16(h.ReaderVersion) 98 b.uint16(h.Flags) 99 b.uint16(h.Method) 100 b.uint16(h.ModifiedTime) 101 b.uint16(h.ModifiedDate) 102 b.uint32(h.CRC32) 103 if h.isZip64() || h.offset >= uint32max { 104 // the file needs a zip64 header. store maxint in both 105 // 32 bit size fields (and offset later) to signal that the 106 // zip64 extra header should be used. 107 b.uint32(uint32max) // compressed size 108 b.uint32(uint32max) // uncompressed size 109 110 // append a zip64 extra block to Extra 111 var buf [28]byte // 2x uint16 + 3x uint64 112 eb := writeBuf(buf[:]) 113 eb.uint16(zip64ExtraID) 114 eb.uint16(24) // size = 3x uint64 115 eb.uint64(h.UncompressedSize64) 116 eb.uint64(h.CompressedSize64) 117 eb.uint64(h.offset) 118 h.Extra = append(h.Extra, buf[:]...) 119 } else { 120 b.uint32(h.CompressedSize) 121 b.uint32(h.UncompressedSize) 122 } 123 124 b.uint16(uint16(len(h.Name))) 125 b.uint16(uint16(len(h.Extra))) 126 b.uint16(uint16(len(h.Comment))) 127 b = b[4:] // skip disk number start and internal file attr (2x uint16) 128 b.uint32(h.ExternalAttrs) 129 if h.offset > uint32max { 130 b.uint32(uint32max) 131 } else { 132 b.uint32(uint32(h.offset)) 133 } 134 if _, err := w.cw.Write(buf[:]); err != nil { 135 return err 136 } 137 if _, err := io.WriteString(w.cw, h.Name); err != nil { 138 return err 139 } 140 if _, err := w.cw.Write(h.Extra); err != nil { 141 return err 142 } 143 if _, err := io.WriteString(w.cw, h.Comment); err != nil { 144 return err 145 } 146 } 147 end := w.cw.count 148 149 records := uint64(len(w.dir)) 150 size := uint64(end - start) 151 offset := uint64(start) 152 153 if f := w.testHookCloseSizeOffset; f != nil { 154 f(size, offset) 155 } 156 157 if records >= uint16max || size >= uint32max || offset >= uint32max { 158 var buf [directory64EndLen + directory64LocLen]byte 159 b := writeBuf(buf[:]) 160 161 // zip64 end of central directory record 162 b.uint32(directory64EndSignature) 163 b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64) 164 b.uint16(zipVersion45) // version made by 165 b.uint16(zipVersion45) // version needed to extract 166 b.uint32(0) // number of this disk 167 b.uint32(0) // number of the disk with the start of the central directory 168 b.uint64(records) // total number of entries in the central directory on this disk 169 b.uint64(records) // total number of entries in the central directory 170 b.uint64(size) // size of the central directory 171 b.uint64(offset) // offset of start of central directory with respect to the starting disk number 172 173 // zip64 end of central directory locator 174 b.uint32(directory64LocSignature) 175 b.uint32(0) // number of the disk with the start of the zip64 end of central directory 176 b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record 177 b.uint32(1) // total number of disks 178 179 if _, err := w.cw.Write(buf[:]); err != nil { 180 return err 181 } 182 183 // store max values in the regular end record to signal 184 // that the zip64 values should be used instead 185 records = uint16max 186 size = uint32max 187 offset = uint32max 188 } 189 190 // write end record 191 var buf [directoryEndLen]byte 192 b := writeBuf(buf[:]) 193 b.uint32(uint32(directoryEndSignature)) 194 b = b[4:] // skip over disk number and first disk number (2x uint16) 195 b.uint16(uint16(records)) // number of entries this disk 196 b.uint16(uint16(records)) // number of entries total 197 b.uint32(uint32(size)) // size of directory 198 b.uint32(uint32(offset)) // start of directory 199 b.uint16(uint16(len(w.comment))) // byte size of EOCD comment 200 if _, err := w.cw.Write(buf[:]); err != nil { 201 return err 202 } 203 if _, err := io.WriteString(w.cw, w.comment); err != nil { 204 return err 205 } 206 207 return w.cw.w.(*bufio.Writer).Flush() 208 } 209 210 // Create adds a file to the zip file using the provided name. 211 // It returns a [Writer] to which the file contents should be written. 212 // The file contents will be compressed using the [Deflate] method. 213 // The name must be a relative path: it must not start with a drive 214 // letter (e.g. C:) or leading slash, and only forward slashes are 215 // allowed. To create a directory instead of a file, add a trailing 216 // slash to the name. 217 // The file's contents must be written to the [io.Writer] before the next 218 // call to [Writer.Create], [Writer.CreateHeader], or [Writer.Close]. 219 func (w *Writer) Create(name string) (io.Writer, error) { 220 header := &FileHeader{ 221 Name: name, 222 Method: Deflate, 223 } 224 return w.CreateHeader(header) 225 } 226 227 // detectUTF8 reports whether s is a valid UTF-8 string, and whether the string 228 // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, 229 // or any other common encoding). 230 func detectUTF8(s string) (valid, require bool) { 231 for i := 0; i < len(s); { 232 r, size := utf8.DecodeRuneInString(s[i:]) 233 i += size 234 // Officially, ZIP uses CP-437, but many readers use the system's 235 // local character encoding. Most encoding are compatible with a large 236 // subset of CP-437, which itself is ASCII-like. 237 // 238 // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those 239 // characters with localized currency and overline characters. 240 if r < 0x20 || r > 0x7d || r == 0x5c { 241 if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { 242 return false, false 243 } 244 require = true 245 } 246 } 247 return true, require 248 } 249 250 // prepare performs the bookkeeping operations required at the start of 251 // CreateHeader and CreateRaw. 252 func (w *Writer) prepare(fh *FileHeader) error { 253 if w.last != nil && !w.last.closed { 254 if err := w.last.close(); err != nil { 255 return err 256 } 257 } 258 if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { 259 // See https://golang.org/issue/11144 confusion. 260 return errors.New("archive/zip: invalid duplicate FileHeader") 261 } 262 return nil 263 } 264 265 // CreateHeader adds a file to the zip archive using the provided [FileHeader] 266 // for the file metadata. [Writer] takes ownership of fh and may mutate 267 // its fields. The caller must not modify fh after calling [Writer.CreateHeader]. 268 // 269 // This returns a [Writer] to which the file contents should be written. 270 // The file's contents must be written to the io.Writer before the next 271 // call to [Writer.Create], [Writer.CreateHeader], [Writer.CreateRaw], or [Writer.Close]. 272 func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { 273 if err := w.prepare(fh); err != nil { 274 return nil, err 275 } 276 277 // The ZIP format has a sad state of affairs regarding character encoding. 278 // Officially, the name and comment fields are supposed to be encoded 279 // in CP-437 (which is mostly compatible with ASCII), unless the UTF-8 280 // flag bit is set. However, there are several problems: 281 // 282 // * Many ZIP readers still do not support UTF-8. 283 // * If the UTF-8 flag is cleared, several readers simply interpret the 284 // name and comment fields as whatever the local system encoding is. 285 // 286 // In order to avoid breaking readers without UTF-8 support, 287 // we avoid setting the UTF-8 flag if the strings are CP-437 compatible. 288 // However, if the strings require multibyte UTF-8 encoding and is a 289 // valid UTF-8 string, then we set the UTF-8 bit. 290 // 291 // For the case, where the user explicitly wants to specify the encoding 292 // as UTF-8, they will need to set the flag bit themselves. 293 utf8Valid1, utf8Require1 := detectUTF8(fh.Name) 294 utf8Valid2, utf8Require2 := detectUTF8(fh.Comment) 295 switch { 296 case fh.NonUTF8: 297 fh.Flags &^= 0x800 298 case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2): 299 fh.Flags |= 0x800 300 } 301 302 fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte 303 fh.ReaderVersion = zipVersion20 304 305 // If Modified is set, this takes precedence over MS-DOS timestamp fields. 306 if !fh.Modified.IsZero() { 307 // Contrary to the FileHeader.SetModTime method, we intentionally 308 // do not convert to UTC, because we assume the user intends to encode 309 // the date using the specified timezone. A user may want this control 310 // because many legacy ZIP readers interpret the timestamp according 311 // to the local timezone. 312 // 313 // The timezone is only non-UTC if a user directly sets the Modified 314 // field directly themselves. All other approaches sets UTC. 315 fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified) 316 317 // Use "extended timestamp" format since this is what Info-ZIP uses. 318 // Nearly every major ZIP implementation uses a different format, 319 // but at least most seem to be able to understand the other formats. 320 // 321 // This format happens to be identical for both local and central header 322 // if modification time is the only timestamp being encoded. 323 var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32) 324 mt := uint32(fh.Modified.Unix()) 325 eb := writeBuf(mbuf[:]) 326 eb.uint16(extTimeExtraID) 327 eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32) 328 eb.uint8(1) // Flags: ModTime 329 eb.uint32(mt) // ModTime 330 fh.Extra = append(fh.Extra, mbuf[:]...) 331 } 332 333 var ( 334 ow io.Writer 335 fw *fileWriter 336 ) 337 h := &header{ 338 FileHeader: fh, 339 offset: uint64(w.cw.count), 340 } 341 342 if strings.HasSuffix(fh.Name, "/") { 343 // Set the compression method to Store to ensure data length is truly zero, 344 // which the writeHeader method always encodes for the size fields. 345 // This is necessary as most compression formats have non-zero lengths 346 // even when compressing an empty string. 347 fh.Method = Store 348 fh.Flags &^= 0x8 // we will not write a data descriptor 349 350 // Explicitly clear sizes as they have no meaning for directories. 351 fh.CompressedSize = 0 352 fh.CompressedSize64 = 0 353 fh.UncompressedSize = 0 354 fh.UncompressedSize64 = 0 355 356 ow = dirWriter{} 357 } else { 358 fh.Flags |= 0x8 // we will write a data descriptor 359 360 fw = &fileWriter{ 361 zipw: w.cw, 362 compCount: &countWriter{w: w.cw}, 363 crc32: crc32.NewIEEE(), 364 } 365 comp := w.compressor(fh.Method) 366 if comp == nil { 367 return nil, ErrAlgorithm 368 } 369 var err error 370 fw.comp, err = comp(fw.compCount) 371 if err != nil { 372 return nil, err 373 } 374 fw.rawCount = &countWriter{w: fw.comp} 375 fw.header = h 376 ow = fw 377 } 378 w.dir = append(w.dir, h) 379 if err := writeHeader(w.cw, h); err != nil { 380 return nil, err 381 } 382 // If we're creating a directory, fw is nil. 383 w.last = fw 384 return ow, nil 385 } 386 387 func writeHeader(w io.Writer, h *header) error { 388 const maxUint16 = 1<<16 - 1 389 if len(h.Name) > maxUint16 { 390 return errLongName 391 } 392 if len(h.Extra) > maxUint16 { 393 return errLongExtra 394 } 395 396 var buf [fileHeaderLen]byte 397 b := writeBuf(buf[:]) 398 b.uint32(uint32(fileHeaderSignature)) 399 b.uint16(h.ReaderVersion) 400 b.uint16(h.Flags) 401 b.uint16(h.Method) 402 b.uint16(h.ModifiedTime) 403 b.uint16(h.ModifiedDate) 404 // In raw mode (caller does the compression), the values are either 405 // written here or in the trailing data descriptor based on the header 406 // flags. 407 if h.raw && !h.hasDataDescriptor() { 408 b.uint32(h.CRC32) 409 b.uint32(uint32(min(h.CompressedSize64, uint32max))) 410 b.uint32(uint32(min(h.UncompressedSize64, uint32max))) 411 } else { 412 // When this package handle the compression, these values are 413 // always written to the trailing data descriptor. 414 b.uint32(0) // crc32 415 b.uint32(0) // compressed size 416 b.uint32(0) // uncompressed size 417 } 418 b.uint16(uint16(len(h.Name))) 419 b.uint16(uint16(len(h.Extra))) 420 if _, err := w.Write(buf[:]); err != nil { 421 return err 422 } 423 if _, err := io.WriteString(w, h.Name); err != nil { 424 return err 425 } 426 _, err := w.Write(h.Extra) 427 return err 428 } 429 430 // CreateRaw adds a file to the zip archive using the provided [FileHeader] and 431 // returns a [Writer] to which the file contents should be written. The file's 432 // contents must be written to the io.Writer before the next call to [Writer.Create], 433 // [Writer.CreateHeader], [Writer.CreateRaw], or [Writer.Close]. 434 // 435 // In contrast to [Writer.CreateHeader], the bytes passed to Writer are not compressed. 436 // 437 // CreateRaw's argument is stored in w. If the argument is a pointer to the embedded 438 // [FileHeader] in a [File] obtained from a [Reader] created from in-memory data, 439 // then w will refer to all of that memory. 440 func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) { 441 if err := w.prepare(fh); err != nil { 442 return nil, err 443 } 444 445 fh.CompressedSize = uint32(min(fh.CompressedSize64, uint32max)) 446 fh.UncompressedSize = uint32(min(fh.UncompressedSize64, uint32max)) 447 448 h := &header{ 449 FileHeader: fh, 450 offset: uint64(w.cw.count), 451 raw: true, 452 } 453 w.dir = append(w.dir, h) 454 if err := writeHeader(w.cw, h); err != nil { 455 return nil, err 456 } 457 458 if strings.HasSuffix(fh.Name, "/") { 459 w.last = nil 460 return dirWriter{}, nil 461 } 462 463 fw := &fileWriter{ 464 header: h, 465 zipw: w.cw, 466 } 467 w.last = fw 468 return fw, nil 469 } 470 471 // Copy copies the file f (obtained from a [Reader]) into w. It copies the raw 472 // form directly bypassing decompression, compression, and validation. 473 func (w *Writer) Copy(f *File) error { 474 r, err := f.OpenRaw() 475 if err != nil { 476 return err 477 } 478 // Copy the FileHeader so w doesn't store a pointer to the data 479 // of f's entire archive. See #65499. 480 fh := f.FileHeader 481 fw, err := w.CreateRaw(&fh) 482 if err != nil { 483 return err 484 } 485 _, err = io.Copy(fw, r) 486 return err 487 } 488 489 // RegisterCompressor registers or overrides a custom compressor for a specific 490 // method ID. If a compressor for a given method is not found, [Writer] will 491 // default to looking up the compressor at the package level. 492 func (w *Writer) RegisterCompressor(method uint16, comp Compressor) { 493 if w.compressors == nil { 494 w.compressors = make(map[uint16]Compressor) 495 } 496 w.compressors[method] = comp 497 } 498 499 // AddFS adds the files from fs.FS to the archive. 500 // It walks the directory tree starting at the root of the filesystem 501 // adding each file to the zip using deflate while maintaining the directory structure. 502 func (w *Writer) AddFS(fsys fs.FS) error { 503 return fs.WalkDir(fsys, ".", func(name string, d fs.DirEntry, err error) error { 504 if err != nil { 505 return err 506 } 507 if d.IsDir() { 508 return nil 509 } 510 info, err := d.Info() 511 if err != nil { 512 return err 513 } 514 if !info.Mode().IsRegular() { 515 return errors.New("zip: cannot add non-regular file") 516 } 517 h, err := FileInfoHeader(info) 518 if err != nil { 519 return err 520 } 521 h.Name = name 522 h.Method = Deflate 523 fw, err := w.CreateHeader(h) 524 if err != nil { 525 return err 526 } 527 f, err := fsys.Open(name) 528 if err != nil { 529 return err 530 } 531 defer f.Close() 532 _, err = io.Copy(fw, f) 533 return err 534 }) 535 } 536 537 func (w *Writer) compressor(method uint16) Compressor { 538 comp := w.compressors[method] 539 if comp == nil { 540 comp = compressor(method) 541 } 542 return comp 543 } 544 545 type dirWriter struct{} 546 547 func (dirWriter) Write(b []byte) (int, error) { 548 if len(b) == 0 { 549 return 0, nil 550 } 551 return 0, errors.New("zip: write to directory") 552 } 553 554 type fileWriter struct { 555 *header 556 zipw io.Writer 557 rawCount *countWriter 558 comp io.WriteCloser 559 compCount *countWriter 560 crc32 hash.Hash32 561 closed bool 562 } 563 564 func (w *fileWriter) Write(p []byte) (int, error) { 565 if w.closed { 566 return 0, errors.New("zip: write to closed file") 567 } 568 if w.raw { 569 return w.zipw.Write(p) 570 } 571 w.crc32.Write(p) 572 return w.rawCount.Write(p) 573 } 574 575 func (w *fileWriter) close() error { 576 if w.closed { 577 return errors.New("zip: file closed twice") 578 } 579 w.closed = true 580 if w.raw { 581 return w.writeDataDescriptor() 582 } 583 if err := w.comp.Close(); err != nil { 584 return err 585 } 586 587 // update FileHeader 588 fh := w.header.FileHeader 589 fh.CRC32 = w.crc32.Sum32() 590 fh.CompressedSize64 = uint64(w.compCount.count) 591 fh.UncompressedSize64 = uint64(w.rawCount.count) 592 593 if fh.isZip64() { 594 fh.CompressedSize = uint32max 595 fh.UncompressedSize = uint32max 596 fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions 597 } else { 598 fh.CompressedSize = uint32(fh.CompressedSize64) 599 fh.UncompressedSize = uint32(fh.UncompressedSize64) 600 } 601 602 return w.writeDataDescriptor() 603 } 604 605 func (w *fileWriter) writeDataDescriptor() error { 606 if !w.hasDataDescriptor() { 607 return nil 608 } 609 // Write data descriptor. This is more complicated than one would 610 // think, see e.g. comments in zipfile.c:putextended() and 611 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588. 612 // The approach here is to write 8 byte sizes if needed without 613 // adding a zip64 extra in the local header (too late anyway). 614 var buf []byte 615 if w.isZip64() { 616 buf = make([]byte, dataDescriptor64Len) 617 } else { 618 buf = make([]byte, dataDescriptorLen) 619 } 620 b := writeBuf(buf) 621 b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X 622 b.uint32(w.CRC32) 623 if w.isZip64() { 624 b.uint64(w.CompressedSize64) 625 b.uint64(w.UncompressedSize64) 626 } else { 627 b.uint32(w.CompressedSize) 628 b.uint32(w.UncompressedSize) 629 } 630 _, err := w.zipw.Write(buf) 631 return err 632 } 633 634 type countWriter struct { 635 w io.Writer 636 count int64 637 } 638 639 func (w *countWriter) Write(p []byte) (int, error) { 640 n, err := w.w.Write(p) 641 w.count += int64(n) 642 return n, err 643 } 644 645 type nopCloser struct { 646 io.Writer 647 } 648 649 func (w nopCloser) Close() error { 650 return nil 651 } 652 653 type writeBuf []byte 654 655 func (b *writeBuf) uint8(v uint8) { 656 (*b)[0] = v 657 *b = (*b)[1:] 658 } 659 660 func (b *writeBuf) uint16(v uint16) { 661 binary.LittleEndian.PutUint16(*b, v) 662 *b = (*b)[2:] 663 } 664 665 func (b *writeBuf) uint32(v uint32) { 666 binary.LittleEndian.PutUint32(*b, v) 667 *b = (*b)[4:] 668 } 669 670 func (b *writeBuf) uint64(v uint64) { 671 binary.LittleEndian.PutUint64(*b, v) 672 *b = (*b)[8:] 673 }