github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/archive/zip/writer.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package zip 6 7 import ( 8 "bufio" 9 "encoding/binary" 10 "errors" 11 "hash" 12 "hash/crc32" 13 "io" 14 "strings" 15 "unicode/utf8" 16 ) 17 18 var ( 19 errLongName = errors.New("zip: FileHeader.Name too long") 20 errLongExtra = errors.New("zip: FileHeader.Extra too long") 21 ) 22 23 // Writer implements a zip file writer. 24 type Writer struct { 25 cw *countWriter 26 dir []*header 27 last *fileWriter 28 closed bool 29 compressors map[uint16]Compressor 30 comment string 31 32 // testHookCloseSizeOffset if non-nil is called with the size 33 // of offset of the central directory at Close. 34 testHookCloseSizeOffset func(size, offset uint64) 35 } 36 37 type header struct { 38 *FileHeader 39 offset uint64 40 raw bool 41 } 42 43 // NewWriter returns a new Writer writing a zip file to w. 44 func NewWriter(w io.Writer) *Writer { 45 return &Writer{cw: &countWriter{w: bufio.NewWriter(w)}} 46 } 47 48 // SetOffset sets the offset of the beginning of the zip data within the 49 // underlying writer. It should be used when the zip data is appended to an 50 // existing file, such as a binary executable. 51 // It must be called before any data is written. 52 func (w *Writer) SetOffset(n int64) { 53 if w.cw.count != 0 { 54 panic("zip: SetOffset called after data was written") 55 } 56 w.cw.count = n 57 } 58 59 // Flush flushes any buffered data to the underlying writer. 60 // Calling Flush is not normally necessary; calling Close is sufficient. 61 func (w *Writer) Flush() error { 62 return w.cw.w.(*bufio.Writer).Flush() 63 } 64 65 // SetComment sets the end-of-central-directory comment field. 66 // It can only be called before Close. 67 func (w *Writer) SetComment(comment string) error { 68 if len(comment) > uint16max { 69 return errors.New("zip: Writer.Comment too long") 70 } 71 w.comment = comment 72 return nil 73 } 74 75 // Close finishes writing the zip file by writing the central directory. 76 // It does not close the underlying writer. 77 func (w *Writer) Close() error { 78 if w.last != nil && !w.last.closed { 79 if err := w.last.close(); err != nil { 80 return err 81 } 82 w.last = nil 83 } 84 if w.closed { 85 return errors.New("zip: writer closed twice") 86 } 87 w.closed = true 88 89 // write central directory 90 start := w.cw.count 91 for _, h := range w.dir { 92 var buf [directoryHeaderLen]byte 93 b := writeBuf(buf[:]) 94 b.uint32(uint32(directoryHeaderSignature)) 95 b.uint16(h.CreatorVersion) 96 b.uint16(h.ReaderVersion) 97 b.uint16(h.Flags) 98 b.uint16(h.Method) 99 b.uint16(h.ModifiedTime) 100 b.uint16(h.ModifiedDate) 101 b.uint32(h.CRC32) 102 if h.isZip64() || h.offset >= uint32max { 103 // the file needs a zip64 header. store maxint in both 104 // 32 bit size fields (and offset later) to signal that the 105 // zip64 extra header should be used. 106 b.uint32(uint32max) // compressed size 107 b.uint32(uint32max) // uncompressed size 108 109 // append a zip64 extra block to Extra 110 var buf [28]byte // 2x uint16 + 3x uint64 111 eb := writeBuf(buf[:]) 112 eb.uint16(zip64ExtraID) 113 eb.uint16(24) // size = 3x uint64 114 eb.uint64(h.UncompressedSize64) 115 eb.uint64(h.CompressedSize64) 116 eb.uint64(h.offset) 117 h.Extra = append(h.Extra, buf[:]...) 118 } else { 119 b.uint32(h.CompressedSize) 120 b.uint32(h.UncompressedSize) 121 } 122 123 b.uint16(uint16(len(h.Name))) 124 b.uint16(uint16(len(h.Extra))) 125 b.uint16(uint16(len(h.Comment))) 126 b = b[4:] // skip disk number start and internal file attr (2x uint16) 127 b.uint32(h.ExternalAttrs) 128 if h.offset > uint32max { 129 b.uint32(uint32max) 130 } else { 131 b.uint32(uint32(h.offset)) 132 } 133 if _, err := w.cw.Write(buf[:]); err != nil { 134 return err 135 } 136 if _, err := io.WriteString(w.cw, h.Name); err != nil { 137 return err 138 } 139 if _, err := w.cw.Write(h.Extra); err != nil { 140 return err 141 } 142 if _, err := io.WriteString(w.cw, h.Comment); err != nil { 143 return err 144 } 145 } 146 end := w.cw.count 147 148 records := uint64(len(w.dir)) 149 size := uint64(end - start) 150 offset := uint64(start) 151 152 if f := w.testHookCloseSizeOffset; f != nil { 153 f(size, offset) 154 } 155 156 if records >= uint16max || size >= uint32max || offset >= uint32max { 157 var buf [directory64EndLen + directory64LocLen]byte 158 b := writeBuf(buf[:]) 159 160 // zip64 end of central directory record 161 b.uint32(directory64EndSignature) 162 b.uint64(directory64EndLen - 12) // length minus signature (uint32) and length fields (uint64) 163 b.uint16(zipVersion45) // version made by 164 b.uint16(zipVersion45) // version needed to extract 165 b.uint32(0) // number of this disk 166 b.uint32(0) // number of the disk with the start of the central directory 167 b.uint64(records) // total number of entries in the central directory on this disk 168 b.uint64(records) // total number of entries in the central directory 169 b.uint64(size) // size of the central directory 170 b.uint64(offset) // offset of start of central directory with respect to the starting disk number 171 172 // zip64 end of central directory locator 173 b.uint32(directory64LocSignature) 174 b.uint32(0) // number of the disk with the start of the zip64 end of central directory 175 b.uint64(uint64(end)) // relative offset of the zip64 end of central directory record 176 b.uint32(1) // total number of disks 177 178 if _, err := w.cw.Write(buf[:]); err != nil { 179 return err 180 } 181 182 // store max values in the regular end record to signal 183 // that the zip64 values should be used instead 184 records = uint16max 185 size = uint32max 186 offset = uint32max 187 } 188 189 // write end record 190 var buf [directoryEndLen]byte 191 b := writeBuf(buf[:]) 192 b.uint32(uint32(directoryEndSignature)) 193 b = b[4:] // skip over disk number and first disk number (2x uint16) 194 b.uint16(uint16(records)) // number of entries this disk 195 b.uint16(uint16(records)) // number of entries total 196 b.uint32(uint32(size)) // size of directory 197 b.uint32(uint32(offset)) // start of directory 198 b.uint16(uint16(len(w.comment))) // byte size of EOCD comment 199 if _, err := w.cw.Write(buf[:]); err != nil { 200 return err 201 } 202 if _, err := io.WriteString(w.cw, w.comment); err != nil { 203 return err 204 } 205 206 return w.cw.w.(*bufio.Writer).Flush() 207 } 208 209 // Create adds a file to the zip file using the provided name. 210 // It returns a Writer to which the file contents should be written. 211 // The file contents will be compressed using the Deflate method. 212 // The name must be a relative path: it must not start with a drive 213 // letter (e.g. C:) or leading slash, and only forward slashes are 214 // allowed. To create a directory instead of a file, add a trailing 215 // slash to the name. 216 // The file's contents must be written to the io.Writer before the next 217 // call to Create, CreateHeader, or Close. 218 func (w *Writer) Create(name string) (io.Writer, error) { 219 header := &FileHeader{ 220 Name: name, 221 Method: Deflate, 222 } 223 return w.CreateHeader(header) 224 } 225 226 // detectUTF8 reports whether s is a valid UTF-8 string, and whether the string 227 // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, 228 // or any other common encoding). 229 func detectUTF8(s string) (valid, require bool) { 230 for i := 0; i < len(s); { 231 r, size := utf8.DecodeRuneInString(s[i:]) 232 i += size 233 // Officially, ZIP uses CP-437, but many readers use the system's 234 // local character encoding. Most encoding are compatible with a large 235 // subset of CP-437, which itself is ASCII-like. 236 // 237 // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those 238 // characters with localized currency and overline characters. 239 if r < 0x20 || r > 0x7d || r == 0x5c { 240 if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) { 241 return false, false 242 } 243 require = true 244 } 245 } 246 return true, require 247 } 248 249 // prepare performs the bookkeeping operations required at the start of 250 // CreateHeader and CreateRaw. 251 func (w *Writer) prepare(fh *FileHeader) error { 252 if w.last != nil && !w.last.closed { 253 if err := w.last.close(); err != nil { 254 return err 255 } 256 } 257 if len(w.dir) > 0 && w.dir[len(w.dir)-1].FileHeader == fh { 258 // See https://golang.org/issue/11144 confusion. 259 return errors.New("archive/zip: invalid duplicate FileHeader") 260 } 261 return nil 262 } 263 264 // CreateHeader adds a file to the zip archive using the provided FileHeader 265 // for the file metadata. Writer takes ownership of fh and may mutate 266 // its fields. The caller must not modify fh after calling CreateHeader. 267 // 268 // This returns a Writer to which the file contents should be written. 269 // The file's contents must be written to the io.Writer before the next 270 // call to Create, CreateHeader, CreateRaw, or Close. 271 func (w *Writer) CreateHeader(fh *FileHeader) (io.Writer, error) { 272 if err := w.prepare(fh); err != nil { 273 return nil, err 274 } 275 276 // The ZIP format has a sad state of affairs regarding character encoding. 277 // Officially, the name and comment fields are supposed to be encoded 278 // in CP-437 (which is mostly compatible with ASCII), unless the UTF-8 279 // flag bit is set. However, there are several problems: 280 // 281 // * Many ZIP readers still do not support UTF-8. 282 // * If the UTF-8 flag is cleared, several readers simply interpret the 283 // name and comment fields as whatever the local system encoding is. 284 // 285 // In order to avoid breaking readers without UTF-8 support, 286 // we avoid setting the UTF-8 flag if the strings are CP-437 compatible. 287 // However, if the strings require multibyte UTF-8 encoding and is a 288 // valid UTF-8 string, then we set the UTF-8 bit. 289 // 290 // For the case, where the user explicitly wants to specify the encoding 291 // as UTF-8, they will need to set the flag bit themselves. 292 utf8Valid1, utf8Require1 := detectUTF8(fh.Name) 293 utf8Valid2, utf8Require2 := detectUTF8(fh.Comment) 294 switch { 295 case fh.NonUTF8: 296 fh.Flags &^= 0x800 297 case (utf8Require1 || utf8Require2) && (utf8Valid1 && utf8Valid2): 298 fh.Flags |= 0x800 299 } 300 301 fh.CreatorVersion = fh.CreatorVersion&0xff00 | zipVersion20 // preserve compatibility byte 302 fh.ReaderVersion = zipVersion20 303 304 // If Modified is set, this takes precedence over MS-DOS timestamp fields. 305 if !fh.Modified.IsZero() { 306 // Contrary to the FileHeader.SetModTime method, we intentionally 307 // do not convert to UTC, because we assume the user intends to encode 308 // the date using the specified timezone. A user may want this control 309 // because many legacy ZIP readers interpret the timestamp according 310 // to the local timezone. 311 // 312 // The timezone is only non-UTC if a user directly sets the Modified 313 // field directly themselves. All other approaches sets UTC. 314 fh.ModifiedDate, fh.ModifiedTime = timeToMsDosTime(fh.Modified) 315 316 // Use "extended timestamp" format since this is what Info-ZIP uses. 317 // Nearly every major ZIP implementation uses a different format, 318 // but at least most seem to be able to understand the other formats. 319 // 320 // This format happens to be identical for both local and central header 321 // if modification time is the only timestamp being encoded. 322 var mbuf [9]byte // 2*SizeOf(uint16) + SizeOf(uint8) + SizeOf(uint32) 323 mt := uint32(fh.Modified.Unix()) 324 eb := writeBuf(mbuf[:]) 325 eb.uint16(extTimeExtraID) 326 eb.uint16(5) // Size: SizeOf(uint8) + SizeOf(uint32) 327 eb.uint8(1) // Flags: ModTime 328 eb.uint32(mt) // ModTime 329 fh.Extra = append(fh.Extra, mbuf[:]...) 330 } 331 332 var ( 333 ow io.Writer 334 fw *fileWriter 335 ) 336 h := &header{ 337 FileHeader: fh, 338 offset: uint64(w.cw.count), 339 } 340 341 if strings.HasSuffix(fh.Name, "/") { 342 // Set the compression method to Store to ensure data length is truly zero, 343 // which the writeHeader method always encodes for the size fields. 344 // This is necessary as most compression formats have non-zero lengths 345 // even when compressing an empty string. 346 fh.Method = Store 347 fh.Flags &^= 0x8 // we will not write a data descriptor 348 349 // Explicitly clear sizes as they have no meaning for directories. 350 fh.CompressedSize = 0 351 fh.CompressedSize64 = 0 352 fh.UncompressedSize = 0 353 fh.UncompressedSize64 = 0 354 355 ow = dirWriter{} 356 } else { 357 fh.Flags |= 0x8 // we will write a data descriptor 358 359 fw = &fileWriter{ 360 zipw: w.cw, 361 compCount: &countWriter{w: w.cw}, 362 crc32: crc32.NewIEEE(), 363 } 364 comp := w.compressor(fh.Method) 365 if comp == nil { 366 return nil, ErrAlgorithm 367 } 368 var err error 369 fw.comp, err = comp(fw.compCount) 370 if err != nil { 371 return nil, err 372 } 373 fw.rawCount = &countWriter{w: fw.comp} 374 fw.header = h 375 ow = fw 376 } 377 w.dir = append(w.dir, h) 378 if err := writeHeader(w.cw, h); err != nil { 379 return nil, err 380 } 381 // If we're creating a directory, fw is nil. 382 w.last = fw 383 return ow, nil 384 } 385 386 func writeHeader(w io.Writer, h *header) error { 387 const maxUint16 = 1<<16 - 1 388 if len(h.Name) > maxUint16 { 389 return errLongName 390 } 391 if len(h.Extra) > maxUint16 { 392 return errLongExtra 393 } 394 395 var buf [fileHeaderLen]byte 396 b := writeBuf(buf[:]) 397 b.uint32(uint32(fileHeaderSignature)) 398 b.uint16(h.ReaderVersion) 399 b.uint16(h.Flags) 400 b.uint16(h.Method) 401 b.uint16(h.ModifiedTime) 402 b.uint16(h.ModifiedDate) 403 // In raw mode (caller does the compression), the values are either 404 // written here or in the trailing data descriptor based on the header 405 // flags. 406 if h.raw && !h.hasDataDescriptor() { 407 b.uint32(h.CRC32) 408 b.uint32(uint32(min64(h.CompressedSize64, uint32max))) 409 b.uint32(uint32(min64(h.UncompressedSize64, uint32max))) 410 } else { 411 // When this package handle the compression, these values are 412 // always written to the trailing data descriptor. 413 b.uint32(0) // crc32 414 b.uint32(0) // compressed size 415 b.uint32(0) // uncompressed size 416 } 417 b.uint16(uint16(len(h.Name))) 418 b.uint16(uint16(len(h.Extra))) 419 if _, err := w.Write(buf[:]); err != nil { 420 return err 421 } 422 if _, err := io.WriteString(w, h.Name); err != nil { 423 return err 424 } 425 _, err := w.Write(h.Extra) 426 return err 427 } 428 429 func min64(x, y uint64) uint64 { 430 if x < y { 431 return x 432 } 433 return y 434 } 435 436 // CreateRaw adds a file to the zip archive using the provided FileHeader and 437 // returns a Writer to which the file contents should be written. The file's 438 // contents must be written to the io.Writer before the next call to Create, 439 // CreateHeader, CreateRaw, or Close. 440 // 441 // In contrast to CreateHeader, the bytes passed to Writer are not compressed. 442 func (w *Writer) CreateRaw(fh *FileHeader) (io.Writer, error) { 443 if err := w.prepare(fh); err != nil { 444 return nil, err 445 } 446 447 fh.CompressedSize = uint32(min64(fh.CompressedSize64, uint32max)) 448 fh.UncompressedSize = uint32(min64(fh.UncompressedSize64, uint32max)) 449 450 h := &header{ 451 FileHeader: fh, 452 offset: uint64(w.cw.count), 453 raw: true, 454 } 455 w.dir = append(w.dir, h) 456 if err := writeHeader(w.cw, h); err != nil { 457 return nil, err 458 } 459 460 if strings.HasSuffix(fh.Name, "/") { 461 w.last = nil 462 return dirWriter{}, nil 463 } 464 465 fw := &fileWriter{ 466 header: h, 467 zipw: w.cw, 468 } 469 w.last = fw 470 return fw, nil 471 } 472 473 // Copy copies the file f (obtained from a Reader) into w. It copies the raw 474 // form directly bypassing decompression, compression, and validation. 475 func (w *Writer) Copy(f *File) error { 476 r, err := f.OpenRaw() 477 if err != nil { 478 return err 479 } 480 fw, err := w.CreateRaw(&f.FileHeader) 481 if err != nil { 482 return err 483 } 484 _, err = io.Copy(fw, r) 485 return err 486 } 487 488 // RegisterCompressor registers or overrides a custom compressor for a specific 489 // method ID. If a compressor for a given method is not found, Writer will 490 // default to looking up the compressor at the package level. 491 func (w *Writer) RegisterCompressor(method uint16, comp Compressor) { 492 if w.compressors == nil { 493 w.compressors = make(map[uint16]Compressor) 494 } 495 w.compressors[method] = comp 496 } 497 498 func (w *Writer) compressor(method uint16) Compressor { 499 comp := w.compressors[method] 500 if comp == nil { 501 comp = compressor(method) 502 } 503 return comp 504 } 505 506 type dirWriter struct{} 507 508 func (dirWriter) Write(b []byte) (int, error) { 509 if len(b) == 0 { 510 return 0, nil 511 } 512 return 0, errors.New("zip: write to directory") 513 } 514 515 type fileWriter struct { 516 *header 517 zipw io.Writer 518 rawCount *countWriter 519 comp io.WriteCloser 520 compCount *countWriter 521 crc32 hash.Hash32 522 closed bool 523 } 524 525 func (w *fileWriter) Write(p []byte) (int, error) { 526 if w.closed { 527 return 0, errors.New("zip: write to closed file") 528 } 529 if w.raw { 530 return w.zipw.Write(p) 531 } 532 w.crc32.Write(p) 533 return w.rawCount.Write(p) 534 } 535 536 func (w *fileWriter) close() error { 537 if w.closed { 538 return errors.New("zip: file closed twice") 539 } 540 w.closed = true 541 if w.raw { 542 return w.writeDataDescriptor() 543 } 544 if err := w.comp.Close(); err != nil { 545 return err 546 } 547 548 // update FileHeader 549 fh := w.header.FileHeader 550 fh.CRC32 = w.crc32.Sum32() 551 fh.CompressedSize64 = uint64(w.compCount.count) 552 fh.UncompressedSize64 = uint64(w.rawCount.count) 553 554 if fh.isZip64() { 555 fh.CompressedSize = uint32max 556 fh.UncompressedSize = uint32max 557 fh.ReaderVersion = zipVersion45 // requires 4.5 - File uses ZIP64 format extensions 558 } else { 559 fh.CompressedSize = uint32(fh.CompressedSize64) 560 fh.UncompressedSize = uint32(fh.UncompressedSize64) 561 } 562 563 return w.writeDataDescriptor() 564 } 565 566 func (w *fileWriter) writeDataDescriptor() error { 567 if !w.hasDataDescriptor() { 568 return nil 569 } 570 // Write data descriptor. This is more complicated than one would 571 // think, see e.g. comments in zipfile.c:putextended() and 572 // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=7073588. 573 // The approach here is to write 8 byte sizes if needed without 574 // adding a zip64 extra in the local header (too late anyway). 575 var buf []byte 576 if w.isZip64() { 577 buf = make([]byte, dataDescriptor64Len) 578 } else { 579 buf = make([]byte, dataDescriptorLen) 580 } 581 b := writeBuf(buf) 582 b.uint32(dataDescriptorSignature) // de-facto standard, required by OS X 583 b.uint32(w.CRC32) 584 if w.isZip64() { 585 b.uint64(w.CompressedSize64) 586 b.uint64(w.UncompressedSize64) 587 } else { 588 b.uint32(w.CompressedSize) 589 b.uint32(w.UncompressedSize) 590 } 591 _, err := w.zipw.Write(buf) 592 return err 593 } 594 595 type countWriter struct { 596 w io.Writer 597 count int64 598 } 599 600 func (w *countWriter) Write(p []byte) (int, error) { 601 n, err := w.w.Write(p) 602 w.count += int64(n) 603 return n, err 604 } 605 606 type nopCloser struct { 607 io.Writer 608 } 609 610 func (w nopCloser) Close() error { 611 return nil 612 } 613 614 type writeBuf []byte 615 616 func (b *writeBuf) uint8(v uint8) { 617 (*b)[0] = v 618 *b = (*b)[1:] 619 } 620 621 func (b *writeBuf) uint16(v uint16) { 622 binary.LittleEndian.PutUint16(*b, v) 623 *b = (*b)[2:] 624 } 625 626 func (b *writeBuf) uint32(v uint32) { 627 binary.LittleEndian.PutUint32(*b, v) 628 *b = (*b)[4:] 629 } 630 631 func (b *writeBuf) uint64(v uint64) { 632 binary.LittleEndian.PutUint64(*b, v) 633 *b = (*b)[8:] 634 }