github.com/mattn/go@v0.0.0-20171011075504-07f7db3ea99f/src/archive/tar/writer.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package tar 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "path" 12 "sort" 13 "strconv" 14 "strings" 15 "time" 16 ) 17 18 // Writer provides sequential writing of a tar archive. 19 // Write.WriteHeader begins a new file with the provided Header, 20 // and then Writer can be treated as an io.Writer to supply that file's data. 21 type Writer struct { 22 w io.Writer 23 pad int64 // Amount of padding to write after current file entry 24 curr fileWriter // Writer for current file entry 25 hdr Header // Shallow copy of Header that is safe for mutations 26 blk block // Buffer to use as temporary local storage 27 28 // err is a persistent error. 29 // It is only the responsibility of every exported method of Writer to 30 // ensure that this error is sticky. 31 err error 32 } 33 34 // NewWriter creates a new Writer writing to w. 35 func NewWriter(w io.Writer) *Writer { 36 return &Writer{w: w, curr: ®FileWriter{w, 0}} 37 } 38 39 type fileWriter interface { 40 io.Writer 41 fileState 42 43 ReadFrom(io.Reader) (int64, error) 44 } 45 46 // Flush finishes writing the current file's block padding. 47 // The current file must be fully written before Flush can be called. 48 // 49 // Deprecated: This is unnecessary as the next call to WriteHeader or Close 50 // will implicitly flush out the file's padding. 51 func (tw *Writer) Flush() error { 52 if tw.err != nil { 53 return tw.err 54 } 55 if nb := tw.curr.LogicalRemaining(); nb > 0 { 56 return fmt.Errorf("tar: missed writing %d bytes", nb) 57 } 58 if _, tw.err = tw.w.Write(zeroBlock[:tw.pad]); tw.err != nil { 59 return tw.err 60 } 61 tw.pad = 0 62 return nil 63 } 64 65 // WriteHeader writes hdr and prepares to accept the file's contents. 66 // The Header.Size determines how many bytes can be written for the next file. 67 // If the current file is not fully written, then this returns an error. 68 // This implicitly flushes any padding necessary before writing the header. 69 func (tw *Writer) WriteHeader(hdr *Header) error { 70 if err := tw.Flush(); err != nil { 71 return err 72 } 73 tw.hdr = *hdr // Shallow copy of Header 74 75 // Round ModTime and ignore AccessTime and ChangeTime unless 76 // the format is explicitly chosen. 77 // This ensures nominal usage of WriteHeader (without specifying the format) 78 // does not always result in the PAX format being chosen, which 79 // causes a 1KiB increase to every header. 80 if tw.hdr.Format == FormatUnknown { 81 tw.hdr.ModTime = tw.hdr.ModTime.Round(time.Second) 82 tw.hdr.AccessTime = time.Time{} 83 tw.hdr.ChangeTime = time.Time{} 84 } 85 86 allowedFormats, paxHdrs, err := tw.hdr.allowedFormats() 87 switch { 88 case allowedFormats.has(FormatUSTAR): 89 tw.err = tw.writeUSTARHeader(&tw.hdr) 90 return tw.err 91 case allowedFormats.has(FormatPAX): 92 tw.err = tw.writePAXHeader(&tw.hdr, paxHdrs) 93 return tw.err 94 case allowedFormats.has(FormatGNU): 95 tw.err = tw.writeGNUHeader(&tw.hdr) 96 return tw.err 97 default: 98 return err // Non-fatal error 99 } 100 } 101 102 func (tw *Writer) writeUSTARHeader(hdr *Header) error { 103 // Check if we can use USTAR prefix/suffix splitting. 104 var namePrefix string 105 if prefix, suffix, ok := splitUSTARPath(hdr.Name); ok { 106 namePrefix, hdr.Name = prefix, suffix 107 } 108 109 // Pack the main header. 110 var f formatter 111 blk := tw.templateV7Plus(hdr, f.formatString, f.formatOctal) 112 f.formatString(blk.USTAR().Prefix(), namePrefix) 113 blk.SetFormat(FormatUSTAR) 114 if f.err != nil { 115 return f.err // Should never happen since header is validated 116 } 117 return tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag) 118 } 119 120 func (tw *Writer) writePAXHeader(hdr *Header, paxHdrs map[string]string) error { 121 realName, realSize := hdr.Name, hdr.Size 122 123 // Handle sparse files. 124 var spd sparseDatas 125 var spb []byte 126 if len(hdr.SparseHoles) > 0 { 127 sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map 128 sph = alignSparseEntries(sph, hdr.Size) 129 spd = invertSparseEntries(sph, hdr.Size) 130 131 // Format the sparse map. 132 hdr.Size = 0 // Replace with encoded size 133 spb = append(strconv.AppendInt(spb, int64(len(spd)), 10), '\n') 134 for _, s := range spd { 135 hdr.Size += s.Length 136 spb = append(strconv.AppendInt(spb, s.Offset, 10), '\n') 137 spb = append(strconv.AppendInt(spb, s.Length, 10), '\n') 138 } 139 pad := blockPadding(int64(len(spb))) 140 spb = append(spb, zeroBlock[:pad]...) 141 hdr.Size += int64(len(spb)) // Accounts for encoded sparse map 142 143 // Add and modify appropriate PAX records. 144 dir, file := path.Split(realName) 145 hdr.Name = path.Join(dir, "GNUSparseFile.0", file) 146 paxHdrs[paxGNUSparseMajor] = "1" 147 paxHdrs[paxGNUSparseMinor] = "0" 148 paxHdrs[paxGNUSparseName] = realName 149 paxHdrs[paxGNUSparseRealSize] = strconv.FormatInt(realSize, 10) 150 paxHdrs[paxSize] = strconv.FormatInt(hdr.Size, 10) 151 delete(paxHdrs, paxPath) // Recorded by paxGNUSparseName 152 } 153 154 // Write PAX records to the output. 155 isGlobal := hdr.Typeflag == TypeXGlobalHeader 156 if len(paxHdrs) > 0 || isGlobal { 157 // Sort keys for deterministic ordering. 158 var keys []string 159 for k := range paxHdrs { 160 keys = append(keys, k) 161 } 162 sort.Strings(keys) 163 164 // Write each record to a buffer. 165 var buf bytes.Buffer 166 for _, k := range keys { 167 rec, err := formatPAXRecord(k, paxHdrs[k]) 168 if err != nil { 169 return err 170 } 171 buf.WriteString(rec) 172 } 173 174 // Write the extended header file. 175 var name string 176 var flag byte 177 if isGlobal { 178 name = "GlobalHead.0.0" 179 flag = TypeXGlobalHeader 180 } else { 181 dir, file := path.Split(realName) 182 name = path.Join(dir, "PaxHeaders.0", file) 183 flag = TypeXHeader 184 } 185 data := buf.String() 186 if err := tw.writeRawFile(name, data, flag, FormatPAX); err != nil || isGlobal { 187 return err // Global headers return here 188 } 189 } 190 191 // Pack the main header. 192 var f formatter // Ignore errors since they are expected 193 fmtStr := func(b []byte, s string) { f.formatString(b, toASCII(s)) } 194 blk := tw.templateV7Plus(hdr, fmtStr, f.formatOctal) 195 blk.SetFormat(FormatPAX) 196 if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { 197 return err 198 } 199 200 // Write the sparse map and setup the sparse writer if necessary. 201 if len(spd) > 0 { 202 // Use tw.curr since the sparse map is accounted for in hdr.Size. 203 if _, err := tw.curr.Write(spb); err != nil { 204 return err 205 } 206 tw.curr = &sparseFileWriter{tw.curr, spd, 0} 207 } 208 return nil 209 } 210 211 func (tw *Writer) writeGNUHeader(hdr *Header) error { 212 // Use long-link files if Name or Linkname exceeds the field size. 213 const longName = "././@LongLink" 214 if len(hdr.Name) > nameSize { 215 data := hdr.Name + "\x00" 216 if err := tw.writeRawFile(longName, data, TypeGNULongName, FormatGNU); err != nil { 217 return err 218 } 219 } 220 if len(hdr.Linkname) > nameSize { 221 data := hdr.Linkname + "\x00" 222 if err := tw.writeRawFile(longName, data, TypeGNULongLink, FormatGNU); err != nil { 223 return err 224 } 225 } 226 227 // Pack the main header. 228 var f formatter // Ignore errors since they are expected 229 var spd sparseDatas 230 var spb []byte 231 blk := tw.templateV7Plus(hdr, f.formatString, f.formatNumeric) 232 if !hdr.AccessTime.IsZero() { 233 f.formatNumeric(blk.GNU().AccessTime(), hdr.AccessTime.Unix()) 234 } 235 if !hdr.ChangeTime.IsZero() { 236 f.formatNumeric(blk.GNU().ChangeTime(), hdr.ChangeTime.Unix()) 237 } 238 if hdr.Typeflag == TypeGNUSparse { 239 sph := append([]SparseEntry{}, hdr.SparseHoles...) // Copy sparse map 240 sph = alignSparseEntries(sph, hdr.Size) 241 spd = invertSparseEntries(sph, hdr.Size) 242 243 // Format the sparse map. 244 formatSPD := func(sp sparseDatas, sa sparseArray) sparseDatas { 245 for i := 0; len(sp) > 0 && i < sa.MaxEntries(); i++ { 246 f.formatNumeric(sa.Entry(i).Offset(), sp[0].Offset) 247 f.formatNumeric(sa.Entry(i).Length(), sp[0].Length) 248 sp = sp[1:] 249 } 250 if len(sp) > 0 { 251 sa.IsExtended()[0] = 1 252 } 253 return sp 254 } 255 sp2 := formatSPD(spd, blk.GNU().Sparse()) 256 for len(sp2) > 0 { 257 var spHdr block 258 sp2 = formatSPD(sp2, spHdr.Sparse()) 259 spb = append(spb, spHdr[:]...) 260 } 261 262 // Update size fields in the header block. 263 realSize := hdr.Size 264 hdr.Size = 0 // Encoded size; does not account for encoded sparse map 265 for _, s := range spd { 266 hdr.Size += s.Length 267 } 268 copy(blk.V7().Size(), zeroBlock[:]) // Reset field 269 f.formatNumeric(blk.V7().Size(), hdr.Size) 270 f.formatNumeric(blk.GNU().RealSize(), realSize) 271 } 272 blk.SetFormat(FormatGNU) 273 if err := tw.writeRawHeader(blk, hdr.Size, hdr.Typeflag); err != nil { 274 return err 275 } 276 277 // Write the extended sparse map and setup the sparse writer if necessary. 278 if len(spd) > 0 { 279 // Use tw.w since the sparse map is not accounted for in hdr.Size. 280 if _, err := tw.w.Write(spb); err != nil { 281 return err 282 } 283 tw.curr = &sparseFileWriter{tw.curr, spd, 0} 284 } 285 return nil 286 } 287 288 type ( 289 stringFormatter func([]byte, string) 290 numberFormatter func([]byte, int64) 291 ) 292 293 // templateV7Plus fills out the V7 fields of a block using values from hdr. 294 // It also fills out fields (uname, gname, devmajor, devminor) that are 295 // shared in the USTAR, PAX, and GNU formats using the provided formatters. 296 // 297 // The block returned is only valid until the next call to 298 // templateV7Plus or writeRawFile. 299 func (tw *Writer) templateV7Plus(hdr *Header, fmtStr stringFormatter, fmtNum numberFormatter) *block { 300 tw.blk.Reset() 301 302 modTime := hdr.ModTime 303 if modTime.IsZero() { 304 modTime = time.Unix(0, 0) 305 } 306 307 v7 := tw.blk.V7() 308 v7.TypeFlag()[0] = hdr.Typeflag 309 fmtStr(v7.Name(), hdr.Name) 310 fmtStr(v7.LinkName(), hdr.Linkname) 311 fmtNum(v7.Mode(), hdr.Mode) 312 fmtNum(v7.UID(), int64(hdr.Uid)) 313 fmtNum(v7.GID(), int64(hdr.Gid)) 314 fmtNum(v7.Size(), hdr.Size) 315 fmtNum(v7.ModTime(), modTime.Unix()) 316 317 ustar := tw.blk.USTAR() 318 fmtStr(ustar.UserName(), hdr.Uname) 319 fmtStr(ustar.GroupName(), hdr.Gname) 320 fmtNum(ustar.DevMajor(), hdr.Devmajor) 321 fmtNum(ustar.DevMinor(), hdr.Devminor) 322 323 return &tw.blk 324 } 325 326 // writeRawFile writes a minimal file with the given name and flag type. 327 // It uses format to encode the header format and will write data as the body. 328 // It uses default values for all of the other fields (as BSD and GNU tar does). 329 func (tw *Writer) writeRawFile(name, data string, flag byte, format Format) error { 330 tw.blk.Reset() 331 332 // Best effort for the filename. 333 name = toASCII(name) 334 if len(name) > nameSize { 335 name = name[:nameSize] 336 } 337 name = strings.TrimRight(name, "/") 338 339 var f formatter 340 v7 := tw.blk.V7() 341 v7.TypeFlag()[0] = flag 342 f.formatString(v7.Name(), name) 343 f.formatOctal(v7.Mode(), 0) 344 f.formatOctal(v7.UID(), 0) 345 f.formatOctal(v7.GID(), 0) 346 f.formatOctal(v7.Size(), int64(len(data))) // Must be < 8GiB 347 f.formatOctal(v7.ModTime(), 0) 348 tw.blk.SetFormat(format) 349 if f.err != nil { 350 return f.err // Only occurs if size condition is violated 351 } 352 353 // Write the header and data. 354 if err := tw.writeRawHeader(&tw.blk, int64(len(data)), flag); err != nil { 355 return err 356 } 357 _, err := io.WriteString(tw, data) 358 return err 359 } 360 361 // writeRawHeader writes the value of blk, regardless of its value. 362 // It sets up the Writer such that it can accept a file of the given size. 363 // If the flag is a special header-only flag, then the size is treated as zero. 364 func (tw *Writer) writeRawHeader(blk *block, size int64, flag byte) error { 365 if err := tw.Flush(); err != nil { 366 return err 367 } 368 if _, err := tw.w.Write(blk[:]); err != nil { 369 return err 370 } 371 if isHeaderOnlyType(flag) { 372 size = 0 373 } 374 tw.curr = ®FileWriter{tw.w, size} 375 tw.pad = blockPadding(size) 376 return nil 377 } 378 379 // splitUSTARPath splits a path according to USTAR prefix and suffix rules. 380 // If the path is not splittable, then it will return ("", "", false). 381 func splitUSTARPath(name string) (prefix, suffix string, ok bool) { 382 length := len(name) 383 if length <= nameSize || !isASCII(name) { 384 return "", "", false 385 } else if length > prefixSize+1 { 386 length = prefixSize + 1 387 } else if name[length-1] == '/' { 388 length-- 389 } 390 391 i := strings.LastIndex(name[:length], "/") 392 nlen := len(name) - i - 1 // nlen is length of suffix 393 plen := i // plen is length of prefix 394 if i <= 0 || nlen > nameSize || nlen == 0 || plen > prefixSize { 395 return "", "", false 396 } 397 return name[:i], name[i+1:], true 398 } 399 400 // Write writes to the current file in the tar archive. 401 // Write returns the error ErrWriteTooLong if more than 402 // Header.Size bytes are written after WriteHeader. 403 // 404 // If the current file is sparse, then the regions marked as a hole 405 // must be written as NUL-bytes. 406 // 407 // Calling Write on special types like TypeLink, TypeSymlink, TypeChar, 408 // TypeBlock, TypeDir, and TypeFifo returns (0, ErrWriteTooLong) regardless 409 // of what the Header.Size claims. 410 func (tw *Writer) Write(b []byte) (int, error) { 411 if tw.err != nil { 412 return 0, tw.err 413 } 414 n, err := tw.curr.Write(b) 415 if err != nil && err != ErrWriteTooLong { 416 tw.err = err 417 } 418 return n, err 419 } 420 421 // ReadFrom populates the content of the current file by reading from r. 422 // The bytes read must match the number of remaining bytes in the current file. 423 // 424 // If the current file is sparse and r is an io.ReadSeeker, 425 // then ReadFrom uses Seek to skip past holes defined in Header.SparseHoles, 426 // assuming that skipped regions are all NULs. 427 // This always reads the last byte to ensure r is the right size. 428 func (tw *Writer) ReadFrom(r io.Reader) (int64, error) { 429 if tw.err != nil { 430 return 0, tw.err 431 } 432 n, err := tw.curr.ReadFrom(r) 433 if err != nil && err != ErrWriteTooLong { 434 tw.err = err 435 } 436 return n, err 437 } 438 439 // Close closes the tar archive by flushing the padding, and writing the footer. 440 // If the current file (from a prior call to WriteHeader) is not fully written, 441 // then this returns an error. 442 func (tw *Writer) Close() error { 443 if tw.err == ErrWriteAfterClose { 444 return nil 445 } 446 if tw.err != nil { 447 return tw.err 448 } 449 450 // Trailer: two zero blocks. 451 err := tw.Flush() 452 for i := 0; i < 2 && err == nil; i++ { 453 _, err = tw.w.Write(zeroBlock[:]) 454 } 455 456 // Ensure all future actions are invalid. 457 tw.err = ErrWriteAfterClose 458 return err // Report IO errors 459 } 460 461 // regFileWriter is a fileWriter for writing data to a regular file entry. 462 type regFileWriter struct { 463 w io.Writer // Underlying Writer 464 nb int64 // Number of remaining bytes to write 465 } 466 467 func (fw *regFileWriter) Write(b []byte) (n int, err error) { 468 overwrite := int64(len(b)) > fw.nb 469 if overwrite { 470 b = b[:fw.nb] 471 } 472 if len(b) > 0 { 473 n, err = fw.w.Write(b) 474 fw.nb -= int64(n) 475 } 476 switch { 477 case err != nil: 478 return n, err 479 case overwrite: 480 return n, ErrWriteTooLong 481 default: 482 return n, nil 483 } 484 } 485 486 func (fw *regFileWriter) ReadFrom(r io.Reader) (int64, error) { 487 return io.Copy(struct{ io.Writer }{fw}, r) 488 } 489 490 func (fw regFileWriter) LogicalRemaining() int64 { 491 return fw.nb 492 } 493 func (fw regFileWriter) PhysicalRemaining() int64 { 494 return fw.nb 495 } 496 497 // sparseFileWriter is a fileWriter for writing data to a sparse file entry. 498 type sparseFileWriter struct { 499 fw fileWriter // Underlying fileWriter 500 sp sparseDatas // Normalized list of data fragments 501 pos int64 // Current position in sparse file 502 } 503 504 func (sw *sparseFileWriter) Write(b []byte) (n int, err error) { 505 overwrite := int64(len(b)) > sw.LogicalRemaining() 506 if overwrite { 507 b = b[:sw.LogicalRemaining()] 508 } 509 510 b0 := b 511 endPos := sw.pos + int64(len(b)) 512 for endPos > sw.pos && err == nil { 513 var nf int // Bytes written in fragment 514 dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() 515 if sw.pos < dataStart { // In a hole fragment 516 bf := b[:min(int64(len(b)), dataStart-sw.pos)] 517 nf, err = zeroWriter{}.Write(bf) 518 } else { // In a data fragment 519 bf := b[:min(int64(len(b)), dataEnd-sw.pos)] 520 nf, err = sw.fw.Write(bf) 521 } 522 b = b[nf:] 523 sw.pos += int64(nf) 524 if sw.pos >= dataEnd && len(sw.sp) > 1 { 525 sw.sp = sw.sp[1:] // Ensure last fragment always remains 526 } 527 } 528 529 n = len(b0) - len(b) 530 switch { 531 case err == ErrWriteTooLong: 532 return n, errMissData // Not possible; implies bug in validation logic 533 case err != nil: 534 return n, err 535 case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: 536 return n, errUnrefData // Not possible; implies bug in validation logic 537 case overwrite: 538 return n, ErrWriteTooLong 539 default: 540 return n, nil 541 } 542 } 543 544 func (sw *sparseFileWriter) ReadFrom(r io.Reader) (n int64, err error) { 545 rs, ok := r.(io.ReadSeeker) 546 if ok { 547 if _, err := rs.Seek(0, io.SeekCurrent); err != nil { 548 ok = false // Not all io.Seeker can really seek 549 } 550 } 551 if !ok { 552 return io.Copy(struct{ io.Writer }{sw}, r) 553 } 554 555 var readLastByte bool 556 pos0 := sw.pos 557 for sw.LogicalRemaining() > 0 && !readLastByte && err == nil { 558 var nf int64 // Size of fragment 559 dataStart, dataEnd := sw.sp[0].Offset, sw.sp[0].endOffset() 560 if sw.pos < dataStart { // In a hole fragment 561 nf = dataStart - sw.pos 562 if sw.PhysicalRemaining() == 0 { 563 readLastByte = true 564 nf-- 565 } 566 _, err = rs.Seek(nf, io.SeekCurrent) 567 } else { // In a data fragment 568 nf = dataEnd - sw.pos 569 nf, err = io.CopyN(sw.fw, rs, nf) 570 } 571 sw.pos += nf 572 if sw.pos >= dataEnd && len(sw.sp) > 1 { 573 sw.sp = sw.sp[1:] // Ensure last fragment always remains 574 } 575 } 576 577 // If the last fragment is a hole, then seek to 1-byte before EOF, and 578 // read a single byte to ensure the file is the right size. 579 if readLastByte && err == nil { 580 _, err = mustReadFull(rs, []byte{0}) 581 sw.pos++ 582 } 583 584 n = sw.pos - pos0 585 switch { 586 case err == io.EOF: 587 return n, io.ErrUnexpectedEOF 588 case err == ErrWriteTooLong: 589 return n, errMissData // Not possible; implies bug in validation logic 590 case err != nil: 591 return n, err 592 case sw.LogicalRemaining() == 0 && sw.PhysicalRemaining() > 0: 593 return n, errUnrefData // Not possible; implies bug in validation logic 594 default: 595 return n, ensureEOF(rs) 596 } 597 } 598 599 func (sw sparseFileWriter) LogicalRemaining() int64 { 600 return sw.sp[len(sw.sp)-1].endOffset() - sw.pos 601 } 602 func (sw sparseFileWriter) PhysicalRemaining() int64 { 603 return sw.fw.PhysicalRemaining() 604 } 605 606 // zeroWriter may only be written with NULs, otherwise it returns errWriteHole. 607 type zeroWriter struct{} 608 609 func (zeroWriter) Write(b []byte) (int, error) { 610 for i, c := range b { 611 if c != 0 { 612 return i, errWriteHole 613 } 614 } 615 return len(b), nil 616 } 617 618 // ensureEOF checks whether r is at EOF, reporting ErrWriteTooLong if not so. 619 func ensureEOF(r io.Reader) error { 620 n, err := tryReadFull(r, []byte{0}) 621 switch { 622 case n > 0: 623 return ErrWriteTooLong 624 case err == io.EOF: 625 return nil 626 default: 627 return err 628 } 629 }