github.com/tiagovtristao/plz@v13.4.0+incompatible/tools/jarcat/zip/writer.go (about) 1 // Package zip implements functions for jarcat that manipulate .zip files. 2 package zip 3 4 import ( 5 "bytes" 6 "encoding/binary" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "os" 11 "path" 12 "path/filepath" 13 "sort" 14 "strings" 15 "time" 16 17 "gopkg.in/op/go-logging.v1" 18 19 "github.com/thought-machine/please/src/fs" 20 "third_party/go/zip" 21 ) 22 23 var log = logging.MustGetLogger("zip") 24 var modTime = time.Date(2001, time.January, 1, 0, 0, 0, 0, time.UTC) 25 26 // fileHeaderLen is the length of a file header in a zipfile. 27 // We need to know this to adjust alignment. 28 const fileHeaderLen = 30 29 30 // A File represents an output zipfile. 31 type File struct { 32 f io.WriteCloser 33 w *zip.Writer 34 filename string 35 input string 36 // Include and Exclude are prefixes of filenames to include or exclude from the zipfile. 37 Include, Exclude []string 38 // Strict controls whether we deny duplicate files or not. 39 // Zipfiles can readily contain duplicates, if this is true we reject them unless they are identical. 40 // If false we allow duplicates and leave it to someone else to handle. 41 Strict bool 42 // RenameDirs is a map of directories to rename, from the old name to the new one. 43 RenameDirs map[string]string 44 // StripPrefix is a prefix that is stripped off any files added with AddFiles. 45 StripPrefix string 46 // Suffix is the suffix of files that we include while scanning. 47 Suffix []string 48 // ExcludeSuffix is a list of suffixes that are excluded from the file scan. 49 ExcludeSuffix []string 50 // StoreSuffix is a list of file suffixes that will be stored instead of deflated. 51 StoreSuffix []string 52 // IncludeOther will make the file scan include other files that are not part of a zip file. 53 IncludeOther bool 54 // AddInitPy will make the writer add __init__.py files to all directories that don't already have one on close. 55 AddInitPy bool 56 // StripPy will strip .py files when there is a corresponding .pyc 57 StripPy bool 58 // DirEntries makes the writer add empty directory entries. 59 DirEntries bool 60 // Align aligns entries to a multiple of this many bytes. 61 Align int 62 // Prefix stores all files with this prefix. 63 Prefix string 64 // files tracks the files that we've written so far. 65 files map[string]fileRecord 66 // concatenatedFiles tracks the files that are built up as we go. 67 concatenatedFiles map[string][]byte 68 } 69 70 // A fileRecord records some information about a file that we use to check if they're exact duplicates. 71 type fileRecord struct { 72 ZipFile string 73 CompressedSize64 uint64 74 UncompressedSize64 uint64 75 CRC32 uint32 76 } 77 78 // NewFile constructs and returns a new File. 79 func NewFile(output string, strict bool) *File { 80 f, err := os.Create(output) 81 if err != nil { 82 log.Fatalf("Failed to open output file: %s", err) 83 } 84 return &File{ 85 f: f, 86 w: zip.NewWriter(f), 87 filename: output, 88 Strict: strict, 89 files: map[string]fileRecord{}, 90 concatenatedFiles: map[string][]byte{}, 91 } 92 } 93 94 // Close must be called before the File is destroyed. 95 func (f *File) Close() { 96 f.handleConcatenatedFiles() 97 if f.AddInitPy { 98 if err := f.AddInitPyFiles(); err != nil { 99 log.Fatalf("%s", err) 100 } 101 } 102 if err := f.w.Close(); err != nil { 103 log.Fatalf("Failed to finalise zip file: %s", err) 104 } 105 if err := f.f.Close(); err != nil { 106 log.Fatalf("Failed to close file: %s", err) 107 } 108 } 109 110 // AddZipFile copies the contents of a zip file into the new zipfile. 111 func (f *File) AddZipFile(filepath string) error { 112 r, err := zip.OpenReader(filepath) 113 if err != nil { 114 return err 115 } 116 defer r.Close() 117 118 // Reopen file to get a directly readable version without decompression. 119 r2, err := os.Open(filepath) 120 if err != nil { 121 return err 122 } 123 defer r2.Close() 124 125 // Need to know all the filenames upfront if we're stripping sources. 126 filelist := map[string]struct{}{} 127 if f.StripPy { 128 for _, rf := range r.File { 129 filelist[rf.Name] = struct{}{} 130 } 131 } 132 133 for _, rf := range r.File { 134 log.Debug("Found file %s (from %s)", rf.Name, filepath) 135 if !f.shouldInclude(rf.Name) { 136 continue 137 } 138 // This directory is very awkward. We need to merge the contents by concatenating them, 139 // we can't replace them or leave them out. 140 if strings.HasPrefix(rf.Name, "META-INF/services/") || 141 strings.HasPrefix(rf.Name, "META-INF/spring") || 142 rf.Name == "META-INF/please_sourcemap" || 143 // akka libs each have their own reference.conf. if you are using 144 // akka as a lib-only (e.g akka-remote), those need to be merged together 145 rf.Name == "reference.conf" { 146 if err := f.concatenateFile(rf); err != nil { 147 return err 148 } 149 continue 150 } 151 hasTrailingSlash := strings.HasSuffix(rf.Name, "/") 152 isDir := hasTrailingSlash || rf.FileInfo().IsDir() 153 if isDir && !hasTrailingSlash { 154 rf.Name = rf.Name + "/" 155 } 156 if existing, present := f.files[rf.Name]; present { 157 // Allow duplicates of directories. Seemingly the best way to identify them is that 158 // they end in a trailing slash. 159 if isDir { 160 continue 161 } 162 // Allow skipping existing files that are exactly the same as the added ones. 163 // It's unnecessarily awkward to insist on not ever doubling up on a dependency. 164 // TODO(pebers): Bit of a hack ignoring it when CRC is 0, would be better to add 165 // the correct CRC when added through WriteFile. 166 if existing.CRC32 == rf.CRC32 || existing.CRC32 == 0 { 167 log.Info("Skipping %s / %s: already added (from %s)", filepath, rf.Name, existing.ZipFile) 168 continue 169 } 170 if f.Strict { 171 log.Error("Duplicate file %s (from %s, already added from %s); crc %d / %d", rf.Name, filepath, existing.ZipFile, rf.CRC32, existing.CRC32) 172 return fmt.Errorf("File %s already added to destination zip file (from %s)", rf.Name, existing.ZipFile) 173 } 174 continue 175 } 176 for before, after := range f.RenameDirs { 177 if strings.HasPrefix(rf.Name, before) { 178 rf.Name = path.Join(after, strings.TrimPrefix(rf.Name, before)) 179 if isDir { 180 rf.Name = rf.Name + "/" 181 } 182 break 183 } 184 } 185 if f.StripPrefix != "" { 186 rf.Name = strings.TrimPrefix(rf.Name, f.StripPrefix) 187 } 188 if f.Prefix != "" { 189 rf.Name = path.Join(f.Prefix, rf.Name) 190 } 191 if f.StripPy && strings.HasSuffix(rf.Name, ".py") { 192 pyc := rf.Name + "c" 193 if f.HasExistingFile(pyc) { 194 log.Debug("Skipping %s since %s exists", rf.Name, pyc) 195 continue 196 } else if _, present := filelist[pyc]; present { 197 log.Debug("Skipping %s since %s exists in this archive", rf.Name, pyc) 198 continue 199 } 200 } 201 // Java tools don't seem to like writing a data descriptor for stored items. 202 // Unsure if this is a limitation of the format or a problem of those tools. 203 rf.Flags = 0 204 f.addExistingFile(rf.Name, filepath, rf.CompressedSize64, rf.UncompressedSize64, rf.CRC32) 205 206 start, err := rf.DataOffset() 207 if err != nil { 208 return err 209 } 210 if _, err := r2.Seek(start, 0); err != nil { 211 return err 212 } 213 if err := f.addFile(&rf.FileHeader, r2, rf.CRC32); err != nil { 214 return err 215 } 216 } 217 return nil 218 } 219 220 // walk is a callback to walk a file tree and add all files found in it. 221 func (f *File) walk(path string, isDir bool, mode os.FileMode) error { 222 if path != f.input && (mode&os.ModeSymlink) != 0 { 223 if resolved, err := filepath.EvalSymlinks(path); err != nil { 224 return err 225 } else if isDir { 226 // TODO(peterebden): Is this case still needed? 227 return fs.WalkMode(resolved, f.walk) 228 } 229 } 230 if samePaths(path, f.filename) { 231 return nil 232 } else if !isDir { 233 if !f.matchesSuffix(path, f.ExcludeSuffix) { 234 if f.matchesSuffix(path, f.Suffix) { 235 log.Debug("Adding zip file %s", path) 236 if err := f.AddZipFile(path); err != nil { 237 return fmt.Errorf("Error adding %s to zipfile: %s", path, err) 238 } 239 } else if f.IncludeOther && !f.HasExistingFile(path) { 240 if f.StripPy && strings.HasSuffix(path, ".py") && f.HasExistingFile(path+"c") { 241 log.Debug("Skipping %s since %sc exists", path, path) 242 return nil 243 } 244 log.Debug("Including existing non-zip file %s", path) 245 if info, err := os.Lstat(path); err != nil { 246 return err 247 } else if b, err := ioutil.ReadFile(path); err != nil { 248 return fmt.Errorf("Error reading %s to zipfile: %s", path, err) 249 } else if err := f.StripBytecodeTimestamp(path, b); err != nil { 250 return err 251 } else if err := f.WriteFile(path, b, info.Mode()&os.ModePerm); err != nil { 252 return err 253 } 254 } 255 } 256 } else if (len(f.Suffix) == 0 || f.AddInitPy) && path != "." && f.DirEntries { // Only add directory entries in "dumb" mode. 257 log.Debug("Adding directory entry %s/", path) 258 if err := f.WriteDir(path); err != nil { 259 return err 260 } 261 } 262 return nil 263 } 264 265 // samePaths returns true if two paths are the same (taking relative/absolute paths into account). 266 func samePaths(a, b string) bool { 267 if path.IsAbs(a) && path.IsAbs(b) { 268 return a == b 269 } 270 wd, _ := os.Getwd() 271 if !path.IsAbs(a) { 272 a = path.Join(wd, a) 273 } 274 if !path.IsAbs(b) { 275 b = path.Join(wd, b) 276 } 277 return a == b 278 } 279 280 // AddFiles walks the given directory and adds any zip files (determined by suffix) that it finds within. 281 func (f *File) AddFiles(in string) error { 282 f.input = in 283 return fs.WalkMode(in, f.walk) 284 } 285 286 // shouldExcludeSuffix returns true if the given filename has a suffix that should be excluded. 287 func (f *File) matchesSuffix(path string, suffixes []string) bool { 288 for _, suffix := range suffixes { 289 if suffix != "" && strings.HasSuffix(path, suffix) { 290 return true 291 } 292 } 293 return false 294 } 295 296 // shouldInclude returns true if the given filename should be included according to the include / exclude sets of this File. 297 func (f *File) shouldInclude(name string) bool { 298 for _, excl := range f.Exclude { 299 if matched, _ := filepath.Match(excl, name); matched { 300 log.Debug("Skipping %s (excluded by %s)", name, excl) 301 return false 302 } else if matched, _ := filepath.Match(excl, filepath.Base(name)); matched { 303 log.Debug("Skipping %s (excluded by %s)", name, excl) 304 return false 305 } 306 } 307 if len(f.Include) == 0 { 308 return true 309 } 310 for _, incl := range f.Include { 311 if matched, _ := filepath.Match(incl, name); matched || strings.HasPrefix(name, incl) { 312 return true 313 } 314 } 315 log.Debug("Skipping %s (didn't match any includes)", name) 316 return false 317 } 318 319 // AddInitPyFiles adds an __init__.py file to every directory in the zip file that doesn't already have one. 320 func (f *File) AddInitPyFiles() error { 321 s := make([]string, 0, len(f.files)) 322 sos := map[string]struct{}{} 323 for p := range f.files { 324 s = append(s, p) 325 // We use this to check that we don't shadow files that look importable. 326 if strings.HasSuffix(p, ".so") { 327 p = strings.TrimSuffix(p, ".so") 328 if idx := strings.LastIndex(p, ".cpython-"); idx != -1 { 329 p = p[:idx] 330 } 331 sos[p] = struct{}{} 332 } 333 } 334 sort.Strings(s) 335 for _, p := range s { 336 for d := filepath.Dir(p); d != "."; d = filepath.Dir(d) { 337 if filepath.Base(d) == "__pycache__" { 338 break // Don't need to add an __init__.py here. 339 } 340 initPyPath := path.Join(d, "__init__.py") 341 // Don't write one at the root, it's not necessary. 342 if _, present := f.files[initPyPath]; present || initPyPath == "__init__.py" { 343 break 344 } else if _, present := f.files[initPyPath+"c"]; present { 345 // If we already have a pyc / pyo we don't need the __init__.py as well. 346 break 347 } else if _, present := f.files[initPyPath+"o"]; present { 348 break 349 } else if _, present := f.files[d+".py"]; present { 350 break 351 } else if _, present := sos[d]; present { 352 break 353 } 354 log.Debug("Adding %s", initPyPath) 355 f.files[initPyPath] = fileRecord{} 356 if err := f.WriteFile(initPyPath, []byte{}, 0644); err != nil { 357 return err 358 } 359 } 360 } 361 return nil 362 } 363 364 // AddManifest adds a manifest to the given zip writer with a Main-Class entry (and a couple of others) 365 func (f *File) AddManifest(mainClass string) error { 366 manifest := fmt.Sprintf("Manifest-Version: 1.0\nMain-Class: %s\n", mainClass) 367 return f.WriteFile("META-INF/MANIFEST.MF", []byte(manifest), 0644) 368 } 369 370 // HasExistingFile returns true if the writer has already written the given file. 371 func (f *File) HasExistingFile(name string) bool { 372 _, present := f.files[name] 373 return present 374 } 375 376 // addExistingFile adds a record for an existing file, although doesn't write any contents. 377 func (f *File) addExistingFile(name, file string, compressedSize, uncompressedSize uint64, crc uint32) { 378 f.files[name] = fileRecord{file, compressedSize, uncompressedSize, crc} 379 } 380 381 // concatenateFile adds a file to the zip which is concatenated with any existing content with the same name. 382 // Writing is deferred since we obviously can't append to it later. 383 func (f *File) concatenateFile(zf *zip.File) error { 384 r, err := zf.Open() 385 if err != nil { 386 return err 387 } 388 defer r.Close() 389 var buf bytes.Buffer 390 if _, err := io.Copy(&buf, r); err != nil { 391 return err 392 } 393 contents := buf.Bytes() 394 if !bytes.HasSuffix(contents, []byte{'\n'}) { 395 contents = append(contents, '\n') 396 } 397 f.concatenatedFiles[zf.Name] = append(f.concatenatedFiles[zf.Name], contents...) 398 return nil 399 } 400 401 // handleConcatenatedFiles appends concatenated files to the archive's directory for writing. 402 func (f *File) handleConcatenatedFiles() error { 403 // Must do it in a deterministic order 404 files := make([]string, 0, len(f.concatenatedFiles)) 405 for name := range f.concatenatedFiles { 406 files = append(files, name) 407 } 408 sort.Strings(files) 409 for _, name := range files { 410 if err := f.WriteFile(name, f.concatenatedFiles[name], 0644); err != nil { 411 return err 412 } 413 } 414 return nil 415 } 416 417 // addFile writes a file to the new writer. 418 func (f *File) addFile(fh *zip.FileHeader, r io.Reader, crc uint32) error { 419 f.align(fh) 420 fh.Flags = 0 // we're not writing a data descriptor after the file 421 comp := func(w io.Writer) (io.WriteCloser, error) { return nopCloser{w}, nil } 422 fh.SetModTime(modTime) 423 fw, err := f.w.CreateHeaderWithCompressor(fh, comp, fixedCrc32{value: crc}) 424 if err == nil { 425 _, err = io.CopyN(fw, r, int64(fh.CompressedSize64)) 426 } 427 return err 428 } 429 430 // WriteFile writes a complete file to the writer. 431 func (f *File) WriteFile(filename string, data []byte, mode os.FileMode) error { 432 filename = path.Join(f.Prefix, filename) 433 fh := zip.FileHeader{ 434 Name: filename, 435 Method: zip.Deflate, 436 } 437 fh.SetMode(mode) 438 fh.SetModTime(modTime) 439 440 for _, ext := range f.StoreSuffix { 441 if strings.HasSuffix(filename, ext) { 442 fh.Method = zip.Store 443 break 444 } 445 } 446 447 f.align(&fh) 448 if fw, err := f.w.CreateHeader(&fh); err != nil { 449 return err 450 } else if _, err := fw.Write(data); err != nil { 451 return err 452 } 453 f.addExistingFile(filename, filename, 0, 0, 0) 454 return nil 455 } 456 457 // align writes any necessary bytes to align the next file. 458 func (f *File) align(h *zip.FileHeader) { 459 if f.Align != 0 && h.Method == zip.Store { 460 // We have to allow space for writing the header, so we predict what the offset will be after it. 461 fileStart := f.w.Offset() + fileHeaderLen + len(h.Name) + len(h.Extra) 462 if overlap := fileStart % f.Align; overlap != 0 { 463 if err := f.w.WriteRaw(bytes.Repeat([]byte{0}, f.Align-overlap)); err != nil { 464 log.Error("Failed to pad file: %s", err) 465 } 466 } 467 } 468 } 469 470 // WriteDir writes a directory entry to the writer. 471 func (f *File) WriteDir(filename string) error { 472 filename = path.Join(f.Prefix, filename) 473 filename += "/" // Must have trailing slash to tell it it's a directory. 474 fh := zip.FileHeader{ 475 Name: filename, 476 Method: zip.Store, 477 } 478 fh.SetModTime(modTime) 479 if _, err := f.w.CreateHeader(&fh); err != nil { 480 return err 481 } 482 f.addExistingFile(filename, filename, 0, 0, 0) 483 return nil 484 } 485 486 // WritePreamble writes a preamble to the zipfile. 487 func (f *File) WritePreamble(preamble []byte) error { 488 return f.w.WriteRaw(preamble) 489 } 490 491 // StripBytecodeTimestamp strips a timestamp from a .pyc or .pyo file. 492 // This is important so our output is deterministic. 493 func (f *File) StripBytecodeTimestamp(filename string, contents []byte) error { 494 if strings.HasSuffix(filename, ".pyc") || strings.HasSuffix(filename, ".pyo") { 495 if len(contents) < 12 { 496 log.Warning("Invalid bytecode file, will not strip timestamp") 497 } else if f.isPy37(contents) { 498 // Check whether this is hash verified. This is probably unlikely since we don't 499 // pass appropriate flags but at this point it doesn't hurt to check. 500 if (contents[4] & 1) != 0 { 501 // Is hash verified. It should never be checked though. 502 contents[4] &^= 2 503 } else { 504 // Timestamp verified, zero it out. 505 f.zeroPycTimestamp(contents, 8) 506 } 507 } else { 508 // The .pyc format starts with a two-byte magic number, a \r\n, then a four-byte 509 // timestamp. It is that timestamp we are interested in; we overwrite it with 510 // the same mtime we use in the zipfile directory (it's important that it is 511 // deterministic, but also that it matches, otherwise zipimport complains). 512 f.zeroPycTimestamp(contents, 4) 513 } 514 } 515 return nil 516 } 517 518 // isPy37 determines if the leading magic number in a .pyc corresponds to Python 3.7. 519 // This is important to us because the structure changed (see PEP 552) and we have to handle that. 520 func (f *File) isPy37(b []byte) bool { 521 i := (int(b[1]) << 8) + int(b[0]) 522 // Python 2 versions use magic numbers in the 20-60,000 range. Ensure it's not one of them. 523 return i >= 3394 && i < 10000 524 } 525 526 // zeroPycTimestamp zeroes out a .pyc timestamp at a given offset. 527 func (f *File) zeroPycTimestamp(contents []byte, offset int) { 528 var buf bytes.Buffer 529 binary.Write(&buf, binary.LittleEndian, modTime.Unix()) 530 b := buf.Bytes() 531 contents[offset+0] = b[0] 532 contents[offset+1] = b[1] 533 contents[offset+2] = b[2] 534 contents[offset+3] = b[3] 535 } 536 537 type nopCloser struct { 538 io.Writer 539 } 540 541 func (w nopCloser) Close() error { 542 return nil 543 } 544 545 // fixedCrc32 implements a Hash32 interface that just writes out a predetermined value. 546 // this is really cheating of course but serves our purposes here. 547 type fixedCrc32 struct { 548 value uint32 549 } 550 551 func (crc fixedCrc32) Write(p []byte) (n int, err error) { 552 return len(p), nil 553 } 554 555 func (crc fixedCrc32) Sum(b []byte) []byte { 556 buf := make([]byte, 4) 557 binary.LittleEndian.PutUint32(buf, crc.value) 558 return b 559 } 560 561 func (crc fixedCrc32) Sum32() uint32 { 562 return crc.value 563 } 564 565 func (crc fixedCrc32) Reset() { 566 } 567 568 func (crc fixedCrc32) Size() int { 569 return 32 570 } 571 572 func (crc fixedCrc32) BlockSize() int { 573 return 32 574 }