github.phpd.cn/thought-machine/please@v12.2.0+incompatible/tools/jarcat/zip/writer.go (about) 1 // Package zip implements functions for jarcat that manipulate .zip files. 2 package zip 3 4 import ( 5 "bytes" 6 "encoding/binary" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "os" 11 "path" 12 "path/filepath" 13 "sort" 14 "strings" 15 "time" 16 17 "gopkg.in/op/go-logging.v1" 18 19 "fs" 20 "third_party/go/zip" 21 ) 22 23 var log = logging.MustGetLogger("zip") 24 var modTime = time.Date(2001, time.January, 1, 0, 0, 0, 0, time.UTC) 25 26 // fileHeaderLen is the length of a file header in a zipfile. 27 // We need to know this to adjust alignment. 28 const fileHeaderLen = 30 29 30 // A File represents an output zipfile. 31 type File struct { 32 f io.WriteCloser 33 w *zip.Writer 34 filename string 35 input string 36 // Include and Exclude are prefixes of filenames to include or exclude from the zipfile. 37 Include, Exclude []string 38 // Strict controls whether we deny duplicate files or not. 39 // Zipfiles can readily contain duplicates, if this is true we reject them unless they are identical. 40 // If false we allow duplicates and leave it to someone else to handle. 41 Strict bool 42 // RenameDirs is a map of directories to rename, from the old name to the new one. 43 RenameDirs map[string]string 44 // StripPrefix is a prefix that is stripped off any files added with AddFiles. 45 StripPrefix string 46 // Suffix is the suffix of files that we include while scanning. 47 Suffix []string 48 // ExcludeSuffix is a list of suffixes that are excluded from the file scan. 49 ExcludeSuffix []string 50 // StoreSuffix is a list of file suffixes that will be stored instead of deflated. 51 StoreSuffix []string 52 // IncludeOther will make the file scan include other files that are not part of a zip file. 53 IncludeOther bool 54 // AddInitPy will make the writer add __init__.py files to all directories that don't already have one on close. 55 AddInitPy bool 56 // DirEntries makes the writer add empty directory entries. 57 DirEntries bool 58 // Align aligns entries to a multiple of this many bytes. 59 Align int 60 // Prefix stores all files with this prefix. 61 Prefix string 62 // files tracks the files that we've written so far. 63 files map[string]fileRecord 64 // concatenatedFiles tracks the files that are built up as we go. 65 concatenatedFiles map[string][]byte 66 } 67 68 // A fileRecord records some information about a file that we use to check if they're exact duplicates. 69 type fileRecord struct { 70 ZipFile string 71 CompressedSize64 uint64 72 UncompressedSize64 uint64 73 CRC32 uint32 74 } 75 76 // NewFile constructs and returns a new File. 77 func NewFile(output string, strict bool) *File { 78 f, err := os.Create(output) 79 if err != nil { 80 log.Fatalf("Failed to open output file: %s", err) 81 } 82 return &File{ 83 f: f, 84 w: zip.NewWriter(f), 85 filename: output, 86 Strict: strict, 87 files: map[string]fileRecord{}, 88 concatenatedFiles: map[string][]byte{}, 89 } 90 } 91 92 // Close must be called before the File is destroyed. 93 func (f *File) Close() { 94 f.handleConcatenatedFiles() 95 if f.AddInitPy { 96 if err := f.AddInitPyFiles(); err != nil { 97 log.Fatalf("%s", err) 98 } 99 } 100 if err := f.w.Close(); err != nil { 101 log.Fatalf("Failed to finalise zip file: %s", err) 102 } 103 if err := f.f.Close(); err != nil { 104 log.Fatalf("Failed to close file: %s", err) 105 } 106 } 107 108 // AddZipFile copies the contents of a zip file into the new zipfile. 109 func (f *File) AddZipFile(filepath string) error { 110 r, err := zip.OpenReader(filepath) 111 if err != nil { 112 return err 113 } 114 defer r.Close() 115 116 // Reopen file to get a directly readable version without decompression. 117 r2, err := os.Open(filepath) 118 if err != nil { 119 return err 120 } 121 defer r2.Close() 122 123 for _, rf := range r.File { 124 log.Debug("Found file %s (from %s)", rf.Name, filepath) 125 if !f.shouldInclude(rf.Name) { 126 continue 127 } 128 // This directory is very awkward. We need to merge the contents by concatenating them, 129 // we can't replace them or leave them out. 130 if strings.HasPrefix(rf.Name, "META-INF/services/") || 131 strings.HasPrefix(rf.Name, "META-INF/spring") || 132 rf.Name == "META-INF/please_sourcemap" || 133 // akka libs each have their own reference.conf. if you are using 134 // akka as a lib-only (e.g akka-remote), those need to be merged together 135 rf.Name == "reference.conf" { 136 if err := f.concatenateFile(rf); err != nil { 137 return err 138 } 139 continue 140 } 141 hasTrailingSlash := strings.HasSuffix(rf.Name, "/") 142 isDir := hasTrailingSlash || rf.FileInfo().IsDir() 143 if isDir && !hasTrailingSlash { 144 rf.Name = rf.Name + "/" 145 } 146 if existing, present := f.files[rf.Name]; present { 147 // Allow duplicates of directories. Seemingly the best way to identify them is that 148 // they end in a trailing slash. 149 if isDir { 150 continue 151 } 152 // Allow skipping existing files that are exactly the same as the added ones. 153 // It's unnecessarily awkward to insist on not ever doubling up on a dependency. 154 // TODO(pebers): Bit of a hack ignoring it when CRC is 0, would be better to add 155 // the correct CRC when added through WriteFile. 156 if existing.CRC32 == rf.CRC32 || existing.CRC32 == 0 { 157 log.Info("Skipping %s / %s: already added (from %s)", filepath, rf.Name, existing.ZipFile) 158 continue 159 } 160 if f.Strict { 161 log.Error("Duplicate file %s (from %s, already added from %s); crc %d / %d", rf.Name, filepath, existing.ZipFile, rf.CRC32, existing.CRC32) 162 return fmt.Errorf("File %s already added to destination zip file (from %s)", rf.Name, existing.ZipFile) 163 } 164 continue 165 } 166 for before, after := range f.RenameDirs { 167 if strings.HasPrefix(rf.Name, before) { 168 rf.Name = path.Join(after, strings.TrimPrefix(rf.Name, before)) 169 if isDir { 170 rf.Name = rf.Name + "/" 171 } 172 break 173 } 174 } 175 if f.StripPrefix != "" { 176 rf.Name = strings.TrimPrefix(rf.Name, f.StripPrefix) 177 } 178 if f.Prefix != "" { 179 rf.Name = path.Join(f.Prefix, rf.Name) 180 } 181 // Java tools don't seem to like writing a data descriptor for stored items. 182 // Unsure if this is a limitation of the format or a problem of those tools. 183 rf.Flags = 0 184 f.addExistingFile(rf.Name, filepath, rf.CompressedSize64, rf.UncompressedSize64, rf.CRC32) 185 186 start, err := rf.DataOffset() 187 if err != nil { 188 return err 189 } 190 if _, err := r2.Seek(start, 0); err != nil { 191 return err 192 } 193 if err := f.addFile(&rf.FileHeader, r2, rf.CRC32); err != nil { 194 return err 195 } 196 } 197 return nil 198 } 199 200 // walk is a callback to walk a file tree and add all files found in it. 201 func (f *File) walk(path string, isDir bool, mode os.FileMode) error { 202 if path != f.input && (mode&os.ModeSymlink) != 0 { 203 if resolved, err := filepath.EvalSymlinks(path); err != nil { 204 return err 205 } else if isDir { 206 // TODO(peterebden): Is this case still needed? 207 return fs.WalkMode(resolved, f.walk) 208 } 209 } 210 if path == f.filename { 211 return nil 212 } else if !isDir { 213 if !f.matchesSuffix(path, f.ExcludeSuffix) { 214 if f.matchesSuffix(path, f.Suffix) { 215 log.Debug("Adding zip file %s", path) 216 if err := f.AddZipFile(path); err != nil { 217 return fmt.Errorf("Error adding %s to zipfile: %s", path, err) 218 } 219 } else if f.IncludeOther && !f.HasExistingFile(path) { 220 log.Debug("Including existing non-zip file %s", path) 221 if info, err := os.Lstat(path); err != nil { 222 return err 223 } else if b, err := ioutil.ReadFile(path); err != nil { 224 return fmt.Errorf("Error reading %s to zipfile: %s", path, err) 225 } else if err := f.StripBytecodeTimestamp(path, b); err != nil { 226 return err 227 } else if err := f.WriteFile(path, b, info.Mode()&os.ModePerm); err != nil { 228 return err 229 } 230 } 231 } 232 } else if (len(f.Suffix) == 0 || f.AddInitPy) && path != "." && f.DirEntries { // Only add directory entries in "dumb" mode. 233 log.Debug("Adding directory entry %s/", path) 234 if err := f.WriteDir(path); err != nil { 235 return err 236 } 237 } 238 return nil 239 } 240 241 // AddFiles walks the given directory and adds any zip files (determined by suffix) that it finds within. 242 func (f *File) AddFiles(in string) error { 243 f.input = in 244 return fs.WalkMode(in, f.walk) 245 } 246 247 // shouldExcludeSuffix returns true if the given filename has a suffix that should be excluded. 248 func (f *File) matchesSuffix(path string, suffixes []string) bool { 249 for _, suffix := range suffixes { 250 if suffix != "" && strings.HasSuffix(path, suffix) { 251 return true 252 } 253 } 254 return false 255 } 256 257 // shouldInclude returns true if the given filename should be included according to the include / exclude sets of this File. 258 func (f *File) shouldInclude(name string) bool { 259 for _, excl := range f.Exclude { 260 if matched, _ := filepath.Match(excl, name); matched { 261 log.Debug("Skipping %s (excluded by %s)", name, excl) 262 return false 263 } else if matched, _ := filepath.Match(excl, filepath.Base(name)); matched { 264 log.Debug("Skipping %s (excluded by %s)", name, excl) 265 return false 266 } 267 } 268 if len(f.Include) == 0 { 269 return true 270 } 271 for _, incl := range f.Include { 272 if matched, _ := filepath.Match(incl, name); matched || strings.HasPrefix(name, incl) { 273 return true 274 } 275 } 276 log.Debug("Skipping %s (didn't match any includes)", name) 277 return false 278 } 279 280 // AddInitPyFiles adds an __init__.py file to every directory in the zip file that doesn't already have one. 281 func (f *File) AddInitPyFiles() error { 282 s := make([]string, 0, len(f.files)) 283 for p := range f.files { 284 s = append(s, p) 285 } 286 sort.Strings(s) 287 for _, p := range s { 288 for d := filepath.Dir(p); d != "."; d = filepath.Dir(d) { 289 if filepath.Base(d) == "__pycache__" { 290 break // Don't need to add an __init__.py here. 291 } 292 initPyPath := path.Join(d, "__init__.py") 293 // Don't write one at the root, it's not necessary. 294 if _, present := f.files[initPyPath]; present || initPyPath == "__init__.py" { 295 break 296 } else if _, present := f.files[initPyPath+"c"]; present { 297 // If we already have a pyc / pyo we don't need the __init__.py as well. 298 break 299 } else if _, present := f.files[initPyPath+"o"]; present { 300 break 301 } 302 log.Debug("Adding %s", initPyPath) 303 f.files[initPyPath] = fileRecord{} 304 if err := f.WriteFile(initPyPath, []byte{}, 0644); err != nil { 305 return err 306 } 307 } 308 } 309 return nil 310 } 311 312 // AddManifest adds a manifest to the given zip writer with a Main-Class entry (and a couple of others) 313 func (f *File) AddManifest(mainClass string) error { 314 manifest := fmt.Sprintf("Manifest-Version: 1.0\nMain-Class: %s\n", mainClass) 315 return f.WriteFile("META-INF/MANIFEST.MF", []byte(manifest), 0644) 316 } 317 318 // HasExistingFile returns true if the writer has already written the given file. 319 func (f *File) HasExistingFile(name string) bool { 320 _, present := f.files[name] 321 return present 322 } 323 324 // addExistingFile adds a record for an existing file, although doesn't write any contents. 325 func (f *File) addExistingFile(name, file string, compressedSize, uncompressedSize uint64, crc uint32) { 326 f.files[name] = fileRecord{file, compressedSize, uncompressedSize, crc} 327 } 328 329 // concatenateFile adds a file to the zip which is concatenated with any existing content with the same name. 330 // Writing is deferred since we obviously can't append to it later. 331 func (f *File) concatenateFile(zf *zip.File) error { 332 r, err := zf.Open() 333 if err != nil { 334 return err 335 } 336 defer r.Close() 337 var buf bytes.Buffer 338 if _, err := io.Copy(&buf, r); err != nil { 339 return err 340 } 341 contents := buf.Bytes() 342 if !bytes.HasSuffix(contents, []byte{'\n'}) { 343 contents = append(contents, '\n') 344 } 345 f.concatenatedFiles[zf.Name] = append(f.concatenatedFiles[zf.Name], contents...) 346 return nil 347 } 348 349 // handleConcatenatedFiles appends concatenated files to the archive's directory for writing. 350 func (f *File) handleConcatenatedFiles() error { 351 // Must do it in a deterministic order 352 files := make([]string, 0, len(f.concatenatedFiles)) 353 for name := range f.concatenatedFiles { 354 files = append(files, name) 355 } 356 sort.Strings(files) 357 for _, name := range files { 358 if err := f.WriteFile(name, f.concatenatedFiles[name], 0644); err != nil { 359 return err 360 } 361 } 362 return nil 363 } 364 365 // addFile writes a file to the new writer. 366 func (f *File) addFile(fh *zip.FileHeader, r io.Reader, crc uint32) error { 367 f.align(fh) 368 fh.Flags = 0 // we're not writing a data descriptor after the file 369 comp := func(w io.Writer) (io.WriteCloser, error) { return nopCloser{w}, nil } 370 fh.SetModTime(modTime) 371 fw, err := f.w.CreateHeaderWithCompressor(fh, comp, fixedCrc32{value: crc}) 372 if err == nil { 373 _, err = io.CopyN(fw, r, int64(fh.CompressedSize64)) 374 } 375 return err 376 } 377 378 // WriteFile writes a complete file to the writer. 379 func (f *File) WriteFile(filename string, data []byte, mode os.FileMode) error { 380 filename = path.Join(f.Prefix, filename) 381 fh := zip.FileHeader{ 382 Name: filename, 383 Method: zip.Deflate, 384 } 385 fh.SetMode(mode) 386 fh.SetModTime(modTime) 387 388 for _, ext := range f.StoreSuffix { 389 if strings.HasSuffix(filename, ext) { 390 fh.Method = zip.Store 391 break 392 } 393 } 394 395 f.align(&fh) 396 if fw, err := f.w.CreateHeader(&fh); err != nil { 397 return err 398 } else if _, err := fw.Write(data); err != nil { 399 return err 400 } 401 f.addExistingFile(filename, filename, 0, 0, 0) 402 return nil 403 } 404 405 // align writes any necessary bytes to align the next file. 406 func (f *File) align(h *zip.FileHeader) { 407 if f.Align != 0 && h.Method == zip.Store { 408 // We have to allow space for writing the header, so we predict what the offset will be after it. 409 fileStart := f.w.Offset() + fileHeaderLen + len(h.Name) + len(h.Extra) 410 if overlap := fileStart % f.Align; overlap != 0 { 411 if err := f.w.WriteRaw(bytes.Repeat([]byte{0}, f.Align-overlap)); err != nil { 412 log.Error("Failed to pad file: %s", err) 413 } 414 } 415 } 416 } 417 418 // WriteDir writes a directory entry to the writer. 419 func (f *File) WriteDir(filename string) error { 420 filename = path.Join(f.Prefix, filename) 421 filename += "/" // Must have trailing slash to tell it it's a directory. 422 fh := zip.FileHeader{ 423 Name: filename, 424 Method: zip.Store, 425 } 426 fh.SetModTime(modTime) 427 if _, err := f.w.CreateHeader(&fh); err != nil { 428 return err 429 } 430 f.addExistingFile(filename, filename, 0, 0, 0) 431 return nil 432 } 433 434 // WritePreamble writes a preamble to the zipfile. 435 func (f *File) WritePreamble(preamble []byte) error { 436 return f.w.WriteRaw(preamble) 437 } 438 439 // StripBytecodeTimestamp strips a timestamp from a .pyc or .pyo file. 440 // This is important so our output is deterministic. 441 func (f *File) StripBytecodeTimestamp(filename string, contents []byte) error { 442 if strings.HasSuffix(filename, ".pyc") || strings.HasSuffix(filename, ".pyo") { 443 if len(contents) < 8 { 444 log.Warning("Invalid bytecode file, will not strip timestamp") 445 } else { 446 // The .pyc format starts with a two-byte magic number, a \r\n, then a four-byte 447 // timestamp. It is that timestamp we are interested in; we overwrite it with 448 // the same mtime we use in the zipfile directory (it's important that it is 449 // deterministic, but also that it matches, otherwise zipimport complains). 450 var buf bytes.Buffer 451 binary.Write(&buf, binary.LittleEndian, modTime.Unix()) 452 b := buf.Bytes() 453 contents[4] = b[0] 454 contents[5] = b[1] 455 contents[6] = b[2] 456 contents[7] = b[3] 457 } 458 } 459 return nil 460 } 461 462 type nopCloser struct { 463 io.Writer 464 } 465 466 func (w nopCloser) Close() error { 467 return nil 468 } 469 470 // fixedCrc32 implements a Hash32 interface that just writes out a predetermined value. 471 // this is really cheating of course but serves our purposes here. 472 type fixedCrc32 struct { 473 value uint32 474 } 475 476 func (crc fixedCrc32) Write(p []byte) (n int, err error) { 477 return len(p), nil 478 } 479 480 func (crc fixedCrc32) Sum(b []byte) []byte { 481 buf := make([]byte, 4) 482 binary.LittleEndian.PutUint32(buf, crc.value) 483 return b 484 } 485 486 func (crc fixedCrc32) Sum32() uint32 { 487 return crc.value 488 } 489 490 func (crc fixedCrc32) Reset() { 491 } 492 493 func (crc fixedCrc32) Size() int { 494 return 32 495 } 496 497 func (crc fixedCrc32) BlockSize() int { 498 return 32 499 }