golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/gorebuild/io.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package main 6 7 import ( 8 "archive/tar" 9 "archive/zip" 10 "bufio" 11 "bytes" 12 "compress/gzip" 13 "crypto/sha256" 14 "encoding/json" 15 "fmt" 16 "hash/crc32" 17 "io" 18 "io/fs" 19 "net/http" 20 "os" 21 "path/filepath" 22 "sort" 23 "strconv" 24 "strings" 25 ) 26 27 // SHA256 returns the hexadecimal SHA256 hash of data. 28 func SHA256(data []byte) string { 29 sum := sha256.Sum256(data) 30 return fmt.Sprintf("%x", sum[:]) 31 } 32 33 // Get returns the content at the named URL. 34 func Get(log *Log, url string) (data []byte, err error) { 35 defer func() { 36 if err != nil && log != nil { 37 log.Printf("%s", err) 38 } 39 }() 40 41 resp, err := http.Get(url) 42 if err != nil { 43 return nil, err 44 } 45 defer resp.Body.Close() 46 if resp.StatusCode != 200 { 47 return nil, fmt.Errorf("get %s: %s", url, resp.Status) 48 } 49 data, err = io.ReadAll(resp.Body) 50 if err != nil { 51 return nil, fmt.Errorf("get %s: %s", url, err) 52 } 53 if log != nil { 54 log.Printf("downloaded %s", url) 55 } 56 return data, nil 57 } 58 59 // GerritTarGz returns a .tar.gz file corresponding to the named repo and ref on Go's Gerrit server. 60 func GerritTarGz(log *Log, repo, ref string) ([]byte, error) { 61 return Get(log, "https://go.googlesource.com/"+repo+"/+archive/"+ref+".tar.gz") 62 } 63 64 // A DLRelease is the JSON for a release, returned by go.dev/dl. 65 type DLRelease struct { 66 Version string `json:"version"` 67 Stable bool `json:"stable"` 68 Files []*DLFile `json:"files"` 69 } 70 71 // A DLFile is the JSON for a file, returned by go.dev/dl. 72 type DLFile struct { 73 Name string `json:"filename"` 74 GOOS string `json:"os"` 75 GOARCH string `json:"arch"` 76 Version string `json:"version"` 77 SHA256 string `json:"sha256"` 78 Size int64 `json:"size"` 79 Kind string `json:"kind"` // "archive", "installer", "source" 80 } 81 82 // DLReleases returns the release list from go.dev/dl. 83 func DLReleases(log *Log) ([]*DLRelease, error) { 84 var all []*DLRelease 85 data, err := Get(log, "https://go.dev/dl/?mode=json&include=all") 86 if err != nil { 87 return nil, err 88 } 89 if err := json.Unmarshal(data, &all); err != nil { 90 return nil, fmt.Errorf("unmarshaling releases JSON: %v", err) 91 } 92 93 for _, r := range all { 94 for _, f := range r.Files { 95 if f.GOARCH == "armv6l" { 96 f.GOARCH = "arm" 97 } 98 } 99 } 100 return all, nil 101 } 102 103 // OpenTarGz returns a tar.Reader for the given tgz data. 104 func OpenTarGz(tgz []byte) (*tar.Reader, error) { 105 zr, err := gzip.NewReader(bytes.NewReader(tgz)) 106 if err != nil { 107 return nil, err 108 } 109 return tar.NewReader(zr), nil 110 } 111 112 // UnpackTarGz unpacks the given tgz data into the named directory. 113 // On error the directory may contain partial contents. 114 func UnpackTarGz(dir string, tgz []byte) error { 115 if err := os.MkdirAll(dir, 0777); err != nil { 116 return err 117 } 118 tr, err := OpenTarGz(tgz) 119 if err != nil { 120 return err 121 } 122 for { 123 hdr, err := tr.Next() 124 if err != nil { 125 if err == io.EOF { 126 break 127 } 128 return err 129 } 130 if hdr.Typeflag == tar.TypeDir { 131 // Ignore directories entirely 132 continue 133 } 134 name := filepath.FromSlash(hdr.Name) 135 if name != filepath.Clean(name) || strings.HasPrefix(name, "..") || filepath.IsAbs(name) { 136 return fmt.Errorf("invalid name in tgz: %#q", hdr.Name) 137 } 138 targ := filepath.Join(dir, name) 139 if err := os.MkdirAll(filepath.Dir(targ), 0777); err != nil { 140 return err 141 } 142 f, err := os.OpenFile(targ, os.O_CREATE|os.O_WRONLY, fs.FileMode(hdr.Mode&0777)) 143 if err != nil { 144 return err 145 } 146 if _, err := io.Copy(f, tr); err != nil { 147 f.Close() 148 return err 149 } 150 if err := f.Close(); err != nil { 151 return err 152 } 153 } 154 return nil 155 } 156 157 // OpenZip returns a zip.Reader for the given zip data. 158 func OpenZip(zipdata []byte) (*zip.Reader, error) { 159 return zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) 160 } 161 162 // UnpackZip unpacks the given zip data into the named directory. 163 // On error the directory may contain partial contents. 164 func UnpackZip(dir string, zipdata []byte) error { 165 if err := os.MkdirAll(dir, 0777); err != nil { 166 return err 167 } 168 zr, err := OpenZip(zipdata) 169 if err != nil { 170 return err 171 } 172 for _, zf := range zr.File { 173 if strings.HasSuffix(zf.Name, "/") { 174 // Ignore directories entirely 175 continue 176 } 177 name := filepath.FromSlash(zf.Name) 178 if name != filepath.Clean(name) || strings.HasPrefix(name, "..") || filepath.IsAbs(name) { 179 return fmt.Errorf("invalid name in zip: %#q", zf.Name) 180 } 181 targ := filepath.Join(dir, name) 182 if err := os.MkdirAll(filepath.Dir(targ), 0777); err != nil { 183 return err 184 } 185 f, err := os.OpenFile(targ, os.O_CREATE|os.O_WRONLY, 0666) 186 if err != nil { 187 return err 188 } 189 zr, err := zf.Open() 190 if err != nil { 191 f.Close() 192 return err 193 } 194 _, err = io.Copy(f, zr) 195 zr.Close() 196 if err != nil { 197 f.Close() 198 return err 199 } 200 if err := f.Close(); err != nil { 201 return err 202 } 203 } 204 return nil 205 } 206 207 // A Fixer is a transformation on file content applied during indexing. 208 // It lets us edit away permitted differences between files, such as code 209 // signatures that cannot be reproduced without the signing keys. 210 type Fixer = func(*Log, string, []byte) []byte 211 212 // A TarFile summarizes a single file in a tar archive: 213 // it records the exact header and the SHA256 of the content. 214 type TarFile struct { 215 tar.Header 216 SHA256 string 217 } 218 219 // A ZipFile summarizes a single file in a zip archive: 220 // it records the exact header and the SHA256 of the content. 221 type ZipFile struct { 222 zip.FileHeader 223 SHA256 string 224 } 225 226 // A CpioFile represents a single file in a CPIO archive. 227 type CpioFile struct { 228 Name string 229 Mode fs.FileMode 230 Size int64 231 SHA256 string 232 } 233 234 // IndexTarGz parses tgz as a gzip-compressed tar file and returns an index of its content. 235 // If fix is non-nil, it is applied to file content before indexing. 236 // This lets us strip code signatures that cannot be reproduced. 237 func IndexTarGz(log *Log, tgz []byte, fix Fixer) map[string]*TarFile { 238 tr, err := OpenTarGz(tgz) 239 if err != nil { 240 log.Printf("%v", err) 241 return nil 242 } 243 ix := make(map[string]*TarFile) 244 for { 245 hdr, err := tr.Next() 246 if err != nil { 247 if err == io.EOF { 248 break 249 } 250 log.Printf("reading tgz: %v", err) 251 return nil 252 } 253 if hdr.Typeflag == tar.TypeDir { 254 // Ignore directories entirely 255 continue 256 } 257 data, err := io.ReadAll(tr) 258 if err != nil { 259 log.Printf("reading %s from tgz: %v", hdr.Name, err) 260 return nil 261 } 262 if fix != nil { 263 data = fix(log, hdr.Name, data) 264 hdr.Size = int64(len(data)) 265 } 266 ix[hdr.Name] = &TarFile{*hdr, SHA256(data)} 267 } 268 return ix 269 } 270 271 // IndexZip parses zipdata as a zip archive and returns an index of its content. 272 // If fix is non-nil, it is applied to file content before indexing. 273 // This lets us strip code signatures that cannot be reproduced. 274 func IndexZip(log *Log, zipdata []byte, fix Fixer) map[string]*ZipFile { 275 zr, err := zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) 276 if err != nil { 277 log.Printf("%v", err) 278 return nil 279 } 280 ix := make(map[string]*ZipFile) 281 for _, hdr := range zr.File { 282 if strings.HasSuffix(hdr.Name, "/") { 283 // Ignore directories entirely 284 continue 285 } 286 rc, err := hdr.Open() 287 if err != nil { 288 log.Printf("%v", err) 289 return nil 290 } 291 data, err := io.ReadAll(rc) 292 rc.Close() 293 if err != nil { 294 log.Printf("%v", err) 295 return nil 296 } 297 if fix != nil { 298 data = fix(log, hdr.Name, data) 299 hdr.CRC32 = crc32.ChecksumIEEE(data) 300 hdr.UncompressedSize = uint32(len(data)) 301 hdr.UncompressedSize64 = uint64(len(data)) 302 } 303 ix[hdr.Name] = &ZipFile{hdr.FileHeader, SHA256(data)} 304 } 305 return ix 306 } 307 308 // IndexCpioGz parses data as a gzip-compressed cpio file and returns an index of its content. 309 // If fix is non-nil, it is applied to file content before indexing. 310 // This lets us strip code signatures that cannot be reproduced. 311 func IndexCpioGz(log *Log, data []byte, fix Fixer) map[string]*CpioFile { 312 zr, err := gzip.NewReader(bytes.NewReader(data)) 313 if err != nil { 314 log.Printf("%v", err) 315 return nil 316 } 317 br := bufio.NewReader(zr) 318 319 const hdrSize = 76 320 321 ix := make(map[string]*CpioFile) 322 hdr := make([]byte, hdrSize) 323 for { 324 _, err := io.ReadFull(br, hdr) 325 if err != nil { 326 if err == io.EOF { 327 break 328 } 329 log.Printf("reading archive: %v", err) 330 return nil 331 } 332 333 // https://www.mkssoftware.com/docs/man4/cpio.4.asp 334 // 335 // hdr[0:6] "070707" 336 // hdr[6:12] device number (all numbers '0'-padded octal) 337 // hdr[12:18] inode number 338 // hdr[18:24] mode 339 // hdr[24:30] uid 340 // hdr[30:36] gid 341 // hdr[36:42] nlink 342 // hdr[42:48] rdev 343 // hdr[48:59] mtime 344 // hdr[59:65] name length 345 // hdr[65:76] file size 346 347 if !allOctal(hdr[:]) || string(hdr[:6]) != "070707" { 348 log.Printf("reading archive: malformed entry") 349 return nil 350 } 351 mode, _ := strconv.ParseInt(string(hdr[18:24]), 8, 64) 352 nameLen, _ := strconv.ParseInt(string(hdr[59:65]), 8, 64) 353 size, _ := strconv.ParseInt(string(hdr[65:76]), 8, 64) 354 nameBuf := make([]byte, nameLen) 355 if _, err := io.ReadFull(br, nameBuf); err != nil { 356 log.Printf("reading archive: %v", err) 357 return nil 358 } 359 if nameLen == 0 || nameBuf[nameLen-1] != 0 { 360 log.Printf("reading archive: malformed entry") 361 return nil 362 } 363 name := string(nameBuf[:nameLen-1]) 364 365 // The MKS cpio page says "TRAILER!!" 366 // but the Apple pkg files use "TRAILER!!!". 367 if name == "TRAILER!!!" { 368 break 369 } 370 371 fmode := fs.FileMode(mode & 0777) 372 if mode&040000 != 0 { 373 fmode |= fs.ModeDir 374 } 375 376 data, err := io.ReadAll(io.LimitReader(br, size)) 377 if err != nil { 378 log.Printf("reading archive: %v", err) 379 return nil 380 } 381 if size != int64(len(data)) { 382 log.Printf("reading archive: short file") 383 return nil 384 } 385 386 if fmode&fs.ModeDir != 0 { 387 continue 388 } 389 390 if fix != nil { 391 data = fix(log, name, data) 392 size = int64(len(data)) 393 } 394 ix[name] = &CpioFile{name, fmode, size, SHA256(data)} 395 } 396 return ix 397 } 398 399 // allOctal reports whether x is entirely ASCII octal digits. 400 func allOctal(x []byte) bool { 401 for _, b := range x { 402 if b < '0' || '7' < b { 403 return false 404 } 405 } 406 return true 407 } 408 409 // DiffArchive diffs the archives 'rebuild' and 'posted' based on their indexes. 410 // It reports to log any files that appear only in one or the other. 411 // For files that appear in both, DiffArchive calls check, which should 412 // log any differences found and report whether the files match. 413 // It reports whether the archives match. 414 // If either of rebuild or posted is nil, DiffArchive returns false without logging, 415 // assuming that the code that returned the nil archive took care of reporting the problem. 416 func DiffArchive[File1, File2 any](log *Log, 417 rebuilt map[string]File1, posted map[string]File2, 418 check func(*Log, File1, File2) bool) bool { 419 420 if rebuilt == nil || posted == nil { 421 return false 422 } 423 424 // Build list of all names; will have duplicates. 425 var names []string 426 for name := range rebuilt { 427 names = append(names, name) 428 } 429 for name := range posted { 430 names = append(names, name) 431 } 432 sort.Strings(names) 433 434 match := true 435 for _, name := range names { 436 fr, okr := rebuilt[name] 437 fp, okp := posted[name] 438 if !okr && !okp { // duplicate name 439 continue 440 } 441 if !okp { 442 log.Printf("%s: missing from posted archive", name) 443 match = false 444 continue 445 } 446 if !okr { 447 log.Printf("%s: unexpected file in posted archive", name) 448 match = false 449 continue 450 } 451 delete(rebuilt, name) 452 delete(posted, name) 453 454 if !check(log, fr, fp) { 455 match = false 456 } 457 } 458 return match 459 } 460 461 // DiffTarGz diffs the tgz files rebuilt and posted, reporting any differences to log 462 // and applying fix to files before comparing them. 463 // It reports whether the archives match. 464 func DiffTarGz(log *Log, rebuilt, posted []byte, fix Fixer) bool { 465 n := 0 466 check := func(log *Log, rebuilt, posted *TarFile) bool { 467 match := true 468 name := rebuilt.Name 469 field := func(what string, rebuilt, posted any) { 470 if posted != rebuilt { 471 if n++; n <= 100 { 472 log.Printf("%s: rebuilt %s = %v, posted = %v", name, what, rebuilt, posted) 473 } else if n == 101 { 474 log.Printf("eliding additional diffs ...") 475 } 476 match = false 477 } 478 } 479 r := rebuilt 480 p := posted 481 field("typeflag", r.Typeflag, p.Typeflag) 482 field("linkname", r.Linkname, p.Linkname) 483 field("mode", r.Mode, p.Mode) 484 field("uid", r.Uid, p.Uid) 485 field("gid", r.Gid, p.Gid) 486 field("uname", r.Uname, p.Uname) 487 field("gname", r.Gname, p.Gname) 488 field("mtime", r.ModTime, p.ModTime) 489 field("atime", r.AccessTime, p.AccessTime) 490 field("ctime", r.ChangeTime, p.ChangeTime) 491 field("devmajor", r.Devmajor, p.Devmajor) 492 field("devminor", r.Devminor, p.Devminor) 493 for k, vhdr := range r.PAXRecords { 494 field("PAX:"+k, vhdr, p.PAXRecords[k]) 495 } 496 for k, vf := range p.PAXRecords { 497 if vhdr, ok := r.PAXRecords[k]; !ok { 498 field("PAX:"+k, vhdr, vf) 499 } 500 } 501 field("format", r.Format, p.Format) 502 field("size", r.Size, p.Size) 503 field("content", r.SHA256, p.SHA256) 504 return match 505 } 506 507 return DiffArchive(log, IndexTarGz(log, rebuilt, fix), IndexTarGz(log, posted, fix), check) 508 } 509 510 // DiffZip diffs the zip files rebuilt and posted, reporting any differences to log 511 // and applying fix to files before comparing them. 512 // It reports whether the archives match. 513 func DiffZip(log *Log, rebuilt, posted []byte, fix Fixer) bool { 514 n := 0 515 check := func(log *Log, rebuilt, posted *ZipFile) bool { 516 match := true 517 name := rebuilt.Name 518 field := func(what string, rebuilt, posted any) { 519 if posted != rebuilt { 520 if n++; n <= 100 { 521 log.Printf("%s: rebuilt %s = %v, posted = %v", name, what, rebuilt, posted) 522 } else if n == 101 { 523 log.Printf("eliding additional diffs ...") 524 } 525 match = false 526 } 527 } 528 r := rebuilt 529 p := posted 530 531 field("comment", r.Comment, p.Comment) 532 field("nonutf8", r.NonUTF8, p.NonUTF8) 533 field("creatorversion", r.CreatorVersion, p.CreatorVersion) 534 field("readerversion", r.ReaderVersion, p.ReaderVersion) 535 field("flags", r.Flags, p.Flags) 536 field("method", r.Method, p.Method) 537 // Older versions of Go produce unequal Modified times in archive/zip, 538 // presumably due to some kind of archive/zip parsing error, 539 // or perhaps due to the Extra field being doubled below. 540 // The problem does not happen with Go 1.20. 541 // To allow people to use older Go versions to run gorebuild, 542 // we only check the actual time instant, not the location, in Modified. 543 field("modifiedUnix", r.Modified.UnixNano(), p.Modified.UnixNano()) 544 field("mtime", r.ModifiedTime, p.ModifiedTime) 545 field("mdate", r.ModifiedDate, p.ModifiedDate) 546 if len(p.Extra) == 2*len(r.Extra) && string(p.Extra) == string(r.Extra)+string(r.Extra) { 547 // Mac signing rewrites the zip file, which ends up doubling 548 // the Extra field due to go.dev/issue/61572. 549 // Allow that. 550 } else { 551 field("extra", fmt.Sprintf("%x", r.Extra), fmt.Sprintf("%x", p.Extra)) 552 } 553 field("crc32", r.CRC32, p.CRC32) 554 field("xattrs", r.ExternalAttrs, p.ExternalAttrs) 555 field("usize32", r.UncompressedSize, p.UncompressedSize) 556 field("usize64", r.UncompressedSize64, p.UncompressedSize64) 557 field("content", r.SHA256, p.SHA256) 558 return match 559 } 560 561 return DiffArchive(log, IndexZip(log, rebuilt, fix), IndexZip(log, posted, fix), check) 562 }