golang.org/x/build@v0.0.0-20240506185731-218518f32b70/cmd/gorebuild/darwin.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"bytes"
     9  	"compress/zlib"
    10  	"debug/macho"
    11  	"encoding/binary"
    12  	"encoding/xml"
    13  	"io"
    14  	"io/fs"
    15  	"strings"
    16  )
    17  
    18  // StripDarwinSig parses data as a Mach-O executable, strips the macOS code signature from it,
    19  // and returns the resulting Mach-O executable. It edits data directly, in addition to returning
    20  // a shortened version.
    21  // If data is not a Mach-O executable, StripDarwinSig silently returns it unaltered.
    22  func StripDarwinSig(log *Log, name string, data []byte) []byte {
    23  	// Binaries only expected in bin and pkg/tool.
    24  	// This is an archive path, not a host file system path, so always forward slash.
    25  	if !strings.Contains(name, "/bin/") && !strings.Contains(name, "/pkg/tool/") {
    26  		return data
    27  	}
    28  	// Check 64-bit Mach-O magic before trying to parse, to keep log quiet.
    29  	if len(data) < 4 || string(data[:4]) != "\xcf\xfa\xed\xfe" {
    30  		return data
    31  	}
    32  
    33  	h, err := macho.NewFile(bytes.NewReader(data))
    34  	if err != nil {
    35  		log.Printf("macho %s: %v", name, err)
    36  		return data
    37  	}
    38  	if len(h.Loads) < 4 {
    39  		log.Printf("macho %s: too few loads", name)
    40  		return data
    41  	}
    42  
    43  	// at returns the uint32 at the given data offset.
    44  	// If the offset is out of range, at returns 0.
    45  	le := binary.LittleEndian
    46  	at := func(off int) uint32 {
    47  		if off < 0 || off+4 < 0 || off+4 > len(data) {
    48  			log.Printf("macho %s: offset out of bounds", name)
    49  			return 0
    50  		}
    51  		return le.Uint32(data[off : off+4])
    52  	}
    53  
    54  	// LC_CODE_SIGNATURE must be the last load.
    55  	raw := h.Loads[len(h.Loads)-1].Raw()
    56  	const LC_CODE_SIGNATURE = 0x1d
    57  	if len(raw) != 16 || le.Uint32(raw[0:]) != LC_CODE_SIGNATURE || le.Uint32(raw[4:]) != 16 {
    58  		// OK not to have a signature. No logging.
    59  		return data
    60  	}
    61  	sigOff := le.Uint32(raw[8:])
    62  	sigSize := le.Uint32(raw[12:])
    63  	if int64(sigOff) >= int64(len(data)) {
    64  		log.Printf("macho %s: invalid signature", name)
    65  		return data
    66  	}
    67  
    68  	// Find __LINKEDIT segment (3rd or 4th load, usually).
    69  	// Each load command has its size as the second uint32 of the command.
    70  	// We maintain the offset in the file as we walk, since we need to edit
    71  	// the loads later.
    72  	off := 32
    73  	load := 0
    74  	for {
    75  		if load >= len(h.Loads) {
    76  			log.Printf("macho %s: cannot find __LINKEDIT", name)
    77  			return data
    78  		}
    79  		lc64, ok := h.Loads[load].(*macho.Segment)
    80  		if ok && lc64.Name == "__LINKEDIT" {
    81  			break
    82  		}
    83  		off += int(at(off + 4))
    84  		load++
    85  	}
    86  	if at(off) != uint32(macho.LoadCmdSegment64) {
    87  		log.Printf("macho %s: confused finding __LINKEDIT", name)
    88  		return data
    89  	}
    90  	linkOff := off + 4 + 4 + 16 + 8 // skip cmd, len, name, addr
    91  	if linkOff < 0 || linkOff+32 < 0 || linkOff+32 > len(data) {
    92  		log.Printf("macho %s: confused finding __LINKEDIT", name)
    93  		return data
    94  	}
    95  	for ; load < len(h.Loads)-1; load++ {
    96  		off += int(at(off + 4))
    97  	}
    98  	if off < 0 || off+16 < 0 || off+16 > len(data) {
    99  		log.Printf("macho %s: confused finding signature load", name)
   100  		return data
   101  	}
   102  
   103  	// Point of no return: edit data to strip signature.
   104  
   105  	// Delete LC_CODE_SIGNATURE entry in load table
   106  	le.PutUint32(data[16:], at(16)-1)  // ncmd--
   107  	le.PutUint32(data[20:], at(20)-16) // cmdsz -= 16
   108  	copy(data[off:], make([]byte, 16)) // clear LC_CODE_SIGNATURE
   109  
   110  	// Update __LINKEDIT file and memory size to not include signature.
   111  	//	filesz -= sigSize
   112  	//	memsz = filesz
   113  	// We can't do memsz -= sigSize because the Apple signer rounds memsz
   114  	// to a page boundary. Go always sets memsz = filesz (unrounded).
   115  	fileSize := le.Uint64(data[linkOff+16:]) - uint64(sigSize)
   116  	le.PutUint64(data[linkOff:], fileSize)    // memsz
   117  	le.PutUint64(data[linkOff+16:], fileSize) // filesize
   118  
   119  	// Remove signature bytes at end of file.
   120  	data = data[:sigOff]
   121  
   122  	return data
   123  }
   124  
   125  // DiffDarwinPkg diffs the content of the macOS pkg and tgz files provided,
   126  // logging differences. It returns true if the files were successfully parsed
   127  // and contain the same files, false otherwise.
   128  //
   129  // The pkg file is expected to have paths beginning with ./usr/local/go instead of go.
   130  // The pkg file is allowed to have an extra /etc/paths.d/go file.
   131  func DiffDarwinPkg(log *Log, tgz, pkg []byte) bool {
   132  	check := func(log *Log, rebuilt *TarFile, posted *CpioFile) bool {
   133  		match := true
   134  		name := rebuilt.Name
   135  		field := func(what string, rebuilt, posted any) {
   136  			if posted != rebuilt {
   137  				log.Printf("%s: rebuilt %s = %v, posted = %v", name, what, rebuilt, posted)
   138  				match = false
   139  			}
   140  		}
   141  		r := rebuilt
   142  		p := posted
   143  		field("name", r.Name, p.Name)
   144  		field("size", r.Size, p.Size)
   145  		field("mode", fs.FileMode(r.Mode&0777), p.Mode)
   146  		field("content", r.SHA256, p.SHA256)
   147  		return match
   148  	}
   149  
   150  	return DiffArchive(log, IndexTarGz(log, tgz, nil), indexPkg(log, pkg, nil), check)
   151  }
   152  
   153  // indexPkg returns an index of the pkg file for comparison with a tgz file.
   154  func indexPkg(log *Log, data []byte, fix Fixer) map[string]*CpioFile {
   155  	payload := pkgPayload(log, data)
   156  	if payload == nil {
   157  		return nil
   158  	}
   159  	ix := IndexCpioGz(log, payload, fix)
   160  	if ix == nil {
   161  		return nil
   162  	}
   163  
   164  	// Delete ./etc/paths.d/go, which is not in the tgz,
   165  	// and trim ./usr/local/go/ down to just go/.
   166  	delete(ix, "./etc/paths.d/go")
   167  	for name, f := range ix {
   168  		if strings.HasPrefix(name, "./usr/local/") {
   169  			delete(ix, name)
   170  			name = strings.TrimPrefix(name, "./usr/local/")
   171  			f.Name = name
   172  			ix[name] = f
   173  		}
   174  	}
   175  	return ix
   176  }
   177  
   178  // A minimal xar parser, enough to read macOS .pkg files.
   179  // Package golang.org/x/build/internal/task also has one
   180  // for its internal needs.
   181  //
   182  // See https://en.wikipedia.org/wiki/Xar_(archiver)
   183  // and https://github.com/mackyle/xar/wiki/xarformat.
   184  
   185  // xarHeader is the main XML data structure for the xar header.
   186  type xarHeader struct {
   187  	XMLName xml.Name `xml:"xar"`
   188  	TOC     xarTOC   `xml:"toc"`
   189  }
   190  
   191  // xarTOC is the table of contents.
   192  type xarTOC struct {
   193  	Files []*xarFile `xml:"file"`
   194  }
   195  
   196  // xarFile is a single file in the table of contents.
   197  // Directories have Type "directory" and contain other files.
   198  type xarFile struct {
   199  	Data  xarFileData `xml:"data"`
   200  	Name  string      `xml:"name"`
   201  	Type  string      `xml:"type"` // "file", "directory"
   202  	Files []*xarFile  `xml:"file"`
   203  }
   204  
   205  // xarFileData is the metadata describing a single file.
   206  type xarFileData struct {
   207  	Length   int64       `xml:"length"`
   208  	Offset   int64       `xml:"offset"`
   209  	Size     int64       `xml:"size"`
   210  	Encoding xarEncoding `xml:"encoding"`
   211  }
   212  
   213  // xarEncoding has an attribute giving the encoding for a file's content.
   214  type xarEncoding struct {
   215  	Style string `xml:"style,attr"`
   216  }
   217  
   218  // pkgPayload parses data as a macOS pkg file for the Go installer
   219  // and returns the content of the file org.golang.go.pkg/Payload.
   220  func pkgPayload(log *Log, data []byte) []byte {
   221  	if len(data) < 28 || string(data[0:4]) != "xar!" {
   222  		log.Printf("not an XAR file format (missing a 28+ byte header with 'xar!' magic number)")
   223  		return nil
   224  	}
   225  	be := binary.BigEndian
   226  	hdrSize := be.Uint16(data[4:])
   227  	vers := be.Uint16(data[6:])
   228  	tocCSize := be.Uint64(data[8:])
   229  	tocUSize := be.Uint64(data[16:])
   230  
   231  	if vers != 1 {
   232  		log.Printf("bad xar version %d", vers)
   233  		return nil
   234  	}
   235  	if int(hdrSize) >= len(data) || uint64(len(data))-uint64(hdrSize) < tocCSize {
   236  		log.Printf("xar header bounds not in file")
   237  		return nil
   238  	}
   239  
   240  	data = data[hdrSize:]
   241  	chdr, data := data[:tocCSize], data[tocCSize:]
   242  
   243  	// Header is zlib-compressed XML.
   244  	zr, err := zlib.NewReader(bytes.NewReader(chdr))
   245  	if err != nil {
   246  		log.Printf("reading xar header: %v", err)
   247  		return nil
   248  	}
   249  	defer zr.Close()
   250  	hdrXML := make([]byte, tocUSize+1)
   251  	n, err := io.ReadFull(zr, hdrXML)
   252  	if uint64(n) != tocUSize {
   253  		log.Printf("invalid xar header size %d", n)
   254  		return nil
   255  	}
   256  	if err != io.ErrUnexpectedEOF {
   257  		log.Printf("reading xar header: %v", err)
   258  		return nil
   259  	}
   260  	hdrXML = hdrXML[:tocUSize]
   261  	var hdr xarHeader
   262  	if err := xml.Unmarshal(hdrXML, &hdr); err != nil {
   263  		log.Printf("unmarshaling xar header: %v", err)
   264  		return nil
   265  	}
   266  
   267  	// Walk TOC file tree to find org.golang.go.pkg/Payload.
   268  	for _, f := range hdr.TOC.Files {
   269  		if f.Name == "org.golang.go.pkg" && f.Type == "directory" {
   270  			for _, f := range f.Files {
   271  				if f.Name == "Payload" {
   272  					if f.Type != "file" {
   273  						log.Printf("bad xar payload type %s", f.Type)
   274  						return nil
   275  					}
   276  					if f.Data.Encoding.Style != "application/octet-stream" {
   277  						log.Printf("bad xar encoding %s", f.Data.Encoding.Style)
   278  						return nil
   279  					}
   280  					if f.Data.Offset >= int64(len(data)) || f.Data.Size > int64(len(data))-f.Data.Offset {
   281  						log.Printf("xar payload bounds not in file")
   282  						return nil
   283  					}
   284  					return data[f.Data.Offset:][:f.Data.Size]
   285  				}
   286  			}
   287  		}
   288  	}
   289  	log.Printf("payload not found")
   290  	return nil
   291  }