github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/cmd/splitdwarf/splitdwarf.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !js && !nacl && !plan9 && !solaris && !windows
     6  // +build !js,!nacl,!plan9,!solaris,!windows
     7  
     8  /*
     9  Splitdwarf uncompresses and copies the DWARF segment of a Mach-O
    10  executable into the "dSYM" file expected by lldb and ports of gdb
    11  on OSX.
    12  
    13  Usage: splitdwarf osxMachoFile [ osxDsymFile ]
    14  
    15  Unless a dSYM file name is provided on the command line,
    16  splitdwarf will place it where the OSX tools expect it, in
    17  "<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>",
    18  creating directories as necessary.
    19  */
    20  package main // import "golang.org/x/tools/cmd/splitdwarf"
    21  
    22  import (
    23  	"crypto/sha256"
    24  	"fmt"
    25  	"io"
    26  	"os"
    27  	"path/filepath"
    28  	"strings"
    29  	"syscall"
    30  
    31  	"golang.org/x/tools/cmd/splitdwarf/internal/macho"
    32  )
    33  
    34  const (
    35  	pageAlign = 12 // 4096 = 1 << 12
    36  )
    37  
    38  func note(format string, why ...interface{}) {
    39  	fmt.Fprintf(os.Stderr, format+"\n", why...)
    40  }
    41  
    42  func fail(format string, why ...interface{}) {
    43  	note(format, why...)
    44  	os.Exit(1)
    45  }
    46  
    47  // splitdwarf inputexe [ outputdwarf ]
    48  func main() {
    49  	if len(os.Args) < 2 || len(os.Args) > 3 {
    50  		fmt.Printf(`
    51  Usage: %s input_exe [ output_dsym ]
    52  Reads the executable input_exe, uncompresses and copies debugging
    53  information into output_dsym. If output_dsym is not specified,
    54  the path
    55        input_exe.dSYM/Contents/Resources/DWARF/input_exe
    56  is used instead.  That is the path that gdb and lldb expect
    57  on OSX.  Input_exe needs a UUID segment; if that is missing,
    58  then one is created and added.  In that case, the permissions
    59  for input_exe need to allow writing.
    60  `, os.Args[0])
    61  		return
    62  	}
    63  
    64  	// Read input, find DWARF, be sure it looks right
    65  	inputExe := os.Args[1]
    66  	exeFile, err := os.Open(inputExe)
    67  	if err != nil {
    68  		fail("%v", err)
    69  	}
    70  	exeMacho, err := macho.NewFile(exeFile)
    71  	if err != nil {
    72  		fail("(internal) Couldn't create macho, %v", err)
    73  	}
    74  	// Postpone dealing with output till input is known-good
    75  
    76  	// describe(&exeMacho.FileTOC)
    77  
    78  	// Offsets into __LINKEDIT:
    79  	//
    80  	// Command LC_SYMTAB =
    81  	//  (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries
    82  	// struct {
    83  	//  StringTableIndex uint32
    84  	//  Type, SectionIndex uint8
    85  	//  Description uint16
    86  	//  Value uint64
    87  	// }
    88  	//
    89  	// (2) string table offset and size.  Strings are zero-byte terminated.  First must be " ".
    90  	//
    91  	// Command LC_DYSYMTAB = indices within symtab (above), except for IndSym
    92  	//   IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab.
    93  	//
    94  	// Section __TEXT.__symbol_stub1.
    95  	//   Offset and size (Reserved2) locate and describe a table for this section.
    96  	//   Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table.
    97  	//   (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline])
    98  	//
    99  	// Section __DATA.__nl_symbol_ptr.
   100  	//   Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
   101  	//   Some of these symbols appear to be duplicates of other indirect symbols appearing early
   102  	//
   103  	// Section __DATA.__la_symbol_ptr.
   104  	//   Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
   105  	//   Some of these symbols appear to be duplicates of other indirect symbols appearing early
   106  	//
   107  
   108  	// Create a File for the output dwarf.
   109  	// Copy header, file type is MH_DSYM
   110  	// Copy the relevant load commands
   111  
   112  	// LoadCmdUuid
   113  	// Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these).
   114  	// Segment __PAGEZERO
   115  	// Segment __TEXT (zero the size, zero the offset of each section)
   116  	// Segment __DATA (zero the size, zero the offset of each section)
   117  	// Segment __LINKEDIT (contains the symbols and strings from Symtab)
   118  	// Segment __DWARF (uncompressed)
   119  
   120  	var uuid *macho.Uuid
   121  	for _, l := range exeMacho.Loads {
   122  		switch l.Command() {
   123  		case macho.LcUuid:
   124  			uuid = l.(*macho.Uuid)
   125  		}
   126  	}
   127  
   128  	// Ensure a given load is not nil
   129  	nonnilC := func(l macho.Load, s string) {
   130  		if l == nil {
   131  			fail("input file %s lacks load command %s", inputExe, s)
   132  		}
   133  	}
   134  
   135  	// Find a segment by name and ensure it is not nil
   136  	nonnilS := func(s string) *macho.Segment {
   137  		l := exeMacho.Segment(s)
   138  		if l == nil {
   139  			fail("input file %s lacks segment %s", inputExe, s)
   140  		}
   141  		return l
   142  	}
   143  
   144  	newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0)
   145  
   146  	symtab := exeMacho.Symtab
   147  	dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output
   148  	nonnilC(symtab, "symtab")
   149  	nonnilC(dysymtab, "dysymtab")
   150  	text := nonnilS("__TEXT")
   151  	data := nonnilS("__DATA")
   152  	linkedit := nonnilS("__LINKEDIT")
   153  	pagezero := nonnilS("__PAGEZERO")
   154  
   155  	newtext := text.CopyZeroed()
   156  	newdata := data.CopyZeroed()
   157  	newsymtab := symtab.Copy()
   158  
   159  	// Linkedit segment contain symbols and strings;
   160  	// Symtab refers to offsets into linkedit.
   161  	// This next bit initializes newsymtab and sets up data structures for the linkedit segment
   162  	linkeditsyms := []macho.Nlist64{}
   163  	linkeditstrings := []string{}
   164  
   165  	// Linkedit will begin at the second page, i.e., offset is one page from beginning
   166  	// Symbols come first
   167  	linkeditsymbase := uint32(1) << pageAlign
   168  
   169  	// Strings come second, offset by the number of symbols times their size.
   170  	// Only those symbols from dysymtab.defsym are written into the debugging information.
   171  	linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym
   172  
   173  	// The first two bytes of the strings are reserved for space, null (' ', \000)
   174  	linkeditstringcur := uint32(2)
   175  
   176  	newsymtab.Syms = newsymtab.Syms[:0]
   177  	newsymtab.Symoff = linkeditsymbase
   178  	newsymtab.Stroff = linkeditstringbase
   179  	newsymtab.Nsyms = dysymtab.Nextdefsym
   180  	for i := uint32(0); i < dysymtab.Nextdefsym; i++ {
   181  		ii := i + dysymtab.Iextdefsym
   182  		oldsym := symtab.Syms[ii]
   183  		newsymtab.Syms = append(newsymtab.Syms, oldsym)
   184  
   185  		linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: linkeditstringcur,
   186  			Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value})
   187  		linkeditstringcur += uint32(len(oldsym.Name)) + 1
   188  		linkeditstrings = append(linkeditstrings, oldsym.Name)
   189  	}
   190  	newsymtab.Strsize = linkeditstringcur
   191  
   192  	exeNeedsUuid := uuid == nil
   193  	if exeNeedsUuid {
   194  		uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}}
   195  		uuid.Len = uuid.LoadSize(newtoc)
   196  		copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16])
   197  		uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3
   198  		uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1
   199  	}
   200  	newtoc.AddLoad(uuid)
   201  
   202  	// For the specified segment (assumed to be in exeMacho) make a copy of its
   203  	// sections with appropriate fields zeroed out, and append them to the
   204  	// currently-last segment in newtoc.
   205  	copyZOdSections := func(g *macho.Segment) {
   206  		for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ {
   207  			s := exeMacho.Sections[i].Copy()
   208  			s.Offset = 0
   209  			s.Reloff = 0
   210  			s.Nreloc = 0
   211  			newtoc.AddSection(s)
   212  		}
   213  	}
   214  
   215  	newtoc.AddLoad(newsymtab)
   216  	newtoc.AddSegment(pagezero)
   217  	newtoc.AddSegment(newtext)
   218  	copyZOdSections(text)
   219  	newtoc.AddSegment(newdata)
   220  	copyZOdSections(data)
   221  
   222  	newlinkedit := linkedit.Copy()
   223  	newlinkedit.Offset = uint64(linkeditsymbase)
   224  	newlinkedit.Filesz = uint64(linkeditstringcur)
   225  	newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file
   226  	newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign)
   227  	// The rest should copy over fine.
   228  	newtoc.AddSegment(newlinkedit)
   229  
   230  	dwarf := nonnilS("__DWARF")
   231  	newdwarf := dwarf.CopyZeroed()
   232  	newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign)
   233  	newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1)
   234  	newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file.
   235  	newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign)
   236  	newtoc.AddSegment(newdwarf)
   237  
   238  	// Map out Dwarf sections (that is, this is section descriptors, not their contents).
   239  	offset := uint32(newdwarf.Offset)
   240  	for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
   241  		o := exeMacho.Sections[i]
   242  		s := o.Copy()
   243  		s.Offset = offset
   244  		us := o.UncompressedSize()
   245  		if s.Size < us {
   246  			s.Size = uint64(us)
   247  			s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes.
   248  		}
   249  		offset += uint32(us)
   250  		if strings.HasPrefix(s.Name, "__z") {
   251  			s.Name = "__" + s.Name[3:] // remove "z"
   252  		}
   253  		s.Reloff = 0
   254  		s.Nreloc = 0
   255  		newtoc.AddSection(s)
   256  	}
   257  
   258  	// Write segments/sections.
   259  	// Only dwarf and linkedit contain anything interesting.
   260  
   261  	// Memory map the output file to get the buffer directly.
   262  	outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF"
   263  	if len(os.Args) > 2 {
   264  		outDwarf = os.Args[2]
   265  	} else {
   266  		err := os.MkdirAll(outDwarf, 0755)
   267  		if err != nil {
   268  			fail("%v", err)
   269  		}
   270  		outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe))
   271  	}
   272  	dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize()))
   273  
   274  	// (1) Linkedit segment
   275  	// Symbol table
   276  	offset = uint32(newlinkedit.Offset)
   277  	for i := range linkeditsyms {
   278  		if exeMacho.Magic == macho.Magic64 {
   279  			offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder)
   280  		} else {
   281  			offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder)
   282  		}
   283  	}
   284  
   285  	// Initial two bytes of string table, followed by actual zero-terminated strings.
   286  	buffer[linkeditstringbase] = ' '
   287  	buffer[linkeditstringbase+1] = 0
   288  	offset = linkeditstringbase + 2
   289  	for _, str := range linkeditstrings {
   290  		for i := 0; i < len(str); i++ {
   291  			buffer[offset] = str[i]
   292  			offset++
   293  		}
   294  		buffer[offset] = 0
   295  		offset++
   296  	}
   297  
   298  	// (2) DWARF segment
   299  	ioff := newdwarf.Firstsect - dwarf.Firstsect
   300  	for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
   301  		s := exeMacho.Sections[i]
   302  		j := i + ioff
   303  		s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:])
   304  	}
   305  
   306  	// Because "text" overlaps the header and the loads, write them afterwards, just in case.
   307  	// Write header.
   308  	newtoc.Put(buffer)
   309  
   310  	err = syscall.Munmap(buffer)
   311  	if err != nil {
   312  		fail("Munmap %s for dwarf output failed, %v", outDwarf, err)
   313  	}
   314  	err = dwarfFile.Close()
   315  	if err != nil {
   316  		fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err)
   317  	}
   318  
   319  	if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command
   320  		hdr := exeMacho.FileTOC.FileHeader
   321  		oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize()
   322  		hdr.NCommands += 1
   323  		hdr.SizeCommands += uuid.LoadSize(newtoc)
   324  
   325  		mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0)
   326  		if err != nil {
   327  			fail("Updating UUID in binary failed, %v", err)
   328  		}
   329  		exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)),
   330  			syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
   331  		if err != nil {
   332  			fail("Mmap of %s for UUID update failed, %v", inputExe, err)
   333  		}
   334  		_ = hdr.Put(exebuf, newtoc.ByteOrder)
   335  		_ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder)
   336  		err = syscall.Munmap(exebuf)
   337  		if err != nil {
   338  			fail("Munmap of %s for UUID update failed, %v", inputExe, err)
   339  		}
   340  	}
   341  }
   342  
   343  // CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file,
   344  // and returns the file descriptor and mapped buffer.
   345  func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) {
   346  	dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
   347  	if err != nil {
   348  		fail("Open for mmap failed, %v", err)
   349  	}
   350  	err = os.Truncate(outDwarf, size)
   351  	if err != nil {
   352  		fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err)
   353  	}
   354  	buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
   355  	if err != nil {
   356  		fail("Mmap %s for dwarf output update failed, %v", outDwarf, err)
   357  	}
   358  	return dwarfFile, buffer
   359  }
   360  
   361  func describe(exem *macho.FileTOC) {
   362  	note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags))
   363  	for i, l := range exem.Loads {
   364  		if s, ok := l.(*macho.Segment); ok {
   365  			fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name,
   366  				s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect)
   367  			for j := uint32(0); j < s.Nsect; j++ {
   368  				c := exem.Sections[j+s.Firstsect]
   369  				fmt.Printf("   Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3)
   370  			}
   371  		} else {
   372  			fmt.Printf("Load %d is %v\n", i, l)
   373  		}
   374  	}
   375  	if exem.SizeCommands != exem.LoadSize() {
   376  		fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize())
   377  	} else {
   378  		note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize())
   379  	}
   380  	note("File size is %d", exem.FileSize())
   381  }
   382  
   383  // contentuuid returns a UUID derived from (some of) the content of an executable.
   384  // specifically included are the non-DWARF sections, specifically excluded are things
   385  // that surely depend on the presence or absence of DWARF sections (e.g., section
   386  // numbers, positions with file, number of load commands).
   387  // (It was considered desirable if this was insensitive to the presence of the
   388  // __DWARF segment, however because it is not last, it moves other segments,
   389  // whose contents appear to contain file offset references.)
   390  func contentuuid(exem *macho.FileTOC) []byte {
   391  	h := sha256.New()
   392  	for _, l := range exem.Loads {
   393  		if l.Command() == macho.LcUuid {
   394  			continue
   395  		}
   396  		if s, ok := l.(*macho.Segment); ok {
   397  			if s.Name == "__DWARF" || s.Name == "__PAGEZERO" {
   398  				continue
   399  			}
   400  			for j := uint32(0); j < s.Nsect; j++ {
   401  				c := exem.Sections[j+s.Firstsect]
   402  				io.Copy(h, c.Open())
   403  			}
   404  		} // Getting dependence on other load commands right is fiddly.
   405  	}
   406  	return h.Sum(nil)
   407  }