github.com/jhump/golang-x-tools@v0.0.0-20220218190644-4958d6d39439/cmd/splitdwarf/splitdwarf.go (about)

     1  // Copyright 2018 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:build !js && !nacl && !plan9 && !solaris && !windows
     6  // +build !js,!nacl,!plan9,!solaris,!windows
     7  
     8  /*
     9  
    10  Splitdwarf uncompresses and copies the DWARF segment of a Mach-O
    11  executable into the "dSYM" file expected by lldb and ports of gdb
    12  on OSX.
    13  
    14  Usage: splitdwarf osxMachoFile [ osxDsymFile ]
    15  
    16  Unless a dSYM file name is provided on the command line,
    17  splitdwarf will place it where the OSX tools expect it, in
    18  "<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>",
    19  creating directories as necessary.
    20  
    21  */
    22  package main // import "github.com/jhump/golang-x-tools/cmd/splitdwarf"
    23  
    24  import (
    25  	"crypto/sha256"
    26  	"fmt"
    27  	"io"
    28  	"os"
    29  	"path/filepath"
    30  	"strings"
    31  	"syscall"
    32  
    33  	"github.com/jhump/golang-x-tools/cmd/splitdwarf/internal/macho"
    34  )
    35  
    36  const (
    37  	pageAlign = 12 // 4096 = 1 << 12
    38  )
    39  
    40  func note(format string, why ...interface{}) {
    41  	fmt.Fprintf(os.Stderr, format+"\n", why...)
    42  }
    43  
    44  func fail(format string, why ...interface{}) {
    45  	note(format, why...)
    46  	os.Exit(1)
    47  }
    48  
    49  // splitdwarf inputexe [ outputdwarf ]
    50  func main() {
    51  	if len(os.Args) < 2 || len(os.Args) > 3 {
    52  		fmt.Printf(`
    53  Usage: %s input_exe [ output_dsym ]
    54  Reads the executable input_exe, uncompresses and copies debugging
    55  information into output_dsym. If output_dsym is not specified,
    56  the path
    57        input_exe.dSYM/Contents/Resources/DWARF/input_exe
    58  is used instead.  That is the path that gdb and lldb expect
    59  on OSX.  Input_exe needs a UUID segment; if that is missing,
    60  then one is created and added.  In that case, the permissions
    61  for input_exe need to allow writing.
    62  `, os.Args[0])
    63  		return
    64  	}
    65  
    66  	// Read input, find DWARF, be sure it looks right
    67  	inputExe := os.Args[1]
    68  	exeFile, err := os.Open(inputExe)
    69  	if err != nil {
    70  		fail("%v", err)
    71  	}
    72  	exeMacho, err := macho.NewFile(exeFile)
    73  	if err != nil {
    74  		fail("(internal) Couldn't create macho, %v", err)
    75  	}
    76  	// Postpone dealing with output till input is known-good
    77  
    78  	// describe(&exeMacho.FileTOC)
    79  
    80  	// Offsets into __LINKEDIT:
    81  	//
    82  	// Command LC_SYMTAB =
    83  	//  (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries
    84  	// struct {
    85  	//  StringTableIndex uint32
    86  	//  Type, SectionIndex uint8
    87  	//  Description uint16
    88  	//  Value uint64
    89  	// }
    90  	//
    91  	// (2) string table offset and size.  Strings are zero-byte terminated.  First must be " ".
    92  	//
    93  	// Command LC_DYSYMTAB = indices within symtab (above), except for IndSym
    94  	//   IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab.
    95  	//
    96  	// Section __TEXT.__symbol_stub1.
    97  	//   Offset and size (Reserved2) locate and describe a table for thios section.
    98  	//   Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table.
    99  	//   (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline])
   100  	//
   101  	// Section __DATA.__nl_symbol_ptr.
   102  	//   Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
   103  	//   Some of these symbols appear to be duplicates of other indirect symbols appearing early
   104  	//
   105  	// Section __DATA.__la_symbol_ptr.
   106  	//   Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset)
   107  	//   Some of these symbols appear to be duplicates of other indirect symbols appearing early
   108  	//
   109  
   110  	// Create a File for the output dwarf.
   111  	// Copy header, file type is MH_DSYM
   112  	// Copy the relevant load commands
   113  
   114  	// LoadCmdUuid
   115  	// Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these).
   116  	// Segment __PAGEZERO
   117  	// Segment __TEXT (zero the size, zero the offset of each section)
   118  	// Segment __DATA (zero the size, zero the offset of each section)
   119  	// Segment __LINKEDIT (contains the symbols and strings from Symtab)
   120  	// Segment __DWARF (uncompressed)
   121  
   122  	var uuid *macho.Uuid
   123  	for _, l := range exeMacho.Loads {
   124  		switch l.Command() {
   125  		case macho.LcUuid:
   126  			uuid = l.(*macho.Uuid)
   127  		}
   128  	}
   129  
   130  	// Ensure a given load is not nil
   131  	nonnilC := func(l macho.Load, s string) {
   132  		if l == nil {
   133  			fail("input file %s lacks load command %s", inputExe, s)
   134  		}
   135  	}
   136  
   137  	// Find a segment by name and ensure it is not nil
   138  	nonnilS := func(s string) *macho.Segment {
   139  		l := exeMacho.Segment(s)
   140  		if l == nil {
   141  			fail("input file %s lacks segment %s", inputExe, s)
   142  		}
   143  		return l
   144  	}
   145  
   146  	newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0)
   147  
   148  	symtab := exeMacho.Symtab
   149  	dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output
   150  	nonnilC(symtab, "symtab")
   151  	nonnilC(dysymtab, "dysymtab")
   152  	text := nonnilS("__TEXT")
   153  	data := nonnilS("__DATA")
   154  	linkedit := nonnilS("__LINKEDIT")
   155  	pagezero := nonnilS("__PAGEZERO")
   156  
   157  	newtext := text.CopyZeroed()
   158  	newdata := data.CopyZeroed()
   159  	newsymtab := symtab.Copy()
   160  
   161  	// Linkedit segment contain symbols and strings;
   162  	// Symtab refers to offsets into linkedit.
   163  	// This next bit initializes newsymtab and sets up data structures for the linkedit segment
   164  	linkeditsyms := []macho.Nlist64{}
   165  	linkeditstrings := []string{}
   166  
   167  	// Linkedit will begin at the second page, i.e., offset is one page from beginning
   168  	// Symbols come first
   169  	linkeditsymbase := uint32(1) << pageAlign
   170  
   171  	// Strings come second, offset by the number of symbols times their size.
   172  	// Only those symbols from dysymtab.defsym are written into the debugging information.
   173  	linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym
   174  
   175  	// The first two bytes of the strings are reserved for space, null (' ', \000)
   176  	linkeditstringcur := uint32(2)
   177  
   178  	newsymtab.Syms = newsymtab.Syms[:0]
   179  	newsymtab.Symoff = linkeditsymbase
   180  	newsymtab.Stroff = linkeditstringbase
   181  	newsymtab.Nsyms = dysymtab.Nextdefsym
   182  	for i := uint32(0); i < dysymtab.Nextdefsym; i++ {
   183  		ii := i + dysymtab.Iextdefsym
   184  		oldsym := symtab.Syms[ii]
   185  		newsymtab.Syms = append(newsymtab.Syms, oldsym)
   186  
   187  		linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: uint32(linkeditstringcur),
   188  			Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value})
   189  		linkeditstringcur += uint32(len(oldsym.Name)) + 1
   190  		linkeditstrings = append(linkeditstrings, oldsym.Name)
   191  	}
   192  	newsymtab.Strsize = linkeditstringcur
   193  
   194  	exeNeedsUuid := uuid == nil
   195  	if exeNeedsUuid {
   196  		uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}}
   197  		uuid.Len = uuid.LoadSize(newtoc)
   198  		copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16])
   199  		uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3
   200  		uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1
   201  	}
   202  	newtoc.AddLoad(uuid)
   203  
   204  	// For the specified segment (assumed to be in exeMacho) make a copy of its
   205  	// sections with appropriate fields zeroed out, and append them to the
   206  	// currently-last segment in newtoc.
   207  	copyZOdSections := func(g *macho.Segment) {
   208  		for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ {
   209  			s := exeMacho.Sections[i].Copy()
   210  			s.Offset = 0
   211  			s.Reloff = 0
   212  			s.Nreloc = 0
   213  			newtoc.AddSection(s)
   214  		}
   215  	}
   216  
   217  	newtoc.AddLoad(newsymtab)
   218  	newtoc.AddSegment(pagezero)
   219  	newtoc.AddSegment(newtext)
   220  	copyZOdSections(text)
   221  	newtoc.AddSegment(newdata)
   222  	copyZOdSections(data)
   223  
   224  	newlinkedit := linkedit.Copy()
   225  	newlinkedit.Offset = uint64(linkeditsymbase)
   226  	newlinkedit.Filesz = uint64(linkeditstringcur)
   227  	newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file
   228  	newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign)
   229  	// The rest should copy over fine.
   230  	newtoc.AddSegment(newlinkedit)
   231  
   232  	dwarf := nonnilS("__DWARF")
   233  	newdwarf := dwarf.CopyZeroed()
   234  	newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign)
   235  	newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1)
   236  	newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file.
   237  	newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign)
   238  	newtoc.AddSegment(newdwarf)
   239  
   240  	// Map out Dwarf sections (that is, this is section descriptors, not their contents).
   241  	offset := uint32(newdwarf.Offset)
   242  	for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
   243  		o := exeMacho.Sections[i]
   244  		s := o.Copy()
   245  		s.Offset = offset
   246  		us := o.UncompressedSize()
   247  		if s.Size < us {
   248  			s.Size = uint64(us)
   249  			s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes.
   250  		}
   251  		offset += uint32(us)
   252  		if strings.HasPrefix(s.Name, "__z") {
   253  			s.Name = "__" + s.Name[3:] // remove "z"
   254  		}
   255  		s.Reloff = 0
   256  		s.Nreloc = 0
   257  		newtoc.AddSection(s)
   258  	}
   259  
   260  	// Write segments/sections.
   261  	// Only dwarf and linkedit contain anything interesting.
   262  
   263  	// Memory map the output file to get the buffer directly.
   264  	outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF"
   265  	if len(os.Args) > 2 {
   266  		outDwarf = os.Args[2]
   267  	} else {
   268  		err := os.MkdirAll(outDwarf, 0755)
   269  		if err != nil {
   270  			fail("%v", err)
   271  		}
   272  		outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe))
   273  	}
   274  	dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize()))
   275  
   276  	// (1) Linkedit segment
   277  	// Symbol table
   278  	offset = uint32(newlinkedit.Offset)
   279  	for i := range linkeditsyms {
   280  		if exeMacho.Magic == macho.Magic64 {
   281  			offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder)
   282  		} else {
   283  			offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder)
   284  		}
   285  	}
   286  
   287  	// Initial two bytes of string table, followed by actual zero-terminated strings.
   288  	buffer[linkeditstringbase] = ' '
   289  	buffer[linkeditstringbase+1] = 0
   290  	offset = linkeditstringbase + 2
   291  	for _, str := range linkeditstrings {
   292  		for i := 0; i < len(str); i++ {
   293  			buffer[offset] = str[i]
   294  			offset++
   295  		}
   296  		buffer[offset] = 0
   297  		offset++
   298  	}
   299  
   300  	// (2) DWARF segment
   301  	ioff := newdwarf.Firstsect - dwarf.Firstsect
   302  	for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ {
   303  		s := exeMacho.Sections[i]
   304  		j := i + ioff
   305  		s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:])
   306  	}
   307  
   308  	// Because "text" overlaps the header and the loads, write them afterwards, just in case.
   309  	// Write header.
   310  	newtoc.Put(buffer)
   311  
   312  	err = syscall.Munmap(buffer)
   313  	if err != nil {
   314  		fail("Munmap %s for dwarf output failed, %v", outDwarf, err)
   315  	}
   316  	err = dwarfFile.Close()
   317  	if err != nil {
   318  		fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err)
   319  	}
   320  
   321  	if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command
   322  		hdr := exeMacho.FileTOC.FileHeader
   323  		oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize()
   324  		hdr.NCommands += 1
   325  		hdr.SizeCommands += uuid.LoadSize(newtoc)
   326  
   327  		mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0)
   328  		if err != nil {
   329  			fail("Updating UUID in binary failed, %v", err)
   330  		}
   331  		exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)),
   332  			syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
   333  		if err != nil {
   334  			fail("Mmap of %s for UUID update failed, %v", inputExe, err)
   335  		}
   336  		_ = hdr.Put(exebuf, newtoc.ByteOrder)
   337  		_ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder)
   338  		err = syscall.Munmap(exebuf)
   339  		if err != nil {
   340  			fail("Munmap of %s for UUID update failed, %v", inputExe, err)
   341  		}
   342  	}
   343  }
   344  
   345  // CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file,
   346  // and returns the file descriptor and mapped buffer.
   347  func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) {
   348  	dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666)
   349  	if err != nil {
   350  		fail("Open for mmap failed, %v", err)
   351  	}
   352  	err = os.Truncate(outDwarf, size)
   353  	if err != nil {
   354  		fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err)
   355  	}
   356  	buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED)
   357  	if err != nil {
   358  		fail("Mmap %s for dwarf output update failed, %v", outDwarf, err)
   359  	}
   360  	return dwarfFile, buffer
   361  }
   362  
   363  func describe(exem *macho.FileTOC) {
   364  	note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags))
   365  	for i, l := range exem.Loads {
   366  		if s, ok := l.(*macho.Segment); ok {
   367  			fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name,
   368  				s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect)
   369  			for j := uint32(0); j < s.Nsect; j++ {
   370  				c := exem.Sections[j+s.Firstsect]
   371  				fmt.Printf("   Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3)
   372  			}
   373  		} else {
   374  			fmt.Printf("Load %d is %v\n", i, l)
   375  		}
   376  	}
   377  	if exem.SizeCommands != exem.LoadSize() {
   378  		fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize())
   379  	} else {
   380  		note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize())
   381  	}
   382  	note("File size is %d", exem.FileSize())
   383  }
   384  
   385  // contentuuid returns a UUID derived from (some of) the content of an executable.
   386  // specifically included are the non-DWARF sections, specifically excluded are things
   387  // that surely depend on the presence or absence of DWARF sections (e.g., section
   388  // numbers, positions with file, number of load commands).
   389  // (It was considered desirable if this was insensitive to the presence of the
   390  // __DWARF segment, however because it is not last, it moves other segments,
   391  // whose contents appear to contain file offset references.)
   392  func contentuuid(exem *macho.FileTOC) []byte {
   393  	h := sha256.New()
   394  	for _, l := range exem.Loads {
   395  		if l.Command() == macho.LcUuid {
   396  			continue
   397  		}
   398  		if s, ok := l.(*macho.Segment); ok {
   399  			if s.Name == "__DWARF" || s.Name == "__PAGEZERO" {
   400  				continue
   401  			}
   402  			for j := uint32(0); j < s.Nsect; j++ {
   403  				c := exem.Sections[j+s.Firstsect]
   404  				io.Copy(h, c.Open())
   405  			}
   406  		} // Getting dependence on other load commands right is fiddly.
   407  	}
   408  	return h.Sum(nil)
   409  }