github.com/cockroachdb/tools@v0.0.0-20230222021103-a6d27438930d/cmd/splitdwarf/splitdwarf.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:build !js && !nacl && !plan9 && !solaris && !windows 6 // +build !js,!nacl,!plan9,!solaris,!windows 7 8 /* 9 Splitdwarf uncompresses and copies the DWARF segment of a Mach-O 10 executable into the "dSYM" file expected by lldb and ports of gdb 11 on OSX. 12 13 Usage: splitdwarf osxMachoFile [ osxDsymFile ] 14 15 Unless a dSYM file name is provided on the command line, 16 splitdwarf will place it where the OSX tools expect it, in 17 "<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>", 18 creating directories as necessary. 19 */ 20 package main // import "golang.org/x/tools/cmd/splitdwarf" 21 22 import ( 23 "crypto/sha256" 24 "fmt" 25 "io" 26 "os" 27 "path/filepath" 28 "strings" 29 "syscall" 30 31 "golang.org/x/tools/cmd/splitdwarf/internal/macho" 32 ) 33 34 const ( 35 pageAlign = 12 // 4096 = 1 << 12 36 ) 37 38 func note(format string, why ...interface{}) { 39 fmt.Fprintf(os.Stderr, format+"\n", why...) 40 } 41 42 func fail(format string, why ...interface{}) { 43 note(format, why...) 44 os.Exit(1) 45 } 46 47 // splitdwarf inputexe [ outputdwarf ] 48 func main() { 49 if len(os.Args) < 2 || len(os.Args) > 3 { 50 fmt.Printf(` 51 Usage: %s input_exe [ output_dsym ] 52 Reads the executable input_exe, uncompresses and copies debugging 53 information into output_dsym. If output_dsym is not specified, 54 the path 55 input_exe.dSYM/Contents/Resources/DWARF/input_exe 56 is used instead. That is the path that gdb and lldb expect 57 on OSX. Input_exe needs a UUID segment; if that is missing, 58 then one is created and added. In that case, the permissions 59 for input_exe need to allow writing. 60 `, os.Args[0]) 61 return 62 } 63 64 // Read input, find DWARF, be sure it looks right 65 inputExe := os.Args[1] 66 exeFile, err := os.Open(inputExe) 67 if err != nil { 68 fail("%v", err) 69 } 70 exeMacho, err := macho.NewFile(exeFile) 71 if err != nil { 72 fail("(internal) Couldn't create macho, %v", err) 73 } 74 // Postpone dealing with output till input is known-good 75 76 // describe(&exeMacho.FileTOC) 77 78 // Offsets into __LINKEDIT: 79 // 80 // Command LC_SYMTAB = 81 // (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries 82 // struct { 83 // StringTableIndex uint32 84 // Type, SectionIndex uint8 85 // Description uint16 86 // Value uint64 87 // } 88 // 89 // (2) string table offset and size. Strings are zero-byte terminated. First must be " ". 90 // 91 // Command LC_DYSYMTAB = indices within symtab (above), except for IndSym 92 // IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab. 93 // 94 // Section __TEXT.__symbol_stub1. 95 // Offset and size (Reserved2) locate and describe a table for this section. 96 // Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table. 97 // (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline]) 98 // 99 // Section __DATA.__nl_symbol_ptr. 100 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset) 101 // Some of these symbols appear to be duplicates of other indirect symbols appearing early 102 // 103 // Section __DATA.__la_symbol_ptr. 104 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset) 105 // Some of these symbols appear to be duplicates of other indirect symbols appearing early 106 // 107 108 // Create a File for the output dwarf. 109 // Copy header, file type is MH_DSYM 110 // Copy the relevant load commands 111 112 // LoadCmdUuid 113 // Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these). 114 // Segment __PAGEZERO 115 // Segment __TEXT (zero the size, zero the offset of each section) 116 // Segment __DATA (zero the size, zero the offset of each section) 117 // Segment __LINKEDIT (contains the symbols and strings from Symtab) 118 // Segment __DWARF (uncompressed) 119 120 var uuid *macho.Uuid 121 for _, l := range exeMacho.Loads { 122 switch l.Command() { 123 case macho.LcUuid: 124 uuid = l.(*macho.Uuid) 125 } 126 } 127 128 // Ensure a given load is not nil 129 nonnilC := func(l macho.Load, s string) { 130 if l == nil { 131 fail("input file %s lacks load command %s", inputExe, s) 132 } 133 } 134 135 // Find a segment by name and ensure it is not nil 136 nonnilS := func(s string) *macho.Segment { 137 l := exeMacho.Segment(s) 138 if l == nil { 139 fail("input file %s lacks segment %s", inputExe, s) 140 } 141 return l 142 } 143 144 newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0) 145 146 symtab := exeMacho.Symtab 147 dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output 148 nonnilC(symtab, "symtab") 149 nonnilC(dysymtab, "dysymtab") 150 text := nonnilS("__TEXT") 151 data := nonnilS("__DATA") 152 linkedit := nonnilS("__LINKEDIT") 153 pagezero := nonnilS("__PAGEZERO") 154 155 newtext := text.CopyZeroed() 156 newdata := data.CopyZeroed() 157 newsymtab := symtab.Copy() 158 159 // Linkedit segment contain symbols and strings; 160 // Symtab refers to offsets into linkedit. 161 // This next bit initializes newsymtab and sets up data structures for the linkedit segment 162 linkeditsyms := []macho.Nlist64{} 163 linkeditstrings := []string{} 164 165 // Linkedit will begin at the second page, i.e., offset is one page from beginning 166 // Symbols come first 167 linkeditsymbase := uint32(1) << pageAlign 168 169 // Strings come second, offset by the number of symbols times their size. 170 // Only those symbols from dysymtab.defsym are written into the debugging information. 171 linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym 172 173 // The first two bytes of the strings are reserved for space, null (' ', \000) 174 linkeditstringcur := uint32(2) 175 176 newsymtab.Syms = newsymtab.Syms[:0] 177 newsymtab.Symoff = linkeditsymbase 178 newsymtab.Stroff = linkeditstringbase 179 newsymtab.Nsyms = dysymtab.Nextdefsym 180 for i := uint32(0); i < dysymtab.Nextdefsym; i++ { 181 ii := i + dysymtab.Iextdefsym 182 oldsym := symtab.Syms[ii] 183 newsymtab.Syms = append(newsymtab.Syms, oldsym) 184 185 linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: linkeditstringcur, 186 Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value}) 187 linkeditstringcur += uint32(len(oldsym.Name)) + 1 188 linkeditstrings = append(linkeditstrings, oldsym.Name) 189 } 190 newsymtab.Strsize = linkeditstringcur 191 192 exeNeedsUuid := uuid == nil 193 if exeNeedsUuid { 194 uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}} 195 uuid.Len = uuid.LoadSize(newtoc) 196 copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16]) 197 uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3 198 uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1 199 } 200 newtoc.AddLoad(uuid) 201 202 // For the specified segment (assumed to be in exeMacho) make a copy of its 203 // sections with appropriate fields zeroed out, and append them to the 204 // currently-last segment in newtoc. 205 copyZOdSections := func(g *macho.Segment) { 206 for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ { 207 s := exeMacho.Sections[i].Copy() 208 s.Offset = 0 209 s.Reloff = 0 210 s.Nreloc = 0 211 newtoc.AddSection(s) 212 } 213 } 214 215 newtoc.AddLoad(newsymtab) 216 newtoc.AddSegment(pagezero) 217 newtoc.AddSegment(newtext) 218 copyZOdSections(text) 219 newtoc.AddSegment(newdata) 220 copyZOdSections(data) 221 222 newlinkedit := linkedit.Copy() 223 newlinkedit.Offset = uint64(linkeditsymbase) 224 newlinkedit.Filesz = uint64(linkeditstringcur) 225 newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file 226 newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign) 227 // The rest should copy over fine. 228 newtoc.AddSegment(newlinkedit) 229 230 dwarf := nonnilS("__DWARF") 231 newdwarf := dwarf.CopyZeroed() 232 newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign) 233 newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1) 234 newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file. 235 newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign) 236 newtoc.AddSegment(newdwarf) 237 238 // Map out Dwarf sections (that is, this is section descriptors, not their contents). 239 offset := uint32(newdwarf.Offset) 240 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ { 241 o := exeMacho.Sections[i] 242 s := o.Copy() 243 s.Offset = offset 244 us := o.UncompressedSize() 245 if s.Size < us { 246 s.Size = uint64(us) 247 s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes. 248 } 249 offset += uint32(us) 250 if strings.HasPrefix(s.Name, "__z") { 251 s.Name = "__" + s.Name[3:] // remove "z" 252 } 253 s.Reloff = 0 254 s.Nreloc = 0 255 newtoc.AddSection(s) 256 } 257 258 // Write segments/sections. 259 // Only dwarf and linkedit contain anything interesting. 260 261 // Memory map the output file to get the buffer directly. 262 outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF" 263 if len(os.Args) > 2 { 264 outDwarf = os.Args[2] 265 } else { 266 err := os.MkdirAll(outDwarf, 0755) 267 if err != nil { 268 fail("%v", err) 269 } 270 outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe)) 271 } 272 dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize())) 273 274 // (1) Linkedit segment 275 // Symbol table 276 offset = uint32(newlinkedit.Offset) 277 for i := range linkeditsyms { 278 if exeMacho.Magic == macho.Magic64 { 279 offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder) 280 } else { 281 offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder) 282 } 283 } 284 285 // Initial two bytes of string table, followed by actual zero-terminated strings. 286 buffer[linkeditstringbase] = ' ' 287 buffer[linkeditstringbase+1] = 0 288 offset = linkeditstringbase + 2 289 for _, str := range linkeditstrings { 290 for i := 0; i < len(str); i++ { 291 buffer[offset] = str[i] 292 offset++ 293 } 294 buffer[offset] = 0 295 offset++ 296 } 297 298 // (2) DWARF segment 299 ioff := newdwarf.Firstsect - dwarf.Firstsect 300 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ { 301 s := exeMacho.Sections[i] 302 j := i + ioff 303 s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:]) 304 } 305 306 // Because "text" overlaps the header and the loads, write them afterwards, just in case. 307 // Write header. 308 newtoc.Put(buffer) 309 310 err = syscall.Munmap(buffer) 311 if err != nil { 312 fail("Munmap %s for dwarf output failed, %v", outDwarf, err) 313 } 314 err = dwarfFile.Close() 315 if err != nil { 316 fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err) 317 } 318 319 if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command 320 hdr := exeMacho.FileTOC.FileHeader 321 oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize() 322 hdr.NCommands += 1 323 hdr.SizeCommands += uuid.LoadSize(newtoc) 324 325 mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0) 326 if err != nil { 327 fail("Updating UUID in binary failed, %v", err) 328 } 329 exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)), 330 syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED) 331 if err != nil { 332 fail("Mmap of %s for UUID update failed, %v", inputExe, err) 333 } 334 _ = hdr.Put(exebuf, newtoc.ByteOrder) 335 _ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder) 336 err = syscall.Munmap(exebuf) 337 if err != nil { 338 fail("Munmap of %s for UUID update failed, %v", inputExe, err) 339 } 340 } 341 } 342 343 // CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file, 344 // and returns the file descriptor and mapped buffer. 345 func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) { 346 dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) 347 if err != nil { 348 fail("Open for mmap failed, %v", err) 349 } 350 err = os.Truncate(outDwarf, size) 351 if err != nil { 352 fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err) 353 } 354 buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED) 355 if err != nil { 356 fail("Mmap %s for dwarf output update failed, %v", outDwarf, err) 357 } 358 return dwarfFile, buffer 359 } 360 361 func describe(exem *macho.FileTOC) { 362 note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags)) 363 for i, l := range exem.Loads { 364 if s, ok := l.(*macho.Segment); ok { 365 fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name, 366 s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect) 367 for j := uint32(0); j < s.Nsect; j++ { 368 c := exem.Sections[j+s.Firstsect] 369 fmt.Printf(" Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3) 370 } 371 } else { 372 fmt.Printf("Load %d is %v\n", i, l) 373 } 374 } 375 if exem.SizeCommands != exem.LoadSize() { 376 fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize()) 377 } else { 378 note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize()) 379 } 380 note("File size is %d", exem.FileSize()) 381 } 382 383 // contentuuid returns a UUID derived from (some of) the content of an executable. 384 // specifically included are the non-DWARF sections, specifically excluded are things 385 // that surely depend on the presence or absence of DWARF sections (e.g., section 386 // numbers, positions with file, number of load commands). 387 // (It was considered desirable if this was insensitive to the presence of the 388 // __DWARF segment, however because it is not last, it moves other segments, 389 // whose contents appear to contain file offset references.) 390 func contentuuid(exem *macho.FileTOC) []byte { 391 h := sha256.New() 392 for _, l := range exem.Loads { 393 if l.Command() == macho.LcUuid { 394 continue 395 } 396 if s, ok := l.(*macho.Segment); ok { 397 if s.Name == "__DWARF" || s.Name == "__PAGEZERO" { 398 continue 399 } 400 for j := uint32(0); j < s.Nsect; j++ { 401 c := exem.Sections[j+s.Firstsect] 402 io.Copy(h, c.Open()) 403 } 404 } // Getting dependence on other load commands right is fiddly. 405 } 406 return h.Sum(nil) 407 }