github.com/april1989/origin-go-tools@v0.0.32/cmd/splitdwarf/splitdwarf.go (about) 1 // Copyright 2018 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !js,!nacl,!plan9,!solaris,!windows 6 7 /* 8 9 Splitdwarf uncompresses and copies the DWARF segment of a Mach-O 10 executable into the "dSYM" file expected by lldb and ports of gdb 11 on OSX. 12 13 Usage: splitdwarf osxMachoFile [ osxDsymFile ] 14 15 Unless a dSYM file name is provided on the command line, 16 splitdwarf will place it where the OSX tools expect it, in 17 "<osxMachoFile>.dSYM/Contents/Resources/DWARF/<osxMachoFile>", 18 creating directories as necessary. 19 20 */ 21 package main // import "github.com/april1989/origin-go-tools/cmd/splitdwarf" 22 23 import ( 24 "crypto/sha256" 25 "fmt" 26 "io" 27 "os" 28 "path/filepath" 29 "strings" 30 "syscall" 31 32 "github.com/april1989/origin-go-tools/cmd/splitdwarf/internal/macho" 33 ) 34 35 const ( 36 pageAlign = 12 // 4096 = 1 << 12 37 ) 38 39 func note(format string, why ...interface{}) { 40 fmt.Fprintf(os.Stderr, format+"\n", why...) 41 } 42 43 func fail(format string, why ...interface{}) { 44 note(format, why...) 45 os.Exit(1) 46 } 47 48 // splitdwarf inputexe [ outputdwarf ] 49 func main() { 50 if len(os.Args) < 2 || len(os.Args) > 3 { 51 fmt.Printf(` 52 Usage: %s input_exe [ output_dsym ] 53 Reads the executable input_exe, uncompresses and copies debugging 54 information into output_dsym. If output_dsym is not specified, 55 the path 56 input_exe.dSYM/Contents/Resources/DWARF/input_exe 57 is used instead. That is the path that gdb and lldb expect 58 on OSX. Input_exe needs a UUID segment; if that is missing, 59 then one is created and added. In that case, the permissions 60 for input_exe need to allow writing. 61 `, os.Args[0]) 62 return 63 } 64 65 // Read input, find DWARF, be sure it looks right 66 inputExe := os.Args[1] 67 exeFile, err := os.Open(inputExe) 68 if err != nil { 69 fail("%v", err) 70 } 71 exeMacho, err := macho.NewFile(exeFile) 72 if err != nil { 73 fail("(internal) Couldn't create macho, %v", err) 74 } 75 // Postpone dealing with output till input is known-good 76 77 // describe(&exeMacho.FileTOC) 78 79 // Offsets into __LINKEDIT: 80 // 81 // Command LC_SYMTAB = 82 // (1) number of symbols at file offset (within link edit section) of 16-byte symbol table entries 83 // struct { 84 // StringTableIndex uint32 85 // Type, SectionIndex uint8 86 // Description uint16 87 // Value uint64 88 // } 89 // 90 // (2) string table offset and size. Strings are zero-byte terminated. First must be " ". 91 // 92 // Command LC_DYSYMTAB = indices within symtab (above), except for IndSym 93 // IndSym Offset = file offset (within link edit section) of 4-byte indices within symtab. 94 // 95 // Section __TEXT.__symbol_stub1. 96 // Offset and size (Reserved2) locate and describe a table for thios section. 97 // Symbols beginning at IndirectSymIndex (Reserved1) (see LC_DYSYMTAB.IndSymOffset) refer to this table. 98 // (These table entries are apparently PLTs [Procedure Linkage Table/Trampoline]) 99 // 100 // Section __DATA.__nl_symbol_ptr. 101 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset) 102 // Some of these symbols appear to be duplicates of other indirect symbols appearing early 103 // 104 // Section __DATA.__la_symbol_ptr. 105 // Reserved1 seems to be an index within the Indirect symbols (see LC_DYSYMTAB.IndSymOffset) 106 // Some of these symbols appear to be duplicates of other indirect symbols appearing early 107 // 108 109 // Create a File for the output dwarf. 110 // Copy header, file type is MH_DSYM 111 // Copy the relevant load commands 112 113 // LoadCmdUuid 114 // Symtab -- very abbreviated (Use DYSYMTAB Iextdefsym, Nextdefsym to identify these). 115 // Segment __PAGEZERO 116 // Segment __TEXT (zero the size, zero the offset of each section) 117 // Segment __DATA (zero the size, zero the offset of each section) 118 // Segment __LINKEDIT (contains the symbols and strings from Symtab) 119 // Segment __DWARF (uncompressed) 120 121 var uuid *macho.Uuid 122 for _, l := range exeMacho.Loads { 123 switch l.Command() { 124 case macho.LcUuid: 125 uuid = l.(*macho.Uuid) 126 } 127 } 128 129 // Ensure a given load is not nil 130 nonnilC := func(l macho.Load, s string) { 131 if l == nil { 132 fail("input file %s lacks load command %s", inputExe, s) 133 } 134 } 135 136 // Find a segment by name and ensure it is not nil 137 nonnilS := func(s string) *macho.Segment { 138 l := exeMacho.Segment(s) 139 if l == nil { 140 fail("input file %s lacks segment %s", inputExe, s) 141 } 142 return l 143 } 144 145 newtoc := exeMacho.FileTOC.DerivedCopy(macho.MhDsym, 0) 146 147 symtab := exeMacho.Symtab 148 dysymtab := exeMacho.Dysymtab // Not appearing in output, but necessary to construct output 149 nonnilC(symtab, "symtab") 150 nonnilC(dysymtab, "dysymtab") 151 text := nonnilS("__TEXT") 152 data := nonnilS("__DATA") 153 linkedit := nonnilS("__LINKEDIT") 154 pagezero := nonnilS("__PAGEZERO") 155 156 newtext := text.CopyZeroed() 157 newdata := data.CopyZeroed() 158 newsymtab := symtab.Copy() 159 160 // Linkedit segment contain symbols and strings; 161 // Symtab refers to offsets into linkedit. 162 // This next bit initializes newsymtab and sets up data structures for the linkedit segment 163 linkeditsyms := []macho.Nlist64{} 164 linkeditstrings := []string{} 165 166 // Linkedit will begin at the second page, i.e., offset is one page from beginning 167 // Symbols come first 168 linkeditsymbase := uint32(1) << pageAlign 169 170 // Strings come second, offset by the number of symbols times their size. 171 // Only those symbols from dysymtab.defsym are written into the debugging information. 172 linkeditstringbase := linkeditsymbase + exeMacho.FileTOC.SymbolSize()*dysymtab.Nextdefsym 173 174 // The first two bytes of the strings are reserved for space, null (' ', \000) 175 linkeditstringcur := uint32(2) 176 177 newsymtab.Syms = newsymtab.Syms[:0] 178 newsymtab.Symoff = linkeditsymbase 179 newsymtab.Stroff = linkeditstringbase 180 newsymtab.Nsyms = dysymtab.Nextdefsym 181 for i := uint32(0); i < dysymtab.Nextdefsym; i++ { 182 ii := i + dysymtab.Iextdefsym 183 oldsym := symtab.Syms[ii] 184 newsymtab.Syms = append(newsymtab.Syms, oldsym) 185 186 linkeditsyms = append(linkeditsyms, macho.Nlist64{Name: uint32(linkeditstringcur), 187 Type: oldsym.Type, Sect: oldsym.Sect, Desc: oldsym.Desc, Value: oldsym.Value}) 188 linkeditstringcur += uint32(len(oldsym.Name)) + 1 189 linkeditstrings = append(linkeditstrings, oldsym.Name) 190 } 191 newsymtab.Strsize = linkeditstringcur 192 193 exeNeedsUuid := uuid == nil 194 if exeNeedsUuid { 195 uuid = &macho.Uuid{macho.UuidCmd{LoadCmd: macho.LcUuid}} 196 uuid.Len = uuid.LoadSize(newtoc) 197 copy(uuid.Id[0:], contentuuid(&exeMacho.FileTOC)[0:16]) 198 uuid.Id[6] = uuid.Id[6]&^0xf0 | 0x40 // version 4 (pseudo-random); see section 4.1.3 199 uuid.Id[8] = uuid.Id[8]&^0xc0 | 0x80 // variant bits; see section 4.1.1 200 } 201 newtoc.AddLoad(uuid) 202 203 // For the specified segment (assumed to be in exeMacho) make a copy of its 204 // sections with appropriate fields zeroed out, and append them to the 205 // currently-last segment in newtoc. 206 copyZOdSections := func(g *macho.Segment) { 207 for i := g.Firstsect; i < g.Firstsect+g.Nsect; i++ { 208 s := exeMacho.Sections[i].Copy() 209 s.Offset = 0 210 s.Reloff = 0 211 s.Nreloc = 0 212 newtoc.AddSection(s) 213 } 214 } 215 216 newtoc.AddLoad(newsymtab) 217 newtoc.AddSegment(pagezero) 218 newtoc.AddSegment(newtext) 219 copyZOdSections(text) 220 newtoc.AddSegment(newdata) 221 copyZOdSections(data) 222 223 newlinkedit := linkedit.Copy() 224 newlinkedit.Offset = uint64(linkeditsymbase) 225 newlinkedit.Filesz = uint64(linkeditstringcur) 226 newlinkedit.Addr = macho.RoundUp(newdata.Addr+newdata.Memsz, 1<<pageAlign) // Follows data sections in file 227 newlinkedit.Memsz = macho.RoundUp(newlinkedit.Filesz, 1<<pageAlign) 228 // The rest should copy over fine. 229 newtoc.AddSegment(newlinkedit) 230 231 dwarf := nonnilS("__DWARF") 232 newdwarf := dwarf.CopyZeroed() 233 newdwarf.Offset = macho.RoundUp(newlinkedit.Offset+newlinkedit.Filesz, 1<<pageAlign) 234 newdwarf.Filesz = dwarf.UncompressedSize(&exeMacho.FileTOC, 1) 235 newdwarf.Addr = newlinkedit.Addr + newlinkedit.Memsz // Follows linkedit sections in file. 236 newdwarf.Memsz = macho.RoundUp(newdwarf.Filesz, 1<<pageAlign) 237 newtoc.AddSegment(newdwarf) 238 239 // Map out Dwarf sections (that is, this is section descriptors, not their contents). 240 offset := uint32(newdwarf.Offset) 241 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ { 242 o := exeMacho.Sections[i] 243 s := o.Copy() 244 s.Offset = offset 245 us := o.UncompressedSize() 246 if s.Size < us { 247 s.Size = uint64(us) 248 s.Align = 0 // This is apparently true for debugging sections; not sure if it generalizes. 249 } 250 offset += uint32(us) 251 if strings.HasPrefix(s.Name, "__z") { 252 s.Name = "__" + s.Name[3:] // remove "z" 253 } 254 s.Reloff = 0 255 s.Nreloc = 0 256 newtoc.AddSection(s) 257 } 258 259 // Write segments/sections. 260 // Only dwarf and linkedit contain anything interesting. 261 262 // Memory map the output file to get the buffer directly. 263 outDwarf := inputExe + ".dSYM/Contents/Resources/DWARF" 264 if len(os.Args) > 2 { 265 outDwarf = os.Args[2] 266 } else { 267 err := os.MkdirAll(outDwarf, 0755) 268 if err != nil { 269 fail("%v", err) 270 } 271 outDwarf = filepath.Join(outDwarf, filepath.Base(inputExe)) 272 } 273 dwarfFile, buffer := CreateMmapFile(outDwarf, int64(newtoc.FileSize())) 274 275 // (1) Linkedit segment 276 // Symbol table 277 offset = uint32(newlinkedit.Offset) 278 for i := range linkeditsyms { 279 if exeMacho.Magic == macho.Magic64 { 280 offset += linkeditsyms[i].Put64(buffer[offset:], newtoc.ByteOrder) 281 } else { 282 offset += linkeditsyms[i].Put32(buffer[offset:], newtoc.ByteOrder) 283 } 284 } 285 286 // Initial two bytes of string table, followed by actual zero-terminated strings. 287 buffer[linkeditstringbase] = ' ' 288 buffer[linkeditstringbase+1] = 0 289 offset = linkeditstringbase + 2 290 for _, str := range linkeditstrings { 291 for i := 0; i < len(str); i++ { 292 buffer[offset] = str[i] 293 offset++ 294 } 295 buffer[offset] = 0 296 offset++ 297 } 298 299 // (2) DWARF segment 300 ioff := newdwarf.Firstsect - dwarf.Firstsect 301 for i := dwarf.Firstsect; i < dwarf.Firstsect+dwarf.Nsect; i++ { 302 s := exeMacho.Sections[i] 303 j := i + ioff 304 s.PutUncompressedData(buffer[newtoc.Sections[j].Offset:]) 305 } 306 307 // Because "text" overlaps the header and the loads, write them afterwards, just in case. 308 // Write header. 309 newtoc.Put(buffer) 310 311 err = syscall.Munmap(buffer) 312 if err != nil { 313 fail("Munmap %s for dwarf output failed, %v", outDwarf, err) 314 } 315 err = dwarfFile.Close() 316 if err != nil { 317 fail("Close %s for dwarf output after mmap/munmap failed, %v", outDwarf, err) 318 } 319 320 if exeNeedsUuid { // Map the original exe, modify the header, and write the UUID command 321 hdr := exeMacho.FileTOC.FileHeader 322 oldCommandEnd := hdr.SizeCommands + newtoc.HdrSize() 323 hdr.NCommands += 1 324 hdr.SizeCommands += uuid.LoadSize(newtoc) 325 326 mapf, err := os.OpenFile(inputExe, os.O_RDWR, 0) 327 if err != nil { 328 fail("Updating UUID in binary failed, %v", err) 329 } 330 exebuf, err := syscall.Mmap(int(mapf.Fd()), 0, int(macho.RoundUp(uint64(hdr.SizeCommands), 1<<pageAlign)), 331 syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED) 332 if err != nil { 333 fail("Mmap of %s for UUID update failed, %v", inputExe, err) 334 } 335 _ = hdr.Put(exebuf, newtoc.ByteOrder) 336 _ = uuid.Put(exebuf[oldCommandEnd:], newtoc.ByteOrder) 337 err = syscall.Munmap(exebuf) 338 if err != nil { 339 fail("Munmap of %s for UUID update failed, %v", inputExe, err) 340 } 341 } 342 } 343 344 // CreateMmapFile creates the file 'outDwarf' of the specified size, mmaps that file, 345 // and returns the file descriptor and mapped buffer. 346 func CreateMmapFile(outDwarf string, size int64) (*os.File, []byte) { 347 dwarfFile, err := os.OpenFile(outDwarf, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0666) 348 if err != nil { 349 fail("Open for mmap failed, %v", err) 350 } 351 err = os.Truncate(outDwarf, size) 352 if err != nil { 353 fail("Truncate/extend of %s to %d bytes failed, %v", dwarfFile, size, err) 354 } 355 buffer, err := syscall.Mmap(int(dwarfFile.Fd()), 0, int(size), syscall.PROT_READ|syscall.PROT_WRITE, syscall.MAP_FILE|syscall.MAP_SHARED) 356 if err != nil { 357 fail("Mmap %s for dwarf output update failed, %v", outDwarf, err) 358 } 359 return dwarfFile, buffer 360 } 361 362 func describe(exem *macho.FileTOC) { 363 note("Type = %s, Flags=0x%x", exem.Type, uint32(exem.Flags)) 364 for i, l := range exem.Loads { 365 if s, ok := l.(*macho.Segment); ok { 366 fmt.Printf("Load %d is Segment %s, offset=0x%x, filesz=%d, addr=0x%x, memsz=%d, nsect=%d\n", i, s.Name, 367 s.Offset, s.Filesz, s.Addr, s.Memsz, s.Nsect) 368 for j := uint32(0); j < s.Nsect; j++ { 369 c := exem.Sections[j+s.Firstsect] 370 fmt.Printf(" Section %s, offset=0x%x, size=%d, addr=0x%x, flags=0x%x, nreloc=%d, res1=%d, res2=%d, res3=%d\n", c.Name, c.Offset, c.Size, c.Addr, c.Flags, c.Nreloc, c.Reserved1, c.Reserved2, c.Reserved3) 371 } 372 } else { 373 fmt.Printf("Load %d is %v\n", i, l) 374 } 375 } 376 if exem.SizeCommands != exem.LoadSize() { 377 fail("recorded command size %d does not equal computed command size %d", exem.SizeCommands, exem.LoadSize()) 378 } else { 379 note("recorded command size %d, computed command size %d", exem.SizeCommands, exem.LoadSize()) 380 } 381 note("File size is %d", exem.FileSize()) 382 } 383 384 // contentuuid returns a UUID derived from (some of) the content of an executable. 385 // specifically included are the non-DWARF sections, specifically excluded are things 386 // that surely depend on the presence or absence of DWARF sections (e.g., section 387 // numbers, positions with file, number of load commands). 388 // (It was considered desirable if this was insensitive to the presence of the 389 // __DWARF segment, however because it is not last, it moves other segments, 390 // whose contents appear to contain file offset references.) 391 func contentuuid(exem *macho.FileTOC) []byte { 392 h := sha256.New() 393 for _, l := range exem.Loads { 394 if l.Command() == macho.LcUuid { 395 continue 396 } 397 if s, ok := l.(*macho.Segment); ok { 398 if s.Name == "__DWARF" || s.Name == "__PAGEZERO" { 399 continue 400 } 401 for j := uint32(0); j < s.Nsect; j++ { 402 c := exem.Sections[j+s.Firstsect] 403 io.Copy(h, c.Open()) 404 } 405 } // Getting dependence on other load commands right is fiddly. 406 } 407 return h.Sum(nil) 408 }