github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/builder/sizes.go (about) 1 package builder 2 3 import ( 4 "bytes" 5 "debug/dwarf" 6 "debug/elf" 7 "debug/macho" 8 "debug/pe" 9 "encoding/binary" 10 "fmt" 11 "io" 12 "os" 13 "path/filepath" 14 "regexp" 15 "sort" 16 "strings" 17 18 "github.com/aykevl/go-wasm" 19 "github.com/tinygo-org/tinygo/goenv" 20 ) 21 22 // Set to true to print extra debug logs. 23 const sizesDebug = false 24 25 // programSize contains size statistics per package of a compiled program. 26 type programSize struct { 27 Packages map[string]packageSize 28 Code uint64 29 ROData uint64 30 Data uint64 31 BSS uint64 32 } 33 34 // sortedPackageNames returns the list of package names (ProgramSize.Packages) 35 // sorted alphabetically. 36 func (ps *programSize) sortedPackageNames() []string { 37 names := make([]string, 0, len(ps.Packages)) 38 for name := range ps.Packages { 39 names = append(names, name) 40 } 41 sort.Strings(names) 42 return names 43 } 44 45 // Flash usage in regular microcontrollers. 46 func (ps *programSize) Flash() uint64 { 47 return ps.Code + ps.ROData + ps.Data 48 } 49 50 // Static RAM usage in regular microcontrollers. 51 func (ps *programSize) RAM() uint64 { 52 return ps.Data + ps.BSS 53 } 54 55 // packageSize contains the size of a package, calculated from the linked object 56 // file. 57 type packageSize struct { 58 Code uint64 59 ROData uint64 60 Data uint64 61 BSS uint64 62 } 63 64 // Flash usage in regular microcontrollers. 65 func (ps *packageSize) Flash() uint64 { 66 return ps.Code + ps.ROData + ps.Data 67 } 68 69 // Static RAM usage in regular microcontrollers. 70 func (ps *packageSize) RAM() uint64 { 71 return ps.Data + ps.BSS 72 } 73 74 // A mapping of a single chunk of code or data to a file path. 75 type addressLine struct { 76 Address uint64 77 Length uint64 // length of this chunk 78 Align uint64 // (maximum) alignment of this line 79 File string // file path as stored in DWARF 80 IsVariable bool // true if this is a variable (or constant), false if it is code 81 } 82 83 // Sections defined in the input file. This struct defines them in a 84 // filetype-agnostic way but roughly follow the ELF types (.text, .data, .bss, 85 // etc). 86 type memorySection struct { 87 Type memoryType 88 Address uint64 89 Size uint64 90 Align uint64 91 } 92 93 type memoryType int 94 95 const ( 96 memoryCode memoryType = iota + 1 97 memoryData 98 memoryROData 99 memoryBSS 100 memoryStack 101 ) 102 103 func (t memoryType) String() string { 104 return [...]string{ 105 0: "-", 106 memoryCode: "code", 107 memoryData: "data", 108 memoryROData: "rodata", 109 memoryBSS: "bss", 110 memoryStack: "stack", 111 }[t] 112 } 113 114 // Regular expressions to match particular symbol names. These are not stored as 115 // DWARF variables because they have no mapping to source code global variables. 116 var ( 117 // Various globals that aren't a variable but nonetheless need to be stored 118 // somewhere: 119 // alloc: heap allocations during init interpretation 120 // pack: data created when storing a constant in an interface for example 121 // string: buffer behind strings 122 packageSymbolRegexp = regexp.MustCompile(`\$(alloc|pack|string)(\.[0-9]+)?$`) 123 ) 124 125 // readProgramSizeFromDWARF reads the source location for each line of code and 126 // each variable in the program, as far as this is stored in the DWARF debug 127 // information. 128 func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset, codeAlignment uint64, skipTombstone bool) ([]addressLine, error) { 129 r := data.Reader() 130 var lines []*dwarf.LineFile 131 var addresses []addressLine 132 for { 133 e, err := r.Next() 134 if err != nil { 135 return nil, err 136 } 137 if e == nil { 138 break 139 } 140 switch e.Tag { 141 case dwarf.TagCompileUnit: 142 // Found a compile unit. 143 // We can read the .debug_line section using it, which contains a 144 // mapping for most instructions to their file/line/column - even 145 // for inlined functions! 146 lr, err := data.LineReader(e) 147 if err != nil { 148 return nil, err 149 } 150 lines = lr.Files() 151 var lineEntry = dwarf.LineEntry{ 152 EndSequence: true, 153 } 154 155 // Line tables are organized as sequences of line entries until an 156 // end sequence. A single line table can contain multiple such 157 // sequences. The last line entry is an EndSequence to indicate the 158 // end. 159 for { 160 // Read the next .debug_line entry. 161 prevLineEntry := lineEntry 162 err := lr.Next(&lineEntry) 163 if err != nil { 164 if err == io.EOF { 165 break 166 } 167 return nil, err 168 } 169 170 if prevLineEntry.EndSequence && lineEntry.Address == 0 && skipTombstone { 171 // Tombstone value. This symbol has been removed, for 172 // example by the --gc-sections linker flag. It is still 173 // here in the debug information because the linker can't 174 // just remove this reference. 175 // Read until the next EndSequence so that this sequence is 176 // skipped. 177 // For more details, see (among others): 178 // https://reviews.llvm.org/D84825 179 // The value 0 can however really occur in object files, 180 // that typically start at address 0. So don't skip 181 // tombstone values in object files (like when parsing MachO 182 // files). 183 for { 184 err := lr.Next(&lineEntry) 185 if err != nil { 186 return nil, err 187 } 188 if lineEntry.EndSequence { 189 break 190 } 191 } 192 } 193 194 if !prevLineEntry.EndSequence { 195 // The chunk describes the code from prevLineEntry to 196 // lineEntry. 197 line := addressLine{ 198 Address: prevLineEntry.Address + codeOffset, 199 Length: lineEntry.Address - prevLineEntry.Address, 200 Align: codeAlignment, 201 File: prevLineEntry.File.Name, 202 } 203 if line.Length != 0 { 204 addresses = append(addresses, line) 205 } 206 } 207 } 208 case dwarf.TagVariable: 209 // Global variable (or constant). Most of these are not actually 210 // stored in the binary, because they have been optimized out. Only 211 // the ones with a location are still present. 212 r.SkipChildren() 213 214 file := e.AttrField(dwarf.AttrDeclFile) 215 location := e.AttrField(dwarf.AttrLocation) 216 globalType := e.AttrField(dwarf.AttrType) 217 if file == nil || location == nil || globalType == nil { 218 // Doesn't contain the requested information. 219 continue 220 } 221 222 // Try to parse the location. While this could in theory be a very 223 // complex expression, usually it's just a DW_OP_addr opcode 224 // followed by an address. 225 addr, err := readDWARFConstant(r.AddressSize(), location.Val.([]uint8)) 226 if err != nil { 227 continue // ignore the error, we don't know what to do with it 228 } 229 230 // Parse the type of the global variable, which (importantly) 231 // contains the variable size. We're not interested in the type, 232 // only in the size. 233 typ, err := data.Type(globalType.Val.(dwarf.Offset)) 234 if err != nil { 235 return nil, err 236 } 237 238 // Read alignment, if it's stored as part of the debug information. 239 var alignment uint64 240 if attr := e.AttrField(dwarf.AttrAlignment); attr != nil { 241 alignment = uint64(attr.Val.(int64)) 242 } 243 244 addresses = append(addresses, addressLine{ 245 Address: addr, 246 Length: uint64(typ.Size()), 247 Align: alignment, 248 File: lines[file.Val.(int64)].Name, 249 IsVariable: true, 250 }) 251 default: 252 r.SkipChildren() 253 } 254 } 255 return addresses, nil 256 } 257 258 // Parse a DWARF constant. For addresses, this is usually a very simple 259 // expression. 260 func readDWARFConstant(addressSize int, bytecode []byte) (uint64, error) { 261 var addr uint64 262 for len(bytecode) != 0 { 263 op := bytecode[0] 264 bytecode = bytecode[1:] 265 switch op { 266 case 0x03: // DW_OP_addr 267 switch addressSize { 268 case 2: 269 addr = uint64(binary.LittleEndian.Uint16(bytecode)) 270 case 4: 271 addr = uint64(binary.LittleEndian.Uint32(bytecode)) 272 case 8: 273 addr = binary.LittleEndian.Uint64(bytecode) 274 default: 275 panic("unexpected address size") 276 } 277 bytecode = bytecode[addressSize:] 278 case 0x23: // DW_OP_plus_uconst 279 offset, n := readULEB128(bytecode) 280 addr += offset 281 bytecode = bytecode[n:] 282 default: 283 return 0, fmt.Errorf("unknown DWARF opcode: 0x%x", op) 284 } 285 } 286 return addr, nil 287 } 288 289 // Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer 290 func readULEB128(buf []byte) (result uint64, n int) { 291 var shift uint8 292 for { 293 b := buf[n] 294 n++ 295 result |= uint64(b&0x7f) << shift 296 if b&0x80 == 0 { 297 break 298 } 299 shift += 7 300 } 301 return 302 } 303 304 // Read a MachO object file and return a line table. 305 // Also return an index from symbol name to start address in the line table. 306 func readMachOSymbolAddresses(path string) (map[string]int, []addressLine, error) { 307 // Some constants from mach-o/nlist.h 308 // See: https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/nlist.h.auto.html 309 const ( 310 N_STAB = 0xe0 311 N_TYPE = 0x0e // bitmask for N_TYPE field 312 N_SECT = 0xe // one of the possible type in the N_TYPE field 313 ) 314 315 // Read DWARF from the given object file. 316 file, err := macho.Open(path) 317 if err != nil { 318 return nil, nil, err 319 } 320 defer file.Close() 321 dwarf, err := file.DWARF() 322 if err != nil { 323 return nil, nil, err 324 } 325 lines, err := readProgramSizeFromDWARF(dwarf, 0, 0, false) 326 if err != nil { 327 return nil, nil, err 328 } 329 330 // Make a map from start addresses to indices in the line table (because the 331 // line table is a slice, not a map). 332 addressToLine := make(map[uint64]int, len(lines)) 333 for i, line := range lines { 334 if _, ok := addressToLine[line.Address]; ok { 335 addressToLine[line.Address] = -1 336 continue 337 } 338 addressToLine[line.Address] = i 339 } 340 341 // Make a map that for each symbol gives the start index in the line table. 342 addresses := make(map[string]int, len(addressToLine)) 343 for _, symbol := range file.Symtab.Syms { 344 if symbol.Type&N_STAB != 0 { 345 continue // STABS entry, ignore 346 } 347 if symbol.Type&0x0e != N_SECT { 348 continue // undefined symbol 349 } 350 if index, ok := addressToLine[symbol.Value]; ok && index >= 0 { 351 if _, ok := addresses[symbol.Name]; ok { 352 // There is a duplicate. Mark it as unavailable. 353 addresses[symbol.Name] = -1 354 continue 355 } 356 addresses[symbol.Name] = index 357 } 358 } 359 360 return addresses, lines, nil 361 } 362 363 // loadProgramSize calculate a program/data size breakdown of each package for a 364 // given ELF file. 365 // If the file doesn't contain DWARF debug information, the returned program 366 // size will still have valid summaries but won't have complete size information 367 // per package. 368 func loadProgramSize(path string, packagePathMap map[string]string) (*programSize, error) { 369 // Open the binary file. 370 f, err := os.Open(path) 371 if err != nil { 372 return nil, err 373 } 374 defer f.Close() 375 376 // This stores all chunks of addresses found in the binary. 377 var addresses []addressLine 378 379 // Load the binary file, which could be in a number of file formats. 380 var sections []memorySection 381 if file, err := elf.NewFile(f); err == nil { 382 var codeAlignment uint64 383 switch file.Machine { 384 case elf.EM_ARM: 385 codeAlignment = 4 // usually 2, but can be 4 386 } 387 // Read DWARF information. The error is intentionally ignored. 388 data, _ := file.DWARF() 389 if data != nil { 390 addresses, err = readProgramSizeFromDWARF(data, 0, codeAlignment, true) 391 if err != nil { 392 // However, _do_ report an error here. Something must have gone 393 // wrong while trying to parse DWARF data. 394 return nil, err 395 } 396 } 397 398 // Read the ELF symbols for some more chunks of location information. 399 // Some globals (such as strings) aren't stored in the DWARF debug 400 // information and therefore need to be obtained in a different way. 401 allSymbols, err := file.Symbols() 402 if err != nil { 403 return nil, err 404 } 405 for _, symbol := range allSymbols { 406 symType := elf.ST_TYPE(symbol.Info) 407 if symbol.Size == 0 { 408 continue 409 } 410 if symType != elf.STT_FUNC && symType != elf.STT_OBJECT && symType != elf.STT_NOTYPE { 411 continue 412 } 413 if symbol.Section >= elf.SHN_LORESERVE { 414 // Not a regular section, so skip it. 415 // One example is elf.SHN_ABS, which is used for symbols 416 // declared with an absolute value such as the memset function 417 // on the ESP32 which is defined in the mask ROM. 418 continue 419 } 420 section := file.Sections[symbol.Section] 421 if section.Flags&elf.SHF_ALLOC == 0 { 422 continue 423 } 424 if packageSymbolRegexp.MatchString(symbol.Name) || symbol.Name == "__isr_vector" { 425 addresses = append(addresses, addressLine{ 426 Address: symbol.Value, 427 Length: symbol.Size, 428 File: symbol.Name, 429 IsVariable: true, 430 }) 431 } 432 } 433 434 // Load allocated sections. 435 for _, section := range file.Sections { 436 if section.Flags&elf.SHF_ALLOC == 0 { 437 continue 438 } 439 if section.Type == elf.SHT_NOBITS { 440 if section.Name == ".stack" { 441 // TinyGo emits stack sections on microcontroller using the 442 // ".stack" name. 443 // This is a bit ugly, but I don't think there is a way to 444 // mark the stack section in a linker script. 445 sections = append(sections, memorySection{ 446 Address: section.Addr, 447 Size: section.Size, 448 Align: section.Addralign, 449 Type: memoryStack, 450 }) 451 } else { 452 // Regular .bss section. 453 sections = append(sections, memorySection{ 454 Address: section.Addr, 455 Size: section.Size, 456 Align: section.Addralign, 457 Type: memoryBSS, 458 }) 459 } 460 } else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_EXECINSTR != 0 { 461 // .text 462 sections = append(sections, memorySection{ 463 Address: section.Addr, 464 Size: section.Size, 465 Align: section.Addralign, 466 Type: memoryCode, 467 }) 468 } else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_WRITE != 0 { 469 // .data 470 sections = append(sections, memorySection{ 471 Address: section.Addr, 472 Size: section.Size, 473 Align: section.Addralign, 474 Type: memoryData, 475 }) 476 } else if section.Type == elf.SHT_PROGBITS { 477 // .rodata 478 sections = append(sections, memorySection{ 479 Address: section.Addr, 480 Size: section.Size, 481 Align: section.Addralign, 482 Type: memoryROData, 483 }) 484 } 485 } 486 } else if file, err := macho.NewFile(f); err == nil { 487 // Read segments, for use while reading through sections. 488 segments := map[string]*macho.Segment{} 489 for _, load := range file.Loads { 490 switch load := load.(type) { 491 case *macho.Segment: 492 segments[load.Name] = load 493 } 494 } 495 496 // Read MachO sections. 497 for _, section := range file.Sections { 498 sectionType := section.Flags & 0xff 499 sectionFlags := section.Flags >> 8 500 segment := segments[section.Seg] 501 // For the constants used here, see: 502 // https://github.com/llvm/llvm-project/blob/release/14.x/llvm/include/llvm/BinaryFormat/MachO.h 503 if sectionFlags&0x800000 != 0 { // S_ATTR_PURE_INSTRUCTIONS 504 // Section containing only instructions. 505 sections = append(sections, memorySection{ 506 Address: section.Addr, 507 Size: uint64(section.Size), 508 Align: uint64(section.Align), 509 Type: memoryCode, 510 }) 511 } else if sectionType == 1 { // S_ZEROFILL 512 // Section filled with zeroes on demand. 513 sections = append(sections, memorySection{ 514 Address: section.Addr, 515 Size: uint64(section.Size), 516 Align: uint64(section.Align), 517 Type: memoryBSS, 518 }) 519 } else if segment.Maxprot&0b011 == 0b001 { // --r (read-only data) 520 // Protection doesn't allow writes, so mark this section read-only. 521 sections = append(sections, memorySection{ 522 Address: section.Addr, 523 Size: uint64(section.Size), 524 Align: uint64(section.Align), 525 Type: memoryROData, 526 }) 527 } else { 528 // The rest is assumed to be regular data. 529 sections = append(sections, memorySection{ 530 Address: section.Addr, 531 Size: uint64(section.Size), 532 Align: uint64(section.Align), 533 Type: memoryData, 534 }) 535 } 536 } 537 538 // Read DWARF information. 539 // The data isn't stored directly in the binary as in most executable 540 // formats. Instead, it is left in the object files that were used as a 541 // basis for linking. The executable does however contain STABS debug 542 // information that points to the source object file and is used by 543 // debuggers. 544 // For more information: 545 // http://wiki.dwarfstd.org/index.php?title=Apple%27s_%22Lazy%22_DWARF_Scheme 546 var objSymbolNames map[string]int 547 var objAddresses []addressLine 548 var previousSymbol macho.Symbol 549 for _, symbol := range file.Symtab.Syms { 550 // STABS constants, from mach-o/stab.h: 551 // https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/stab.h.auto.html 552 const ( 553 N_GSYM = 0x20 554 N_FUN = 0x24 555 N_STSYM = 0x26 556 N_SO = 0x64 557 N_OSO = 0x66 558 ) 559 if symbol.Type == N_OSO { 560 // Found an object file. Now try to parse it. 561 objSymbolNames, objAddresses, err = readMachOSymbolAddresses(symbol.Name) 562 if err != nil && sizesDebug { 563 // Errors are normally ignored. If there is an error, it's 564 // simply treated as that the DWARF is not available. 565 fmt.Fprintf(os.Stderr, "could not read DWARF from file %s: %s\n", symbol.Name, err) 566 } 567 } else if symbol.Type == N_FUN { 568 // Found a function. 569 // The way this is encoded is a bit weird. MachO symbols don't 570 // have a length. What I've found is that the length is encoded 571 // by first having a N_FUN symbol as usual, and then having a 572 // symbol with a zero-length name that has the value not set to 573 // the address of the symbol but to the length. So in order to 574 // get both the address and the length, we look for a symbol 575 // with a name followed by a symbol without a name. 576 if symbol.Name == "" && previousSymbol.Type == N_FUN && previousSymbol.Name != "" { 577 // Functions are encoded as many small chunks in the line 578 // table (one or a few instructions per source line). But 579 // the symbol length covers the whole symbols, over many 580 // lines and possibly including inlined functions. So we 581 // continue to iterate through the objAddresses slice until 582 // we've found all the source lines that are part of this 583 // symbol. 584 address := previousSymbol.Value 585 length := symbol.Value 586 if index, ok := objSymbolNames[previousSymbol.Name]; ok && index >= 0 { 587 for length > 0 { 588 line := objAddresses[index] 589 line.Address = address 590 if line.Length > length { 591 // Line extends beyond the end of te symbol? 592 // Weird, shouldn't happen. 593 break 594 } 595 addresses = append(addresses, line) 596 index++ 597 length -= line.Length 598 address += line.Length 599 } 600 } 601 } 602 } else if symbol.Type == N_GSYM || symbol.Type == N_STSYM { 603 // Global variables. 604 if index, ok := objSymbolNames[symbol.Name]; ok { 605 address := objAddresses[index] 606 address.Address = symbol.Value 607 addresses = append(addresses, address) 608 } 609 } 610 previousSymbol = symbol 611 } 612 } else if file, err := pe.NewFile(f); err == nil { 613 // Read DWARF information. The error is intentionally ignored. 614 data, _ := file.DWARF() 615 if data != nil { 616 addresses, err = readProgramSizeFromDWARF(data, 0, 0, true) 617 if err != nil { 618 // However, _do_ report an error here. Something must have gone 619 // wrong while trying to parse DWARF data. 620 return nil, err 621 } 622 } 623 624 // Read COFF sections. 625 optionalHeader := file.OptionalHeader.(*pe.OptionalHeader64) 626 for _, section := range file.Sections { 627 // For more information: 628 // https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header 629 const ( 630 IMAGE_SCN_CNT_CODE = 0x00000020 631 IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040 632 IMAGE_SCN_MEM_DISCARDABLE = 0x02000000 633 IMAGE_SCN_MEM_READ = 0x40000000 634 IMAGE_SCN_MEM_WRITE = 0x80000000 635 ) 636 if section.Characteristics&IMAGE_SCN_MEM_DISCARDABLE != 0 { 637 // Debug sections, etc. 638 continue 639 } 640 address := uint64(section.VirtualAddress) + optionalHeader.ImageBase 641 if section.Characteristics&IMAGE_SCN_CNT_CODE != 0 { 642 // .text 643 sections = append(sections, memorySection{ 644 Address: address, 645 Size: uint64(section.VirtualSize), 646 Type: memoryCode, 647 }) 648 } else if section.Characteristics&IMAGE_SCN_CNT_INITIALIZED_DATA != 0 { 649 if section.Characteristics&IMAGE_SCN_MEM_WRITE != 0 { 650 // .data 651 sections = append(sections, memorySection{ 652 Address: address, 653 Size: uint64(section.Size), 654 Type: memoryData, 655 }) 656 if section.Size < section.VirtualSize { 657 // Equivalent of a .bss section. 658 // Note: because of how the PE/COFF format is 659 // structured, not all zero-initialized data is marked 660 // as such. A portion may be at the end of the .data 661 // section and is thus marked as initialized data. 662 sections = append(sections, memorySection{ 663 Address: address + uint64(section.Size), 664 Size: uint64(section.VirtualSize) - uint64(section.Size), 665 Type: memoryBSS, 666 }) 667 } 668 } else if section.Characteristics&IMAGE_SCN_MEM_READ != 0 { 669 // .rdata, .buildid, .pdata 670 sections = append(sections, memorySection{ 671 Address: address, 672 Size: uint64(section.VirtualSize), 673 Type: memoryROData, 674 }) 675 } 676 } 677 } 678 } else if file, err := wasm.Parse(f); err == nil { 679 // File is in WebAssembly format. 680 681 // Put code at a very high address, so that it won't conflict with the 682 // data in the memory section. 683 const codeOffset = 0x8000_0000_0000_0000 684 685 // Read DWARF information. The error is intentionally ignored. 686 data, _ := file.DWARF() 687 if data != nil { 688 addresses, err = readProgramSizeFromDWARF(data, codeOffset, 0, true) 689 if err != nil { 690 // However, _do_ report an error here. Something must have gone 691 // wrong while trying to parse DWARF data. 692 return nil, err 693 } 694 } 695 696 var linearMemorySize uint64 697 for _, section := range file.Sections { 698 switch section := section.(type) { 699 case *wasm.SectionCode: 700 sections = append(sections, memorySection{ 701 Address: codeOffset, 702 Size: uint64(section.Size()), 703 Type: memoryCode, 704 }) 705 case *wasm.SectionMemory: 706 // This value is used when processing *wasm.SectionData (which 707 // always comes after *wasm.SectionMemory). 708 linearMemorySize = uint64(section.Entries[0].Limits.Initial) * 64 * 1024 709 case *wasm.SectionData: 710 // Data sections contain initial values for linear memory. 711 // First load the list of data sections, and sort them by 712 // address for easier processing. 713 var dataSections []memorySection 714 for _, entry := range section.Entries { 715 address, err := wasm.Eval(bytes.NewBuffer(entry.Offset)) 716 if err != nil { 717 return nil, fmt.Errorf("could not parse data section address: %w", err) 718 } 719 dataSections = append(dataSections, memorySection{ 720 Address: uint64(address[0].(int32)), 721 Size: uint64(len(entry.Data)), 722 Type: memoryData, 723 }) 724 } 725 sort.Slice(dataSections, func(i, j int) bool { 726 return dataSections[i].Address < dataSections[j].Address 727 }) 728 729 // And now add all data sections for linear memory. 730 // Parts that are in the slice of data sections are added as 731 // memoryData, and parts that are not are added as memoryBSS. 732 addr := uint64(0) 733 for _, section := range dataSections { 734 if addr < section.Address { 735 sections = append(sections, memorySection{ 736 Address: addr, 737 Size: section.Address - addr, 738 Type: memoryBSS, 739 }) 740 } 741 if addr > section.Address { 742 // This might be allowed, I'm not sure. 743 // It certainly doesn't make a lot of sense. 744 return nil, fmt.Errorf("overlapping data section") 745 } 746 // addr == section.Address 747 sections = append(sections, section) 748 addr = section.Address + section.Size 749 } 750 if addr < linearMemorySize { 751 sections = append(sections, memorySection{ 752 Address: addr, 753 Size: linearMemorySize - addr, 754 Type: memoryBSS, 755 }) 756 } 757 } 758 } 759 } else { 760 return nil, fmt.Errorf("could not parse file: %w", err) 761 } 762 763 // Sort the slice of address chunks by address, so that we can iterate 764 // through it to calculate section sizes. 765 sort.Slice(addresses, func(i, j int) bool { 766 if addresses[i].Address == addresses[j].Address { 767 // Very rarely, there might be duplicate addresses. 768 // If that happens, sort the largest chunks first. 769 return addresses[i].Length > addresses[j].Length 770 } 771 return addresses[i].Address < addresses[j].Address 772 }) 773 774 // Now finally determine the binary/RAM size usage per package by going 775 // through each allocated section. 776 sizes := make(map[string]packageSize) 777 for _, section := range sections { 778 switch section.Type { 779 case memoryCode: 780 readSection(section, addresses, func(path string, size uint64, isVariable bool) { 781 field := sizes[path] 782 if isVariable { 783 field.ROData += size 784 } else { 785 field.Code += size 786 } 787 sizes[path] = field 788 }, packagePathMap) 789 case memoryROData: 790 readSection(section, addresses, func(path string, size uint64, isVariable bool) { 791 field := sizes[path] 792 field.ROData += size 793 sizes[path] = field 794 }, packagePathMap) 795 case memoryData: 796 readSection(section, addresses, func(path string, size uint64, isVariable bool) { 797 field := sizes[path] 798 field.Data += size 799 sizes[path] = field 800 }, packagePathMap) 801 case memoryBSS: 802 readSection(section, addresses, func(path string, size uint64, isVariable bool) { 803 field := sizes[path] 804 field.BSS += size 805 sizes[path] = field 806 }, packagePathMap) 807 case memoryStack: 808 // We store the C stack as a pseudo-package. 809 sizes["C stack"] = packageSize{ 810 BSS: section.Size, 811 } 812 } 813 } 814 815 // ...and summarize the results. 816 program := &programSize{ 817 Packages: sizes, 818 } 819 for _, pkg := range sizes { 820 program.Code += pkg.Code 821 program.ROData += pkg.ROData 822 program.Data += pkg.Data 823 program.BSS += pkg.BSS 824 } 825 return program, nil 826 } 827 828 // readSection determines for each byte in this section to which package it 829 // belongs. It reports this usage through the addSize callback. 830 func readSection(section memorySection, addresses []addressLine, addSize func(string, uint64, bool), packagePathMap map[string]string) { 831 // The addr variable tracks at which address we are while going through this 832 // section. We start at the beginning. 833 addr := section.Address 834 sectionEnd := section.Address + section.Size 835 if sizesDebug { 836 fmt.Printf("%08x..%08x %5d: %s\n", addr, sectionEnd, section.Size, section.Type) 837 } 838 for _, line := range addresses { 839 if line.Address < section.Address || line.Address+line.Length > sectionEnd { 840 // Check that this line is entirely within the section. 841 // Don't bother dealing with line entries that cross sections (that 842 // seems rather unlikely anyway). 843 continue 844 } 845 if addr < line.Address { 846 // There is a gap: there is a space between the current and the 847 // previous line entry. 848 // Check whether this is caused by alignment requirements. 849 addrAligned := (addr + line.Align - 1) &^ (line.Align - 1) 850 if line.Align > 1 && addrAligned >= line.Address { 851 // It is, assume that's what causes the gap. 852 addSize("(padding)", line.Address-addr, true) 853 } else { 854 addSize("(unknown)", line.Address-addr, false) 855 if sizesDebug { 856 fmt.Printf("%08x..%08x %5d: unknown (gap), alignment=%d\n", addr, line.Address, line.Address-addr, line.Align) 857 } 858 } 859 addr = line.Address 860 } 861 if addr > line.Address+line.Length { 862 // The current line is already covered by a previous line entry. 863 // Simply skip it. 864 continue 865 } 866 // At this point, addr falls within the current line (probably at the 867 // start). 868 length := line.Length 869 if addr > line.Address { 870 // There is some overlap: the previous line entry already covered 871 // part of this line entry. So reduce the length to add to the 872 // remaining bit of the line entry. 873 length = line.Length - (addr - line.Address) 874 } 875 // Finally, mark this chunk of memory as used by the given package. 876 addSize(findPackagePath(line.File, packagePathMap), length, line.IsVariable) 877 addr = line.Address + line.Length 878 } 879 if addr < sectionEnd { 880 // There is a gap at the end of the section. 881 addrAligned := (addr + section.Align - 1) &^ (section.Align - 1) 882 if section.Align > 1 && addrAligned >= sectionEnd { 883 // The gap is caused by the section alignment. 884 // For example, if a .rodata section ends with a non-aligned string. 885 addSize("(padding)", sectionEnd-addr, true) 886 } else { 887 addSize("(unknown)", sectionEnd-addr, false) 888 if sizesDebug { 889 fmt.Printf("%08x..%08x %5d: unknown (end), alignment=%d\n", addr, sectionEnd, sectionEnd-addr, section.Align) 890 } 891 } 892 } 893 } 894 895 // findPackagePath returns the Go package (or a pseudo package) for the given 896 // path. It uses some heuristics, for example for some C libraries. 897 func findPackagePath(path string, packagePathMap map[string]string) string { 898 // Check whether this path is part of one of the compiled packages. 899 packagePath, ok := packagePathMap[filepath.Dir(path)] 900 if !ok { 901 if strings.HasPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")) { 902 // Emit C libraries (in the lib subdirectory of TinyGo) as a single 903 // package, with a "C" prefix. For example: "C compiler-rt" for the 904 // compiler runtime library from LLVM. 905 packagePath = "C " + strings.Split(strings.TrimPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")), string(os.PathSeparator))[1] 906 } else if strings.HasPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "llvm-project")) { 907 packagePath = "C compiler-rt" 908 } else if packageSymbolRegexp.MatchString(path) { 909 // Parse symbol names like main$alloc or runtime$string. 910 packagePath = path[:strings.LastIndex(path, "$")] 911 } else if path == "__isr_vector" { 912 packagePath = "C interrupt vector" 913 } else if path == "<Go type>" { 914 packagePath = "Go types" 915 } else if path == "<Go interface assert>" { 916 // Interface type assert, generated by the interface lowering pass. 917 packagePath = "Go interface assert" 918 } else if path == "<Go interface method>" { 919 // Interface method wrapper (switch over all concrete types), 920 // generated by the interface lowering pass. 921 packagePath = "Go interface method" 922 } else if path == "<stdin>" { 923 // This can happen when the source code (in Go) doesn't have a 924 // source file and uses "-" as the location. Somewhere this is 925 // converted to "<stdin>". 926 // Convert this back to the "-" string. Eventually, this should be 927 // fixed in the compiler. 928 packagePath = "-" 929 } else { 930 // This is some other path. Not sure what it is, so just emit its directory. 931 packagePath = filepath.Dir(path) // fallback 932 } 933 } 934 return packagePath 935 }