github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/builder/sizes.go (about)

     1  package builder
     2  
     3  import (
     4  	"bytes"
     5  	"debug/dwarf"
     6  	"debug/elf"
     7  	"debug/macho"
     8  	"debug/pe"
     9  	"encoding/binary"
    10  	"fmt"
    11  	"io"
    12  	"os"
    13  	"path/filepath"
    14  	"regexp"
    15  	"sort"
    16  	"strings"
    17  
    18  	"github.com/aykevl/go-wasm"
    19  	"github.com/tinygo-org/tinygo/goenv"
    20  )
    21  
    22  // Set to true to print extra debug logs.
    23  const sizesDebug = false
    24  
    25  // programSize contains size statistics per package of a compiled program.
    26  type programSize struct {
    27  	Packages map[string]packageSize
    28  	Code     uint64
    29  	ROData   uint64
    30  	Data     uint64
    31  	BSS      uint64
    32  }
    33  
    34  // sortedPackageNames returns the list of package names (ProgramSize.Packages)
    35  // sorted alphabetically.
    36  func (ps *programSize) sortedPackageNames() []string {
    37  	names := make([]string, 0, len(ps.Packages))
    38  	for name := range ps.Packages {
    39  		names = append(names, name)
    40  	}
    41  	sort.Strings(names)
    42  	return names
    43  }
    44  
    45  // Flash usage in regular microcontrollers.
    46  func (ps *programSize) Flash() uint64 {
    47  	return ps.Code + ps.ROData + ps.Data
    48  }
    49  
    50  // Static RAM usage in regular microcontrollers.
    51  func (ps *programSize) RAM() uint64 {
    52  	return ps.Data + ps.BSS
    53  }
    54  
    55  // packageSize contains the size of a package, calculated from the linked object
    56  // file.
    57  type packageSize struct {
    58  	Code   uint64
    59  	ROData uint64
    60  	Data   uint64
    61  	BSS    uint64
    62  }
    63  
    64  // Flash usage in regular microcontrollers.
    65  func (ps *packageSize) Flash() uint64 {
    66  	return ps.Code + ps.ROData + ps.Data
    67  }
    68  
    69  // Static RAM usage in regular microcontrollers.
    70  func (ps *packageSize) RAM() uint64 {
    71  	return ps.Data + ps.BSS
    72  }
    73  
    74  // A mapping of a single chunk of code or data to a file path.
    75  type addressLine struct {
    76  	Address    uint64
    77  	Length     uint64 // length of this chunk
    78  	Align      uint64 // (maximum) alignment of this line
    79  	File       string // file path as stored in DWARF
    80  	IsVariable bool   // true if this is a variable (or constant), false if it is code
    81  }
    82  
    83  // Sections defined in the input file. This struct defines them in a
    84  // filetype-agnostic way but roughly follow the ELF types (.text, .data, .bss,
    85  // etc).
    86  type memorySection struct {
    87  	Type    memoryType
    88  	Address uint64
    89  	Size    uint64
    90  	Align   uint64
    91  }
    92  
    93  type memoryType int
    94  
    95  const (
    96  	memoryCode memoryType = iota + 1
    97  	memoryData
    98  	memoryROData
    99  	memoryBSS
   100  	memoryStack
   101  )
   102  
   103  func (t memoryType) String() string {
   104  	return [...]string{
   105  		0:            "-",
   106  		memoryCode:   "code",
   107  		memoryData:   "data",
   108  		memoryROData: "rodata",
   109  		memoryBSS:    "bss",
   110  		memoryStack:  "stack",
   111  	}[t]
   112  }
   113  
   114  // Regular expressions to match particular symbol names. These are not stored as
   115  // DWARF variables because they have no mapping to source code global variables.
   116  var (
   117  	// Various globals that aren't a variable but nonetheless need to be stored
   118  	// somewhere:
   119  	//   alloc:  heap allocations during init interpretation
   120  	//   pack:   data created when storing a constant in an interface for example
   121  	//   string: buffer behind strings
   122  	packageSymbolRegexp = regexp.MustCompile(`\$(alloc|pack|string)(\.[0-9]+)?$`)
   123  )
   124  
   125  // readProgramSizeFromDWARF reads the source location for each line of code and
   126  // each variable in the program, as far as this is stored in the DWARF debug
   127  // information.
   128  func readProgramSizeFromDWARF(data *dwarf.Data, codeOffset, codeAlignment uint64, skipTombstone bool) ([]addressLine, error) {
   129  	r := data.Reader()
   130  	var lines []*dwarf.LineFile
   131  	var addresses []addressLine
   132  	for {
   133  		e, err := r.Next()
   134  		if err != nil {
   135  			return nil, err
   136  		}
   137  		if e == nil {
   138  			break
   139  		}
   140  		switch e.Tag {
   141  		case dwarf.TagCompileUnit:
   142  			// Found a compile unit.
   143  			// We can read the .debug_line section using it, which contains a
   144  			// mapping for most instructions to their file/line/column - even
   145  			// for inlined functions!
   146  			lr, err := data.LineReader(e)
   147  			if err != nil {
   148  				return nil, err
   149  			}
   150  			lines = lr.Files()
   151  			var lineEntry = dwarf.LineEntry{
   152  				EndSequence: true,
   153  			}
   154  
   155  			// Line tables are organized as sequences of line entries until an
   156  			// end sequence. A single line table can contain multiple such
   157  			// sequences. The last line entry is an EndSequence to indicate the
   158  			// end.
   159  			for {
   160  				// Read the next .debug_line entry.
   161  				prevLineEntry := lineEntry
   162  				err := lr.Next(&lineEntry)
   163  				if err != nil {
   164  					if err == io.EOF {
   165  						break
   166  					}
   167  					return nil, err
   168  				}
   169  
   170  				if prevLineEntry.EndSequence && lineEntry.Address == 0 && skipTombstone {
   171  					// Tombstone value. This symbol has been removed, for
   172  					// example by the --gc-sections linker flag. It is still
   173  					// here in the debug information because the linker can't
   174  					// just remove this reference.
   175  					// Read until the next EndSequence so that this sequence is
   176  					// skipped.
   177  					// For more details, see (among others):
   178  					// https://reviews.llvm.org/D84825
   179  					// The value 0 can however really occur in object files,
   180  					// that typically start at address 0. So don't skip
   181  					// tombstone values in object files (like when parsing MachO
   182  					// files).
   183  					for {
   184  						err := lr.Next(&lineEntry)
   185  						if err != nil {
   186  							return nil, err
   187  						}
   188  						if lineEntry.EndSequence {
   189  							break
   190  						}
   191  					}
   192  				}
   193  
   194  				if !prevLineEntry.EndSequence {
   195  					// The chunk describes the code from prevLineEntry to
   196  					// lineEntry.
   197  					line := addressLine{
   198  						Address: prevLineEntry.Address + codeOffset,
   199  						Length:  lineEntry.Address - prevLineEntry.Address,
   200  						Align:   codeAlignment,
   201  						File:    prevLineEntry.File.Name,
   202  					}
   203  					if line.Length != 0 {
   204  						addresses = append(addresses, line)
   205  					}
   206  				}
   207  			}
   208  		case dwarf.TagVariable:
   209  			// Global variable (or constant). Most of these are not actually
   210  			// stored in the binary, because they have been optimized out. Only
   211  			// the ones with a location are still present.
   212  			r.SkipChildren()
   213  
   214  			file := e.AttrField(dwarf.AttrDeclFile)
   215  			location := e.AttrField(dwarf.AttrLocation)
   216  			globalType := e.AttrField(dwarf.AttrType)
   217  			if file == nil || location == nil || globalType == nil {
   218  				// Doesn't contain the requested information.
   219  				continue
   220  			}
   221  
   222  			// Try to parse the location. While this could in theory be a very
   223  			// complex expression, usually it's just a DW_OP_addr opcode
   224  			// followed by an address.
   225  			addr, err := readDWARFConstant(r.AddressSize(), location.Val.([]uint8))
   226  			if err != nil {
   227  				continue // ignore the error, we don't know what to do with it
   228  			}
   229  
   230  			// Parse the type of the global variable, which (importantly)
   231  			// contains the variable size. We're not interested in the type,
   232  			// only in the size.
   233  			typ, err := data.Type(globalType.Val.(dwarf.Offset))
   234  			if err != nil {
   235  				return nil, err
   236  			}
   237  
   238  			// Read alignment, if it's stored as part of the debug information.
   239  			var alignment uint64
   240  			if attr := e.AttrField(dwarf.AttrAlignment); attr != nil {
   241  				alignment = uint64(attr.Val.(int64))
   242  			}
   243  
   244  			addresses = append(addresses, addressLine{
   245  				Address:    addr,
   246  				Length:     uint64(typ.Size()),
   247  				Align:      alignment,
   248  				File:       lines[file.Val.(int64)].Name,
   249  				IsVariable: true,
   250  			})
   251  		default:
   252  			r.SkipChildren()
   253  		}
   254  	}
   255  	return addresses, nil
   256  }
   257  
   258  // Parse a DWARF constant. For addresses, this is usually a very simple
   259  // expression.
   260  func readDWARFConstant(addressSize int, bytecode []byte) (uint64, error) {
   261  	var addr uint64
   262  	for len(bytecode) != 0 {
   263  		op := bytecode[0]
   264  		bytecode = bytecode[1:]
   265  		switch op {
   266  		case 0x03: // DW_OP_addr
   267  			switch addressSize {
   268  			case 2:
   269  				addr = uint64(binary.LittleEndian.Uint16(bytecode))
   270  			case 4:
   271  				addr = uint64(binary.LittleEndian.Uint32(bytecode))
   272  			case 8:
   273  				addr = binary.LittleEndian.Uint64(bytecode)
   274  			default:
   275  				panic("unexpected address size")
   276  			}
   277  			bytecode = bytecode[addressSize:]
   278  		case 0x23: // DW_OP_plus_uconst
   279  			offset, n := readULEB128(bytecode)
   280  			addr += offset
   281  			bytecode = bytecode[n:]
   282  		default:
   283  			return 0, fmt.Errorf("unknown DWARF opcode: 0x%x", op)
   284  		}
   285  	}
   286  	return addr, nil
   287  }
   288  
   289  // Source: https://en.wikipedia.org/wiki/LEB128#Decode_unsigned_integer
   290  func readULEB128(buf []byte) (result uint64, n int) {
   291  	var shift uint8
   292  	for {
   293  		b := buf[n]
   294  		n++
   295  		result |= uint64(b&0x7f) << shift
   296  		if b&0x80 == 0 {
   297  			break
   298  		}
   299  		shift += 7
   300  	}
   301  	return
   302  }
   303  
   304  // Read a MachO object file and return a line table.
   305  // Also return an index from symbol name to start address in the line table.
   306  func readMachOSymbolAddresses(path string) (map[string]int, []addressLine, error) {
   307  	// Some constants from mach-o/nlist.h
   308  	// See: https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/nlist.h.auto.html
   309  	const (
   310  		N_STAB = 0xe0
   311  		N_TYPE = 0x0e // bitmask for N_TYPE field
   312  		N_SECT = 0xe  // one of the possible type in the N_TYPE field
   313  	)
   314  
   315  	// Read DWARF from the given object file.
   316  	file, err := macho.Open(path)
   317  	if err != nil {
   318  		return nil, nil, err
   319  	}
   320  	defer file.Close()
   321  	dwarf, err := file.DWARF()
   322  	if err != nil {
   323  		return nil, nil, err
   324  	}
   325  	lines, err := readProgramSizeFromDWARF(dwarf, 0, 0, false)
   326  	if err != nil {
   327  		return nil, nil, err
   328  	}
   329  
   330  	// Make a map from start addresses to indices in the line table (because the
   331  	// line table is a slice, not a map).
   332  	addressToLine := make(map[uint64]int, len(lines))
   333  	for i, line := range lines {
   334  		if _, ok := addressToLine[line.Address]; ok {
   335  			addressToLine[line.Address] = -1
   336  			continue
   337  		}
   338  		addressToLine[line.Address] = i
   339  	}
   340  
   341  	// Make a map that for each symbol gives the start index in the line table.
   342  	addresses := make(map[string]int, len(addressToLine))
   343  	for _, symbol := range file.Symtab.Syms {
   344  		if symbol.Type&N_STAB != 0 {
   345  			continue // STABS entry, ignore
   346  		}
   347  		if symbol.Type&0x0e != N_SECT {
   348  			continue // undefined symbol
   349  		}
   350  		if index, ok := addressToLine[symbol.Value]; ok && index >= 0 {
   351  			if _, ok := addresses[symbol.Name]; ok {
   352  				// There is a duplicate. Mark it as unavailable.
   353  				addresses[symbol.Name] = -1
   354  				continue
   355  			}
   356  			addresses[symbol.Name] = index
   357  		}
   358  	}
   359  
   360  	return addresses, lines, nil
   361  }
   362  
   363  // loadProgramSize calculate a program/data size breakdown of each package for a
   364  // given ELF file.
   365  // If the file doesn't contain DWARF debug information, the returned program
   366  // size will still have valid summaries but won't have complete size information
   367  // per package.
   368  func loadProgramSize(path string, packagePathMap map[string]string) (*programSize, error) {
   369  	// Open the binary file.
   370  	f, err := os.Open(path)
   371  	if err != nil {
   372  		return nil, err
   373  	}
   374  	defer f.Close()
   375  
   376  	// This stores all chunks of addresses found in the binary.
   377  	var addresses []addressLine
   378  
   379  	// Load the binary file, which could be in a number of file formats.
   380  	var sections []memorySection
   381  	if file, err := elf.NewFile(f); err == nil {
   382  		var codeAlignment uint64
   383  		switch file.Machine {
   384  		case elf.EM_ARM:
   385  			codeAlignment = 4 // usually 2, but can be 4
   386  		}
   387  		// Read DWARF information. The error is intentionally ignored.
   388  		data, _ := file.DWARF()
   389  		if data != nil {
   390  			addresses, err = readProgramSizeFromDWARF(data, 0, codeAlignment, true)
   391  			if err != nil {
   392  				// However, _do_ report an error here. Something must have gone
   393  				// wrong while trying to parse DWARF data.
   394  				return nil, err
   395  			}
   396  		}
   397  
   398  		// Read the ELF symbols for some more chunks of location information.
   399  		// Some globals (such as strings) aren't stored in the DWARF debug
   400  		// information and therefore need to be obtained in a different way.
   401  		allSymbols, err := file.Symbols()
   402  		if err != nil {
   403  			return nil, err
   404  		}
   405  		for _, symbol := range allSymbols {
   406  			symType := elf.ST_TYPE(symbol.Info)
   407  			if symbol.Size == 0 {
   408  				continue
   409  			}
   410  			if symType != elf.STT_FUNC && symType != elf.STT_OBJECT && symType != elf.STT_NOTYPE {
   411  				continue
   412  			}
   413  			if symbol.Section >= elf.SHN_LORESERVE {
   414  				// Not a regular section, so skip it.
   415  				// One example is elf.SHN_ABS, which is used for symbols
   416  				// declared with an absolute value such as the memset function
   417  				// on the ESP32 which is defined in the mask ROM.
   418  				continue
   419  			}
   420  			section := file.Sections[symbol.Section]
   421  			if section.Flags&elf.SHF_ALLOC == 0 {
   422  				continue
   423  			}
   424  			if packageSymbolRegexp.MatchString(symbol.Name) || symbol.Name == "__isr_vector" {
   425  				addresses = append(addresses, addressLine{
   426  					Address:    symbol.Value,
   427  					Length:     symbol.Size,
   428  					File:       symbol.Name,
   429  					IsVariable: true,
   430  				})
   431  			}
   432  		}
   433  
   434  		// Load allocated sections.
   435  		for _, section := range file.Sections {
   436  			if section.Flags&elf.SHF_ALLOC == 0 {
   437  				continue
   438  			}
   439  			if section.Type == elf.SHT_NOBITS {
   440  				if section.Name == ".stack" {
   441  					// TinyGo emits stack sections on microcontroller using the
   442  					// ".stack" name.
   443  					// This is a bit ugly, but I don't think there is a way to
   444  					// mark the stack section in a linker script.
   445  					sections = append(sections, memorySection{
   446  						Address: section.Addr,
   447  						Size:    section.Size,
   448  						Align:   section.Addralign,
   449  						Type:    memoryStack,
   450  					})
   451  				} else {
   452  					// Regular .bss section.
   453  					sections = append(sections, memorySection{
   454  						Address: section.Addr,
   455  						Size:    section.Size,
   456  						Align:   section.Addralign,
   457  						Type:    memoryBSS,
   458  					})
   459  				}
   460  			} else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_EXECINSTR != 0 {
   461  				// .text
   462  				sections = append(sections, memorySection{
   463  					Address: section.Addr,
   464  					Size:    section.Size,
   465  					Align:   section.Addralign,
   466  					Type:    memoryCode,
   467  				})
   468  			} else if section.Type == elf.SHT_PROGBITS && section.Flags&elf.SHF_WRITE != 0 {
   469  				// .data
   470  				sections = append(sections, memorySection{
   471  					Address: section.Addr,
   472  					Size:    section.Size,
   473  					Align:   section.Addralign,
   474  					Type:    memoryData,
   475  				})
   476  			} else if section.Type == elf.SHT_PROGBITS {
   477  				// .rodata
   478  				sections = append(sections, memorySection{
   479  					Address: section.Addr,
   480  					Size:    section.Size,
   481  					Align:   section.Addralign,
   482  					Type:    memoryROData,
   483  				})
   484  			}
   485  		}
   486  	} else if file, err := macho.NewFile(f); err == nil {
   487  		// Read segments, for use while reading through sections.
   488  		segments := map[string]*macho.Segment{}
   489  		for _, load := range file.Loads {
   490  			switch load := load.(type) {
   491  			case *macho.Segment:
   492  				segments[load.Name] = load
   493  			}
   494  		}
   495  
   496  		// Read MachO sections.
   497  		for _, section := range file.Sections {
   498  			sectionType := section.Flags & 0xff
   499  			sectionFlags := section.Flags >> 8
   500  			segment := segments[section.Seg]
   501  			// For the constants used here, see:
   502  			// https://github.com/llvm/llvm-project/blob/release/14.x/llvm/include/llvm/BinaryFormat/MachO.h
   503  			if sectionFlags&0x800000 != 0 { // S_ATTR_PURE_INSTRUCTIONS
   504  				// Section containing only instructions.
   505  				sections = append(sections, memorySection{
   506  					Address: section.Addr,
   507  					Size:    uint64(section.Size),
   508  					Align:   uint64(section.Align),
   509  					Type:    memoryCode,
   510  				})
   511  			} else if sectionType == 1 { // S_ZEROFILL
   512  				// Section filled with zeroes on demand.
   513  				sections = append(sections, memorySection{
   514  					Address: section.Addr,
   515  					Size:    uint64(section.Size),
   516  					Align:   uint64(section.Align),
   517  					Type:    memoryBSS,
   518  				})
   519  			} else if segment.Maxprot&0b011 == 0b001 { // --r (read-only data)
   520  				// Protection doesn't allow writes, so mark this section read-only.
   521  				sections = append(sections, memorySection{
   522  					Address: section.Addr,
   523  					Size:    uint64(section.Size),
   524  					Align:   uint64(section.Align),
   525  					Type:    memoryROData,
   526  				})
   527  			} else {
   528  				// The rest is assumed to be regular data.
   529  				sections = append(sections, memorySection{
   530  					Address: section.Addr,
   531  					Size:    uint64(section.Size),
   532  					Align:   uint64(section.Align),
   533  					Type:    memoryData,
   534  				})
   535  			}
   536  		}
   537  
   538  		// Read DWARF information.
   539  		// The data isn't stored directly in the binary as in most executable
   540  		// formats. Instead, it is left in the object files that were used as a
   541  		// basis for linking. The executable does however contain STABS debug
   542  		// information that points to the source object file and is used by
   543  		// debuggers.
   544  		// For more information:
   545  		// http://wiki.dwarfstd.org/index.php?title=Apple%27s_%22Lazy%22_DWARF_Scheme
   546  		var objSymbolNames map[string]int
   547  		var objAddresses []addressLine
   548  		var previousSymbol macho.Symbol
   549  		for _, symbol := range file.Symtab.Syms {
   550  			// STABS constants, from mach-o/stab.h:
   551  			// https://opensource.apple.com/source/xnu/xnu-7195.141.2/EXTERNAL_HEADERS/mach-o/stab.h.auto.html
   552  			const (
   553  				N_GSYM  = 0x20
   554  				N_FUN   = 0x24
   555  				N_STSYM = 0x26
   556  				N_SO    = 0x64
   557  				N_OSO   = 0x66
   558  			)
   559  			if symbol.Type == N_OSO {
   560  				// Found an object file. Now try to parse it.
   561  				objSymbolNames, objAddresses, err = readMachOSymbolAddresses(symbol.Name)
   562  				if err != nil && sizesDebug {
   563  					// Errors are normally ignored. If there is an error, it's
   564  					// simply treated as that the DWARF is not available.
   565  					fmt.Fprintf(os.Stderr, "could not read DWARF from file %s: %s\n", symbol.Name, err)
   566  				}
   567  			} else if symbol.Type == N_FUN {
   568  				// Found a function.
   569  				// The way this is encoded is a bit weird. MachO symbols don't
   570  				// have a length. What I've found is that the length is encoded
   571  				// by first having a N_FUN symbol as usual, and then having a
   572  				// symbol with a zero-length name that has the value not set to
   573  				// the address of the symbol but to the length. So in order to
   574  				// get both the address and the length, we look for a symbol
   575  				// with a name followed by a symbol without a name.
   576  				if symbol.Name == "" && previousSymbol.Type == N_FUN && previousSymbol.Name != "" {
   577  					// Functions are encoded as many small chunks in the line
   578  					// table (one or a few instructions per source line). But
   579  					// the symbol length covers the whole symbols, over many
   580  					// lines and possibly including inlined functions. So we
   581  					// continue to iterate through the objAddresses slice until
   582  					// we've found all the source lines that are part of this
   583  					// symbol.
   584  					address := previousSymbol.Value
   585  					length := symbol.Value
   586  					if index, ok := objSymbolNames[previousSymbol.Name]; ok && index >= 0 {
   587  						for length > 0 {
   588  							line := objAddresses[index]
   589  							line.Address = address
   590  							if line.Length > length {
   591  								// Line extends beyond the end of te symbol?
   592  								// Weird, shouldn't happen.
   593  								break
   594  							}
   595  							addresses = append(addresses, line)
   596  							index++
   597  							length -= line.Length
   598  							address += line.Length
   599  						}
   600  					}
   601  				}
   602  			} else if symbol.Type == N_GSYM || symbol.Type == N_STSYM {
   603  				// Global variables.
   604  				if index, ok := objSymbolNames[symbol.Name]; ok {
   605  					address := objAddresses[index]
   606  					address.Address = symbol.Value
   607  					addresses = append(addresses, address)
   608  				}
   609  			}
   610  			previousSymbol = symbol
   611  		}
   612  	} else if file, err := pe.NewFile(f); err == nil {
   613  		// Read DWARF information. The error is intentionally ignored.
   614  		data, _ := file.DWARF()
   615  		if data != nil {
   616  			addresses, err = readProgramSizeFromDWARF(data, 0, 0, true)
   617  			if err != nil {
   618  				// However, _do_ report an error here. Something must have gone
   619  				// wrong while trying to parse DWARF data.
   620  				return nil, err
   621  			}
   622  		}
   623  
   624  		// Read COFF sections.
   625  		optionalHeader := file.OptionalHeader.(*pe.OptionalHeader64)
   626  		for _, section := range file.Sections {
   627  			// For more information:
   628  			// https://docs.microsoft.com/en-us/windows/win32/api/winnt/ns-winnt-image_section_header
   629  			const (
   630  				IMAGE_SCN_CNT_CODE             = 0x00000020
   631  				IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040
   632  				IMAGE_SCN_MEM_DISCARDABLE      = 0x02000000
   633  				IMAGE_SCN_MEM_READ             = 0x40000000
   634  				IMAGE_SCN_MEM_WRITE            = 0x80000000
   635  			)
   636  			if section.Characteristics&IMAGE_SCN_MEM_DISCARDABLE != 0 {
   637  				// Debug sections, etc.
   638  				continue
   639  			}
   640  			address := uint64(section.VirtualAddress) + optionalHeader.ImageBase
   641  			if section.Characteristics&IMAGE_SCN_CNT_CODE != 0 {
   642  				// .text
   643  				sections = append(sections, memorySection{
   644  					Address: address,
   645  					Size:    uint64(section.VirtualSize),
   646  					Type:    memoryCode,
   647  				})
   648  			} else if section.Characteristics&IMAGE_SCN_CNT_INITIALIZED_DATA != 0 {
   649  				if section.Characteristics&IMAGE_SCN_MEM_WRITE != 0 {
   650  					// .data
   651  					sections = append(sections, memorySection{
   652  						Address: address,
   653  						Size:    uint64(section.Size),
   654  						Type:    memoryData,
   655  					})
   656  					if section.Size < section.VirtualSize {
   657  						// Equivalent of a .bss section.
   658  						// Note: because of how the PE/COFF format is
   659  						// structured, not all zero-initialized data is marked
   660  						// as such. A portion may be at the end of the .data
   661  						// section and is thus marked as initialized data.
   662  						sections = append(sections, memorySection{
   663  							Address: address + uint64(section.Size),
   664  							Size:    uint64(section.VirtualSize) - uint64(section.Size),
   665  							Type:    memoryBSS,
   666  						})
   667  					}
   668  				} else if section.Characteristics&IMAGE_SCN_MEM_READ != 0 {
   669  					// .rdata, .buildid, .pdata
   670  					sections = append(sections, memorySection{
   671  						Address: address,
   672  						Size:    uint64(section.VirtualSize),
   673  						Type:    memoryROData,
   674  					})
   675  				}
   676  			}
   677  		}
   678  	} else if file, err := wasm.Parse(f); err == nil {
   679  		// File is in WebAssembly format.
   680  
   681  		// Put code at a very high address, so that it won't conflict with the
   682  		// data in the memory section.
   683  		const codeOffset = 0x8000_0000_0000_0000
   684  
   685  		// Read DWARF information. The error is intentionally ignored.
   686  		data, _ := file.DWARF()
   687  		if data != nil {
   688  			addresses, err = readProgramSizeFromDWARF(data, codeOffset, 0, true)
   689  			if err != nil {
   690  				// However, _do_ report an error here. Something must have gone
   691  				// wrong while trying to parse DWARF data.
   692  				return nil, err
   693  			}
   694  		}
   695  
   696  		var linearMemorySize uint64
   697  		for _, section := range file.Sections {
   698  			switch section := section.(type) {
   699  			case *wasm.SectionCode:
   700  				sections = append(sections, memorySection{
   701  					Address: codeOffset,
   702  					Size:    uint64(section.Size()),
   703  					Type:    memoryCode,
   704  				})
   705  			case *wasm.SectionMemory:
   706  				// This value is used when processing *wasm.SectionData (which
   707  				// always comes after *wasm.SectionMemory).
   708  				linearMemorySize = uint64(section.Entries[0].Limits.Initial) * 64 * 1024
   709  			case *wasm.SectionData:
   710  				// Data sections contain initial values for linear memory.
   711  				// First load the list of data sections, and sort them by
   712  				// address for easier processing.
   713  				var dataSections []memorySection
   714  				for _, entry := range section.Entries {
   715  					address, err := wasm.Eval(bytes.NewBuffer(entry.Offset))
   716  					if err != nil {
   717  						return nil, fmt.Errorf("could not parse data section address: %w", err)
   718  					}
   719  					dataSections = append(dataSections, memorySection{
   720  						Address: uint64(address[0].(int32)),
   721  						Size:    uint64(len(entry.Data)),
   722  						Type:    memoryData,
   723  					})
   724  				}
   725  				sort.Slice(dataSections, func(i, j int) bool {
   726  					return dataSections[i].Address < dataSections[j].Address
   727  				})
   728  
   729  				// And now add all data sections for linear memory.
   730  				// Parts that are in the slice of data sections are added as
   731  				// memoryData, and parts that are not are added as memoryBSS.
   732  				addr := uint64(0)
   733  				for _, section := range dataSections {
   734  					if addr < section.Address {
   735  						sections = append(sections, memorySection{
   736  							Address: addr,
   737  							Size:    section.Address - addr,
   738  							Type:    memoryBSS,
   739  						})
   740  					}
   741  					if addr > section.Address {
   742  						// This might be allowed, I'm not sure.
   743  						// It certainly doesn't make a lot of sense.
   744  						return nil, fmt.Errorf("overlapping data section")
   745  					}
   746  					// addr == section.Address
   747  					sections = append(sections, section)
   748  					addr = section.Address + section.Size
   749  				}
   750  				if addr < linearMemorySize {
   751  					sections = append(sections, memorySection{
   752  						Address: addr,
   753  						Size:    linearMemorySize - addr,
   754  						Type:    memoryBSS,
   755  					})
   756  				}
   757  			}
   758  		}
   759  	} else {
   760  		return nil, fmt.Errorf("could not parse file: %w", err)
   761  	}
   762  
   763  	// Sort the slice of address chunks by address, so that we can iterate
   764  	// through it to calculate section sizes.
   765  	sort.Slice(addresses, func(i, j int) bool {
   766  		if addresses[i].Address == addresses[j].Address {
   767  			// Very rarely, there might be duplicate addresses.
   768  			// If that happens, sort the largest chunks first.
   769  			return addresses[i].Length > addresses[j].Length
   770  		}
   771  		return addresses[i].Address < addresses[j].Address
   772  	})
   773  
   774  	// Now finally determine the binary/RAM size usage per package by going
   775  	// through each allocated section.
   776  	sizes := make(map[string]packageSize)
   777  	for _, section := range sections {
   778  		switch section.Type {
   779  		case memoryCode:
   780  			readSection(section, addresses, func(path string, size uint64, isVariable bool) {
   781  				field := sizes[path]
   782  				if isVariable {
   783  					field.ROData += size
   784  				} else {
   785  					field.Code += size
   786  				}
   787  				sizes[path] = field
   788  			}, packagePathMap)
   789  		case memoryROData:
   790  			readSection(section, addresses, func(path string, size uint64, isVariable bool) {
   791  				field := sizes[path]
   792  				field.ROData += size
   793  				sizes[path] = field
   794  			}, packagePathMap)
   795  		case memoryData:
   796  			readSection(section, addresses, func(path string, size uint64, isVariable bool) {
   797  				field := sizes[path]
   798  				field.Data += size
   799  				sizes[path] = field
   800  			}, packagePathMap)
   801  		case memoryBSS:
   802  			readSection(section, addresses, func(path string, size uint64, isVariable bool) {
   803  				field := sizes[path]
   804  				field.BSS += size
   805  				sizes[path] = field
   806  			}, packagePathMap)
   807  		case memoryStack:
   808  			// We store the C stack as a pseudo-package.
   809  			sizes["C stack"] = packageSize{
   810  				BSS: section.Size,
   811  			}
   812  		}
   813  	}
   814  
   815  	// ...and summarize the results.
   816  	program := &programSize{
   817  		Packages: sizes,
   818  	}
   819  	for _, pkg := range sizes {
   820  		program.Code += pkg.Code
   821  		program.ROData += pkg.ROData
   822  		program.Data += pkg.Data
   823  		program.BSS += pkg.BSS
   824  	}
   825  	return program, nil
   826  }
   827  
   828  // readSection determines for each byte in this section to which package it
   829  // belongs. It reports this usage through the addSize callback.
   830  func readSection(section memorySection, addresses []addressLine, addSize func(string, uint64, bool), packagePathMap map[string]string) {
   831  	// The addr variable tracks at which address we are while going through this
   832  	// section. We start at the beginning.
   833  	addr := section.Address
   834  	sectionEnd := section.Address + section.Size
   835  	if sizesDebug {
   836  		fmt.Printf("%08x..%08x %5d: %s\n", addr, sectionEnd, section.Size, section.Type)
   837  	}
   838  	for _, line := range addresses {
   839  		if line.Address < section.Address || line.Address+line.Length > sectionEnd {
   840  			// Check that this line is entirely within the section.
   841  			// Don't bother dealing with line entries that cross sections (that
   842  			// seems rather unlikely anyway).
   843  			continue
   844  		}
   845  		if addr < line.Address {
   846  			// There is a gap: there is a space between the current and the
   847  			// previous line entry.
   848  			// Check whether this is caused by alignment requirements.
   849  			addrAligned := (addr + line.Align - 1) &^ (line.Align - 1)
   850  			if line.Align > 1 && addrAligned >= line.Address {
   851  				// It is, assume that's what causes the gap.
   852  				addSize("(padding)", line.Address-addr, true)
   853  			} else {
   854  				addSize("(unknown)", line.Address-addr, false)
   855  				if sizesDebug {
   856  					fmt.Printf("%08x..%08x %5d:  unknown (gap), alignment=%d\n", addr, line.Address, line.Address-addr, line.Align)
   857  				}
   858  			}
   859  			addr = line.Address
   860  		}
   861  		if addr > line.Address+line.Length {
   862  			// The current line is already covered by a previous line entry.
   863  			// Simply skip it.
   864  			continue
   865  		}
   866  		// At this point, addr falls within the current line (probably at the
   867  		// start).
   868  		length := line.Length
   869  		if addr > line.Address {
   870  			// There is some overlap: the previous line entry already covered
   871  			// part of this line entry. So reduce the length to add to the
   872  			// remaining bit of the line entry.
   873  			length = line.Length - (addr - line.Address)
   874  		}
   875  		// Finally, mark this chunk of memory as used by the given package.
   876  		addSize(findPackagePath(line.File, packagePathMap), length, line.IsVariable)
   877  		addr = line.Address + line.Length
   878  	}
   879  	if addr < sectionEnd {
   880  		// There is a gap at the end of the section.
   881  		addrAligned := (addr + section.Align - 1) &^ (section.Align - 1)
   882  		if section.Align > 1 && addrAligned >= sectionEnd {
   883  			// The gap is caused by the section alignment.
   884  			// For example, if a .rodata section ends with a non-aligned string.
   885  			addSize("(padding)", sectionEnd-addr, true)
   886  		} else {
   887  			addSize("(unknown)", sectionEnd-addr, false)
   888  			if sizesDebug {
   889  				fmt.Printf("%08x..%08x %5d:  unknown (end), alignment=%d\n", addr, sectionEnd, sectionEnd-addr, section.Align)
   890  			}
   891  		}
   892  	}
   893  }
   894  
   895  // findPackagePath returns the Go package (or a pseudo package) for the given
   896  // path. It uses some heuristics, for example for some C libraries.
   897  func findPackagePath(path string, packagePathMap map[string]string) string {
   898  	// Check whether this path is part of one of the compiled packages.
   899  	packagePath, ok := packagePathMap[filepath.Dir(path)]
   900  	if !ok {
   901  		if strings.HasPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")) {
   902  			// Emit C libraries (in the lib subdirectory of TinyGo) as a single
   903  			// package, with a "C" prefix. For example: "C compiler-rt" for the
   904  			// compiler runtime library from LLVM.
   905  			packagePath = "C " + strings.Split(strings.TrimPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "lib")), string(os.PathSeparator))[1]
   906  		} else if strings.HasPrefix(path, filepath.Join(goenv.Get("TINYGOROOT"), "llvm-project")) {
   907  			packagePath = "C compiler-rt"
   908  		} else if packageSymbolRegexp.MatchString(path) {
   909  			// Parse symbol names like main$alloc or runtime$string.
   910  			packagePath = path[:strings.LastIndex(path, "$")]
   911  		} else if path == "__isr_vector" {
   912  			packagePath = "C interrupt vector"
   913  		} else if path == "<Go type>" {
   914  			packagePath = "Go types"
   915  		} else if path == "<Go interface assert>" {
   916  			// Interface type assert, generated by the interface lowering pass.
   917  			packagePath = "Go interface assert"
   918  		} else if path == "<Go interface method>" {
   919  			// Interface method wrapper (switch over all concrete types),
   920  			// generated by the interface lowering pass.
   921  			packagePath = "Go interface method"
   922  		} else if path == "<stdin>" {
   923  			// This can happen when the source code (in Go) doesn't have a
   924  			// source file and uses "-" as the location. Somewhere this is
   925  			// converted to "<stdin>".
   926  			// Convert this back to the "-" string. Eventually, this should be
   927  			// fixed in the compiler.
   928  			packagePath = "-"
   929  		} else {
   930  			// This is some other path. Not sure what it is, so just emit its directory.
   931  			packagePath = filepath.Dir(path) // fallback
   932  		}
   933  	}
   934  	return packagePath
   935  }