github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/bin/pef/pef.go (about)

     1  // Package pef provides access to PEF (Preferred Executable Format) files.
     2  package pef
     3  
     4  import (
     5  	"bytes"
     6  	"encoding/binary"
     7  	"fmt"
     8  	"io"
     9  	"math"
    10  	"os"
    11  	"sort"
    12  	"time"
    13  
    14  	"github.com/decomp/exp/bin"
    15  	"github.com/pkg/errors"
    16  )
    17  
    18  // Register PEF format.
    19  func init() {
    20  	// Preferred Executable Format (PEF) format.
    21  	//
    22  	//    4A 6F 79 21 70 65 66 66  |Joy!peff|
    23  	const magic = "Joy!peff"
    24  	bin.RegisterFormat("pef", magic, Parse)
    25  }
    26  
    27  // ParseFile parses the given PEF binary executable, reading from path.
    28  func ParseFile(path string) (*bin.File, error) {
    29  	f, err := os.Open(path)
    30  	if err != nil {
    31  		return nil, errors.WithStack(err)
    32  	}
    33  	defer f.Close()
    34  	return Parse(f)
    35  }
    36  
    37  // Parse parses the given PEF binary executable, reading from r.
    38  //
    39  // Users are responsible for closing r.
    40  func Parse(r io.ReaderAt) (*bin.File, error) {
    41  	// Open PEF file.
    42  	f, err := NewFile(r)
    43  	if err != nil {
    44  		return nil, errors.WithStack(err)
    45  	}
    46  
    47  	// Parse machine architecture.
    48  	file := &bin.File{}
    49  	for _, container := range f.Containers {
    50  		var arch bin.Arch
    51  		switch container.Architecture {
    52  		case "pwpc":
    53  			arch = bin.ArchPowerPC_32
    54  		default:
    55  			panic(fmt.Errorf("support for machine architecture %q not yet implemented", container.Architecture))
    56  		}
    57  		if file.Arch != 0 && arch != file.Arch {
    58  			panic(fmt.Errorf("support for multiple machine architectures not yet implemented; prev %q, new %q", file.Arch, arch))
    59  		}
    60  		file.Arch = arch
    61  	}
    62  
    63  	// Parse sections.
    64  	for _, container := range f.Containers {
    65  		for _, s := range container.Sections {
    66  			data, err := s.Data()
    67  			if err != nil {
    68  				return nil, errors.WithStack(err)
    69  			}
    70  			perm := parsePerm(s.SectionKind)
    71  			offset := container.Offset + uint64(s.ContainerOffset)
    72  			sect := &bin.Section{
    73  				Addr:     bin.Address(s.DefaultAddress),
    74  				Offset:   offset,
    75  				Data:     data,
    76  				FileSize: int(s.PackedSize),
    77  				MemSize:  int(s.TotalSize),
    78  				Perm:     perm,
    79  			}
    80  			file.Sections = append(file.Sections, sect)
    81  		}
    82  	}
    83  	// Sort sections.
    84  	less := func(i, j int) bool {
    85  		if file.Sections[i].Addr == file.Sections[j].Addr {
    86  			if len(file.Sections[i].Data) > len(file.Sections[j].Data) {
    87  				// prioritize longer sections with identical addresses.
    88  				return true
    89  			}
    90  			return file.Sections[i].Name < file.Sections[j].Name
    91  		}
    92  		return file.Sections[i].Addr < file.Sections[j].Addr
    93  	}
    94  	sort.Slice(file.Sections, less)
    95  
    96  	return file, nil
    97  }
    98  
    99  // NewFile creates a new File for accessing a PEF binary in an underlying
   100  // reader.
   101  //
   102  // Users are responsible for closing r.
   103  func NewFile(r io.ReaderAt) (*File, error) {
   104  	f, err := parseFile(r)
   105  	if err != nil {
   106  		return nil, errors.WithStack(err)
   107  	}
   108  	return f, nil
   109  }
   110  
   111  // A File is PEF file.
   112  type File struct {
   113  	// PEF containers.
   114  	Containers []*Container
   115  }
   116  
   117  // parseFile parses and returns a PEF file.
   118  func parseFile(r io.ReaderAt) (*File, error) {
   119  	var offset int64
   120  	f := &File{}
   121  	for {
   122  		sr := io.NewSectionReader(r, offset, math.MaxInt64)
   123  		container, n, err := parseContainer(sr)
   124  		if err != nil {
   125  			if errors.Cause(err) == io.EOF {
   126  				break
   127  			}
   128  			return nil, errors.WithStack(err)
   129  		}
   130  		offset += n
   131  		f.Containers = append(f.Containers, container)
   132  	}
   133  	return f, nil
   134  }
   135  
   136  // A Container is a PEF container.
   137  type Container struct {
   138  	// PEF container header.
   139  	*ContainerHeader
   140  	// File offset of the container.
   141  	Offset uint64
   142  	// PEF sections.
   143  	Sections []*Section
   144  }
   145  
   146  // parseContainer parses and returns a PEF container.
   147  func parseContainer(r io.ReaderAt) (*Container, int64, error) {
   148  	// Overview of the structure of a PEF container.
   149  	//
   150  	//    Container header
   151  	//    Section headers; zero or more
   152  	//    Section name table
   153  	//    Section contents; zero or more
   154  
   155  	// Parse PEF container header.
   156  	var offset int64
   157  	hdr, n, err := parseContainerHeader(r)
   158  	if err != nil {
   159  		return nil, 0, errors.WithStack(err)
   160  	}
   161  	offset += n
   162  	container := &Container{
   163  		ContainerHeader: hdr,
   164  	}
   165  
   166  	// Parse section headers.
   167  	for i := uint16(0); i < hdr.SectionCount; i++ {
   168  		sr := io.NewSectionReader(r, offset, math.MaxInt64)
   169  		sectHdr, n, err := parseSectionHeader(sr)
   170  		if err != nil {
   171  			return nil, 0, errors.WithStack(err)
   172  		}
   173  		offset += n
   174  		sect := &Section{
   175  			SectionHeader: sectHdr,
   176  			ReaderAt:      io.NewSectionReader(r, int64(sectHdr.ContainerOffset), int64(sectHdr.PackedSize)),
   177  		}
   178  		container.Sections = append(container.Sections, sect)
   179  	}
   180  
   181  	// The PEF container section name table contains the names of the sections
   182  	// stored as C-style null-terminated character strings. The strings have no
   183  	// specified alignment. Note that the section name table must immediately
   184  	// follow the section headers in the container.
   185  
   186  	// TODO: Parse string table.
   187  
   188  	// Parse Loader section.
   189  	for _, sect := range container.Sections {
   190  		if sect.SectionKind == kindLoader {
   191  			if err := parseLoaderSection(sect); err != nil {
   192  				return nil, 0, errors.WithStack(err)
   193  			}
   194  		}
   195  	}
   196  
   197  	// Calculate offset based on the end of the last section.
   198  	for _, sect := range container.Sections {
   199  		x := int64(sect.SectionHeader.ContainerOffset + sect.SectionHeader.PackedSize)
   200  		if offset < x {
   201  			offset = x
   202  		}
   203  	}
   204  	// Adjust offset based on alignment.
   205  	//
   206  	// When the container is not file-mapped, the overall container alignment is
   207  	// 16 bytes.
   208  	//
   209  	// ref: https://web.archive.org/web/20020111211702/http://developer.apple.com:80/techpubs/mac/runtimehtml/RTArch-92.html
   210  	if rem := offset % 16; rem != 0 {
   211  		offset += 16 - rem
   212  	}
   213  	return container, offset, nil
   214  }
   215  
   216  // A ContainerHeader represents a single PEF container header.
   217  //
   218  // ref: https://web.archive.org/web/20020219190852/http://developer.apple.com:80/techpubs/mac/runtimehtml/RTArch-91.html
   219  type ContainerHeader struct {
   220  	// Magic header: "Joy!"
   221  	Tag1 string
   222  	// Magic header: "peff"
   223  	Tag2 string
   224  	// Machine architecture.
   225  	//    "pwpc" for PowerPC
   226  	//    "m68k" for Motorola 68K
   227  	Architecture string
   228  	// PEF container format version.
   229  	FormatVersion uint32
   230  	// PEF container creation date.
   231  	DateTimeStamp time.Time
   232  	// Old definition version.
   233  	OldDefVersion uint32
   234  	// Old implementation version.
   235  	OldImpVersion uint32
   236  	// Current version.
   237  	CurrentVersion uint32
   238  	// Number of sections.
   239  	SectionCount uint16
   240  	// Number of instantiated sections.
   241  	InstSectionCount uint16
   242  }
   243  
   244  // parseContainerHeader parses and returns a PEF container header.
   245  func parseContainerHeader(r io.ReaderAt) (*ContainerHeader, int64, error) {
   246  	// PEF container header.
   247  	//
   248  	// ref: https://web.archive.org/web/20020219190852/http://developer.apple.com:80/techpubs/mac/runtimehtml/RTArch-91.html
   249  	const containerHeaderSize = 40
   250  	type containerHeader struct {
   251  		Tag1          [4]byte // 4 bytes; "Joy!"
   252  		Tag2          [4]byte // 4 bytes; "peff"
   253  		Architecture  [4]byte // 4 bytes; "pwpc" or "m68k"
   254  		FormatVersion uint32
   255  		// In Macintosh time-measurement scheme (number of seconds measured from
   256  		// January 1, 1904).
   257  		DateTimeStamp    uint32
   258  		OldDefVersion    uint32
   259  		OldImpVersion    uint32
   260  		CurrentVersion   uint32
   261  		SectionCount     uint16
   262  		InstSectionCount uint16
   263  		_                uint32 // reserved
   264  	}
   265  	buf := make([]byte, containerHeaderSize)
   266  	if _, err := r.ReadAt(buf, 0); err != nil {
   267  		return nil, 0, errors.WithStack(err)
   268  	}
   269  	v := &containerHeader{}
   270  	if err := binary.Read(bytes.NewReader(buf), binary.BigEndian, v); err != nil {
   271  		return nil, 0, errors.WithStack(err)
   272  	}
   273  	epoch := time.Date(1904, 1, 1, 0, 0, 0, 0, time.UTC)
   274  	dur := time.Duration(v.DateTimeStamp) * time.Second
   275  	date := epoch.Add(dur)
   276  	hdr := &ContainerHeader{
   277  		Tag1:             string(v.Tag1[:]),
   278  		Tag2:             string(v.Tag2[:]),
   279  		Architecture:     string(v.Architecture[:]),
   280  		FormatVersion:    v.FormatVersion,
   281  		DateTimeStamp:    date,
   282  		OldDefVersion:    v.OldDefVersion,
   283  		OldImpVersion:    v.OldImpVersion,
   284  		CurrentVersion:   v.CurrentVersion,
   285  		SectionCount:     v.SectionCount,
   286  		InstSectionCount: v.InstSectionCount,
   287  	}
   288  	return hdr, containerHeaderSize, nil
   289  }
   290  
   291  // A Section is a PEF section.
   292  type Section struct {
   293  	// PEF section header.
   294  	*SectionHeader
   295  	io.ReaderAt
   296  }
   297  
   298  // Data reads and returns the contents of the PEF section.
   299  func (sect *Section) Data() ([]byte, error) {
   300  	buf := make([]byte, sect.PackedSize)
   301  	if _, err := sect.ReadAt(buf, 0); err != nil {
   302  		return nil, errors.WithStack(err)
   303  	}
   304  	return buf, nil
   305  }
   306  
   307  // A SectionHeader is a PEF section header.
   308  type SectionHeader struct {
   309  	// Offset from start of section name table to section name; or -1 if
   310  	// section has no name.
   311  	NameOffset int32
   312  	// Preferred address at which to place the section's instance.
   313  	DefaultAddress uint32
   314  	// Size in bytes required by the section's contents at execution time. For a
   315  	// code section, this size is merely the size of the executable code. For a
   316  	// data section, this size indicates the sum of the size of the initialized
   317  	// data plus the size of any zero-initialized data. Zero-initialized data
   318  	// appears at the end of a section's contents and its length is exactly the
   319  	// difference of the TotalSize and UnpackedSize values.
   320  	//
   321  	// For noninstantiated sections, this field is ignored.
   322  	TotalSize uint32
   323  	// Size of the section's contents that is explicitly initialized from the
   324  	// container. For code sections, this field is the size of the executable
   325  	// code. For an unpacked data section, this field indicates only the size of
   326  	// the initialized data. For packed data this is the size to which the
   327  	// compressed contents expand. The UnpackedSize value also defines the
   328  	// boundary between the explicitly initialized portion and the zero-
   329  	// initialized portion.
   330  	//
   331  	// For noninstantiated sections, this field is ignored.
   332  	UnpackedSize uint32
   333  	// Size in bytes of a section's contents in the container. For code sections,
   334  	// this field is the size of the executable code. For an unpacked data
   335  	// section, this field indicates only the size of the initialized data. For a
   336  	// packed data section this field is the size of the pattern description
   337  	// contained in the section.
   338  	PackedSize uint32
   339  	// Offset from the beginning of the container to the start of the section's
   340  	// contents.
   341  	ContainerOffset uint32
   342  	// Indicates the type of section as well as any special attributes.
   343  	SectionKind uint8
   344  	// Controls how the section information is shared among processes.
   345  	ShareKind uint8
   346  	// Indicates the desired alignment for instantiated sections in memory as a
   347  	// power of 2.
   348  	Alignment uint8
   349  }
   350  
   351  // parseSectionHeader parses and returns a PEF section header.
   352  func parseSectionHeader(r io.ReaderAt) (*SectionHeader, int64, error) {
   353  	// PEF section header.
   354  	//
   355  	// ref: https://web.archive.org/web/20020111211702/http://developer.apple.com:80/techpubs/mac/runtimehtml/RTArch-92.html
   356  	const sectionHeaderSize = 28
   357  	buf := make([]byte, sectionHeaderSize)
   358  	if _, err := r.ReadAt(buf, 0); err != nil {
   359  		return nil, 0, errors.WithStack(err)
   360  	}
   361  	hdr := &SectionHeader{}
   362  	if err := binary.Read(bytes.NewReader(buf), binary.BigEndian, hdr); err != nil {
   363  		return nil, 0, errors.WithStack(err)
   364  	}
   365  	if hdr.NameOffset != -1 {
   366  		panic("support for section name table not yet implemented")
   367  	}
   368  	return hdr, sectionHeaderSize, nil
   369  }
   370  
   371  // Section kinds.
   372  const (
   373  	// Read-only executable code.
   374  	kindCode = 0
   375  	// Read/write data.
   376  	kindUnpackedData = 1
   377  	// Read/write data.
   378  	kindPatternInitializedData = 2
   379  	// Read-only data.
   380  	kindConstant = 3
   381  	// Contains information about imports, exports, and entry points.
   382  	kindLoader = 4
   383  	// Reserved for future use.
   384  	kindDebug = 5
   385  	// Read/write, executable code.
   386  	kindExecutableData = 6
   387  	// Reserved for future use.
   388  	kindException = 7
   389  	// Reserved for future use.
   390  	kindTraceback = 8
   391  )
   392  
   393  // parsePerm returns the memory access permissions represented by the given PEF
   394  // section kind.
   395  func parsePerm(kind uint8) bin.Perm {
   396  	var perm bin.Perm
   397  	switch kind {
   398  	case kindCode, kindUnpackedData, kindPatternInitializedData, kindConstant, kindExecutableData:
   399  		perm |= bin.PermR
   400  	}
   401  	switch kind {
   402  	case kindUnpackedData, kindPatternInitializedData, kindExecutableData:
   403  		perm |= bin.PermW
   404  	}
   405  	switch kind {
   406  	case kindCode, kindExecutableData:
   407  		perm |= bin.PermX
   408  	}
   409  	return perm
   410  }
   411  
   412  // parseLoaderSection parses the given Loader section.
   413  func parseLoaderSection(sect *Section) error {
   414  	// Overview of the structure of a PEF Loader section.
   415  	//
   416  	//    Loader header
   417  	//    Imported library table
   418  	//    Imported symbol table
   419  	//    Relocation headers table
   420  	//    Relocations
   421  	//    Loader string table
   422  	//    Export hash table
   423  	//    Export key table
   424  	//    Exported symbol table
   425  	const loaderHeaderSize = 56
   426  	type LoaderHeader struct {
   427  		MainSection              int32
   428  		MainOffset               uint32
   429  		InitSection              int32
   430  		InitOffset               uint32
   431  		TermSection              int32
   432  		TermOffset               uint32
   433  		ImportedLibraryCount     uint32
   434  		TotalImportedSymbolCount uint32
   435  		RelocSectionCount        uint32
   436  		RelocInstrOffset         uint32
   437  		LoaderStringsOffset      uint32
   438  		ExportHashOffset         uint32
   439  		ExportHashTablePower     uint32
   440  		ExportedSymbolCount      uint32
   441  	}
   442  	buf := make([]byte, loaderHeaderSize)
   443  	if _, err := sect.ReadAt(buf, 0); err != nil {
   444  		return errors.WithStack(err)
   445  	}
   446  	hdr := &LoaderHeader{}
   447  	if err := binary.Read(bytes.NewReader(buf), binary.BigEndian, hdr); err != nil {
   448  		return errors.WithStack(err)
   449  	}
   450  	return nil
   451  }