github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/cmd/hfix/main.go (about)

     1  // The hfix tool fixes the syntax of IDA generated C header files (*.h -> *.h).
     2  package main
     3  
     4  import (
     5  	"bytes"
     6  	"flag"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"log"
    10  	"os"
    11  	"os/exec"
    12  	"regexp"
    13  	"strconv"
    14  	"strings"
    15  	"time"
    16  
    17  	"github.com/mewkiz/pkg/term"
    18  	"github.com/pkg/errors"
    19  )
    20  
    21  // dbg represents a logger with the "hfix:" prefix, which logs debug messages to
    22  // standard error.
    23  var dbg = log.New(os.Stderr, term.BlueBold("hfix:")+" ", 0)
    24  
    25  func usage() {
    26  	const use = `
    27  Fix the syntax of IDA generated C header files (*.h -> *.h).
    28  
    29  Usage:
    30  
    31  	hfix [OPTION]... FILE.h
    32  
    33  Flags:
    34  `
    35  	fmt.Fprint(os.Stderr, use[1:])
    36  	flag.PrintDefaults()
    37  }
    38  
    39  func main() {
    40  	// Parse command line flags.
    41  	var (
    42  		// output specifies the output path.
    43  		output string
    44  		// partial specifies whether to store partially fixed header files.
    45  		partial bool
    46  		// pre specifies whether to store preprocessed header files.
    47  		pre bool
    48  		// quiet specifies whether to suppress non-error messages.
    49  		quiet bool
    50  	)
    51  	flag.StringVar(&output, "o", "", "output path")
    52  	flag.BoolVar(&partial, "partial", false, "store partially fixed header files")
    53  	flag.BoolVar(&pre, "pre", false, "store preprocessed header files")
    54  	flag.BoolVar(&quiet, "q", false, "suppress non-error messages")
    55  	flag.Parse()
    56  	flag.Usage = usage
    57  	flag.Parse()
    58  	if flag.NArg() != 1 {
    59  		flag.Usage()
    60  		os.Exit(1)
    61  	}
    62  	hPath := flag.Arg(0)
    63  	// Mute debug messages if `-q` is set.
    64  	if quiet {
    65  		dbg.SetOutput(ioutil.Discard)
    66  	}
    67  
    68  	// Read file.
    69  	buf, err := ioutil.ReadFile(hPath)
    70  	if err != nil {
    71  		log.Fatalf("%+v", err)
    72  	}
    73  
    74  	// Preprocess input.
    75  	input := string(buf)
    76  	input = preprocess(input)
    77  	if pre {
    78  		if err := ioutil.WriteFile("pre.h", []byte(input), 0644); err != nil {
    79  			log.Fatalf("%+v", err)
    80  		}
    81  	}
    82  
    83  	// Fix syntax of the IDA generated C header file.
    84  	input, err = fix(input)
    85  	if err != nil {
    86  		if partial {
    87  			if err := ioutil.WriteFile("partial.h", []byte(input), 0644); err != nil {
    88  				log.Fatalf("%+v", err)
    89  			}
    90  		}
    91  		log.Fatalf("%+v", err)
    92  	}
    93  
    94  	// Store C header output.
    95  	w := os.Stdout
    96  	if len(output) > 0 {
    97  		f, err := os.Create(output)
    98  		if err != nil {
    99  			log.Fatal(err)
   100  		}
   101  		defer f.Close()
   102  		w = f
   103  	}
   104  	if _, err := w.WriteString(input); err != nil {
   105  		log.Fatalf("%+v", err)
   106  	}
   107  }
   108  
   109  var (
   110  	reEnumSizeSpec = regexp.MustCompile(`(enum [a-zA-Z0-9_$]+) : [a-zA-Z0-9_$]+`)
   111  	reEmptyEnum    = regexp.MustCompile(`enum [a-zA-Z0-9_$]+[\n]{[\n]};[\n]`)
   112  	reAlign        = regexp.MustCompile(`__declspec[(]align[(][0-9]+[)][)] `)
   113  	// Input before:
   114  	//
   115  	//    struct MessageVtbl
   116  	//    {
   117  	//      HRESULT (__stdcall *QueryInterface)(#277 *This, const IID *const riid, void **ppvObject);
   118  	//
   119  	// Input after:
   120  	//
   121  	//    struct MessageVtbl
   122  	//    {
   123  	//      HRESULT (__stdcall *QueryInterface)(MessageVtbl *This, const IID *const riid, void **ppvObject);
   124  	reBrokenTypeRef = regexp.MustCompile(`struct ([a-zA-Z0-9_$]+)[\n]{[\n][^\n#]+(#[0-9]+) [*]This[^\n]+`)
   125  	// Input before:
   126  	//
   127  	//    #pragma pack(push, 8)
   128  	//    #pragma pack(pop)
   129  	//
   130  	// Input after:
   131  	//
   132  	//    empty
   133  	rePragmaPack = regexp.MustCompile(`#pragma pack[(][^)]+[)]`)
   134  	// Input before:
   135  	//
   136  	//	   struct struct_name::$A707B71C060B6D10F73A71917EA8473F::$AA04DEB0C6383F89F13D312A174572A9
   137  	//    {
   138  	//
   139  	// Input after:
   140  	//
   141  	//    empty
   142  	reDupTypeDef = regexp.MustCompile(`[\n](struct|union) ([a-zA-Z0-9_$]+)::[^\n]+[\n]{(.|[\n])+?;[\n][\n]`)
   143  	// Input before:
   144  	//
   145  	//    IDirectDrawClipper::IDirectDrawClipperVtbl
   146  	//
   147  	// Input after:
   148  	//
   149  	//    IDirectDrawClipperVtbl
   150  	reTypeNamespace = regexp.MustCompile(`([a-zA-Z0-9_$]+::)+([a-zA-Z0-9_$]+) `)
   151  	// Input before:
   152  	//
   153  	//    enum enum_name
   154  	//    {
   155  	//      AAA = 0,
   156  	//    };
   157  	//
   158  	// Input after:
   159  	//
   160  	//    enum enum_name
   161  	//    {
   162  	//      AAA = 0,
   163  	//    };
   164  	//
   165  	//    typedef enum enum_name enum_name;
   166  	reTypedefEnum = regexp.MustCompile(`enum ([a-zA-Z0-9_$]+)[\n]{[^}]*};`)
   167  	// Input before:
   168  	//
   169  	//    struct struct_name
   170  	//    {
   171  	//      int x;
   172  	//    };
   173  	//
   174  	// Input after:
   175  	//
   176  	//    struct struct_name
   177  	//    {
   178  	//      int x;
   179  	//    };
   180  	//
   181  	//    typedef struct struct_name struct_name;
   182  	reTypedefStruct = regexp.MustCompile(`struct ([a-zA-Z0-9_$]+)[\n]{(.|[\n])*?[\n]};`)
   183  )
   184  
   185  // preprocess fixes simple syntax errors in the given input C header.
   186  func preprocess(input string) string {
   187  	// Drop enum type size specifiers.
   188  	input = reEnumSizeSpec.ReplaceAllString(input, "$1")
   189  	// Remove empty enums.
   190  	input = reEmptyEnum.ReplaceAllString(input, "")
   191  	// Drop alignment attribute.
   192  	input = reAlign.ReplaceAllString(input, "")
   193  	// Drop __unaligned attribute.
   194  	input = strings.Replace(input, "struct __unaligned ", "struct ", -1)
   195  	// Fix broken type names in structs.
   196  	for {
   197  		subs := reBrokenTypeRef.FindAllStringSubmatch(input, 1)
   198  		if subs == nil {
   199  			break
   200  		}
   201  		for _, sub := range subs {
   202  			// struct type name.
   203  			typ := sub[1] + " "
   204  			// #ID
   205  			id := sub[2] + " "
   206  			input = strings.Replace(input, id, typ, -1)
   207  		}
   208  	}
   209  	// Drop #pragma pack directives.
   210  	input = rePragmaPack.ReplaceAllString(input, "")
   211  	// Drop duplicate struct and union type definitions (identified with hash).
   212  	input = reDupTypeDef.ReplaceAllString(input, "\n")
   213  	// Drop namespace in type names.
   214  	input = reTypeNamespace.ReplaceAllString(input, "$2")
   215  	// Insert enum type definitions.
   216  	input = reTypedefEnum.ReplaceAllString(input, "$0\n\ntypedef enum $1 $1;\n")
   217  	// Insert struct type definitions.
   218  	input = reTypedefStruct.ReplaceAllString(input, "$0\n\ntypedef struct $1 $1;\n")
   219  	// Fix syntax of `noreturn` function attributes.
   220  	input = strings.Replace(input, " __noreturn ", " __attribute__((noreturn)) ", -1)
   221  	// Fix destructor method name.
   222  	input = strings.Replace(input, "type_info::`scalar deleting destructor'", "type_info_delete", -1)
   223  	// Fix constructor name.
   224  	input = strings.Replace(input, "type_info::~type_info", "type_info_create", -1)
   225  	return input
   226  }
   227  
   228  // fix fixes the syntax of the given IDA generated C header file.
   229  func fix(input string) (string, error) {
   230  	for {
   231  		errbuf := &bytes.Buffer{}
   232  		cmd := exec.Command("clang", "-m32", "-x", "c-header", "-Wno-return-type", "-Wno-invalid-noreturn", "-ferror-limit=0", "-o", "-", "-")
   233  		cmd.Stdin = strings.NewReader(input)
   234  		cmd.Stderr = errbuf
   235  		if err := cmd.Run(); err != nil {
   236  			es, err2 := parseErrors(errbuf.String())
   237  			if err2 != nil {
   238  				return input, errors.WithStack(err2)
   239  			}
   240  			if s, ok := replace(input, es); ok {
   241  				input = s
   242  				// To make it easier to break of an infinite loop, if replacements
   243  				// introduce new Clang errors.
   244  				time.Sleep(1 * time.Millisecond)
   245  				continue
   246  			}
   247  			return input, errors.Wrapf(err, "clang error: %v", errbuf)
   248  		}
   249  		return input, nil
   250  	}
   251  }
   252  
   253  // clangError represents an error reported by Clang.
   254  type clangError struct {
   255  	// Line and column number of the error.
   256  	line, col int
   257  	// Error category.
   258  	kind kind
   259  }
   260  
   261  // kind represents the set of Clang error categories.
   262  type kind uint
   263  
   264  // Clang error categories.
   265  const (
   266  	// error: must use 'struct' tag to refer to type ...
   267  	//
   268  	// Input before:
   269  	//
   270  	//    typedef struct_name type_name;
   271  	//
   272  	// Input after:
   273  	//
   274  	//    typedef struct struct_name type_name;
   275  	kindStructTagMissing kind = iota + 1
   276  	// error: must use 'enum' tag to refer to type
   277  	//
   278  	// Input before:
   279  	//
   280  	//    enum_name foo;
   281  	//
   282  	// Input after:
   283  	//
   284  	//    enum enum_name foo;
   285  	kindEnumTagMissing
   286  	// error: must use 'union' tag to refer to type ...
   287  	//
   288  	// Input before:
   289  	//
   290  	//    typedef union_name type_name;
   291  	//
   292  	// Input after:
   293  	//
   294  	//    typedef union union_name type_name;
   295  	kindUnionTagMissing
   296  	// error: unknown type name '_BYTE'; did you mean 'BYTE'
   297  	//
   298  	// Input before:
   299  	//
   300  	//    _BYTE foo;
   301  	//
   302  	// Input after:
   303  	//
   304  	//    BYTE foo;
   305  	kindByteTypeName
   306  	// error: parameter name omitted
   307  	//
   308  	// Input before:
   309  	//
   310  	//    void f(int, int) {}
   311  	//
   312  	// Input after:
   313  	//
   314  	//    void f(int a1, int a2) {}
   315  	kindParamNameMissing
   316  )
   317  
   318  var (
   319  	reError = regexp.MustCompile(`<stdin>:([0-9]+):([0-9]+): (error: [^\n]+)`)
   320  )
   321  
   322  // parseErrors parses the error output reported by Clang.
   323  func parseErrors(errbuf string) ([]clangError, error) {
   324  	var es []clangError
   325  	lines := strings.Split(errbuf, "\n")
   326  	for _, line := range lines {
   327  		if !(strings.HasPrefix(line, "<stdin>:") && strings.Contains(line, " error: ")) {
   328  			continue
   329  		}
   330  		subs := reError.FindStringSubmatch(line)
   331  		if subs == nil {
   332  			return nil, errors.Errorf("unable to locate Clang error in line `%v`", line)
   333  		}
   334  		// Parse line number.
   335  		l, err := strconv.Atoi(subs[1])
   336  		if err != nil {
   337  			return nil, errors.WithStack(err)
   338  		}
   339  		// Parse column number.
   340  		c, err := strconv.Atoi(subs[2])
   341  		if err != nil {
   342  			return nil, errors.WithStack(err)
   343  		}
   344  		e := clangError{line: l - 1, col: c - 1}
   345  		// Parse error message.
   346  		msg := subs[3]
   347  		switch {
   348  		case strings.HasPrefix(msg, "error: must use 'struct' tag to refer to type"):
   349  			e.kind = kindStructTagMissing
   350  		case strings.HasPrefix(msg, "error: must use 'enum' tag to refer to type"):
   351  			e.kind = kindEnumTagMissing
   352  		case strings.HasPrefix(msg, "error: must use 'union' tag to refer to type"):
   353  			e.kind = kindUnionTagMissing
   354  		case strings.HasPrefix(msg, "error: unknown type name '_BYTE'; did you mean 'BYTE'"):
   355  			e.kind = kindByteTypeName
   356  		case strings.HasPrefix(msg, "error: parameter name omitted"):
   357  			e.kind = kindParamNameMissing
   358  		default:
   359  			// Skip unknown Clang error category.
   360  			continue
   361  			//return nil, errors.Errorf("unable to locate error category for Clang error `%v`", msg)
   362  		}
   363  		es = append(es, e)
   364  	}
   365  	return es, nil
   366  }
   367  
   368  // replace fixes the syntax errors identified by Clang in the given input C
   369  // header. The boolean return value indicates that a replacement was made.
   370  func replace(input string, es []clangError) (string, bool) {
   371  	fixed := false
   372  	lines := strings.Split(input, "\n")
   373  	lineFixed := make(map[int]bool)
   374  	for _, e := range es {
   375  		i := e.line
   376  		if lineFixed[i] {
   377  			// Only fix one error per line at the time.
   378  			continue
   379  		}
   380  		line := lines[i]
   381  		switch e.kind {
   382  		case kindStructTagMissing:
   383  			dbg.Printf("replacement made at line %d: kindStructTagMissing", i)
   384  			// insert `struct `
   385  			line = line[:e.col] + "struct " + line[e.col:]
   386  			fixed = true
   387  			lineFixed[i] = true
   388  		case kindEnumTagMissing:
   389  			dbg.Printf("replacement made at line %d: kindEnumTagMissing", i)
   390  			// insert `enum `
   391  			line = line[:e.col] + "enum " + line[e.col:]
   392  			fixed = true
   393  			lineFixed[i] = true
   394  		case kindUnionTagMissing:
   395  			dbg.Printf("replacement made at line %d: kindUnionTagMissing", i)
   396  			// insert `union `
   397  			line = line[:e.col] + "union " + line[e.col:]
   398  			fixed = true
   399  			lineFixed[i] = true
   400  		case kindByteTypeName:
   401  			dbg.Printf("replacement made at line %d: kindByteTypeName", i)
   402  			// replace `_BYTE` with `BYTE`
   403  			line = line[:e.col] + line[e.col+1:]
   404  			fixed = true
   405  			lineFixed[i] = true
   406  		case kindParamNameMissing:
   407  			dbg.Printf("replacement made at line %d: kindParamNameMissing", i)
   408  			// replace `_BYTE` with `BYTE`
   409  			paramName := fmt.Sprintf(" a%d", e.col)
   410  			line = line[:e.col] + paramName + line[e.col:]
   411  			fixed = true
   412  			lineFixed[i] = true
   413  		default:
   414  			panic(fmt.Errorf("support for Clang error kind %v not yet implemented", e.kind))
   415  		}
   416  		lines[i] = line
   417  	}
   418  	return strings.Join(lines, "\n"), fixed
   419  }