github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/cmd/hfix/main.go (about) 1 // The hfix tool fixes the syntax of IDA generated C header files (*.h -> *.h). 2 package main 3 4 import ( 5 "bytes" 6 "flag" 7 "fmt" 8 "io/ioutil" 9 "log" 10 "os" 11 "os/exec" 12 "regexp" 13 "strconv" 14 "strings" 15 "time" 16 17 "github.com/mewkiz/pkg/term" 18 "github.com/pkg/errors" 19 ) 20 21 // dbg represents a logger with the "hfix:" prefix, which logs debug messages to 22 // standard error. 23 var dbg = log.New(os.Stderr, term.BlueBold("hfix:")+" ", 0) 24 25 func usage() { 26 const use = ` 27 Fix the syntax of IDA generated C header files (*.h -> *.h). 28 29 Usage: 30 31 hfix [OPTION]... FILE.h 32 33 Flags: 34 ` 35 fmt.Fprint(os.Stderr, use[1:]) 36 flag.PrintDefaults() 37 } 38 39 func main() { 40 // Parse command line flags. 41 var ( 42 // output specifies the output path. 43 output string 44 // partial specifies whether to store partially fixed header files. 45 partial bool 46 // pre specifies whether to store preprocessed header files. 47 pre bool 48 // quiet specifies whether to suppress non-error messages. 49 quiet bool 50 ) 51 flag.StringVar(&output, "o", "", "output path") 52 flag.BoolVar(&partial, "partial", false, "store partially fixed header files") 53 flag.BoolVar(&pre, "pre", false, "store preprocessed header files") 54 flag.BoolVar(&quiet, "q", false, "suppress non-error messages") 55 flag.Parse() 56 flag.Usage = usage 57 flag.Parse() 58 if flag.NArg() != 1 { 59 flag.Usage() 60 os.Exit(1) 61 } 62 hPath := flag.Arg(0) 63 // Mute debug messages if `-q` is set. 64 if quiet { 65 dbg.SetOutput(ioutil.Discard) 66 } 67 68 // Read file. 69 buf, err := ioutil.ReadFile(hPath) 70 if err != nil { 71 log.Fatalf("%+v", err) 72 } 73 74 // Preprocess input. 75 input := string(buf) 76 input = preprocess(input) 77 if pre { 78 if err := ioutil.WriteFile("pre.h", []byte(input), 0644); err != nil { 79 log.Fatalf("%+v", err) 80 } 81 } 82 83 // Fix syntax of the IDA generated C header file. 84 input, err = fix(input) 85 if err != nil { 86 if partial { 87 if err := ioutil.WriteFile("partial.h", []byte(input), 0644); err != nil { 88 log.Fatalf("%+v", err) 89 } 90 } 91 log.Fatalf("%+v", err) 92 } 93 94 // Store C header output. 95 w := os.Stdout 96 if len(output) > 0 { 97 f, err := os.Create(output) 98 if err != nil { 99 log.Fatal(err) 100 } 101 defer f.Close() 102 w = f 103 } 104 if _, err := w.WriteString(input); err != nil { 105 log.Fatalf("%+v", err) 106 } 107 } 108 109 var ( 110 reEnumSizeSpec = regexp.MustCompile(`(enum [a-zA-Z0-9_$]+) : [a-zA-Z0-9_$]+`) 111 reEmptyEnum = regexp.MustCompile(`enum [a-zA-Z0-9_$]+[\n]{[\n]};[\n]`) 112 reAlign = regexp.MustCompile(`__declspec[(]align[(][0-9]+[)][)] `) 113 // Input before: 114 // 115 // struct MessageVtbl 116 // { 117 // HRESULT (__stdcall *QueryInterface)(#277 *This, const IID *const riid, void **ppvObject); 118 // 119 // Input after: 120 // 121 // struct MessageVtbl 122 // { 123 // HRESULT (__stdcall *QueryInterface)(MessageVtbl *This, const IID *const riid, void **ppvObject); 124 reBrokenTypeRef = regexp.MustCompile(`struct ([a-zA-Z0-9_$]+)[\n]{[\n][^\n#]+(#[0-9]+) [*]This[^\n]+`) 125 // Input before: 126 // 127 // #pragma pack(push, 8) 128 // #pragma pack(pop) 129 // 130 // Input after: 131 // 132 // empty 133 rePragmaPack = regexp.MustCompile(`#pragma pack[(][^)]+[)]`) 134 // Input before: 135 // 136 // struct struct_name::$A707B71C060B6D10F73A71917EA8473F::$AA04DEB0C6383F89F13D312A174572A9 137 // { 138 // 139 // Input after: 140 // 141 // empty 142 reDupTypeDef = regexp.MustCompile(`[\n](struct|union) ([a-zA-Z0-9_$]+)::[^\n]+[\n]{(.|[\n])+?;[\n][\n]`) 143 // Input before: 144 // 145 // IDirectDrawClipper::IDirectDrawClipperVtbl 146 // 147 // Input after: 148 // 149 // IDirectDrawClipperVtbl 150 reTypeNamespace = regexp.MustCompile(`([a-zA-Z0-9_$]+::)+([a-zA-Z0-9_$]+) `) 151 // Input before: 152 // 153 // enum enum_name 154 // { 155 // AAA = 0, 156 // }; 157 // 158 // Input after: 159 // 160 // enum enum_name 161 // { 162 // AAA = 0, 163 // }; 164 // 165 // typedef enum enum_name enum_name; 166 reTypedefEnum = regexp.MustCompile(`enum ([a-zA-Z0-9_$]+)[\n]{[^}]*};`) 167 // Input before: 168 // 169 // struct struct_name 170 // { 171 // int x; 172 // }; 173 // 174 // Input after: 175 // 176 // struct struct_name 177 // { 178 // int x; 179 // }; 180 // 181 // typedef struct struct_name struct_name; 182 reTypedefStruct = regexp.MustCompile(`struct ([a-zA-Z0-9_$]+)[\n]{(.|[\n])*?[\n]};`) 183 ) 184 185 // preprocess fixes simple syntax errors in the given input C header. 186 func preprocess(input string) string { 187 // Drop enum type size specifiers. 188 input = reEnumSizeSpec.ReplaceAllString(input, "$1") 189 // Remove empty enums. 190 input = reEmptyEnum.ReplaceAllString(input, "") 191 // Drop alignment attribute. 192 input = reAlign.ReplaceAllString(input, "") 193 // Drop __unaligned attribute. 194 input = strings.Replace(input, "struct __unaligned ", "struct ", -1) 195 // Fix broken type names in structs. 196 for { 197 subs := reBrokenTypeRef.FindAllStringSubmatch(input, 1) 198 if subs == nil { 199 break 200 } 201 for _, sub := range subs { 202 // struct type name. 203 typ := sub[1] + " " 204 // #ID 205 id := sub[2] + " " 206 input = strings.Replace(input, id, typ, -1) 207 } 208 } 209 // Drop #pragma pack directives. 210 input = rePragmaPack.ReplaceAllString(input, "") 211 // Drop duplicate struct and union type definitions (identified with hash). 212 input = reDupTypeDef.ReplaceAllString(input, "\n") 213 // Drop namespace in type names. 214 input = reTypeNamespace.ReplaceAllString(input, "$2") 215 // Insert enum type definitions. 216 input = reTypedefEnum.ReplaceAllString(input, "$0\n\ntypedef enum $1 $1;\n") 217 // Insert struct type definitions. 218 input = reTypedefStruct.ReplaceAllString(input, "$0\n\ntypedef struct $1 $1;\n") 219 // Fix syntax of `noreturn` function attributes. 220 input = strings.Replace(input, " __noreturn ", " __attribute__((noreturn)) ", -1) 221 // Fix destructor method name. 222 input = strings.Replace(input, "type_info::`scalar deleting destructor'", "type_info_delete", -1) 223 // Fix constructor name. 224 input = strings.Replace(input, "type_info::~type_info", "type_info_create", -1) 225 return input 226 } 227 228 // fix fixes the syntax of the given IDA generated C header file. 229 func fix(input string) (string, error) { 230 for { 231 errbuf := &bytes.Buffer{} 232 cmd := exec.Command("clang", "-m32", "-x", "c-header", "-Wno-return-type", "-Wno-invalid-noreturn", "-ferror-limit=0", "-o", "-", "-") 233 cmd.Stdin = strings.NewReader(input) 234 cmd.Stderr = errbuf 235 if err := cmd.Run(); err != nil { 236 es, err2 := parseErrors(errbuf.String()) 237 if err2 != nil { 238 return input, errors.WithStack(err2) 239 } 240 if s, ok := replace(input, es); ok { 241 input = s 242 // To make it easier to break of an infinite loop, if replacements 243 // introduce new Clang errors. 244 time.Sleep(1 * time.Millisecond) 245 continue 246 } 247 return input, errors.Wrapf(err, "clang error: %v", errbuf) 248 } 249 return input, nil 250 } 251 } 252 253 // clangError represents an error reported by Clang. 254 type clangError struct { 255 // Line and column number of the error. 256 line, col int 257 // Error category. 258 kind kind 259 } 260 261 // kind represents the set of Clang error categories. 262 type kind uint 263 264 // Clang error categories. 265 const ( 266 // error: must use 'struct' tag to refer to type ... 267 // 268 // Input before: 269 // 270 // typedef struct_name type_name; 271 // 272 // Input after: 273 // 274 // typedef struct struct_name type_name; 275 kindStructTagMissing kind = iota + 1 276 // error: must use 'enum' tag to refer to type 277 // 278 // Input before: 279 // 280 // enum_name foo; 281 // 282 // Input after: 283 // 284 // enum enum_name foo; 285 kindEnumTagMissing 286 // error: must use 'union' tag to refer to type ... 287 // 288 // Input before: 289 // 290 // typedef union_name type_name; 291 // 292 // Input after: 293 // 294 // typedef union union_name type_name; 295 kindUnionTagMissing 296 // error: unknown type name '_BYTE'; did you mean 'BYTE' 297 // 298 // Input before: 299 // 300 // _BYTE foo; 301 // 302 // Input after: 303 // 304 // BYTE foo; 305 kindByteTypeName 306 // error: parameter name omitted 307 // 308 // Input before: 309 // 310 // void f(int, int) {} 311 // 312 // Input after: 313 // 314 // void f(int a1, int a2) {} 315 kindParamNameMissing 316 ) 317 318 var ( 319 reError = regexp.MustCompile(`<stdin>:([0-9]+):([0-9]+): (error: [^\n]+)`) 320 ) 321 322 // parseErrors parses the error output reported by Clang. 323 func parseErrors(errbuf string) ([]clangError, error) { 324 var es []clangError 325 lines := strings.Split(errbuf, "\n") 326 for _, line := range lines { 327 if !(strings.HasPrefix(line, "<stdin>:") && strings.Contains(line, " error: ")) { 328 continue 329 } 330 subs := reError.FindStringSubmatch(line) 331 if subs == nil { 332 return nil, errors.Errorf("unable to locate Clang error in line `%v`", line) 333 } 334 // Parse line number. 335 l, err := strconv.Atoi(subs[1]) 336 if err != nil { 337 return nil, errors.WithStack(err) 338 } 339 // Parse column number. 340 c, err := strconv.Atoi(subs[2]) 341 if err != nil { 342 return nil, errors.WithStack(err) 343 } 344 e := clangError{line: l - 1, col: c - 1} 345 // Parse error message. 346 msg := subs[3] 347 switch { 348 case strings.HasPrefix(msg, "error: must use 'struct' tag to refer to type"): 349 e.kind = kindStructTagMissing 350 case strings.HasPrefix(msg, "error: must use 'enum' tag to refer to type"): 351 e.kind = kindEnumTagMissing 352 case strings.HasPrefix(msg, "error: must use 'union' tag to refer to type"): 353 e.kind = kindUnionTagMissing 354 case strings.HasPrefix(msg, "error: unknown type name '_BYTE'; did you mean 'BYTE'"): 355 e.kind = kindByteTypeName 356 case strings.HasPrefix(msg, "error: parameter name omitted"): 357 e.kind = kindParamNameMissing 358 default: 359 // Skip unknown Clang error category. 360 continue 361 //return nil, errors.Errorf("unable to locate error category for Clang error `%v`", msg) 362 } 363 es = append(es, e) 364 } 365 return es, nil 366 } 367 368 // replace fixes the syntax errors identified by Clang in the given input C 369 // header. The boolean return value indicates that a replacement was made. 370 func replace(input string, es []clangError) (string, bool) { 371 fixed := false 372 lines := strings.Split(input, "\n") 373 lineFixed := make(map[int]bool) 374 for _, e := range es { 375 i := e.line 376 if lineFixed[i] { 377 // Only fix one error per line at the time. 378 continue 379 } 380 line := lines[i] 381 switch e.kind { 382 case kindStructTagMissing: 383 dbg.Printf("replacement made at line %d: kindStructTagMissing", i) 384 // insert `struct ` 385 line = line[:e.col] + "struct " + line[e.col:] 386 fixed = true 387 lineFixed[i] = true 388 case kindEnumTagMissing: 389 dbg.Printf("replacement made at line %d: kindEnumTagMissing", i) 390 // insert `enum ` 391 line = line[:e.col] + "enum " + line[e.col:] 392 fixed = true 393 lineFixed[i] = true 394 case kindUnionTagMissing: 395 dbg.Printf("replacement made at line %d: kindUnionTagMissing", i) 396 // insert `union ` 397 line = line[:e.col] + "union " + line[e.col:] 398 fixed = true 399 lineFixed[i] = true 400 case kindByteTypeName: 401 dbg.Printf("replacement made at line %d: kindByteTypeName", i) 402 // replace `_BYTE` with `BYTE` 403 line = line[:e.col] + line[e.col+1:] 404 fixed = true 405 lineFixed[i] = true 406 case kindParamNameMissing: 407 dbg.Printf("replacement made at line %d: kindParamNameMissing", i) 408 // replace `_BYTE` with `BYTE` 409 paramName := fmt.Sprintf(" a%d", e.col) 410 line = line[:e.col] + paramName + line[e.col:] 411 fixed = true 412 lineFixed[i] = true 413 default: 414 panic(fmt.Errorf("support for Clang error kind %v not yet implemented", e.kind)) 415 } 416 lines[i] = line 417 } 418 return strings.Join(lines, "\n"), fixed 419 }