github.com/Konstantin8105/c4go@v0.0.0-20240505174241-768bb1c65a51/preprocessor/preprocessor.go (about) 1 package preprocessor 2 3 import ( 4 "bufio" 5 "bytes" 6 "fmt" 7 "io/ioutil" 8 "os" 9 "os/exec" 10 "path/filepath" 11 "strings" 12 "text/scanner" 13 "unicode" 14 "unicode/utf8" 15 16 "github.com/Konstantin8105/c4go/util" 17 ) 18 19 // One simple part of preprocessor code 20 type entity struct { 21 positionInSource int 22 include string 23 other string 24 25 // Zero index of `lines` is look like that: 26 // # 11 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 2 3 4 27 // After that 0 or more lines of codes 28 lines []*string 29 } 30 31 func (e *entity) parseComments(comments *[]Comment) { 32 var source bytes.Buffer 33 for i := range e.lines { 34 if i == 0 { 35 continue 36 } 37 source.Write([]byte(*e.lines[i])) 38 source.Write([]byte{'\n'}) 39 } 40 41 var s scanner.Scanner 42 s.Init(strings.NewReader(source.String())) 43 s.Mode = scanner.ScanComments 44 s.Filename = e.include 45 for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() { 46 if scanner.TokenString(tok) == "Comment" { 47 // parse multiline comments to single line comment 48 var lines []string 49 if s.TokenText()[1] == '*' { 50 lines = strings.Split(s.TokenText(), "\n") 51 lines[0] = strings.TrimLeft(lines[0], "/"+"*") 52 lines[len(lines)-1] = strings.TrimRight(lines[len(lines)-1], "*"+"/") 53 for i := range lines { 54 lines[i] = "/" + "/" + lines[i] 55 } 56 } else { 57 lines = append(lines, s.TokenText()) 58 } 59 60 // save comments 61 for _, l := range lines { 62 (*comments) = append(*comments, Comment{ 63 File: e.include, 64 Line: s.Position.Line + e.positionInSource - 1, 65 Comment: l, 66 }) 67 } 68 } 69 } 70 } 71 72 // isSame - check is Same entities 73 func (e *entity) isSame(x *entity) bool { 74 if e.include != x.include { 75 return false 76 } 77 if e.positionInSource != x.positionInSource { 78 return false 79 } 80 if e.other != x.other { 81 return false 82 } 83 if len(e.lines) != len(x.lines) { 84 return false 85 } 86 for k := range e.lines { 87 is := e.lines[k] 88 js := x.lines[k] 89 if len(*is) != len(*js) || *is != *js { 90 return false 91 } 92 } 93 return true 94 } 95 96 // Comment - position of line comment '//...' 97 type Comment struct { 98 File string 99 Line int 100 Comment string 101 } 102 103 // IncludeHeader - struct for C include header 104 type IncludeHeader struct { 105 HeaderName string 106 BaseHeaderName string 107 IsUserSource bool 108 } 109 110 // FilePP a struct with all information about preprocessor C code 111 type FilePP struct { 112 entities []entity 113 pp []byte 114 comments []Comment 115 includes []IncludeHeader 116 } 117 118 // NewFilePP create a struct FilePP with results of analyzing 119 // preprocessor C code 120 func NewFilePP(inputFiles, clangFlags []string, cppCode bool) ( 121 f FilePP, err error) { 122 defer func() { 123 if err != nil { 124 err = fmt.Errorf("preprocess error : %v", err) 125 } 126 }() 127 128 var allItems []entity 129 130 allItems, err = analyzeFiles(inputFiles, clangFlags, cppCode) 131 if err != nil { 132 return 133 } 134 135 // Generate list of user files 136 userSource := map[string]bool{} 137 var us []string 138 us, err = GetIncludeListWithUserSource(inputFiles, clangFlags, cppCode) 139 if err != nil { 140 return 141 } 142 var all []string 143 all, err = GetIncludeFullList(inputFiles, clangFlags, cppCode) 144 if err != nil { 145 return 146 } 147 148 // Generate C header list 149 f.includes = generateIncludeList(us, all) 150 151 for j := range us { 152 userSource[us[j]] = true 153 } 154 155 // Merge the entities 156 var lines []string 157 for i := range allItems { 158 // If found same part of preprocess code, then 159 // don't include in result buffer for transpiling 160 // for avoid duplicate of code 161 var found bool 162 for j := 0; j < i; j++ { 163 if allItems[i].isSame(&allItems[j]) { 164 found = true 165 break 166 } 167 } 168 if found { 169 continue 170 } 171 // Parse comments only for user sources 172 var isUserSource bool 173 if userSource[allItems[i].include] { 174 isUserSource = true 175 } 176 if allItems[i].include[0] == '.' && 177 allItems[i].include[1] == '/' && 178 userSource[allItems[i].include[2:]] { 179 isUserSource = true 180 } 181 if isUserSource { 182 allItems[i].parseComments(&f.comments) 183 } 184 185 // Parameter "other" is not included for avoid like: 186 // ./tests/multi/head.h:4:28: error: invalid line marker flag '2': \ 187 // cannot pop empty include stack 188 // # 2 "./tests/multi/main.c" 2 189 // ^ 190 header := fmt.Sprintf("# %d \"%s\"", 191 allItems[i].positionInSource, allItems[i].include) 192 lines = append(lines, header) 193 if len(allItems[i].lines) > 0 { 194 for ii, l := range allItems[i].lines { 195 if ii == 0 { 196 continue 197 } 198 lines = append(lines, *l) 199 } 200 } 201 f.entities = append(f.entities, allItems[i]) 202 } 203 f.pp = ([]byte)(strings.Join(lines, "\n")) 204 205 { 206 for i := range f.includes { 207 f.includes[i].BaseHeaderName = f.includes[i].HeaderName 208 } 209 // correct include names only for external Includes 210 var ier []string 211 ier, err = GetIeraphyIncludeList(inputFiles, clangFlags, cppCode) 212 213 // cut lines without pattern ". " 214 again: 215 for i := range ier { 216 remove := false 217 if len(ier[i]) == 0 { 218 remove = true 219 } else if ier[i][0] != '.' { 220 remove = true 221 } else if index := strings.Index(ier[i], ". "); index < 0 { 222 remove = true 223 } 224 if remove { 225 ier = append(ier[:i], ier[i+1:]...) 226 goto again 227 } 228 } 229 230 separator := func(line string) (level int, name string) { 231 for i := range line { 232 if line[i] == ' ' { 233 level = i 234 break 235 } 236 } 237 name = line[level+1:] 238 return 239 } 240 241 for i := range f.includes { 242 if f.includes[i].IsUserSource { 243 continue 244 } 245 // find position in Include ierarphy 246 var pos int = -1 247 for j := range ier { 248 if strings.Contains(ier[j], f.includes[i].BaseHeaderName) { 249 pos = j 250 break 251 } 252 } 253 if pos < 0 { 254 continue 255 } 256 257 // find level of line 258 level, _ := separator(ier[pos]) 259 260 for j := pos; j >= 0; j-- { 261 levelJ, nameJ := separator(ier[j]) 262 if levelJ >= level { 263 continue 264 } 265 if f.IsUserSource(nameJ) { 266 break 267 } 268 f.includes[i].BaseHeaderName = nameJ 269 level = levelJ 270 } 271 } 272 } 273 return 274 } 275 276 // GetSource return source of preprocessor C code 277 func (f FilePP) GetSource() []byte { 278 return f.pp 279 } 280 281 // GetComments return comments in preprocessor C code 282 func (f FilePP) GetComments() []Comment { 283 return f.comments 284 } 285 286 // GetIncludeFiles return list of '#include' file in C sources 287 func (f FilePP) GetIncludeFiles() []IncludeHeader { 288 return f.includes 289 } 290 291 // IsUserSource get is it source from user 292 func (f FilePP) IsUserSource(in string) bool { 293 for i := range f.includes { 294 if !f.includes[i].IsUserSource { 295 continue 296 } 297 if !strings.Contains(in, f.includes[i].HeaderName) { 298 continue 299 } 300 return true 301 } 302 return false 303 } 304 305 // GetBaseInclude return base include 306 func (f FilePP) GetBaseInclude(in string) string { 307 for i := range f.includes { 308 if in == f.includes[i].HeaderName { 309 return f.includes[i].BaseHeaderName 310 } 311 } 312 return in 313 } 314 315 // GetSnippet return short part of code inside preprocessor C code 316 func (f FilePP) GetSnippet(file string, 317 line, lineEnd int, 318 col, colEnd int) ( 319 buffer []byte, err error) { 320 defer func() { 321 if err != nil { 322 err = fmt.Errorf("GetSnippet error for `%v` {%v,%v}{%v,%v}. %v", 323 file, 324 line, lineEnd, 325 col, colEnd, 326 err) 327 } 328 }() 329 330 if lineEnd == 0 { 331 lineEnd = line 332 } 333 334 // replace 2,3,4... byte of rune to one byte symbol 335 var t string 336 for _, r := range file { 337 if utf8.RuneLen(r) > 1 { 338 t += "_" 339 continue 340 } 341 t += string(r) 342 } 343 file = t 344 345 again: 346 for i := range f.entities { 347 for j := range f.entities[i].include { 348 if f.entities[i].include[j] != '\\' { 349 continue 350 } 351 if j+3 > len(f.entities[i].include)-1 { 352 continue 353 } 354 wrongSymbol := false 355 var isSymbol2 bool 356 runes := f.entities[i].include[j+1 : j+4] 357 for y, r := range runes { 358 if !unicode.IsDigit(r) { 359 wrongSymbol = true 360 } 361 if y == 0 && r == '2' { 362 isSymbol2 = true 363 } 364 } 365 if !wrongSymbol { 366 if isSymbol2 { 367 f.entities[i].include = f.entities[i].include[:j] + "_" + 368 f.entities[i].include[j+4:] 369 } else { 370 f.entities[i].include = f.entities[i].include[:j] + 371 f.entities[i].include[j+4:] 372 } 373 goto again 374 } 375 } 376 } 377 378 for i := range f.entities { 379 if f.entities[i].include != file { 380 continue 381 } 382 lineEnd := lineEnd 383 if len(f.entities[i].lines)+f.entities[i].positionInSource < lineEnd { 384 continue 385 } 386 l := f.entities[i].lines[lineEnd+1-f.entities[i].positionInSource] 387 if col == 0 && colEnd == 0 { 388 return []byte(*l), nil 389 } 390 if colEnd == 0 { 391 if col-1 < len([]byte(*l)) { 392 return []byte((*l)[col-1:]), nil 393 } 394 err = fmt.Errorf("empty snippet") 395 return 396 } 397 if col <= 0 { 398 col = 1 399 } 400 if colEnd > len((*l)) { 401 return []byte((*l)[col-1:]), nil 402 } 403 return []byte((*l)[col-1 : colEnd]), nil 404 } 405 406 err = fmt.Errorf("snippet is not found") 407 return 408 } 409 410 // analyzeFiles - analyze single file and separation preprocessor code to part 411 func analyzeFiles(inputFiles, clangFlags []string, cppCode bool) ( 412 items []entity, err error) { 413 // See : https://clang.llvm.org/docs/CommandGuide/clang.html 414 // clang -E <file> Run the preprocessor stage. 415 var out bytes.Buffer 416 out, err = getPreprocessSources(inputFiles, clangFlags, cppCode) 417 if err != nil { 418 return 419 } 420 421 // Parsing preprocessor file 422 r := bytes.NewReader(out.Bytes()) 423 scanner := bufio.NewScanner(r) 424 scanner.Split(bufio.ScanLines) 425 // counter - get position of line 426 var counter int 427 // item, items - entity of preprocess file 428 var item *entity 429 430 reg := util.GetRegex("# (\\d+) \".*\".*") 431 432 for scanner.Scan() { 433 line := scanner.Text() 434 if reg.MatchString(line) { 435 if item != (*entity)(nil) { 436 items = append(items, *item) 437 } 438 item, err = parseIncludePreprocessorLine(line) 439 if err != nil { 440 err = fmt.Errorf("cannot parse line : %s with error: %s", line, err) 441 return 442 } 443 if item.positionInSource == 0 { 444 // cannot by less 1 for avoid problem with 445 // identification of "0" AST base element 446 item.positionInSource = 1 447 } 448 item.lines = make([]*string, 0) 449 } 450 counter++ 451 item.lines = append(item.lines, &line) 452 } 453 if item != (*entity)(nil) { 454 items = append(items, *item) 455 } 456 return 457 } 458 459 // See : https://clang.llvm.org/docs/CommandGuide/clang.html 460 // clang -E <file> Run the preprocessor stage. 461 func getPreprocessSources(inputFiles, clangFlags []string, cppCode bool) ( 462 out bytes.Buffer, err error) { 463 // get temp dir 464 dir, err := ioutil.TempDir("", "c4go-union") 465 if err != nil { 466 return 467 } 468 defer func() { _ = os.RemoveAll(dir) }() 469 470 // file name union file 471 var unionFileName = dir + "/" + "unionFileName.c" 472 473 // create a body for union file 474 var unionBody string 475 for i := range inputFiles { 476 var absPath string 477 absPath, err = filepath.Abs(inputFiles[i]) 478 if err != nil { 479 return 480 } 481 unionBody += fmt.Sprintf("#include \"%s\"\n", absPath) 482 } 483 484 // write a union file 485 err = ioutil.WriteFile(unionFileName, []byte(unionBody), 0644) 486 if err != nil { 487 return 488 } 489 490 // Add open source defines 491 clangFlags = append(clangFlags, "-D_GNU_SOURCE") 492 493 // preprocessor clang 494 var stderr bytes.Buffer 495 496 var args []string 497 args = append(args, "-E", "-C") 498 args = append(args, clangFlags...) 499 args = append(args, unionFileName) // All inputFiles 500 501 var outFile bytes.Buffer 502 var cmd *exec.Cmd 503 504 compiler, compilerFlag := Compiler(cppCode) 505 args = append(compilerFlag, args...) 506 cmd = exec.Command(compiler, args...) 507 508 cmd.Stdout = &outFile 509 cmd.Stderr = &stderr 510 err = cmd.Run() 511 if err != nil { 512 err = fmt.Errorf("preprocess for file: %v\nfailed: %v\nStdErr = %v", inputFiles, err, stderr.String()) 513 return 514 } 515 _, err = out.Write(outFile.Bytes()) 516 if err != nil { 517 return 518 } 519 520 return 521 } 522 523 func generateIncludeList(userList, allList []string) ( 524 includes []IncludeHeader) { 525 526 for i := range allList { 527 var isUser bool 528 for j := range userList { 529 if allList[i] == userList[j] { 530 isUser = true 531 break 532 } 533 } 534 includes = append(includes, IncludeHeader{ 535 HeaderName: allList[i], 536 IsUserSource: isUser, 537 }) 538 } 539 540 return 541 } 542 543 // GetIncludeListWithUserSource - Get list of include files 544 // Example: 545 // $ clang -MM -c exit.c 546 // exit.o: exit.c tests.h 547 func GetIncludeListWithUserSource(inputFiles, clangFlags []string, cppCode bool) ( 548 lines []string, err error) { 549 var out string 550 out, err = getIncludeList(inputFiles, clangFlags, []string{"-MM"}, cppCode) 551 if err != nil { 552 return 553 } 554 return parseIncludeList(out) 555 } 556 557 // GetIncludeFullList - Get full list of include files 558 // Example: 559 // $ clang -M -c triangle.c 560 // 561 // triangle.o: triangle.c /usr/include/stdio.h /usr/include/features.h \ 562 // /usr/include/stdc-predef.h /usr/include/x86_64-linux-gnu/sys/cdefs.h \ 563 // /usr/include/x86_64-linux-gnu/bits/wordsize.h \ 564 // /usr/include/x86_64-linux-gnu/gnu/stubs.h \ 565 // /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \ 566 // / ........ and other 567 func GetIncludeFullList(inputFiles, clangFlags []string, cppCode bool) ( 568 lines []string, err error) { 569 var out string 570 out, err = getIncludeList(inputFiles, clangFlags, []string{"-M"}, cppCode) 571 if err != nil { 572 return 573 } 574 return parseIncludeList(out) 575 } 576 577 // GetIeraphyIncludeList - Get list of include files in ierarphy 578 // Example: 579 // clang -MM -H ./tests/math.c 580 // . ./tests/tests.h 581 // .. /usr/include/string.h 582 // ... /usr/include/features.h 583 // .... /usr/include/stdc-predef.h 584 // .... /usr/include/x86_64-linux-gnu/sys/cdefs.h 585 // ..... /usr/include/x86_64-linux-gnu/bits/wordsize.h 586 // .... /usr/include/x86_64-linux-gnu/gnu/stubs.h 587 // ..... /usr/include/x86_64-linux-gnu/gnu/stubs-64.h 588 // ... /usr/lib/llvm-6.0/lib/clang/6.0.0/include/stddef.h 589 // ... /usr/include/xlocale.h 590 // .. /usr/include/math.h 591 // ... /usr/include/x86_64-linux-gnu/bits/math-vector.h 592 func GetIeraphyIncludeList(inputFiles, clangFlags []string, cppCode bool) ( 593 lines []string, err error) { 594 var out string 595 out, err = getIncludeList(inputFiles, clangFlags, []string{"-MM", "-H"}, cppCode) 596 if err != nil { 597 return 598 } 599 return strings.Split(out, "\n"), nil 600 } 601 602 // getIncludeList return stdout lines 603 func getIncludeList(inputFiles, clangFlags []string, flag []string, cppCode bool) ( 604 _ string, err error) { 605 defer func() { 606 if err != nil { 607 err = fmt.Errorf("cannot get Include List : %v", err) 608 } 609 }() 610 var out bytes.Buffer 611 var stderr bytes.Buffer 612 var args []string 613 for i := range inputFiles { 614 inputFiles[i], err = filepath.Abs(inputFiles[i]) 615 if err != nil { 616 return 617 } 618 } 619 args = append(args, flag...) 620 args = append(args, "-c") 621 args = append(args, inputFiles...) 622 args = append(args, clangFlags...) 623 624 defer func() { 625 if err != nil { 626 fmt.Errorf("used next arguments: `%v`. %v", args, err) 627 } 628 }() 629 630 var cmd *exec.Cmd 631 compiler, compilerFlag := Compiler(cppCode) 632 args = append(compilerFlag, args...) 633 cmd = exec.Command(compiler, args...) 634 635 cmd.Stdout = &out 636 cmd.Stderr = &stderr 637 err = cmd.Run() 638 if err != nil { 639 err = fmt.Errorf("preprocess failed: %v\nStdErr = %v", err, stderr.String()) 640 return 641 } 642 643 // add stderr to out, for flags "-MM -H" 644 out.WriteString(stderr.String()) 645 646 // remove warnings 647 // ... /usr/lib/llvm-4.0/bin/../lib/clang/4.0.1/include/stddef.h 648 // .. /usr/include/x86_64-linux-gnu/bits/stdlib-float.h 649 // /home/konstantin/go/src/github.com/Konstantin8105/c4go/testdata/kilo/debug.kilo.c:81:9: warning: '_BSD_SOURCE' macro redefined [-Wmacro-redefined] 650 // #define _BSD_SOURCE 651 // ^ 652 // /usr/include/features.h:188:10: note: previous definition is here 653 // # define _BSD_SOURCE 1 654 // ^ 655 lines := strings.Split(out.String(), "\n") 656 for i := range lines { 657 if strings.Contains(lines[i], "warning:") { 658 lines = lines[:i] 659 break 660 } 661 } 662 663 return strings.Join(lines, "\n"), err 664 }