github.com/Konstantin8105/c4go@v0.0.0-20240505174241-768bb1c65a51/preprocessor/preprocessor.go (about)

     1  package preprocessor
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"os"
     9  	"os/exec"
    10  	"path/filepath"
    11  	"strings"
    12  	"text/scanner"
    13  	"unicode"
    14  	"unicode/utf8"
    15  
    16  	"github.com/Konstantin8105/c4go/util"
    17  )
    18  
    19  // One simple part of preprocessor code
    20  type entity struct {
    21  	positionInSource int
    22  	include          string
    23  	other            string
    24  
    25  	// Zero index of `lines` is look like that:
    26  	// # 11 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 2 3 4
    27  	// After that 0 or more lines of codes
    28  	lines []*string
    29  }
    30  
    31  func (e *entity) parseComments(comments *[]Comment) {
    32  	var source bytes.Buffer
    33  	for i := range e.lines {
    34  		if i == 0 {
    35  			continue
    36  		}
    37  		source.Write([]byte(*e.lines[i]))
    38  		source.Write([]byte{'\n'})
    39  	}
    40  
    41  	var s scanner.Scanner
    42  	s.Init(strings.NewReader(source.String()))
    43  	s.Mode = scanner.ScanComments
    44  	s.Filename = e.include
    45  	for tok := s.Scan(); tok != scanner.EOF; tok = s.Scan() {
    46  		if scanner.TokenString(tok) == "Comment" {
    47  			// parse multiline comments to single line comment
    48  			var lines []string
    49  			if s.TokenText()[1] == '*' {
    50  				lines = strings.Split(s.TokenText(), "\n")
    51  				lines[0] = strings.TrimLeft(lines[0], "/"+"*")
    52  				lines[len(lines)-1] = strings.TrimRight(lines[len(lines)-1], "*"+"/")
    53  				for i := range lines {
    54  					lines[i] = "/" + "/" + lines[i]
    55  				}
    56  			} else {
    57  				lines = append(lines, s.TokenText())
    58  			}
    59  
    60  			// save comments
    61  			for _, l := range lines {
    62  				(*comments) = append(*comments, Comment{
    63  					File:    e.include,
    64  					Line:    s.Position.Line + e.positionInSource - 1,
    65  					Comment: l,
    66  				})
    67  			}
    68  		}
    69  	}
    70  }
    71  
    72  // isSame - check is Same entities
    73  func (e *entity) isSame(x *entity) bool {
    74  	if e.include != x.include {
    75  		return false
    76  	}
    77  	if e.positionInSource != x.positionInSource {
    78  		return false
    79  	}
    80  	if e.other != x.other {
    81  		return false
    82  	}
    83  	if len(e.lines) != len(x.lines) {
    84  		return false
    85  	}
    86  	for k := range e.lines {
    87  		is := e.lines[k]
    88  		js := x.lines[k]
    89  		if len(*is) != len(*js) || *is != *js {
    90  			return false
    91  		}
    92  	}
    93  	return true
    94  }
    95  
    96  // Comment - position of line comment '//...'
    97  type Comment struct {
    98  	File    string
    99  	Line    int
   100  	Comment string
   101  }
   102  
   103  // IncludeHeader - struct for C include header
   104  type IncludeHeader struct {
   105  	HeaderName     string
   106  	BaseHeaderName string
   107  	IsUserSource   bool
   108  }
   109  
   110  // FilePP a struct with all information about preprocessor C code
   111  type FilePP struct {
   112  	entities []entity
   113  	pp       []byte
   114  	comments []Comment
   115  	includes []IncludeHeader
   116  }
   117  
   118  // NewFilePP create a struct FilePP with results of analyzing
   119  // preprocessor C code
   120  func NewFilePP(inputFiles, clangFlags []string, cppCode bool) (
   121  	f FilePP, err error) {
   122  	defer func() {
   123  		if err != nil {
   124  			err = fmt.Errorf("preprocess error : %v", err)
   125  		}
   126  	}()
   127  
   128  	var allItems []entity
   129  
   130  	allItems, err = analyzeFiles(inputFiles, clangFlags, cppCode)
   131  	if err != nil {
   132  		return
   133  	}
   134  
   135  	// Generate list of user files
   136  	userSource := map[string]bool{}
   137  	var us []string
   138  	us, err = GetIncludeListWithUserSource(inputFiles, clangFlags, cppCode)
   139  	if err != nil {
   140  		return
   141  	}
   142  	var all []string
   143  	all, err = GetIncludeFullList(inputFiles, clangFlags, cppCode)
   144  	if err != nil {
   145  		return
   146  	}
   147  
   148  	// Generate C header list
   149  	f.includes = generateIncludeList(us, all)
   150  
   151  	for j := range us {
   152  		userSource[us[j]] = true
   153  	}
   154  
   155  	// Merge the entities
   156  	var lines []string
   157  	for i := range allItems {
   158  		// If found same part of preprocess code, then
   159  		// don't include in result buffer for transpiling
   160  		// for avoid duplicate of code
   161  		var found bool
   162  		for j := 0; j < i; j++ {
   163  			if allItems[i].isSame(&allItems[j]) {
   164  				found = true
   165  				break
   166  			}
   167  		}
   168  		if found {
   169  			continue
   170  		}
   171  		// Parse comments only for user sources
   172  		var isUserSource bool
   173  		if userSource[allItems[i].include] {
   174  			isUserSource = true
   175  		}
   176  		if allItems[i].include[0] == '.' &&
   177  			allItems[i].include[1] == '/' &&
   178  			userSource[allItems[i].include[2:]] {
   179  			isUserSource = true
   180  		}
   181  		if isUserSource {
   182  			allItems[i].parseComments(&f.comments)
   183  		}
   184  
   185  		// Parameter "other" is not included for avoid like:
   186  		// ./tests/multi/head.h:4:28: error: invalid line marker flag '2': \
   187  		// cannot pop empty include stack
   188  		// # 2 "./tests/multi/main.c" 2
   189  		//                            ^
   190  		header := fmt.Sprintf("# %d \"%s\"",
   191  			allItems[i].positionInSource, allItems[i].include)
   192  		lines = append(lines, header)
   193  		if len(allItems[i].lines) > 0 {
   194  			for ii, l := range allItems[i].lines {
   195  				if ii == 0 {
   196  					continue
   197  				}
   198  				lines = append(lines, *l)
   199  			}
   200  		}
   201  		f.entities = append(f.entities, allItems[i])
   202  	}
   203  	f.pp = ([]byte)(strings.Join(lines, "\n"))
   204  
   205  	{
   206  		for i := range f.includes {
   207  			f.includes[i].BaseHeaderName = f.includes[i].HeaderName
   208  		}
   209  		// correct include names only for external Includes
   210  		var ier []string
   211  		ier, err = GetIeraphyIncludeList(inputFiles, clangFlags, cppCode)
   212  
   213  		// cut lines without pattern ". "
   214  	again:
   215  		for i := range ier {
   216  			remove := false
   217  			if len(ier[i]) == 0 {
   218  				remove = true
   219  			} else if ier[i][0] != '.' {
   220  				remove = true
   221  			} else if index := strings.Index(ier[i], ". "); index < 0 {
   222  				remove = true
   223  			}
   224  			if remove {
   225  				ier = append(ier[:i], ier[i+1:]...)
   226  				goto again
   227  			}
   228  		}
   229  
   230  		separator := func(line string) (level int, name string) {
   231  			for i := range line {
   232  				if line[i] == ' ' {
   233  					level = i
   234  					break
   235  				}
   236  			}
   237  			name = line[level+1:]
   238  			return
   239  		}
   240  
   241  		for i := range f.includes {
   242  			if f.includes[i].IsUserSource {
   243  				continue
   244  			}
   245  			// find position in Include ierarphy
   246  			var pos int = -1
   247  			for j := range ier {
   248  				if strings.Contains(ier[j], f.includes[i].BaseHeaderName) {
   249  					pos = j
   250  					break
   251  				}
   252  			}
   253  			if pos < 0 {
   254  				continue
   255  			}
   256  
   257  			// find level of line
   258  			level, _ := separator(ier[pos])
   259  
   260  			for j := pos; j >= 0; j-- {
   261  				levelJ, nameJ := separator(ier[j])
   262  				if levelJ >= level {
   263  					continue
   264  				}
   265  				if f.IsUserSource(nameJ) {
   266  					break
   267  				}
   268  				f.includes[i].BaseHeaderName = nameJ
   269  				level = levelJ
   270  			}
   271  		}
   272  	}
   273  	return
   274  }
   275  
   276  // GetSource return source of preprocessor C code
   277  func (f FilePP) GetSource() []byte {
   278  	return f.pp
   279  }
   280  
   281  // GetComments return comments in preprocessor C code
   282  func (f FilePP) GetComments() []Comment {
   283  	return f.comments
   284  }
   285  
   286  // GetIncludeFiles return list of '#include' file in C sources
   287  func (f FilePP) GetIncludeFiles() []IncludeHeader {
   288  	return f.includes
   289  }
   290  
   291  // IsUserSource get is it source from user
   292  func (f FilePP) IsUserSource(in string) bool {
   293  	for i := range f.includes {
   294  		if !f.includes[i].IsUserSource {
   295  			continue
   296  		}
   297  		if !strings.Contains(in, f.includes[i].HeaderName) {
   298  			continue
   299  		}
   300  		return true
   301  	}
   302  	return false
   303  }
   304  
   305  // GetBaseInclude return base include
   306  func (f FilePP) GetBaseInclude(in string) string {
   307  	for i := range f.includes {
   308  		if in == f.includes[i].HeaderName {
   309  			return f.includes[i].BaseHeaderName
   310  		}
   311  	}
   312  	return in
   313  }
   314  
   315  // GetSnippet return short part of code inside preprocessor C code
   316  func (f FilePP) GetSnippet(file string,
   317  	line, lineEnd int,
   318  	col, colEnd int) (
   319  	buffer []byte, err error) {
   320  	defer func() {
   321  		if err != nil {
   322  			err = fmt.Errorf("GetSnippet error for `%v` {%v,%v}{%v,%v}. %v",
   323  				file,
   324  				line, lineEnd,
   325  				col, colEnd,
   326  				err)
   327  		}
   328  	}()
   329  
   330  	if lineEnd == 0 {
   331  		lineEnd = line
   332  	}
   333  
   334  	// replace 2,3,4... byte of rune to one byte symbol
   335  	var t string
   336  	for _, r := range file {
   337  		if utf8.RuneLen(r) > 1 {
   338  			t += "_"
   339  			continue
   340  		}
   341  		t += string(r)
   342  	}
   343  	file = t
   344  
   345  again:
   346  	for i := range f.entities {
   347  		for j := range f.entities[i].include {
   348  			if f.entities[i].include[j] != '\\' {
   349  				continue
   350  			}
   351  			if j+3 > len(f.entities[i].include)-1 {
   352  				continue
   353  			}
   354  			wrongSymbol := false
   355  			var isSymbol2 bool
   356  			runes := f.entities[i].include[j+1 : j+4]
   357  			for y, r := range runes {
   358  				if !unicode.IsDigit(r) {
   359  					wrongSymbol = true
   360  				}
   361  				if y == 0 && r == '2' {
   362  					isSymbol2 = true
   363  				}
   364  			}
   365  			if !wrongSymbol {
   366  				if isSymbol2 {
   367  					f.entities[i].include = f.entities[i].include[:j] + "_" +
   368  						f.entities[i].include[j+4:]
   369  				} else {
   370  					f.entities[i].include = f.entities[i].include[:j] +
   371  						f.entities[i].include[j+4:]
   372  				}
   373  				goto again
   374  			}
   375  		}
   376  	}
   377  
   378  	for i := range f.entities {
   379  		if f.entities[i].include != file {
   380  			continue
   381  		}
   382  		lineEnd := lineEnd
   383  		if len(f.entities[i].lines)+f.entities[i].positionInSource < lineEnd {
   384  			continue
   385  		}
   386  		l := f.entities[i].lines[lineEnd+1-f.entities[i].positionInSource]
   387  		if col == 0 && colEnd == 0 {
   388  			return []byte(*l), nil
   389  		}
   390  		if colEnd == 0 {
   391  			if col-1 < len([]byte(*l)) {
   392  				return []byte((*l)[col-1:]), nil
   393  			}
   394  			err = fmt.Errorf("empty snippet")
   395  			return
   396  		}
   397  		if col <= 0 {
   398  			col = 1
   399  		}
   400  		if colEnd > len((*l)) {
   401  			return []byte((*l)[col-1:]), nil
   402  		}
   403  		return []byte((*l)[col-1 : colEnd]), nil
   404  	}
   405  
   406  	err = fmt.Errorf("snippet is not found")
   407  	return
   408  }
   409  
   410  // analyzeFiles - analyze single file and separation preprocessor code to part
   411  func analyzeFiles(inputFiles, clangFlags []string, cppCode bool) (
   412  	items []entity, err error) {
   413  	// See : https://clang.llvm.org/docs/CommandGuide/clang.html
   414  	// clang -E <file>    Run the preprocessor stage.
   415  	var out bytes.Buffer
   416  	out, err = getPreprocessSources(inputFiles, clangFlags, cppCode)
   417  	if err != nil {
   418  		return
   419  	}
   420  
   421  	// Parsing preprocessor file
   422  	r := bytes.NewReader(out.Bytes())
   423  	scanner := bufio.NewScanner(r)
   424  	scanner.Split(bufio.ScanLines)
   425  	// counter - get position of line
   426  	var counter int
   427  	// item, items - entity of preprocess file
   428  	var item *entity
   429  
   430  	reg := util.GetRegex("# (\\d+) \".*\".*")
   431  
   432  	for scanner.Scan() {
   433  		line := scanner.Text()
   434  		if reg.MatchString(line) {
   435  			if item != (*entity)(nil) {
   436  				items = append(items, *item)
   437  			}
   438  			item, err = parseIncludePreprocessorLine(line)
   439  			if err != nil {
   440  				err = fmt.Errorf("cannot parse line : %s with error: %s", line, err)
   441  				return
   442  			}
   443  			if item.positionInSource == 0 {
   444  				// cannot by less 1 for avoid problem with
   445  				// identification of "0" AST base element
   446  				item.positionInSource = 1
   447  			}
   448  			item.lines = make([]*string, 0)
   449  		}
   450  		counter++
   451  		item.lines = append(item.lines, &line)
   452  	}
   453  	if item != (*entity)(nil) {
   454  		items = append(items, *item)
   455  	}
   456  	return
   457  }
   458  
   459  // See : https://clang.llvm.org/docs/CommandGuide/clang.html
   460  // clang -E <file>    Run the preprocessor stage.
   461  func getPreprocessSources(inputFiles, clangFlags []string, cppCode bool) (
   462  	out bytes.Buffer, err error) {
   463  	// get temp dir
   464  	dir, err := ioutil.TempDir("", "c4go-union")
   465  	if err != nil {
   466  		return
   467  	}
   468  	defer func() { _ = os.RemoveAll(dir) }()
   469  
   470  	// file name union file
   471  	var unionFileName = dir + "/" + "unionFileName.c"
   472  
   473  	// create a body for union file
   474  	var unionBody string
   475  	for i := range inputFiles {
   476  		var absPath string
   477  		absPath, err = filepath.Abs(inputFiles[i])
   478  		if err != nil {
   479  			return
   480  		}
   481  		unionBody += fmt.Sprintf("#include \"%s\"\n", absPath)
   482  	}
   483  
   484  	// write a union file
   485  	err = ioutil.WriteFile(unionFileName, []byte(unionBody), 0644)
   486  	if err != nil {
   487  		return
   488  	}
   489  
   490  	// Add open source defines
   491  	clangFlags = append(clangFlags, "-D_GNU_SOURCE")
   492  
   493  	// preprocessor clang
   494  	var stderr bytes.Buffer
   495  
   496  	var args []string
   497  	args = append(args, "-E", "-C")
   498  	args = append(args, clangFlags...)
   499  	args = append(args, unionFileName) // All inputFiles
   500  
   501  	var outFile bytes.Buffer
   502  	var cmd *exec.Cmd
   503  
   504  	compiler, compilerFlag := Compiler(cppCode)
   505  	args = append(compilerFlag, args...)
   506  	cmd = exec.Command(compiler, args...)
   507  
   508  	cmd.Stdout = &outFile
   509  	cmd.Stderr = &stderr
   510  	err = cmd.Run()
   511  	if err != nil {
   512  		err = fmt.Errorf("preprocess for file: %v\nfailed: %v\nStdErr = %v", inputFiles, err, stderr.String())
   513  		return
   514  	}
   515  	_, err = out.Write(outFile.Bytes())
   516  	if err != nil {
   517  		return
   518  	}
   519  
   520  	return
   521  }
   522  
   523  func generateIncludeList(userList, allList []string) (
   524  	includes []IncludeHeader) {
   525  
   526  	for i := range allList {
   527  		var isUser bool
   528  		for j := range userList {
   529  			if allList[i] == userList[j] {
   530  				isUser = true
   531  				break
   532  			}
   533  		}
   534  		includes = append(includes, IncludeHeader{
   535  			HeaderName:   allList[i],
   536  			IsUserSource: isUser,
   537  		})
   538  	}
   539  
   540  	return
   541  }
   542  
   543  // GetIncludeListWithUserSource - Get list of include files
   544  // Example:
   545  // $ clang  -MM -c exit.c
   546  // exit.o: exit.c tests.h
   547  func GetIncludeListWithUserSource(inputFiles, clangFlags []string, cppCode bool) (
   548  	lines []string, err error) {
   549  	var out string
   550  	out, err = getIncludeList(inputFiles, clangFlags, []string{"-MM"}, cppCode)
   551  	if err != nil {
   552  		return
   553  	}
   554  	return parseIncludeList(out)
   555  }
   556  
   557  // GetIncludeFullList - Get full list of include files
   558  // Example:
   559  // $ clang -M -c triangle.c
   560  //
   561  //	triangle.o: triangle.c /usr/include/stdio.h /usr/include/features.h \
   562  //	  /usr/include/stdc-predef.h /usr/include/x86_64-linux-gnu/sys/cdefs.h \
   563  //	  /usr/include/x86_64-linux-gnu/bits/wordsize.h \
   564  //	  /usr/include/x86_64-linux-gnu/gnu/stubs.h \
   565  //	  /usr/include/x86_64-linux-gnu/gnu/stubs-64.h \
   566  //	  / ........ and other
   567  func GetIncludeFullList(inputFiles, clangFlags []string, cppCode bool) (
   568  	lines []string, err error) {
   569  	var out string
   570  	out, err = getIncludeList(inputFiles, clangFlags, []string{"-M"}, cppCode)
   571  	if err != nil {
   572  		return
   573  	}
   574  	return parseIncludeList(out)
   575  }
   576  
   577  // GetIeraphyIncludeList - Get list of include files in ierarphy
   578  // Example:
   579  // clang -MM -H ./tests/math.c
   580  // . ./tests/tests.h
   581  // .. /usr/include/string.h
   582  // ... /usr/include/features.h
   583  // .... /usr/include/stdc-predef.h
   584  // .... /usr/include/x86_64-linux-gnu/sys/cdefs.h
   585  // ..... /usr/include/x86_64-linux-gnu/bits/wordsize.h
   586  // .... /usr/include/x86_64-linux-gnu/gnu/stubs.h
   587  // ..... /usr/include/x86_64-linux-gnu/gnu/stubs-64.h
   588  // ... /usr/lib/llvm-6.0/lib/clang/6.0.0/include/stddef.h
   589  // ... /usr/include/xlocale.h
   590  // .. /usr/include/math.h
   591  // ... /usr/include/x86_64-linux-gnu/bits/math-vector.h
   592  func GetIeraphyIncludeList(inputFiles, clangFlags []string, cppCode bool) (
   593  	lines []string, err error) {
   594  	var out string
   595  	out, err = getIncludeList(inputFiles, clangFlags, []string{"-MM", "-H"}, cppCode)
   596  	if err != nil {
   597  		return
   598  	}
   599  	return strings.Split(out, "\n"), nil
   600  }
   601  
   602  // getIncludeList return stdout lines
   603  func getIncludeList(inputFiles, clangFlags []string, flag []string, cppCode bool) (
   604  	_ string, err error) {
   605  	defer func() {
   606  		if err != nil {
   607  			err = fmt.Errorf("cannot get Include List : %v", err)
   608  		}
   609  	}()
   610  	var out bytes.Buffer
   611  	var stderr bytes.Buffer
   612  	var args []string
   613  	for i := range inputFiles {
   614  		inputFiles[i], err = filepath.Abs(inputFiles[i])
   615  		if err != nil {
   616  			return
   617  		}
   618  	}
   619  	args = append(args, flag...)
   620  	args = append(args, "-c")
   621  	args = append(args, inputFiles...)
   622  	args = append(args, clangFlags...)
   623  
   624  	defer func() {
   625  		if err != nil {
   626  			fmt.Errorf("used next arguments: `%v`. %v", args, err)
   627  		}
   628  	}()
   629  
   630  	var cmd *exec.Cmd
   631  	compiler, compilerFlag := Compiler(cppCode)
   632  	args = append(compilerFlag, args...)
   633  	cmd = exec.Command(compiler, args...)
   634  
   635  	cmd.Stdout = &out
   636  	cmd.Stderr = &stderr
   637  	err = cmd.Run()
   638  	if err != nil {
   639  		err = fmt.Errorf("preprocess failed: %v\nStdErr = %v", err, stderr.String())
   640  		return
   641  	}
   642  
   643  	// add stderr to out, for flags "-MM -H"
   644  	out.WriteString(stderr.String())
   645  
   646  	// remove warnings
   647  	// ... /usr/lib/llvm-4.0/bin/../lib/clang/4.0.1/include/stddef.h
   648  	// .. /usr/include/x86_64-linux-gnu/bits/stdlib-float.h
   649  	// /home/konstantin/go/src/github.com/Konstantin8105/c4go/testdata/kilo/debug.kilo.c:81:9: warning: '_BSD_SOURCE' macro redefined [-Wmacro-redefined]
   650  	// #define _BSD_SOURCE
   651  	//         ^
   652  	// /usr/include/features.h:188:10: note: previous definition is here
   653  	// # define _BSD_SOURCE    1
   654  	//          ^
   655  	lines := strings.Split(out.String(), "\n")
   656  	for i := range lines {
   657  		if strings.Contains(lines[i], "warning:") {
   658  			lines = lines[:i]
   659  			break
   660  		}
   661  	}
   662  
   663  	return strings.Join(lines, "\n"), err
   664  }