github.com/xzntrc/go-enry/v2@v2.0.0-20230215091818-766cc1d65498/internal/tokenizer/flex/tokenize_c.go (about)

     1  // +build flex
     2  
     3  package flex
     4  
     5  // #include <stdlib.h>
     6  // #include "linguist.h"
     7  // #include "lex.linguist_yy.h"
     8  // int linguist_yywrap(yyscan_t yyscanner) {
     9  // 	return 1;
    10  // }
    11  import "C"
    12  import "unsafe"
    13  
    14  const maxTokenLen = 32 // bytes
    15  
    16  // TokenizeFlex implements tokenizer by calling Flex generated code from linguist in C
    17  // This is a transliteration from C https://github.com/github/linguist/blob/master/ext/linguist/linguist.c#L12
    18  func TokenizeFlex(content []byte) []string {
    19  	var buf C.YY_BUFFER_STATE
    20  	var scanner C.yyscan_t
    21  	var extra C.struct_tokenizer_extra
    22  	var _len C.ulong
    23  	var r C.int
    24  
    25  	_len = C.ulong(len(content))
    26  	cs := C.CBytes(content)
    27  	defer C.free(unsafe.Pointer(cs))
    28  
    29  	C.linguist_yylex_init_extra(&extra, &scanner)
    30  	buf = C.linguist_yy_scan_bytes((*C.char)(cs), _len, scanner)
    31  
    32  	ary := []string{}
    33  	for {
    34  		extra._type = C.NO_ACTION
    35  		extra.token = nil
    36  		r = C.linguist_yylex(scanner)
    37  		switch extra._type {
    38  		case C.NO_ACTION:
    39  			break
    40  		case C.REGULAR_TOKEN:
    41  			_len = C.strlen(extra.token)
    42  			if _len <= maxTokenLen {
    43  				ary = append(ary, C.GoStringN(extra.token, (C.int)(_len)))
    44  			}
    45  			C.free(unsafe.Pointer(extra.token))
    46  			break
    47  		case C.SHEBANG_TOKEN:
    48  			_len = C.strlen(extra.token)
    49  			if _len <= maxTokenLen {
    50  				s := "SHEBANG#!" + C.GoStringN(extra.token, (C.int)(_len))
    51  				ary = append(ary, s)
    52  			}
    53  			C.free(unsafe.Pointer(extra.token))
    54  			break
    55  		case C.SGML_TOKEN:
    56  			_len = C.strlen(extra.token)
    57  			if _len <= maxTokenLen {
    58  				s := C.GoStringN(extra.token, (C.int)(_len)) + ">"
    59  				ary = append(ary, s)
    60  			}
    61  			C.free(unsafe.Pointer(extra.token))
    62  			break
    63  		}
    64  		if r == 0 {
    65  			break
    66  		}
    67  	}
    68  
    69  	C.linguist_yy_delete_buffer(buf, scanner)
    70  	C.linguist_yylex_destroy(scanner)
    71  
    72  	return ary
    73  }