github.com/xzntrc/go-enry/v2@v2.0.0-20230215091818-766cc1d65498/internal/tokenizer/flex/tokenize_c.go (about) 1 // +build flex 2 3 package flex 4 5 // #include <stdlib.h> 6 // #include "linguist.h" 7 // #include "lex.linguist_yy.h" 8 // int linguist_yywrap(yyscan_t yyscanner) { 9 // return 1; 10 // } 11 import "C" 12 import "unsafe" 13 14 const maxTokenLen = 32 // bytes 15 16 // TokenizeFlex implements tokenizer by calling Flex generated code from linguist in C 17 // This is a transliteration from C https://github.com/github/linguist/blob/master/ext/linguist/linguist.c#L12 18 func TokenizeFlex(content []byte) []string { 19 var buf C.YY_BUFFER_STATE 20 var scanner C.yyscan_t 21 var extra C.struct_tokenizer_extra 22 var _len C.ulong 23 var r C.int 24 25 _len = C.ulong(len(content)) 26 cs := C.CBytes(content) 27 defer C.free(unsafe.Pointer(cs)) 28 29 C.linguist_yylex_init_extra(&extra, &scanner) 30 buf = C.linguist_yy_scan_bytes((*C.char)(cs), _len, scanner) 31 32 ary := []string{} 33 for { 34 extra._type = C.NO_ACTION 35 extra.token = nil 36 r = C.linguist_yylex(scanner) 37 switch extra._type { 38 case C.NO_ACTION: 39 break 40 case C.REGULAR_TOKEN: 41 _len = C.strlen(extra.token) 42 if _len <= maxTokenLen { 43 ary = append(ary, C.GoStringN(extra.token, (C.int)(_len))) 44 } 45 C.free(unsafe.Pointer(extra.token)) 46 break 47 case C.SHEBANG_TOKEN: 48 _len = C.strlen(extra.token) 49 if _len <= maxTokenLen { 50 s := "SHEBANG#!" + C.GoStringN(extra.token, (C.int)(_len)) 51 ary = append(ary, s) 52 } 53 C.free(unsafe.Pointer(extra.token)) 54 break 55 case C.SGML_TOKEN: 56 _len = C.strlen(extra.token) 57 if _len <= maxTokenLen { 58 s := C.GoStringN(extra.token, (C.int)(_len)) + ">" 59 ary = append(ary, s) 60 } 61 C.free(unsafe.Pointer(extra.token)) 62 break 63 } 64 if r == 0 { 65 break 66 } 67 } 68 69 C.linguist_yy_delete_buffer(buf, scanner) 70 C.linguist_yylex_destroy(scanner) 71 72 return ary 73 }