github.com/zkry/enry@v1.6.3/internal/tokenizer/tokenize_test.go (about) 1 package tokenizer 2 3 import ( 4 "fmt" 5 "testing" 6 7 "github.com/stretchr/testify/assert" 8 ) 9 10 const ( 11 testContent = `#!/usr/bin/ruby 12 13 #!/usr/bin/env node 14 15 aaa 16 17 #!/usr/bin/env A=B foo=bar awk -f 18 19 #!python 20 21 func Tokenize(content []byte) []string { 22 splitted := bytes.Fields(content) 23 tokens := /* make([]string, 0, len(splitted)) 24 no comment -- comment 25 for _, tokenByte := range splitted { 26 token64 := base64.StdEncoding.EncodeToString(tokenByte) 27 tokens = append(tokens, token64) 28 notcatchasanumber3.5 29 }*/ 30 othercode 31 /* testing multiple 32 33 multiline comments*/ 34 35 <!-- com 36 ment --> 37 <!-- comment 2--> 38 ppp no comment # comment 39 40 "literal1" 41 42 abb (tokenByte, 0xAF02) | ,3.2L 43 44 'literal2' notcatchasanumber3.5 45 46 5 += number * anotherNumber 47 if isTrue && isToo { 48 0b00001000 >> 1 49 } 50 51 return tokens 52 53 oneBool = 3 <= 2 54 varBool = 3<=2> 55 56 #ifndef 57 #i'm not a comment if the single line comment symbol is not followed by a white 58 59 PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4."); 60 61 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> 62 <html xmlns="http://www.w3.org/1999/xhtml"> 63 <head> 64 <title id="hola" class="">This is a XHTML sample file</title> 65 <style type="text/css"><![CDATA[ 66 #example { 67 background-color: yellow; 68 } 69 ]]></style> 70 </head> 71 <body> 72 <div id="example"> 73 Just a simple <strong>XHTML</strong> test page. 74 </div> 75 </body> 76 </html>` 77 ) 78 79 var ( 80 tokensFromTestContent = []string{"SHEBANG#!ruby", "SHEBANG#!node", "SHEBANG#!awk", "<!DOCTYPE>", "PUBLIC", "W3C", "DTD", "XHTML", "1", "0", 81 "Strict", "EN", "http", "www", "w3", "org", "TR", "xhtml1", "DTD", "xhtml1", "strict", "dtd", "<html>", "<head>", "<title>", "class=", 82 "</title>", "<style>", "<![CDATA[>", "example", "background", "color", "yellow", "</style>", "</head>", "<body>", "<div>", "<strong>", 83 "</strong>", "</div>", "</body>", "</html>", "(", "[", "]", ")", "[", "]", "{", "(", ")", "(", ")", "{", "}", "(", ")", ";", "{", ";", 84 "}", "]", "]", "#", "/usr/bin/ruby", "#", "/usr/bin/env", "node", "aaa", "#", "/usr/bin/env", "A", "B", "foo", "bar", "awk", "f", "#", 85 "python", "func", "Tokenize", "content", "byte", "string", "splitted", "bytes.Fields", "content", "tokens", "othercode", "ppp", "no", 86 "comment", "abb", "tokenByte", "notcatchasanumber", "number", "*", "anotherNumber", "if", "isTrue", "isToo", "b", "return", "tokens", 87 "oneBool", "varBool", "#ifndef", "#i", "m", "not", "a", "comment", "if", "the", "single", "line", "comment", "symbol", "is", "not", 88 "followed", "by", "a", "white", "PyErr_SetString", "PyExc_RuntimeError", "html", "PUBLIC", "xmlns", "id", "class", "This", "is", "a", 89 "XHTML", "sample", "file", "type", "#example", "background", "color", "yellow", "id", "Just", "a", "simple", "XHTML", "test", "page.", 90 "-", "|", "+", "&&", "<", "<", "-", "!", "!", "!", "=", "=", "!", ":", "=", ":", "=", ",", ",", "=", ">", ">", "=", "=", "=", "=", ">", 91 "'", ",", ">", "=", ">", "=", "=", ">", "=", ">", ":", ">", "=", ">"} 92 ) 93 94 func TestTokenize(t *testing.T) { 95 tests := []struct { 96 name string 97 content []byte 98 expected []string 99 }{ 100 {name: "content", content: []byte(testContent), expected: tokensFromTestContent}, 101 } 102 103 for _, test := range tests { 104 t.Run(test.name, func(t *testing.T) { 105 tokens := Tokenize(test.content) 106 assert.Equal(t, len(test.expected), len(tokens), fmt.Sprintf("token' slice length = %v, want %v", len(test.expected), len(tokens))) 107 for i, expectedToken := range test.expected { 108 assert.Equal(t, expectedToken, tokens[i], fmt.Sprintf("token = %v, want %v", tokens[i], expectedToken)) 109 } 110 }) 111 } 112 }