github.com/zkry/enry@v1.6.3/internal/tokenizer/tokenize_test.go (about)

     1  package tokenizer
     2  
     3  import (
     4  	"fmt"
     5  	"testing"
     6  
     7  	"github.com/stretchr/testify/assert"
     8  )
     9  
    10  const (
    11  	testContent = `#!/usr/bin/ruby
    12  
    13  #!/usr/bin/env node
    14  
    15  aaa
    16  
    17  #!/usr/bin/env A=B foo=bar awk -f
    18  
    19  #!python
    20  
    21  func Tokenize(content []byte) []string {
    22  	splitted := bytes.Fields(content)
    23  	tokens := /* make([]string, 0, len(splitted))
    24  	no comment -- comment
    25  	for _, tokenByte := range splitted {
    26  		token64 := base64.StdEncoding.EncodeToString(tokenByte)
    27  		tokens = append(tokens, token64)
    28  		notcatchasanumber3.5
    29  	}*/
    30  othercode
    31  	/* testing multiple 
    32  	
    33  		multiline comments*/
    34  
    35  <!-- com
    36  	ment -->
    37  <!-- comment 2-->
    38  ppp no comment # comment
    39  
    40  "literal1"
    41  
    42  abb (tokenByte, 0xAF02) | ,3.2L
    43  
    44  'literal2' notcatchasanumber3.5
    45  
    46  	5 += number * anotherNumber
    47  	if isTrue && isToo {
    48  		0b00001000 >> 1
    49  	}
    50  
    51  	return tokens
    52  
    53  oneBool = 3 <= 2
    54  varBool = 3<=2>
    55   
    56  #ifndef
    57  #i'm not a comment if the single line comment symbol is not followed by a white
    58  
    59    PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4.");
    60  
    61  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
    62  <html xmlns="http://www.w3.org/1999/xhtml">
    63      <head>
    64          <title id="hola" class="">This is a XHTML sample file</title>
    65          <style type="text/css"><![CDATA[
    66              #example {
    67                  background-color: yellow;
    68              }
    69          ]]></style>
    70      </head>
    71      <body>
    72          <div id="example">
    73              Just a simple <strong>XHTML</strong> test page.
    74          </div>
    75      </body>
    76  </html>`
    77  )
    78  
    79  var (
    80  	tokensFromTestContent = []string{"SHEBANG#!ruby", "SHEBANG#!node", "SHEBANG#!awk", "<!DOCTYPE>", "PUBLIC", "W3C", "DTD", "XHTML", "1", "0",
    81  		"Strict", "EN", "http", "www", "w3", "org", "TR", "xhtml1", "DTD", "xhtml1", "strict", "dtd", "<html>", "<head>", "<title>", "class=",
    82  		"</title>", "<style>", "<![CDATA[>", "example", "background", "color", "yellow", "</style>", "</head>", "<body>", "<div>", "<strong>",
    83  		"</strong>", "</div>", "</body>", "</html>", "(", "[", "]", ")", "[", "]", "{", "(", ")", "(", ")", "{", "}", "(", ")", ";", "{", ";",
    84  		"}", "]", "]", "#", "/usr/bin/ruby", "#", "/usr/bin/env", "node", "aaa", "#", "/usr/bin/env", "A", "B", "foo", "bar", "awk", "f", "#",
    85  		"python", "func", "Tokenize", "content", "byte", "string", "splitted", "bytes.Fields", "content", "tokens", "othercode", "ppp", "no",
    86  		"comment", "abb", "tokenByte", "notcatchasanumber", "number", "*", "anotherNumber", "if", "isTrue", "isToo", "b", "return", "tokens",
    87  		"oneBool", "varBool", "#ifndef", "#i", "m", "not", "a", "comment", "if", "the", "single", "line", "comment", "symbol", "is", "not",
    88  		"followed", "by", "a", "white", "PyErr_SetString", "PyExc_RuntimeError", "html", "PUBLIC", "xmlns", "id", "class", "This", "is", "a",
    89  		"XHTML", "sample", "file", "type", "#example", "background", "color", "yellow", "id", "Just", "a", "simple", "XHTML", "test", "page.",
    90  		"-", "|", "+", "&&", "<", "<", "-", "!", "!", "!", "=", "=", "!", ":", "=", ":", "=", ",", ",", "=", ">", ">", "=", "=", "=", "=", ">",
    91  		"'", ",", ">", "=", ">", "=", "=", ">", "=", ">", ":", ">", "=", ">"}
    92  )
    93  
    94  func TestTokenize(t *testing.T) {
    95  	tests := []struct {
    96  		name     string
    97  		content  []byte
    98  		expected []string
    99  	}{
   100  		{name: "content", content: []byte(testContent), expected: tokensFromTestContent},
   101  	}
   102  
   103  	for _, test := range tests {
   104  		t.Run(test.name, func(t *testing.T) {
   105  			tokens := Tokenize(test.content)
   106  			assert.Equal(t, len(test.expected), len(tokens), fmt.Sprintf("token' slice length = %v, want %v", len(test.expected), len(tokens)))
   107  			for i, expectedToken := range test.expected {
   108  				assert.Equal(t, expectedToken, tokens[i], fmt.Sprintf("token = %v, want %v", tokens[i], expectedToken))
   109  			}
   110  		})
   111  	}
   112  }