github.com/Khushbukela/protoreflect@v1.0.1/desc/protoparse/lexer_test.go (about)

     1  package protoparse
     2  
     3  import (
     4  	"io"
     5  	"math"
     6  	"strings"
     7  	"testing"
     8  
     9  	"github.com/jhump/protoreflect/desc/protoparse/ast"
    10  	"github.com/jhump/protoreflect/internal/testutil"
    11  )
    12  
    13  func TestLexer(t *testing.T) {
    14  	l := newTestLexer(strings.NewReader(`
    15  	// comment
    16  
    17  	/*
    18  	 * block comment
    19  	 */ /* inline comment */
    20  
    21  	int32  "\032\x16\n\rfoobar\"zap"		'another\tstring\'s\t'
    22  foo
    23  
    24  	// another comment
    25  	// more and more...
    26  
    27  	service rpc message
    28  	.type
    29  	.f.q.n
    30  	name
    31  	f.q.n
    32  
    33  	.01
    34  	.01e12
    35  	.01e+5
    36  	.033e-1
    37  
    38  	12345
    39  	-12345
    40  	123.1234
    41  	0.123
    42  	012345
    43  	0x2134abcdef30
    44  	-0543
    45  	-0xff76
    46  	101.0102
    47  	202.0203e1
    48  	304.0304e-10
    49  	3.1234e+12
    50  
    51  	{ } + - , ;
    52  
    53  	[option=foo]
    54  	syntax = "proto2";
    55  
    56  	// some strange cases
    57  	1.543 g12 /* trailing line comment */
    58  	000.000
    59  	0.1234 .5678 .
    60  	12e12 1.2345e123412341234
    61  
    62  	Random_identifier_with_numbers_0123456789_and_letters...
    63  	// this is a trailing comment
    64  	// that spans multiple lines
    65  	// over two in fact!
    66  	/*
    67  	 * this is a detached comment
    68  	 * with lots of extra words and stuff...
    69  	 */
    70  
    71  	// this is an attached leading comment
    72  	foo
    73  
    74  	1.23e+20+20
    75  	// a trailing comment for last element
    76  
    77  	// comment attached to no tokens (upcoming token is EOF!)
    78  	/* another comment followed by some final whitespace*/
    79  
    80  	
    81  	`))
    82  
    83  	var prev ast.Node
    84  	var sym protoSymType
    85  	expected := []struct {
    86  		t          int
    87  		line, col  int
    88  		span       int
    89  		v          interface{}
    90  		comments   []string
    91  		trailCount int
    92  	}{
    93  		{t: _INT32, line: 8, col: 9, span: 5, v: "int32", comments: []string{"// comment\n", "/*\n\t * block comment\n\t */", "/* inline comment */"}},
    94  		{t: _STRING_LIT, line: 8, col: 16, span: 25, v: "\032\x16\n\rfoobar\"zap"},
    95  		{t: _STRING_LIT, line: 8, col: 57, span: 22, v: "another\tstring's\t"},
    96  		{t: _NAME, line: 9, col: 1, span: 3, v: "foo"},
    97  		{t: _SERVICE, line: 14, col: 9, span: 7, v: "service", comments: []string{"// another comment\n", "// more and more...\n"}},
    98  		{t: _RPC, line: 14, col: 17, span: 3, v: "rpc"},
    99  		{t: _MESSAGE, line: 14, col: 21, span: 7, v: "message"},
   100  		{t: '.', line: 15, col: 9, span: 1},
   101  		{t: _NAME, line: 15, col: 10, span: 4, v: "type"},
   102  		{t: '.', line: 16, col: 9, span: 1},
   103  		{t: _NAME, line: 16, col: 10, span: 1, v: "f"},
   104  		{t: '.', line: 16, col: 11, span: 1},
   105  		{t: _NAME, line: 16, col: 12, span: 1, v: "q"},
   106  		{t: '.', line: 16, col: 13, span: 1},
   107  		{t: _NAME, line: 16, col: 14, span: 1, v: "n"},
   108  		{t: _NAME, line: 17, col: 9, span: 4, v: "name"},
   109  		{t: _NAME, line: 18, col: 9, span: 1, v: "f"},
   110  		{t: '.', line: 18, col: 10, span: 1},
   111  		{t: _NAME, line: 18, col: 11, span: 1, v: "q"},
   112  		{t: '.', line: 18, col: 12, span: 1},
   113  		{t: _NAME, line: 18, col: 13, span: 1, v: "n"},
   114  		{t: _FLOAT_LIT, line: 20, col: 9, span: 3, v: 0.01},
   115  		{t: _FLOAT_LIT, line: 21, col: 9, span: 6, v: 0.01e12},
   116  		{t: _FLOAT_LIT, line: 22, col: 9, span: 6, v: 0.01e5},
   117  		{t: _FLOAT_LIT, line: 23, col: 9, span: 7, v: 0.033e-1},
   118  		{t: _INT_LIT, line: 25, col: 9, span: 5, v: uint64(12345)},
   119  		{t: '-', line: 26, col: 9, span: 1, v: nil},
   120  		{t: _INT_LIT, line: 26, col: 10, span: 5, v: uint64(12345)},
   121  		{t: _FLOAT_LIT, line: 27, col: 9, span: 8, v: 123.1234},
   122  		{t: _FLOAT_LIT, line: 28, col: 9, span: 5, v: 0.123},
   123  		{t: _INT_LIT, line: 29, col: 9, span: 6, v: uint64(012345)},
   124  		{t: _INT_LIT, line: 30, col: 9, span: 14, v: uint64(0x2134abcdef30)},
   125  		{t: '-', line: 31, col: 9, span: 1, v: nil},
   126  		{t: _INT_LIT, line: 31, col: 10, span: 4, v: uint64(0543)},
   127  		{t: '-', line: 32, col: 9, span: 1, v: nil},
   128  		{t: _INT_LIT, line: 32, col: 10, span: 6, v: uint64(0xff76)},
   129  		{t: _FLOAT_LIT, line: 33, col: 9, span: 8, v: 101.0102},
   130  		{t: _FLOAT_LIT, line: 34, col: 9, span: 10, v: 202.0203e1},
   131  		{t: _FLOAT_LIT, line: 35, col: 9, span: 12, v: 304.0304e-10},
   132  		{t: _FLOAT_LIT, line: 36, col: 9, span: 10, v: 3.1234e+12},
   133  		{t: '{', line: 38, col: 9, span: 1, v: nil},
   134  		{t: '}', line: 38, col: 11, span: 1, v: nil},
   135  		{t: '+', line: 38, col: 13, span: 1, v: nil},
   136  		{t: '-', line: 38, col: 15, span: 1, v: nil},
   137  		{t: ',', line: 38, col: 17, span: 1, v: nil},
   138  		{t: ';', line: 38, col: 19, span: 1, v: nil},
   139  		{t: '[', line: 40, col: 9, span: 1, v: nil},
   140  		{t: _OPTION, line: 40, col: 10, span: 6, v: "option"},
   141  		{t: '=', line: 40, col: 16, span: 1, v: nil},
   142  		{t: _NAME, line: 40, col: 17, span: 3, v: "foo"},
   143  		{t: ']', line: 40, col: 20, span: 1, v: nil},
   144  		{t: _SYNTAX, line: 41, col: 9, span: 6, v: "syntax"},
   145  		{t: '=', line: 41, col: 16, span: 1, v: nil},
   146  		{t: _STRING_LIT, line: 41, col: 18, span: 8, v: "proto2"},
   147  		{t: ';', line: 41, col: 26, span: 1, v: nil},
   148  		{t: _FLOAT_LIT, line: 44, col: 9, span: 5, v: 1.543, comments: []string{"// some strange cases\n"}},
   149  		{t: _NAME, line: 44, col: 15, span: 3, v: "g12"},
   150  		{t: _FLOAT_LIT, line: 45, col: 9, span: 7, v: 0.0, comments: []string{"/* trailing line comment */"}, trailCount: 1},
   151  		{t: _FLOAT_LIT, line: 46, col: 9, span: 6, v: 0.1234},
   152  		{t: _FLOAT_LIT, line: 46, col: 16, span: 5, v: 0.5678},
   153  		{t: '.', line: 46, col: 22, span: 1, v: nil},
   154  		{t: _FLOAT_LIT, line: 47, col: 9, span: 5, v: 12e12},
   155  		{t: _FLOAT_LIT, line: 47, col: 15, span: 19, v: math.Inf(1)},
   156  		{t: _NAME, line: 49, col: 9, span: 53, v: "Random_identifier_with_numbers_0123456789_and_letters"},
   157  		{t: '.', line: 49, col: 62, span: 1, v: nil},
   158  		{t: '.', line: 49, col: 63, span: 1, v: nil},
   159  		{t: '.', line: 49, col: 64, span: 1, v: nil},
   160  		{t: _NAME, line: 59, col: 9, span: 3, v: "foo", comments: []string{"// this is a trailing comment\n", "// that spans multiple lines\n", "// over two in fact!\n", "/*\n\t * this is a detached comment\n\t * with lots of extra words and stuff...\n\t */", "// this is an attached leading comment\n"}, trailCount: 3},
   161  		{t: _FLOAT_LIT, line: 61, col: 9, span: 8, v: 1.23e+20},
   162  		{t: '+', line: 61, col: 17, span: 1, v: nil},
   163  		{t: _INT_LIT, line: 61, col: 18, span: 2, v: uint64(20)},
   164  	}
   165  
   166  	for i, exp := range expected {
   167  		tok := l.Lex(&sym)
   168  		if tok == 0 {
   169  			t.Fatalf("lexer reported EOF but should have returned %v", exp)
   170  		}
   171  		var n ast.Node
   172  		var val interface{}
   173  		switch tok {
   174  		case _SYNTAX, _OPTION, _INT32, _SERVICE, _RPC, _MESSAGE, _NAME:
   175  			n = sym.id
   176  			val = sym.id.Val
   177  		case _STRING_LIT:
   178  			n = sym.s
   179  			val = sym.s.Val
   180  		case _INT_LIT:
   181  			n = sym.i
   182  			val = sym.i.Val
   183  		case _FLOAT_LIT:
   184  			n = sym.f
   185  			val = sym.f.Val
   186  		case _ERROR:
   187  			val = sym.err
   188  		default:
   189  			n = sym.b
   190  			val = nil
   191  		}
   192  		testutil.Eq(t, exp.t, tok, "case %d: wrong token type (expecting value %v, got %v)", i, exp.v, val)
   193  		testutil.Eq(t, exp.v, val, "case %d: wrong token value", i)
   194  		testutil.Eq(t, exp.line, n.Start().Line, "case %d: wrong line number", i)
   195  		testutil.Eq(t, exp.col, n.Start().Col, "case %d: wrong column number (on line %d)", i, exp.line)
   196  		testutil.Eq(t, exp.line, n.End().Line, "case %d: wrong end line number", i)
   197  		testutil.Eq(t, exp.col+exp.span, n.End().Col, "case %d: wrong end column number", i)
   198  		if prev != nil {
   199  			testutil.Eq(t, exp.trailCount, len(prev.TrailingComments()), "case %d: wrong number of trailing comments", i)
   200  		}
   201  		testutil.Eq(t, len(exp.comments)-exp.trailCount, len(n.LeadingComments()), "case %d: wrong number of comments", i)
   202  		for ci := range exp.comments {
   203  			var c ast.Comment
   204  			if ci < exp.trailCount {
   205  				c = prev.TrailingComments()[ci]
   206  			} else {
   207  				c = n.LeadingComments()[ci-exp.trailCount]
   208  			}
   209  			testutil.Eq(t, exp.comments[ci], c.Text, "case %d, comment #%d: unexpected text", i, ci+1)
   210  		}
   211  		prev = n
   212  	}
   213  	if tok := l.Lex(&sym); tok != 0 {
   214  		t.Fatalf("lexer reported symbol after what should have been EOF: %d", tok)
   215  	}
   216  	// Now we check final state of lexer for unattached comments and final whitespace
   217  	// One of the final comments get associated as trailing comment for final token
   218  	testutil.Eq(t, 1, len(prev.TrailingComments()), "last token: wrong number of trailing comments")
   219  	finalComments := l.eof.LeadingComments()
   220  	testutil.Eq(t, 2, len(finalComments), "wrong number of final remaining comments")
   221  	testutil.Eq(t, "// comment attached to no tokens (upcoming token is EOF!)\n", finalComments[0].Text, "incorrect final comment text")
   222  	testutil.Eq(t, "/* another comment followed by some final whitespace*/", finalComments[1].Text, "incorrect final comment text")
   223  	testutil.Eq(t, "\n\n\t\n\t", l.eof.LeadingWhitespace(), "incorrect final whitespace")
   224  }
   225  
   226  func TestLexerErrors(t *testing.T) {
   227  	testCases := []struct {
   228  		str    string
   229  		errMsg string
   230  	}{
   231  		{str: `0xffffffffffffffffffff`, errMsg: "value out of range"},
   232  		{str: `"foobar`, errMsg: "unexpected EOF"},
   233  		{str: `"foobar\J"`, errMsg: "invalid escape sequence"},
   234  		{str: `"foobar\xgfoo"`, errMsg: "invalid hex escape"},
   235  		{str: `"foobar\u09gafoo"`, errMsg: "invalid unicode escape"},
   236  		{str: `"foobar\U0010005zfoo"`, errMsg: "invalid unicode escape"},
   237  		{str: `"foobar\U00110000foo"`, errMsg: "unicode escape is out of range"},
   238  		{str: "'foobar\nbaz'", errMsg: "encountered end-of-line"},
   239  		{str: "'foobar\000baz'", errMsg: "null character ('\\0') not allowed"},
   240  		{str: `1.543g12`, errMsg: "invalid syntax"},
   241  		{str: `0.1234.5678.`, errMsg: "invalid syntax"},
   242  		{str: `0x987.345aaf`, errMsg: "invalid syntax"},
   243  		{str: `0.987.345`, errMsg: "invalid syntax"},
   244  		{str: `0.987e34e-20`, errMsg: "invalid syntax"},
   245  		{str: `0.987e-345e20`, errMsg: "invalid syntax"},
   246  		{str: `.987to123`, errMsg: "invalid syntax"},
   247  		{str: `0b0111`, errMsg: "invalid syntax"},
   248  		{str: `0o765432`, errMsg: "invalid syntax"},
   249  		{str: `1_000_000`, errMsg: "invalid syntax"},
   250  		{str: `1_000.000_001e6`, errMsg: "invalid syntax"},
   251  		{str: `0X1F_FFP-16`, errMsg: "invalid syntax"},
   252  		{str: `/* foobar`, errMsg: "unexpected EOF"},
   253  		{str: "\x00", errMsg: "invalid control character"},
   254  		{str: "\x03", errMsg: "invalid control character"},
   255  		{str: "\x1B", errMsg: "invalid control character"},
   256  		{str: "\x7F", errMsg: "invalid control character"},
   257  		{str: "#", errMsg: "invalid character"},
   258  		{str: "?", errMsg: "invalid character"},
   259  		{str: "^", errMsg: "invalid character"},
   260  		{str: "\uAAAA", errMsg: "invalid character"},
   261  		{str: "\U0010FFFF", errMsg: "invalid character"},
   262  		{str: "// foo \x00", errMsg: "invalid control character"},
   263  		{str: "/* foo \x00", errMsg: "invalid control character"},
   264  		{str: "09", errMsg: "invalid syntax in octal integer value: 09"},
   265  		{str: "0f", errMsg: "invalid syntax in octal integer value: 0f"},
   266  	}
   267  	for i, tc := range testCases {
   268  		l := newTestLexer(strings.NewReader(tc.str))
   269  		var sym protoSymType
   270  		tok := l.Lex(&sym)
   271  		testutil.Eq(t, _ERROR, tok)
   272  		testutil.Require(t, sym.err != nil)
   273  		testutil.Require(t, strings.Contains(sym.err.Error(), tc.errMsg), "case %d: expected message to contain %q but does not: %q", i, tc.errMsg, sym.err.Error())
   274  		t.Logf("case %d: %v", i, sym.err)
   275  	}
   276  }
   277  
   278  func newTestLexer(in io.Reader) *protoLex {
   279  	return newLexer(in, "test.proto", newErrorHandler(nil, nil))
   280  }