github.com/xiaoshude/protoreflect@v1.16.1-0.20220310024924-8c94d7247598/desc/protoparse/lexer_test.go (about)

     1  package protoparse
     2  
     3  import (
     4  	"io"
     5  	"strings"
     6  	"testing"
     7  
     8  	"github.com/xiaoshude/protoreflect/desc/protoparse/ast"
     9  	"github.com/xiaoshude/protoreflect/internal/testutil"
    10  )
    11  
    12  func TestLexer(t *testing.T) {
    13  	l := newTestLexer(strings.NewReader(`
    14  	// comment
    15  
    16  	/*
    17  	 * block comment
    18  	 */ /* inline comment */
    19  
    20  	int32  "\032\x16\n\rfoobar\"zap"		'another\tstring\'s\t'
    21  foo
    22  
    23  	// another comment
    24  	// more and more...
    25  
    26  	service rpc message
    27  	.type
    28  	.f.q.n
    29  	name
    30  	f.q.n
    31  
    32  	.01
    33  	.01e12
    34  	.01e+5
    35  	.033e-1
    36  
    37  	12345
    38  	-12345
    39  	123.1234
    40  	0.123
    41  	012345
    42  	0x2134abcdef30
    43  	-0543
    44  	-0xff76
    45  	101.0102
    46  	202.0203e1
    47  	304.0304e-10
    48  	3.1234e+12
    49  
    50  	{ } + - , ;
    51  
    52  	[option=foo]
    53  	syntax = "proto2";
    54  
    55  	// some strange cases
    56  	1.543g12 /* trailing line comment */
    57  	000.000
    58  	0.1234.5678.
    59  	12e12
    60  
    61  	Random_identifier_with_numbers_0123456789_and_letters...
    62  	// this is a trailing comment
    63  	// that spans multiple lines
    64  	// over two in fact!
    65  	/*
    66  	 * this is a detached comment
    67  	 * with lots of extra words and stuff...
    68  	 */
    69  
    70  	// this is an attached leading comment
    71  	foo
    72  	// a trailing comment for last element
    73  
    74  	// comment attached to no tokens (upcoming token is EOF!)
    75  	/* another comment followed by some final whitespace*/
    76  
    77  	
    78  	`))
    79  
    80  	var prev ast.Node
    81  	var sym protoSymType
    82  	expected := []struct {
    83  		t          int
    84  		line, col  int
    85  		span       int
    86  		v          interface{}
    87  		comments   []string
    88  		trailCount int
    89  	}{
    90  		{t: _INT32, line: 8, col: 9, span: 5, v: "int32", comments: []string{"// comment\n", "/*\n\t * block comment\n\t */", "/* inline comment */"}},
    91  		{t: _STRING_LIT, line: 8, col: 16, span: 25, v: "\032\x16\n\rfoobar\"zap"},
    92  		{t: _STRING_LIT, line: 8, col: 57, span: 22, v: "another\tstring's\t"},
    93  		{t: _NAME, line: 9, col: 1, span: 3, v: "foo"},
    94  		{t: _SERVICE, line: 14, col: 9, span: 7, v: "service", comments: []string{"// another comment\n", "// more and more...\n"}},
    95  		{t: _RPC, line: 14, col: 17, span: 3, v: "rpc"},
    96  		{t: _MESSAGE, line: 14, col: 21, span: 7, v: "message"},
    97  		{t: '.', line: 15, col: 9, span: 1},
    98  		{t: _NAME, line: 15, col: 10, span: 4, v: "type"},
    99  		{t: '.', line: 16, col: 9, span: 1},
   100  		{t: _NAME, line: 16, col: 10, span: 1, v: "f"},
   101  		{t: '.', line: 16, col: 11, span: 1},
   102  		{t: _NAME, line: 16, col: 12, span: 1, v: "q"},
   103  		{t: '.', line: 16, col: 13, span: 1},
   104  		{t: _NAME, line: 16, col: 14, span: 1, v: "n"},
   105  		{t: _NAME, line: 17, col: 9, span: 4, v: "name"},
   106  		{t: _NAME, line: 18, col: 9, span: 1, v: "f"},
   107  		{t: '.', line: 18, col: 10, span: 1},
   108  		{t: _NAME, line: 18, col: 11, span: 1, v: "q"},
   109  		{t: '.', line: 18, col: 12, span: 1},
   110  		{t: _NAME, line: 18, col: 13, span: 1, v: "n"},
   111  		{t: _FLOAT_LIT, line: 20, col: 9, span: 3, v: 0.01},
   112  		{t: _FLOAT_LIT, line: 21, col: 9, span: 6, v: 0.01e12},
   113  		{t: _FLOAT_LIT, line: 22, col: 9, span: 6, v: 0.01e5},
   114  		{t: _FLOAT_LIT, line: 23, col: 9, span: 7, v: 0.033e-1},
   115  		{t: _INT_LIT, line: 25, col: 9, span: 5, v: uint64(12345)},
   116  		{t: '-', line: 26, col: 9, span: 1, v: nil},
   117  		{t: _INT_LIT, line: 26, col: 10, span: 5, v: uint64(12345)},
   118  		{t: _FLOAT_LIT, line: 27, col: 9, span: 8, v: 123.1234},
   119  		{t: _FLOAT_LIT, line: 28, col: 9, span: 5, v: 0.123},
   120  		{t: _INT_LIT, line: 29, col: 9, span: 6, v: uint64(012345)},
   121  		{t: _INT_LIT, line: 30, col: 9, span: 14, v: uint64(0x2134abcdef30)},
   122  		{t: '-', line: 31, col: 9, span: 1, v: nil},
   123  		{t: _INT_LIT, line: 31, col: 10, span: 4, v: uint64(0543)},
   124  		{t: '-', line: 32, col: 9, span: 1, v: nil},
   125  		{t: _INT_LIT, line: 32, col: 10, span: 6, v: uint64(0xff76)},
   126  		{t: _FLOAT_LIT, line: 33, col: 9, span: 8, v: 101.0102},
   127  		{t: _FLOAT_LIT, line: 34, col: 9, span: 10, v: 202.0203e1},
   128  		{t: _FLOAT_LIT, line: 35, col: 9, span: 12, v: 304.0304e-10},
   129  		{t: _FLOAT_LIT, line: 36, col: 9, span: 10, v: 3.1234e+12},
   130  		{t: '{', line: 38, col: 9, span: 1, v: nil},
   131  		{t: '}', line: 38, col: 11, span: 1, v: nil},
   132  		{t: '+', line: 38, col: 13, span: 1, v: nil},
   133  		{t: '-', line: 38, col: 15, span: 1, v: nil},
   134  		{t: ',', line: 38, col: 17, span: 1, v: nil},
   135  		{t: ';', line: 38, col: 19, span: 1, v: nil},
   136  		{t: '[', line: 40, col: 9, span: 1, v: nil},
   137  		{t: _OPTION, line: 40, col: 10, span: 6, v: "option"},
   138  		{t: '=', line: 40, col: 16, span: 1, v: nil},
   139  		{t: _NAME, line: 40, col: 17, span: 3, v: "foo"},
   140  		{t: ']', line: 40, col: 20, span: 1, v: nil},
   141  		{t: _SYNTAX, line: 41, col: 9, span: 6, v: "syntax"},
   142  		{t: '=', line: 41, col: 16, span: 1, v: nil},
   143  		{t: _STRING_LIT, line: 41, col: 18, span: 8, v: "proto2"},
   144  		{t: ';', line: 41, col: 26, span: 1, v: nil},
   145  		{t: _FLOAT_LIT, line: 44, col: 9, span: 5, v: 1.543, comments: []string{"// some strange cases\n"}},
   146  		{t: _NAME, line: 44, col: 14, span: 3, v: "g12"},
   147  		{t: _FLOAT_LIT, line: 45, col: 9, span: 7, v: 0.0, comments: []string{"/* trailing line comment */"}, trailCount: 1},
   148  		{t: _FLOAT_LIT, line: 46, col: 9, span: 6, v: 0.1234},
   149  		{t: _FLOAT_LIT, line: 46, col: 15, span: 5, v: 0.5678},
   150  		{t: '.', line: 46, col: 20, span: 1, v: nil},
   151  		{t: _FLOAT_LIT, line: 47, col: 9, span: 5, v: 12e12},
   152  		{t: _NAME, line: 49, col: 9, span: 53, v: "Random_identifier_with_numbers_0123456789_and_letters"},
   153  		{t: '.', line: 49, col: 62, span: 1, v: nil},
   154  		{t: '.', line: 49, col: 63, span: 1, v: nil},
   155  		{t: '.', line: 49, col: 64, span: 1, v: nil},
   156  		{t: _NAME, line: 59, col: 9, span: 3, v: "foo", comments: []string{"// this is a trailing comment\n", "// that spans multiple lines\n", "// over two in fact!\n", "/*\n\t * this is a detached comment\n\t * with lots of extra words and stuff...\n\t */", "// this is an attached leading comment\n"}, trailCount: 3},
   157  	}
   158  
   159  	for i, exp := range expected {
   160  		tok := l.Lex(&sym)
   161  		if tok == 0 {
   162  			t.Fatalf("lexer reported EOF but should have returned %v", exp)
   163  		}
   164  		var n ast.Node
   165  		var val interface{}
   166  		switch tok {
   167  		case _SYNTAX, _OPTION, _INT32, _SERVICE, _RPC, _MESSAGE, _NAME:
   168  			n = sym.id
   169  			val = sym.id.Val
   170  		case _STRING_LIT:
   171  			n = sym.s
   172  			val = sym.s.Val
   173  		case _INT_LIT:
   174  			n = sym.i
   175  			val = sym.i.Val
   176  		case _FLOAT_LIT:
   177  			n = sym.f
   178  			val = sym.f.Val
   179  		default:
   180  			n = sym.b
   181  			val = nil
   182  		}
   183  		testutil.Eq(t, exp.t, tok, "case %d: wrong token type (case %v)", i, exp.v)
   184  		testutil.Eq(t, exp.v, val, "case %d: wrong token value", i)
   185  		testutil.Eq(t, exp.line, n.Start().Line, "case %d: wrong line number", i)
   186  		testutil.Eq(t, exp.col, n.Start().Col, "case %d: wrong column number", i)
   187  		testutil.Eq(t, exp.line, n.End().Line, "case %d: wrong end line number", i)
   188  		testutil.Eq(t, exp.col+exp.span, n.End().Col, "case %d: wrong end column number", i)
   189  		if exp.trailCount > 0 {
   190  			testutil.Eq(t, exp.trailCount, len(prev.TrailingComments()), "case %d: wrong number of trailing comments", i)
   191  		}
   192  		testutil.Eq(t, len(exp.comments)-exp.trailCount, len(n.LeadingComments()), "case %d: wrong number of comments", i)
   193  		for ci := range exp.comments {
   194  			var c ast.Comment
   195  			if ci < exp.trailCount {
   196  				c = prev.TrailingComments()[ci]
   197  			} else {
   198  				c = n.LeadingComments()[ci-exp.trailCount]
   199  			}
   200  			testutil.Eq(t, exp.comments[ci], c.Text, "case %d, comment #%d: unexpected text", i, ci+1)
   201  		}
   202  		prev = n
   203  	}
   204  	if tok := l.Lex(&sym); tok != 0 {
   205  		t.Fatalf("lexer reported symbol after what should have been EOF: %d", tok)
   206  	}
   207  	// Now we check final state of lexer for unattached comments and final whitespace
   208  	// One of the final comments get associated as trailing comment for final token
   209  	testutil.Eq(t, 1, len(prev.TrailingComments()), "last token: wrong number of trailing comments")
   210  	finalComments := l.eof.LeadingComments()
   211  	testutil.Eq(t, 2, len(finalComments), "wrong number of final remaining comments")
   212  	testutil.Eq(t, "// comment attached to no tokens (upcoming token is EOF!)\n", finalComments[0].Text, "incorrect final comment text")
   213  	testutil.Eq(t, "/* another comment followed by some final whitespace*/", finalComments[1].Text, "incorrect final comment text")
   214  	testutil.Eq(t, "\n\n\t\n\t", l.eof.LeadingWhitespace(), "incorrect final whitespace")
   215  }
   216  
   217  func TestLexerErrors(t *testing.T) {
   218  	testCases := []struct {
   219  		str    string
   220  		errMsg string
   221  	}{
   222  		{str: `0xffffffffffffffffffff`, errMsg: "value out of range"},
   223  		{str: `"foobar`, errMsg: "unexpected EOF"},
   224  		{str: `"foobar\J"`, errMsg: "invalid escape sequence"},
   225  		{str: `"foobar\xgfoo"`, errMsg: "invalid hex escape"},
   226  		{str: `"foobar\u09gafoo"`, errMsg: "invalid unicode escape"},
   227  		{str: `"foobar\U0010005zfoo"`, errMsg: "invalid unicode escape"},
   228  		{str: `"foobar\U00110000foo"`, errMsg: "unicode escape is out of range"},
   229  		{str: "'foobar\nbaz'", errMsg: "encountered end-of-line"},
   230  		{str: "'foobar\000baz'", errMsg: "null character ('\\0') not allowed"},
   231  		{str: `/* foobar`, errMsg: "unexpected EOF"},
   232  	}
   233  	for i, tc := range testCases {
   234  		l := newTestLexer(strings.NewReader(tc.str))
   235  		var sym protoSymType
   236  		tok := l.Lex(&sym)
   237  		testutil.Eq(t, _ERROR, tok)
   238  		testutil.Require(t, sym.err != nil)
   239  		testutil.Require(t, strings.Contains(sym.err.Error(), tc.errMsg), "case %d: expected message to contain %q but does not: %q", i, tc.errMsg, sym.err.Error())
   240  	}
   241  }
   242  
   243  func newTestLexer(in io.Reader) *protoLex {
   244  	return newLexer(in, "test.proto", newErrorHandler(nil, nil))
   245  }