github.com/hoveychen/protoreflect@v1.4.7-0.20221103114119-0b4b3385ec76/desc/protoparse/lexer_test.go (about)

     1  package protoparse
     2  
     3  import (
     4  	"strings"
     5  	"testing"
     6  
     7  	"github.com/hoveychen/protoreflect/internal/testutil"
     8  )
     9  
    10  func TestLexer(t *testing.T) {
    11  	l := newTestLexer(strings.NewReader(`
    12  	// comment
    13  
    14  	/*
    15  	 * block comment
    16  	 */ /* inline comment */
    17  
    18  	int32  "\032\x16\n\rfoobar\"zap"		'another\tstring\'s\t'
    19  foo
    20  
    21  	// another comment
    22  	// more and more...
    23  
    24  	service rpc message
    25  	.type
    26  	.f.q.n
    27  	name
    28  	f.q.n
    29  
    30  	.01
    31  	.01e12
    32  	.01e+5
    33  	.033e-1
    34  
    35  	12345
    36  	-12345
    37  	123.1234
    38  	0.123
    39  	012345
    40  	0x2134abcdef30
    41  	-0543
    42  	-0xff76
    43  	101.0102
    44  	202.0203e1
    45  	304.0304e-10
    46  	3.1234e+12
    47  
    48  	{ } + - , ;
    49  
    50  	[option=foo]
    51  	syntax = "proto2";
    52  
    53  	// some strange cases
    54  	1.543g12 /* trailing line comment */
    55  	000.000
    56  	0.1234.5678.
    57  	12e12
    58  
    59  	Random_identifier_with_numbers_0123456789_and_letters...
    60  	// this is a trailing comment
    61  	// that spans multiple lines
    62  	// over two in fact!
    63  	/*
    64  	 * this is a detached comment
    65  	 * with lots of extra words and stuff...
    66  	 */
    67  
    68  	// this is an attached leading comment
    69  	foo
    70  	`))
    71  
    72  	var prev node
    73  	var sym protoSymType
    74  	expected := []struct {
    75  		t          int
    76  		line, col  int
    77  		span       int
    78  		v          interface{}
    79  		comments   []string
    80  		trailCount int
    81  	}{
    82  		{t: _INT32, line: 8, col: 9, span: 5, v: "int32", comments: []string{"// comment\n", "/*\n\t * block comment\n\t */", "/* inline comment */"}},
    83  		{t: _STRING_LIT, line: 8, col: 16, span: 25, v: "\032\x16\n\rfoobar\"zap"},
    84  		{t: _STRING_LIT, line: 8, col: 57, span: 22, v: "another\tstring's\t"},
    85  		{t: _NAME, line: 9, col: 1, span: 3, v: "foo"},
    86  		{t: _SERVICE, line: 14, col: 9, span: 7, v: "service", comments: []string{"// another comment\n", "// more and more...\n"}},
    87  		{t: _RPC, line: 14, col: 17, span: 3, v: "rpc"},
    88  		{t: _MESSAGE, line: 14, col: 21, span: 7, v: "message"},
    89  		{t: _TYPENAME, line: 15, col: 9, span: 5, v: ".type"},
    90  		{t: _TYPENAME, line: 16, col: 9, span: 6, v: ".f.q.n"},
    91  		{t: _NAME, line: 17, col: 9, span: 4, v: "name"},
    92  		{t: _FQNAME, line: 18, col: 9, span: 5, v: "f.q.n"},
    93  		{t: _FLOAT_LIT, line: 20, col: 9, span: 3, v: 0.01},
    94  		{t: _FLOAT_LIT, line: 21, col: 9, span: 6, v: 0.01e12},
    95  		{t: _FLOAT_LIT, line: 22, col: 9, span: 6, v: 0.01e5},
    96  		{t: _FLOAT_LIT, line: 23, col: 9, span: 7, v: 0.033e-1},
    97  		{t: _INT_LIT, line: 25, col: 9, span: 5, v: uint64(12345)},
    98  		{t: '-', line: 26, col: 9, span: 1, v: nil},
    99  		{t: _INT_LIT, line: 26, col: 10, span: 5, v: uint64(12345)},
   100  		{t: _FLOAT_LIT, line: 27, col: 9, span: 8, v: 123.1234},
   101  		{t: _FLOAT_LIT, line: 28, col: 9, span: 5, v: 0.123},
   102  		{t: _INT_LIT, line: 29, col: 9, span: 6, v: uint64(012345)},
   103  		{t: _INT_LIT, line: 30, col: 9, span: 14, v: uint64(0x2134abcdef30)},
   104  		{t: '-', line: 31, col: 9, span: 1, v: nil},
   105  		{t: _INT_LIT, line: 31, col: 10, span: 4, v: uint64(0543)},
   106  		{t: '-', line: 32, col: 9, span: 1, v: nil},
   107  		{t: _INT_LIT, line: 32, col: 10, span: 6, v: uint64(0xff76)},
   108  		{t: _FLOAT_LIT, line: 33, col: 9, span: 8, v: 101.0102},
   109  		{t: _FLOAT_LIT, line: 34, col: 9, span: 10, v: 202.0203e1},
   110  		{t: _FLOAT_LIT, line: 35, col: 9, span: 12, v: 304.0304e-10},
   111  		{t: _FLOAT_LIT, line: 36, col: 9, span: 10, v: 3.1234e+12},
   112  		{t: '{', line: 38, col: 9, span: 1, v: nil},
   113  		{t: '}', line: 38, col: 11, span: 1, v: nil},
   114  		{t: '+', line: 38, col: 13, span: 1, v: nil},
   115  		{t: '-', line: 38, col: 15, span: 1, v: nil},
   116  		{t: ',', line: 38, col: 17, span: 1, v: nil},
   117  		{t: ';', line: 38, col: 19, span: 1, v: nil},
   118  		{t: '[', line: 40, col: 9, span: 1, v: nil},
   119  		{t: _OPTION, line: 40, col: 10, span: 6, v: "option"},
   120  		{t: '=', line: 40, col: 16, span: 1, v: nil},
   121  		{t: _NAME, line: 40, col: 17, span: 3, v: "foo"},
   122  		{t: ']', line: 40, col: 20, span: 1, v: nil},
   123  		{t: _SYNTAX, line: 41, col: 9, span: 6, v: "syntax"},
   124  		{t: '=', line: 41, col: 16, span: 1, v: nil},
   125  		{t: _STRING_LIT, line: 41, col: 18, span: 8, v: "proto2"},
   126  		{t: ';', line: 41, col: 26, span: 1, v: nil},
   127  		{t: _FLOAT_LIT, line: 44, col: 9, span: 5, v: 1.543, comments: []string{"// some strange cases\n"}},
   128  		{t: _NAME, line: 44, col: 14, span: 3, v: "g12"},
   129  		{t: _FLOAT_LIT, line: 45, col: 9, span: 7, v: 0.0, comments: []string{"/* trailing line comment */"}, trailCount: 1},
   130  		{t: _FLOAT_LIT, line: 46, col: 9, span: 6, v: 0.1234},
   131  		{t: _FLOAT_LIT, line: 46, col: 15, span: 5, v: 0.5678},
   132  		{t: '.', line: 46, col: 20, span: 1, v: nil},
   133  		{t: _FLOAT_LIT, line: 47, col: 9, span: 5, v: 12e12},
   134  		{t: _NAME, line: 49, col: 9, span: 53, v: "Random_identifier_with_numbers_0123456789_and_letters"},
   135  		{t: '.', line: 49, col: 62, span: 1, v: nil},
   136  		{t: '.', line: 49, col: 63, span: 1, v: nil},
   137  		{t: '.', line: 49, col: 64, span: 1, v: nil},
   138  		{t: _NAME, line: 59, col: 9, span: 3, v: "foo", comments: []string{"// this is a trailing comment\n", "// that spans multiple lines\n", "// over two in fact!\n", "/*\n\t * this is a detached comment\n\t * with lots of extra words and stuff...\n\t */", "// this is an attached leading comment\n"}, trailCount: 3},
   139  	}
   140  
   141  	for i, exp := range expected {
   142  		tok := l.Lex(&sym)
   143  		if tok == 0 {
   144  			t.Fatalf("lexer reported EOF but should have returned %v", exp)
   145  		}
   146  		var n node
   147  		var val interface{}
   148  		switch tok {
   149  		case _SYNTAX, _OPTION, _INT32, _SERVICE, _RPC, _MESSAGE, _TYPENAME, _NAME, _FQNAME:
   150  			n = sym.id
   151  			val = sym.id.val
   152  		case _STRING_LIT:
   153  			n = sym.str
   154  			val = sym.str.val
   155  		case _INT_LIT:
   156  			n = sym.ui
   157  			val = sym.ui.val
   158  		case _FLOAT_LIT:
   159  			n = sym.f
   160  			val = sym.f.val
   161  		default:
   162  			n = sym.b
   163  			val = nil
   164  		}
   165  		testutil.Eq(t, exp.t, tok, "case %d: wrong token type (case %v)", i, exp.v)
   166  		testutil.Eq(t, exp.v, val, "case %d: wrong token value", i)
   167  		testutil.Eq(t, exp.line, n.start().Line, "case %d: wrong line number", i)
   168  		testutil.Eq(t, exp.col, n.start().Col, "case %d: wrong column number", i)
   169  		testutil.Eq(t, exp.line, n.end().Line, "case %d: wrong end line number", i)
   170  		testutil.Eq(t, exp.col+exp.span, n.end().Col, "case %d: wrong end column number", i)
   171  		if exp.trailCount > 0 {
   172  			testutil.Eq(t, exp.trailCount, len(prev.trailingComments()), "case %d: wrong number of trailing comments", i)
   173  		}
   174  		testutil.Eq(t, len(exp.comments)-exp.trailCount, len(n.leadingComments()), "case %d: wrong number of comments", i)
   175  		for ci := range exp.comments {
   176  			var c comment
   177  			if ci < exp.trailCount {
   178  				c = prev.trailingComments()[ci]
   179  			} else {
   180  				c = n.leadingComments()[ci-exp.trailCount]
   181  			}
   182  			testutil.Eq(t, exp.comments[ci], c.text, "case %d, comment #%d: unexpected text", i, ci+1)
   183  		}
   184  		prev = n
   185  	}
   186  	if tok := l.Lex(&sym); tok != 0 {
   187  		t.Fatalf("lexer reported symbol after what should have been EOF: %d", tok)
   188  	}
   189  }
   190  
   191  func TestLexerErrors(t *testing.T) {
   192  	testCases := []struct {
   193  		str    string
   194  		errMsg string
   195  	}{
   196  		{str: `0xffffffffffffffffffff`, errMsg: "value out of range"},
   197  		{str: `"foobar`, errMsg: "unexpected EOF"},
   198  		{str: `"foobar\J"`, errMsg: "invalid escape sequence"},
   199  		{str: `"foobar\xgfoo"`, errMsg: "invalid hex escape"},
   200  		{str: `"foobar\u09gafoo"`, errMsg: "invalid unicode escape"},
   201  		{str: `"foobar\U0010005zfoo"`, errMsg: "invalid unicode escape"},
   202  		{str: `"foobar\U00110000foo"`, errMsg: "unicode escape is out of range"},
   203  		{str: "'foobar\nbaz'", errMsg: "encountered end-of-line"},
   204  		{str: "'foobar\000baz'", errMsg: "null character ('\\0') not allowed"},
   205  		{str: `/* foobar`, errMsg: "unexpected EOF"},
   206  	}
   207  	for i, tc := range testCases {
   208  		l := newTestLexer(strings.NewReader(tc.str))
   209  		var sym protoSymType
   210  		tok := l.Lex(&sym)
   211  		testutil.Eq(t, _ERROR, tok)
   212  		testutil.Require(t, sym.err != nil)
   213  		testutil.Require(t, strings.Contains(sym.err.Error(), tc.errMsg), "case %d: expected message to contain %q but does not: %q", i, tc.errMsg, sym.err.Error())
   214  	}
   215  }