github.com/syumai/protoreflect@v1.7.1-0.20200810020253-2ac7e3b3a321/desc/protoparse/lexer_test.go (about)

     1  package protoparse
     2  
     3  import (
     4  	"io"
     5  	"strings"
     6  	"testing"
     7  
     8  	"github.com/syumai/protoreflect/internal/testutil"
     9  )
    10  
    11  func TestLexer(t *testing.T) {
    12  	l := newTestLexer(strings.NewReader(`
    13  	// comment
    14  
    15  	/*
    16  	 * block comment
    17  	 */ /* inline comment */
    18  
    19  	int32  "\032\x16\n\rfoobar\"zap"		'another\tstring\'s\t'
    20  foo
    21  
    22  	// another comment
    23  	// more and more...
    24  
    25  	service rpc message
    26  	.type
    27  	.f.q.n
    28  	name
    29  	f.q.n
    30  
    31  	.01
    32  	.01e12
    33  	.01e+5
    34  	.033e-1
    35  
    36  	12345
    37  	-12345
    38  	123.1234
    39  	0.123
    40  	012345
    41  	0x2134abcdef30
    42  	-0543
    43  	-0xff76
    44  	101.0102
    45  	202.0203e1
    46  	304.0304e-10
    47  	3.1234e+12
    48  
    49  	{ } + - , ;
    50  
    51  	[option=foo]
    52  	syntax = "proto2";
    53  
    54  	// some strange cases
    55  	1.543g12 /* trailing line comment */
    56  	000.000
    57  	0.1234.5678.
    58  	12e12
    59  
    60  	Random_identifier_with_numbers_0123456789_and_letters...
    61  	// this is a trailing comment
    62  	// that spans multiple lines
    63  	// over two in fact!
    64  	/*
    65  	 * this is a detached comment
    66  	 * with lots of extra words and stuff...
    67  	 */
    68  
    69  	// this is an attached leading comment
    70  	foo
    71  	`))
    72  
    73  	var prev node
    74  	var sym protoSymType
    75  	expected := []struct {
    76  		t          int
    77  		line, col  int
    78  		span       int
    79  		v          interface{}
    80  		comments   []string
    81  		trailCount int
    82  	}{
    83  		{t: _INT32, line: 8, col: 9, span: 5, v: "int32", comments: []string{"// comment\n", "/*\n\t * block comment\n\t */", "/* inline comment */"}},
    84  		{t: _STRING_LIT, line: 8, col: 16, span: 25, v: "\032\x16\n\rfoobar\"zap"},
    85  		{t: _STRING_LIT, line: 8, col: 57, span: 22, v: "another\tstring's\t"},
    86  		{t: _NAME, line: 9, col: 1, span: 3, v: "foo"},
    87  		{t: _SERVICE, line: 14, col: 9, span: 7, v: "service", comments: []string{"// another comment\n", "// more and more...\n"}},
    88  		{t: _RPC, line: 14, col: 17, span: 3, v: "rpc"},
    89  		{t: _MESSAGE, line: 14, col: 21, span: 7, v: "message"},
    90  		{t: '.', line: 15, col: 9, span: 1},
    91  		{t: _NAME, line: 15, col: 10, span: 4, v: "type"},
    92  		{t: '.', line: 16, col: 9, span: 1},
    93  		{t: _NAME, line: 16, col: 10, span: 1, v: "f"},
    94  		{t: '.', line: 16, col: 11, span: 1},
    95  		{t: _NAME, line: 16, col: 12, span: 1, v: "q"},
    96  		{t: '.', line: 16, col: 13, span: 1},
    97  		{t: _NAME, line: 16, col: 14, span: 1, v: "n"},
    98  		{t: _NAME, line: 17, col: 9, span: 4, v: "name"},
    99  		{t: _NAME, line: 18, col: 9, span: 1, v: "f"},
   100  		{t: '.', line: 18, col: 10, span: 1},
   101  		{t: _NAME, line: 18, col: 11, span: 1, v: "q"},
   102  		{t: '.', line: 18, col: 12, span: 1},
   103  		{t: _NAME, line: 18, col: 13, span: 1, v: "n"},
   104  		{t: _FLOAT_LIT, line: 20, col: 9, span: 3, v: 0.01},
   105  		{t: _FLOAT_LIT, line: 21, col: 9, span: 6, v: 0.01e12},
   106  		{t: _FLOAT_LIT, line: 22, col: 9, span: 6, v: 0.01e5},
   107  		{t: _FLOAT_LIT, line: 23, col: 9, span: 7, v: 0.033e-1},
   108  		{t: _INT_LIT, line: 25, col: 9, span: 5, v: uint64(12345)},
   109  		{t: '-', line: 26, col: 9, span: 1, v: nil},
   110  		{t: _INT_LIT, line: 26, col: 10, span: 5, v: uint64(12345)},
   111  		{t: _FLOAT_LIT, line: 27, col: 9, span: 8, v: 123.1234},
   112  		{t: _FLOAT_LIT, line: 28, col: 9, span: 5, v: 0.123},
   113  		{t: _INT_LIT, line: 29, col: 9, span: 6, v: uint64(012345)},
   114  		{t: _INT_LIT, line: 30, col: 9, span: 14, v: uint64(0x2134abcdef30)},
   115  		{t: '-', line: 31, col: 9, span: 1, v: nil},
   116  		{t: _INT_LIT, line: 31, col: 10, span: 4, v: uint64(0543)},
   117  		{t: '-', line: 32, col: 9, span: 1, v: nil},
   118  		{t: _INT_LIT, line: 32, col: 10, span: 6, v: uint64(0xff76)},
   119  		{t: _FLOAT_LIT, line: 33, col: 9, span: 8, v: 101.0102},
   120  		{t: _FLOAT_LIT, line: 34, col: 9, span: 10, v: 202.0203e1},
   121  		{t: _FLOAT_LIT, line: 35, col: 9, span: 12, v: 304.0304e-10},
   122  		{t: _FLOAT_LIT, line: 36, col: 9, span: 10, v: 3.1234e+12},
   123  		{t: '{', line: 38, col: 9, span: 1, v: nil},
   124  		{t: '}', line: 38, col: 11, span: 1, v: nil},
   125  		{t: '+', line: 38, col: 13, span: 1, v: nil},
   126  		{t: '-', line: 38, col: 15, span: 1, v: nil},
   127  		{t: ',', line: 38, col: 17, span: 1, v: nil},
   128  		{t: ';', line: 38, col: 19, span: 1, v: nil},
   129  		{t: '[', line: 40, col: 9, span: 1, v: nil},
   130  		{t: _OPTION, line: 40, col: 10, span: 6, v: "option"},
   131  		{t: '=', line: 40, col: 16, span: 1, v: nil},
   132  		{t: _NAME, line: 40, col: 17, span: 3, v: "foo"},
   133  		{t: ']', line: 40, col: 20, span: 1, v: nil},
   134  		{t: _SYNTAX, line: 41, col: 9, span: 6, v: "syntax"},
   135  		{t: '=', line: 41, col: 16, span: 1, v: nil},
   136  		{t: _STRING_LIT, line: 41, col: 18, span: 8, v: "proto2"},
   137  		{t: ';', line: 41, col: 26, span: 1, v: nil},
   138  		{t: _FLOAT_LIT, line: 44, col: 9, span: 5, v: 1.543, comments: []string{"// some strange cases\n"}},
   139  		{t: _NAME, line: 44, col: 14, span: 3, v: "g12"},
   140  		{t: _FLOAT_LIT, line: 45, col: 9, span: 7, v: 0.0, comments: []string{"/* trailing line comment */"}, trailCount: 1},
   141  		{t: _FLOAT_LIT, line: 46, col: 9, span: 6, v: 0.1234},
   142  		{t: _FLOAT_LIT, line: 46, col: 15, span: 5, v: 0.5678},
   143  		{t: '.', line: 46, col: 20, span: 1, v: nil},
   144  		{t: _FLOAT_LIT, line: 47, col: 9, span: 5, v: 12e12},
   145  		{t: _NAME, line: 49, col: 9, span: 53, v: "Random_identifier_with_numbers_0123456789_and_letters"},
   146  		{t: '.', line: 49, col: 62, span: 1, v: nil},
   147  		{t: '.', line: 49, col: 63, span: 1, v: nil},
   148  		{t: '.', line: 49, col: 64, span: 1, v: nil},
   149  		{t: _NAME, line: 59, col: 9, span: 3, v: "foo", comments: []string{"// this is a trailing comment\n", "// that spans multiple lines\n", "// over two in fact!\n", "/*\n\t * this is a detached comment\n\t * with lots of extra words and stuff...\n\t */", "// this is an attached leading comment\n"}, trailCount: 3},
   150  	}
   151  
   152  	for i, exp := range expected {
   153  		tok := l.Lex(&sym)
   154  		if tok == 0 {
   155  			t.Fatalf("lexer reported EOF but should have returned %v", exp)
   156  		}
   157  		var n node
   158  		var val interface{}
   159  		switch tok {
   160  		case _SYNTAX, _OPTION, _INT32, _SERVICE, _RPC, _MESSAGE, _NAME:
   161  			n = sym.id
   162  			val = sym.id.val
   163  		case _STRING_LIT:
   164  			n = sym.s
   165  			val = sym.s.val
   166  		case _INT_LIT:
   167  			n = sym.i
   168  			val = sym.i.val
   169  		case _FLOAT_LIT:
   170  			n = sym.f
   171  			val = sym.f.val
   172  		default:
   173  			n = sym.b
   174  			val = nil
   175  		}
   176  		testutil.Eq(t, exp.t, tok, "case %d: wrong token type (case %v)", i, exp.v)
   177  		testutil.Eq(t, exp.v, val, "case %d: wrong token value", i)
   178  		testutil.Eq(t, exp.line, n.start().Line, "case %d: wrong line number", i)
   179  		testutil.Eq(t, exp.col, n.start().Col, "case %d: wrong column number", i)
   180  		testutil.Eq(t, exp.line, n.end().Line, "case %d: wrong end line number", i)
   181  		testutil.Eq(t, exp.col+exp.span, n.end().Col, "case %d: wrong end column number", i)
   182  		if exp.trailCount > 0 {
   183  			testutil.Eq(t, exp.trailCount, len(prev.trailingComments()), "case %d: wrong number of trailing comments", i)
   184  		}
   185  		testutil.Eq(t, len(exp.comments)-exp.trailCount, len(n.leadingComments()), "case %d: wrong number of comments", i)
   186  		for ci := range exp.comments {
   187  			var c comment
   188  			if ci < exp.trailCount {
   189  				c = prev.trailingComments()[ci]
   190  			} else {
   191  				c = n.leadingComments()[ci-exp.trailCount]
   192  			}
   193  			testutil.Eq(t, exp.comments[ci], c.text, "case %d, comment #%d: unexpected text", i, ci+1)
   194  		}
   195  		prev = n
   196  	}
   197  	if tok := l.Lex(&sym); tok != 0 {
   198  		t.Fatalf("lexer reported symbol after what should have been EOF: %d", tok)
   199  	}
   200  }
   201  
   202  func TestLexerErrors(t *testing.T) {
   203  	testCases := []struct {
   204  		str    string
   205  		errMsg string
   206  	}{
   207  		{str: `0xffffffffffffffffffff`, errMsg: "value out of range"},
   208  		{str: `"foobar`, errMsg: "unexpected EOF"},
   209  		{str: `"foobar\J"`, errMsg: "invalid escape sequence"},
   210  		{str: `"foobar\xgfoo"`, errMsg: "invalid hex escape"},
   211  		{str: `"foobar\u09gafoo"`, errMsg: "invalid unicode escape"},
   212  		{str: `"foobar\U0010005zfoo"`, errMsg: "invalid unicode escape"},
   213  		{str: `"foobar\U00110000foo"`, errMsg: "unicode escape is out of range"},
   214  		{str: "'foobar\nbaz'", errMsg: "encountered end-of-line"},
   215  		{str: "'foobar\000baz'", errMsg: "null character ('\\0') not allowed"},
   216  		{str: `/* foobar`, errMsg: "unexpected EOF"},
   217  	}
   218  	for i, tc := range testCases {
   219  		l := newTestLexer(strings.NewReader(tc.str))
   220  		var sym protoSymType
   221  		tok := l.Lex(&sym)
   222  		testutil.Eq(t, _ERROR, tok)
   223  		testutil.Require(t, sym.err != nil)
   224  		testutil.Require(t, strings.Contains(sym.err.Error(), tc.errMsg), "case %d: expected message to contain %q but does not: %q", i, tc.errMsg, sym.err.Error())
   225  	}
   226  }
   227  
   228  func newTestLexer(in io.Reader) *protoLex {
   229  	return newLexer(in, "test.proto", newErrorHandler(nil, nil))
   230  }