github.com/hoveychen/protoreflect@v1.4.7-0.20221103114119-0b4b3385ec76/desc/protoparse/lexer_test.go (about) 1 package protoparse 2 3 import ( 4 "strings" 5 "testing" 6 7 "github.com/hoveychen/protoreflect/internal/testutil" 8 ) 9 10 func TestLexer(t *testing.T) { 11 l := newTestLexer(strings.NewReader(` 12 // comment 13 14 /* 15 * block comment 16 */ /* inline comment */ 17 18 int32 "\032\x16\n\rfoobar\"zap" 'another\tstring\'s\t' 19 foo 20 21 // another comment 22 // more and more... 23 24 service rpc message 25 .type 26 .f.q.n 27 name 28 f.q.n 29 30 .01 31 .01e12 32 .01e+5 33 .033e-1 34 35 12345 36 -12345 37 123.1234 38 0.123 39 012345 40 0x2134abcdef30 41 -0543 42 -0xff76 43 101.0102 44 202.0203e1 45 304.0304e-10 46 3.1234e+12 47 48 { } + - , ; 49 50 [option=foo] 51 syntax = "proto2"; 52 53 // some strange cases 54 1.543g12 /* trailing line comment */ 55 000.000 56 0.1234.5678. 57 12e12 58 59 Random_identifier_with_numbers_0123456789_and_letters... 60 // this is a trailing comment 61 // that spans multiple lines 62 // over two in fact! 63 /* 64 * this is a detached comment 65 * with lots of extra words and stuff... 66 */ 67 68 // this is an attached leading comment 69 foo 70 `)) 71 72 var prev node 73 var sym protoSymType 74 expected := []struct { 75 t int 76 line, col int 77 span int 78 v interface{} 79 comments []string 80 trailCount int 81 }{ 82 {t: _INT32, line: 8, col: 9, span: 5, v: "int32", comments: []string{"// comment\n", "/*\n\t * block comment\n\t */", "/* inline comment */"}}, 83 {t: _STRING_LIT, line: 8, col: 16, span: 25, v: "\032\x16\n\rfoobar\"zap"}, 84 {t: _STRING_LIT, line: 8, col: 57, span: 22, v: "another\tstring's\t"}, 85 {t: _NAME, line: 9, col: 1, span: 3, v: "foo"}, 86 {t: _SERVICE, line: 14, col: 9, span: 7, v: "service", comments: []string{"// another comment\n", "// more and more...\n"}}, 87 {t: _RPC, line: 14, col: 17, span: 3, v: "rpc"}, 88 {t: _MESSAGE, line: 14, col: 21, span: 7, v: "message"}, 89 {t: _TYPENAME, line: 15, col: 9, span: 5, v: ".type"}, 90 {t: _TYPENAME, line: 16, col: 9, span: 6, v: ".f.q.n"}, 91 {t: _NAME, line: 17, col: 9, span: 4, v: "name"}, 92 {t: _FQNAME, line: 18, col: 9, span: 5, v: "f.q.n"}, 93 {t: _FLOAT_LIT, line: 20, col: 9, span: 3, v: 0.01}, 94 {t: _FLOAT_LIT, line: 21, col: 9, span: 6, v: 0.01e12}, 95 {t: _FLOAT_LIT, line: 22, col: 9, span: 6, v: 0.01e5}, 96 {t: _FLOAT_LIT, line: 23, col: 9, span: 7, v: 0.033e-1}, 97 {t: _INT_LIT, line: 25, col: 9, span: 5, v: uint64(12345)}, 98 {t: '-', line: 26, col: 9, span: 1, v: nil}, 99 {t: _INT_LIT, line: 26, col: 10, span: 5, v: uint64(12345)}, 100 {t: _FLOAT_LIT, line: 27, col: 9, span: 8, v: 123.1234}, 101 {t: _FLOAT_LIT, line: 28, col: 9, span: 5, v: 0.123}, 102 {t: _INT_LIT, line: 29, col: 9, span: 6, v: uint64(012345)}, 103 {t: _INT_LIT, line: 30, col: 9, span: 14, v: uint64(0x2134abcdef30)}, 104 {t: '-', line: 31, col: 9, span: 1, v: nil}, 105 {t: _INT_LIT, line: 31, col: 10, span: 4, v: uint64(0543)}, 106 {t: '-', line: 32, col: 9, span: 1, v: nil}, 107 {t: _INT_LIT, line: 32, col: 10, span: 6, v: uint64(0xff76)}, 108 {t: _FLOAT_LIT, line: 33, col: 9, span: 8, v: 101.0102}, 109 {t: _FLOAT_LIT, line: 34, col: 9, span: 10, v: 202.0203e1}, 110 {t: _FLOAT_LIT, line: 35, col: 9, span: 12, v: 304.0304e-10}, 111 {t: _FLOAT_LIT, line: 36, col: 9, span: 10, v: 3.1234e+12}, 112 {t: '{', line: 38, col: 9, span: 1, v: nil}, 113 {t: '}', line: 38, col: 11, span: 1, v: nil}, 114 {t: '+', line: 38, col: 13, span: 1, v: nil}, 115 {t: '-', line: 38, col: 15, span: 1, v: nil}, 116 {t: ',', line: 38, col: 17, span: 1, v: nil}, 117 {t: ';', line: 38, col: 19, span: 1, v: nil}, 118 {t: '[', line: 40, col: 9, span: 1, v: nil}, 119 {t: _OPTION, line: 40, col: 10, span: 6, v: "option"}, 120 {t: '=', line: 40, col: 16, span: 1, v: nil}, 121 {t: _NAME, line: 40, col: 17, span: 3, v: "foo"}, 122 {t: ']', line: 40, col: 20, span: 1, v: nil}, 123 {t: _SYNTAX, line: 41, col: 9, span: 6, v: "syntax"}, 124 {t: '=', line: 41, col: 16, span: 1, v: nil}, 125 {t: _STRING_LIT, line: 41, col: 18, span: 8, v: "proto2"}, 126 {t: ';', line: 41, col: 26, span: 1, v: nil}, 127 {t: _FLOAT_LIT, line: 44, col: 9, span: 5, v: 1.543, comments: []string{"// some strange cases\n"}}, 128 {t: _NAME, line: 44, col: 14, span: 3, v: "g12"}, 129 {t: _FLOAT_LIT, line: 45, col: 9, span: 7, v: 0.0, comments: []string{"/* trailing line comment */"}, trailCount: 1}, 130 {t: _FLOAT_LIT, line: 46, col: 9, span: 6, v: 0.1234}, 131 {t: _FLOAT_LIT, line: 46, col: 15, span: 5, v: 0.5678}, 132 {t: '.', line: 46, col: 20, span: 1, v: nil}, 133 {t: _FLOAT_LIT, line: 47, col: 9, span: 5, v: 12e12}, 134 {t: _NAME, line: 49, col: 9, span: 53, v: "Random_identifier_with_numbers_0123456789_and_letters"}, 135 {t: '.', line: 49, col: 62, span: 1, v: nil}, 136 {t: '.', line: 49, col: 63, span: 1, v: nil}, 137 {t: '.', line: 49, col: 64, span: 1, v: nil}, 138 {t: _NAME, line: 59, col: 9, span: 3, v: "foo", comments: []string{"// this is a trailing comment\n", "// that spans multiple lines\n", "// over two in fact!\n", "/*\n\t * this is a detached comment\n\t * with lots of extra words and stuff...\n\t */", "// this is an attached leading comment\n"}, trailCount: 3}, 139 } 140 141 for i, exp := range expected { 142 tok := l.Lex(&sym) 143 if tok == 0 { 144 t.Fatalf("lexer reported EOF but should have returned %v", exp) 145 } 146 var n node 147 var val interface{} 148 switch tok { 149 case _SYNTAX, _OPTION, _INT32, _SERVICE, _RPC, _MESSAGE, _TYPENAME, _NAME, _FQNAME: 150 n = sym.id 151 val = sym.id.val 152 case _STRING_LIT: 153 n = sym.str 154 val = sym.str.val 155 case _INT_LIT: 156 n = sym.ui 157 val = sym.ui.val 158 case _FLOAT_LIT: 159 n = sym.f 160 val = sym.f.val 161 default: 162 n = sym.b 163 val = nil 164 } 165 testutil.Eq(t, exp.t, tok, "case %d: wrong token type (case %v)", i, exp.v) 166 testutil.Eq(t, exp.v, val, "case %d: wrong token value", i) 167 testutil.Eq(t, exp.line, n.start().Line, "case %d: wrong line number", i) 168 testutil.Eq(t, exp.col, n.start().Col, "case %d: wrong column number", i) 169 testutil.Eq(t, exp.line, n.end().Line, "case %d: wrong end line number", i) 170 testutil.Eq(t, exp.col+exp.span, n.end().Col, "case %d: wrong end column number", i) 171 if exp.trailCount > 0 { 172 testutil.Eq(t, exp.trailCount, len(prev.trailingComments()), "case %d: wrong number of trailing comments", i) 173 } 174 testutil.Eq(t, len(exp.comments)-exp.trailCount, len(n.leadingComments()), "case %d: wrong number of comments", i) 175 for ci := range exp.comments { 176 var c comment 177 if ci < exp.trailCount { 178 c = prev.trailingComments()[ci] 179 } else { 180 c = n.leadingComments()[ci-exp.trailCount] 181 } 182 testutil.Eq(t, exp.comments[ci], c.text, "case %d, comment #%d: unexpected text", i, ci+1) 183 } 184 prev = n 185 } 186 if tok := l.Lex(&sym); tok != 0 { 187 t.Fatalf("lexer reported symbol after what should have been EOF: %d", tok) 188 } 189 } 190 191 func TestLexerErrors(t *testing.T) { 192 testCases := []struct { 193 str string 194 errMsg string 195 }{ 196 {str: `0xffffffffffffffffffff`, errMsg: "value out of range"}, 197 {str: `"foobar`, errMsg: "unexpected EOF"}, 198 {str: `"foobar\J"`, errMsg: "invalid escape sequence"}, 199 {str: `"foobar\xgfoo"`, errMsg: "invalid hex escape"}, 200 {str: `"foobar\u09gafoo"`, errMsg: "invalid unicode escape"}, 201 {str: `"foobar\U0010005zfoo"`, errMsg: "invalid unicode escape"}, 202 {str: `"foobar\U00110000foo"`, errMsg: "unicode escape is out of range"}, 203 {str: "'foobar\nbaz'", errMsg: "encountered end-of-line"}, 204 {str: "'foobar\000baz'", errMsg: "null character ('\\0') not allowed"}, 205 {str: `/* foobar`, errMsg: "unexpected EOF"}, 206 } 207 for i, tc := range testCases { 208 l := newTestLexer(strings.NewReader(tc.str)) 209 var sym protoSymType 210 tok := l.Lex(&sym) 211 testutil.Eq(t, _ERROR, tok) 212 testutil.Require(t, sym.err != nil) 213 testutil.Require(t, strings.Contains(sym.err.Error(), tc.errMsg), "case %d: expected message to contain %q but does not: %q", i, tc.errMsg, sym.err.Error()) 214 } 215 }