github.com/Khushbukela/protoreflect@v1.0.1/desc/protoparse/lexer_test.go (about) 1 package protoparse 2 3 import ( 4 "io" 5 "math" 6 "strings" 7 "testing" 8 9 "github.com/jhump/protoreflect/desc/protoparse/ast" 10 "github.com/jhump/protoreflect/internal/testutil" 11 ) 12 13 func TestLexer(t *testing.T) { 14 l := newTestLexer(strings.NewReader(` 15 // comment 16 17 /* 18 * block comment 19 */ /* inline comment */ 20 21 int32 "\032\x16\n\rfoobar\"zap" 'another\tstring\'s\t' 22 foo 23 24 // another comment 25 // more and more... 26 27 service rpc message 28 .type 29 .f.q.n 30 name 31 f.q.n 32 33 .01 34 .01e12 35 .01e+5 36 .033e-1 37 38 12345 39 -12345 40 123.1234 41 0.123 42 012345 43 0x2134abcdef30 44 -0543 45 -0xff76 46 101.0102 47 202.0203e1 48 304.0304e-10 49 3.1234e+12 50 51 { } + - , ; 52 53 [option=foo] 54 syntax = "proto2"; 55 56 // some strange cases 57 1.543 g12 /* trailing line comment */ 58 000.000 59 0.1234 .5678 . 60 12e12 1.2345e123412341234 61 62 Random_identifier_with_numbers_0123456789_and_letters... 63 // this is a trailing comment 64 // that spans multiple lines 65 // over two in fact! 66 /* 67 * this is a detached comment 68 * with lots of extra words and stuff... 69 */ 70 71 // this is an attached leading comment 72 foo 73 74 1.23e+20+20 75 // a trailing comment for last element 76 77 // comment attached to no tokens (upcoming token is EOF!) 78 /* another comment followed by some final whitespace*/ 79 80 81 `)) 82 83 var prev ast.Node 84 var sym protoSymType 85 expected := []struct { 86 t int 87 line, col int 88 span int 89 v interface{} 90 comments []string 91 trailCount int 92 }{ 93 {t: _INT32, line: 8, col: 9, span: 5, v: "int32", comments: []string{"// comment\n", "/*\n\t * block comment\n\t */", "/* inline comment */"}}, 94 {t: _STRING_LIT, line: 8, col: 16, span: 25, v: "\032\x16\n\rfoobar\"zap"}, 95 {t: _STRING_LIT, line: 8, col: 57, span: 22, v: "another\tstring's\t"}, 96 {t: _NAME, line: 9, col: 1, span: 3, v: "foo"}, 97 {t: _SERVICE, line: 14, col: 9, span: 7, v: "service", comments: []string{"// another comment\n", "// more and more...\n"}}, 98 {t: _RPC, line: 14, col: 17, span: 3, v: "rpc"}, 99 {t: _MESSAGE, line: 14, col: 21, span: 7, v: "message"}, 100 {t: '.', line: 15, col: 9, span: 1}, 101 {t: _NAME, line: 15, col: 10, span: 4, v: "type"}, 102 {t: '.', line: 16, col: 9, span: 1}, 103 {t: _NAME, line: 16, col: 10, span: 1, v: "f"}, 104 {t: '.', line: 16, col: 11, span: 1}, 105 {t: _NAME, line: 16, col: 12, span: 1, v: "q"}, 106 {t: '.', line: 16, col: 13, span: 1}, 107 {t: _NAME, line: 16, col: 14, span: 1, v: "n"}, 108 {t: _NAME, line: 17, col: 9, span: 4, v: "name"}, 109 {t: _NAME, line: 18, col: 9, span: 1, v: "f"}, 110 {t: '.', line: 18, col: 10, span: 1}, 111 {t: _NAME, line: 18, col: 11, span: 1, v: "q"}, 112 {t: '.', line: 18, col: 12, span: 1}, 113 {t: _NAME, line: 18, col: 13, span: 1, v: "n"}, 114 {t: _FLOAT_LIT, line: 20, col: 9, span: 3, v: 0.01}, 115 {t: _FLOAT_LIT, line: 21, col: 9, span: 6, v: 0.01e12}, 116 {t: _FLOAT_LIT, line: 22, col: 9, span: 6, v: 0.01e5}, 117 {t: _FLOAT_LIT, line: 23, col: 9, span: 7, v: 0.033e-1}, 118 {t: _INT_LIT, line: 25, col: 9, span: 5, v: uint64(12345)}, 119 {t: '-', line: 26, col: 9, span: 1, v: nil}, 120 {t: _INT_LIT, line: 26, col: 10, span: 5, v: uint64(12345)}, 121 {t: _FLOAT_LIT, line: 27, col: 9, span: 8, v: 123.1234}, 122 {t: _FLOAT_LIT, line: 28, col: 9, span: 5, v: 0.123}, 123 {t: _INT_LIT, line: 29, col: 9, span: 6, v: uint64(012345)}, 124 {t: _INT_LIT, line: 30, col: 9, span: 14, v: uint64(0x2134abcdef30)}, 125 {t: '-', line: 31, col: 9, span: 1, v: nil}, 126 {t: _INT_LIT, line: 31, col: 10, span: 4, v: uint64(0543)}, 127 {t: '-', line: 32, col: 9, span: 1, v: nil}, 128 {t: _INT_LIT, line: 32, col: 10, span: 6, v: uint64(0xff76)}, 129 {t: _FLOAT_LIT, line: 33, col: 9, span: 8, v: 101.0102}, 130 {t: _FLOAT_LIT, line: 34, col: 9, span: 10, v: 202.0203e1}, 131 {t: _FLOAT_LIT, line: 35, col: 9, span: 12, v: 304.0304e-10}, 132 {t: _FLOAT_LIT, line: 36, col: 9, span: 10, v: 3.1234e+12}, 133 {t: '{', line: 38, col: 9, span: 1, v: nil}, 134 {t: '}', line: 38, col: 11, span: 1, v: nil}, 135 {t: '+', line: 38, col: 13, span: 1, v: nil}, 136 {t: '-', line: 38, col: 15, span: 1, v: nil}, 137 {t: ',', line: 38, col: 17, span: 1, v: nil}, 138 {t: ';', line: 38, col: 19, span: 1, v: nil}, 139 {t: '[', line: 40, col: 9, span: 1, v: nil}, 140 {t: _OPTION, line: 40, col: 10, span: 6, v: "option"}, 141 {t: '=', line: 40, col: 16, span: 1, v: nil}, 142 {t: _NAME, line: 40, col: 17, span: 3, v: "foo"}, 143 {t: ']', line: 40, col: 20, span: 1, v: nil}, 144 {t: _SYNTAX, line: 41, col: 9, span: 6, v: "syntax"}, 145 {t: '=', line: 41, col: 16, span: 1, v: nil}, 146 {t: _STRING_LIT, line: 41, col: 18, span: 8, v: "proto2"}, 147 {t: ';', line: 41, col: 26, span: 1, v: nil}, 148 {t: _FLOAT_LIT, line: 44, col: 9, span: 5, v: 1.543, comments: []string{"// some strange cases\n"}}, 149 {t: _NAME, line: 44, col: 15, span: 3, v: "g12"}, 150 {t: _FLOAT_LIT, line: 45, col: 9, span: 7, v: 0.0, comments: []string{"/* trailing line comment */"}, trailCount: 1}, 151 {t: _FLOAT_LIT, line: 46, col: 9, span: 6, v: 0.1234}, 152 {t: _FLOAT_LIT, line: 46, col: 16, span: 5, v: 0.5678}, 153 {t: '.', line: 46, col: 22, span: 1, v: nil}, 154 {t: _FLOAT_LIT, line: 47, col: 9, span: 5, v: 12e12}, 155 {t: _FLOAT_LIT, line: 47, col: 15, span: 19, v: math.Inf(1)}, 156 {t: _NAME, line: 49, col: 9, span: 53, v: "Random_identifier_with_numbers_0123456789_and_letters"}, 157 {t: '.', line: 49, col: 62, span: 1, v: nil}, 158 {t: '.', line: 49, col: 63, span: 1, v: nil}, 159 {t: '.', line: 49, col: 64, span: 1, v: nil}, 160 {t: _NAME, line: 59, col: 9, span: 3, v: "foo", comments: []string{"// this is a trailing comment\n", "// that spans multiple lines\n", "// over two in fact!\n", "/*\n\t * this is a detached comment\n\t * with lots of extra words and stuff...\n\t */", "// this is an attached leading comment\n"}, trailCount: 3}, 161 {t: _FLOAT_LIT, line: 61, col: 9, span: 8, v: 1.23e+20}, 162 {t: '+', line: 61, col: 17, span: 1, v: nil}, 163 {t: _INT_LIT, line: 61, col: 18, span: 2, v: uint64(20)}, 164 } 165 166 for i, exp := range expected { 167 tok := l.Lex(&sym) 168 if tok == 0 { 169 t.Fatalf("lexer reported EOF but should have returned %v", exp) 170 } 171 var n ast.Node 172 var val interface{} 173 switch tok { 174 case _SYNTAX, _OPTION, _INT32, _SERVICE, _RPC, _MESSAGE, _NAME: 175 n = sym.id 176 val = sym.id.Val 177 case _STRING_LIT: 178 n = sym.s 179 val = sym.s.Val 180 case _INT_LIT: 181 n = sym.i 182 val = sym.i.Val 183 case _FLOAT_LIT: 184 n = sym.f 185 val = sym.f.Val 186 case _ERROR: 187 val = sym.err 188 default: 189 n = sym.b 190 val = nil 191 } 192 testutil.Eq(t, exp.t, tok, "case %d: wrong token type (expecting value %v, got %v)", i, exp.v, val) 193 testutil.Eq(t, exp.v, val, "case %d: wrong token value", i) 194 testutil.Eq(t, exp.line, n.Start().Line, "case %d: wrong line number", i) 195 testutil.Eq(t, exp.col, n.Start().Col, "case %d: wrong column number (on line %d)", i, exp.line) 196 testutil.Eq(t, exp.line, n.End().Line, "case %d: wrong end line number", i) 197 testutil.Eq(t, exp.col+exp.span, n.End().Col, "case %d: wrong end column number", i) 198 if prev != nil { 199 testutil.Eq(t, exp.trailCount, len(prev.TrailingComments()), "case %d: wrong number of trailing comments", i) 200 } 201 testutil.Eq(t, len(exp.comments)-exp.trailCount, len(n.LeadingComments()), "case %d: wrong number of comments", i) 202 for ci := range exp.comments { 203 var c ast.Comment 204 if ci < exp.trailCount { 205 c = prev.TrailingComments()[ci] 206 } else { 207 c = n.LeadingComments()[ci-exp.trailCount] 208 } 209 testutil.Eq(t, exp.comments[ci], c.Text, "case %d, comment #%d: unexpected text", i, ci+1) 210 } 211 prev = n 212 } 213 if tok := l.Lex(&sym); tok != 0 { 214 t.Fatalf("lexer reported symbol after what should have been EOF: %d", tok) 215 } 216 // Now we check final state of lexer for unattached comments and final whitespace 217 // One of the final comments get associated as trailing comment for final token 218 testutil.Eq(t, 1, len(prev.TrailingComments()), "last token: wrong number of trailing comments") 219 finalComments := l.eof.LeadingComments() 220 testutil.Eq(t, 2, len(finalComments), "wrong number of final remaining comments") 221 testutil.Eq(t, "// comment attached to no tokens (upcoming token is EOF!)\n", finalComments[0].Text, "incorrect final comment text") 222 testutil.Eq(t, "/* another comment followed by some final whitespace*/", finalComments[1].Text, "incorrect final comment text") 223 testutil.Eq(t, "\n\n\t\n\t", l.eof.LeadingWhitespace(), "incorrect final whitespace") 224 } 225 226 func TestLexerErrors(t *testing.T) { 227 testCases := []struct { 228 str string 229 errMsg string 230 }{ 231 {str: `0xffffffffffffffffffff`, errMsg: "value out of range"}, 232 {str: `"foobar`, errMsg: "unexpected EOF"}, 233 {str: `"foobar\J"`, errMsg: "invalid escape sequence"}, 234 {str: `"foobar\xgfoo"`, errMsg: "invalid hex escape"}, 235 {str: `"foobar\u09gafoo"`, errMsg: "invalid unicode escape"}, 236 {str: `"foobar\U0010005zfoo"`, errMsg: "invalid unicode escape"}, 237 {str: `"foobar\U00110000foo"`, errMsg: "unicode escape is out of range"}, 238 {str: "'foobar\nbaz'", errMsg: "encountered end-of-line"}, 239 {str: "'foobar\000baz'", errMsg: "null character ('\\0') not allowed"}, 240 {str: `1.543g12`, errMsg: "invalid syntax"}, 241 {str: `0.1234.5678.`, errMsg: "invalid syntax"}, 242 {str: `0x987.345aaf`, errMsg: "invalid syntax"}, 243 {str: `0.987.345`, errMsg: "invalid syntax"}, 244 {str: `0.987e34e-20`, errMsg: "invalid syntax"}, 245 {str: `0.987e-345e20`, errMsg: "invalid syntax"}, 246 {str: `.987to123`, errMsg: "invalid syntax"}, 247 {str: `0b0111`, errMsg: "invalid syntax"}, 248 {str: `0o765432`, errMsg: "invalid syntax"}, 249 {str: `1_000_000`, errMsg: "invalid syntax"}, 250 {str: `1_000.000_001e6`, errMsg: "invalid syntax"}, 251 {str: `0X1F_FFP-16`, errMsg: "invalid syntax"}, 252 {str: `/* foobar`, errMsg: "unexpected EOF"}, 253 {str: "\x00", errMsg: "invalid control character"}, 254 {str: "\x03", errMsg: "invalid control character"}, 255 {str: "\x1B", errMsg: "invalid control character"}, 256 {str: "\x7F", errMsg: "invalid control character"}, 257 {str: "#", errMsg: "invalid character"}, 258 {str: "?", errMsg: "invalid character"}, 259 {str: "^", errMsg: "invalid character"}, 260 {str: "\uAAAA", errMsg: "invalid character"}, 261 {str: "\U0010FFFF", errMsg: "invalid character"}, 262 {str: "// foo \x00", errMsg: "invalid control character"}, 263 {str: "/* foo \x00", errMsg: "invalid control character"}, 264 {str: "09", errMsg: "invalid syntax in octal integer value: 09"}, 265 {str: "0f", errMsg: "invalid syntax in octal integer value: 0f"}, 266 } 267 for i, tc := range testCases { 268 l := newTestLexer(strings.NewReader(tc.str)) 269 var sym protoSymType 270 tok := l.Lex(&sym) 271 testutil.Eq(t, _ERROR, tok) 272 testutil.Require(t, sym.err != nil) 273 testutil.Require(t, strings.Contains(sym.err.Error(), tc.errMsg), "case %d: expected message to contain %q but does not: %q", i, tc.errMsg, sym.err.Error()) 274 t.Logf("case %d: %v", i, sym.err) 275 } 276 } 277 278 func newTestLexer(in io.Reader) *protoLex { 279 return newLexer(in, "test.proto", newErrorHandler(nil, nil)) 280 }