github.com/bakjos/protoreflect@v1.9.2/desc/protoparse/lexer_test.go (about) 1 package protoparse 2 3 import ( 4 "io" 5 "strings" 6 "testing" 7 8 "github.com/bakjos/protoreflect/desc/protoparse/ast" 9 "github.com/bakjos/protoreflect/internal/testutil" 10 ) 11 12 func TestLexer(t *testing.T) { 13 l := newTestLexer(strings.NewReader(` 14 // comment 15 16 /* 17 * block comment 18 */ /* inline comment */ 19 20 int32 "\032\x16\n\rfoobar\"zap" 'another\tstring\'s\t' 21 foo 22 23 // another comment 24 // more and more... 25 26 service rpc message 27 .type 28 .f.q.n 29 name 30 f.q.n 31 32 .01 33 .01e12 34 .01e+5 35 .033e-1 36 37 12345 38 -12345 39 123.1234 40 0.123 41 012345 42 0x2134abcdef30 43 -0543 44 -0xff76 45 101.0102 46 202.0203e1 47 304.0304e-10 48 3.1234e+12 49 50 { } + - , ; 51 52 [option=foo] 53 syntax = "proto2"; 54 55 // some strange cases 56 1.543g12 /* trailing line comment */ 57 000.000 58 0.1234.5678. 59 12e12 60 61 Random_identifier_with_numbers_0123456789_and_letters... 62 // this is a trailing comment 63 // that spans multiple lines 64 // over two in fact! 65 /* 66 * this is a detached comment 67 * with lots of extra words and stuff... 68 */ 69 70 // this is an attached leading comment 71 foo 72 // a trailing comment for last element 73 74 // comment attached to no tokens (upcoming token is EOF!) 75 /* another comment followed by some final whitespace*/ 76 77 78 `)) 79 80 var prev ast.Node 81 var sym protoSymType 82 expected := []struct { 83 t int 84 line, col int 85 span int 86 v interface{} 87 comments []string 88 trailCount int 89 }{ 90 {t: _INT32, line: 8, col: 9, span: 5, v: "int32", comments: []string{"// comment\n", "/*\n\t * block comment\n\t */", "/* inline comment */"}}, 91 {t: _STRING_LIT, line: 8, col: 16, span: 25, v: "\032\x16\n\rfoobar\"zap"}, 92 {t: _STRING_LIT, line: 8, col: 57, span: 22, v: "another\tstring's\t"}, 93 {t: _NAME, line: 9, col: 1, span: 3, v: "foo"}, 94 {t: _SERVICE, line: 14, col: 9, span: 7, v: "service", comments: []string{"// another comment\n", "// more and more...\n"}}, 95 {t: _RPC, line: 14, col: 17, span: 3, v: "rpc"}, 96 {t: _MESSAGE, line: 14, col: 21, span: 7, v: "message"}, 97 {t: '.', line: 15, col: 9, span: 1}, 98 {t: _NAME, line: 15, col: 10, span: 4, v: "type"}, 99 {t: '.', line: 16, col: 9, span: 1}, 100 {t: _NAME, line: 16, col: 10, span: 1, v: "f"}, 101 {t: '.', line: 16, col: 11, span: 1}, 102 {t: _NAME, line: 16, col: 12, span: 1, v: "q"}, 103 {t: '.', line: 16, col: 13, span: 1}, 104 {t: _NAME, line: 16, col: 14, span: 1, v: "n"}, 105 {t: _NAME, line: 17, col: 9, span: 4, v: "name"}, 106 {t: _NAME, line: 18, col: 9, span: 1, v: "f"}, 107 {t: '.', line: 18, col: 10, span: 1}, 108 {t: _NAME, line: 18, col: 11, span: 1, v: "q"}, 109 {t: '.', line: 18, col: 12, span: 1}, 110 {t: _NAME, line: 18, col: 13, span: 1, v: "n"}, 111 {t: _FLOAT_LIT, line: 20, col: 9, span: 3, v: 0.01}, 112 {t: _FLOAT_LIT, line: 21, col: 9, span: 6, v: 0.01e12}, 113 {t: _FLOAT_LIT, line: 22, col: 9, span: 6, v: 0.01e5}, 114 {t: _FLOAT_LIT, line: 23, col: 9, span: 7, v: 0.033e-1}, 115 {t: _INT_LIT, line: 25, col: 9, span: 5, v: uint64(12345)}, 116 {t: '-', line: 26, col: 9, span: 1, v: nil}, 117 {t: _INT_LIT, line: 26, col: 10, span: 5, v: uint64(12345)}, 118 {t: _FLOAT_LIT, line: 27, col: 9, span: 8, v: 123.1234}, 119 {t: _FLOAT_LIT, line: 28, col: 9, span: 5, v: 0.123}, 120 {t: _INT_LIT, line: 29, col: 9, span: 6, v: uint64(012345)}, 121 {t: _INT_LIT, line: 30, col: 9, span: 14, v: uint64(0x2134abcdef30)}, 122 {t: '-', line: 31, col: 9, span: 1, v: nil}, 123 {t: _INT_LIT, line: 31, col: 10, span: 4, v: uint64(0543)}, 124 {t: '-', line: 32, col: 9, span: 1, v: nil}, 125 {t: _INT_LIT, line: 32, col: 10, span: 6, v: uint64(0xff76)}, 126 {t: _FLOAT_LIT, line: 33, col: 9, span: 8, v: 101.0102}, 127 {t: _FLOAT_LIT, line: 34, col: 9, span: 10, v: 202.0203e1}, 128 {t: _FLOAT_LIT, line: 35, col: 9, span: 12, v: 304.0304e-10}, 129 {t: _FLOAT_LIT, line: 36, col: 9, span: 10, v: 3.1234e+12}, 130 {t: '{', line: 38, col: 9, span: 1, v: nil}, 131 {t: '}', line: 38, col: 11, span: 1, v: nil}, 132 {t: '+', line: 38, col: 13, span: 1, v: nil}, 133 {t: '-', line: 38, col: 15, span: 1, v: nil}, 134 {t: ',', line: 38, col: 17, span: 1, v: nil}, 135 {t: ';', line: 38, col: 19, span: 1, v: nil}, 136 {t: '[', line: 40, col: 9, span: 1, v: nil}, 137 {t: _OPTION, line: 40, col: 10, span: 6, v: "option"}, 138 {t: '=', line: 40, col: 16, span: 1, v: nil}, 139 {t: _NAME, line: 40, col: 17, span: 3, v: "foo"}, 140 {t: ']', line: 40, col: 20, span: 1, v: nil}, 141 {t: _SYNTAX, line: 41, col: 9, span: 6, v: "syntax"}, 142 {t: '=', line: 41, col: 16, span: 1, v: nil}, 143 {t: _STRING_LIT, line: 41, col: 18, span: 8, v: "proto2"}, 144 {t: ';', line: 41, col: 26, span: 1, v: nil}, 145 {t: _FLOAT_LIT, line: 44, col: 9, span: 5, v: 1.543, comments: []string{"// some strange cases\n"}}, 146 {t: _NAME, line: 44, col: 14, span: 3, v: "g12"}, 147 {t: _FLOAT_LIT, line: 45, col: 9, span: 7, v: 0.0, comments: []string{"/* trailing line comment */"}, trailCount: 1}, 148 {t: _FLOAT_LIT, line: 46, col: 9, span: 6, v: 0.1234}, 149 {t: _FLOAT_LIT, line: 46, col: 15, span: 5, v: 0.5678}, 150 {t: '.', line: 46, col: 20, span: 1, v: nil}, 151 {t: _FLOAT_LIT, line: 47, col: 9, span: 5, v: 12e12}, 152 {t: _NAME, line: 49, col: 9, span: 53, v: "Random_identifier_with_numbers_0123456789_and_letters"}, 153 {t: '.', line: 49, col: 62, span: 1, v: nil}, 154 {t: '.', line: 49, col: 63, span: 1, v: nil}, 155 {t: '.', line: 49, col: 64, span: 1, v: nil}, 156 {t: _NAME, line: 59, col: 9, span: 3, v: "foo", comments: []string{"// this is a trailing comment\n", "// that spans multiple lines\n", "// over two in fact!\n", "/*\n\t * this is a detached comment\n\t * with lots of extra words and stuff...\n\t */", "// this is an attached leading comment\n"}, trailCount: 3}, 157 } 158 159 for i, exp := range expected { 160 tok := l.Lex(&sym) 161 if tok == 0 { 162 t.Fatalf("lexer reported EOF but should have returned %v", exp) 163 } 164 var n ast.Node 165 var val interface{} 166 switch tok { 167 case _SYNTAX, _OPTION, _INT32, _SERVICE, _RPC, _MESSAGE, _NAME: 168 n = sym.id 169 val = sym.id.Val 170 case _STRING_LIT: 171 n = sym.s 172 val = sym.s.Val 173 case _INT_LIT: 174 n = sym.i 175 val = sym.i.Val 176 case _FLOAT_LIT: 177 n = sym.f 178 val = sym.f.Val 179 default: 180 n = sym.b 181 val = nil 182 } 183 testutil.Eq(t, exp.t, tok, "case %d: wrong token type (case %v)", i, exp.v) 184 testutil.Eq(t, exp.v, val, "case %d: wrong token value", i) 185 testutil.Eq(t, exp.line, n.Start().Line, "case %d: wrong line number", i) 186 testutil.Eq(t, exp.col, n.Start().Col, "case %d: wrong column number", i) 187 testutil.Eq(t, exp.line, n.End().Line, "case %d: wrong end line number", i) 188 testutil.Eq(t, exp.col+exp.span, n.End().Col, "case %d: wrong end column number", i) 189 if exp.trailCount > 0 { 190 testutil.Eq(t, exp.trailCount, len(prev.TrailingComments()), "case %d: wrong number of trailing comments", i) 191 } 192 testutil.Eq(t, len(exp.comments)-exp.trailCount, len(n.LeadingComments()), "case %d: wrong number of comments", i) 193 for ci := range exp.comments { 194 var c ast.Comment 195 if ci < exp.trailCount { 196 c = prev.TrailingComments()[ci] 197 } else { 198 c = n.LeadingComments()[ci-exp.trailCount] 199 } 200 testutil.Eq(t, exp.comments[ci], c.Text, "case %d, comment #%d: unexpected text", i, ci+1) 201 } 202 prev = n 203 } 204 if tok := l.Lex(&sym); tok != 0 { 205 t.Fatalf("lexer reported symbol after what should have been EOF: %d", tok) 206 } 207 // Now we check final state of lexer for unattached comments and final whitespace 208 // One of the final comments get associated as trailing comment for final token 209 testutil.Eq(t, 1, len(prev.TrailingComments()), "last token: wrong number of trailing comments") 210 finalComments := l.eof.LeadingComments() 211 testutil.Eq(t, 2, len(finalComments), "wrong number of final remaining comments") 212 testutil.Eq(t, "// comment attached to no tokens (upcoming token is EOF!)\n", finalComments[0].Text, "incorrect final comment text") 213 testutil.Eq(t, "/* another comment followed by some final whitespace*/", finalComments[1].Text, "incorrect final comment text") 214 testutil.Eq(t, "\n\n\t\n\t", l.eof.LeadingWhitespace(), "incorrect final whitespace") 215 } 216 217 func TestLexerErrors(t *testing.T) { 218 testCases := []struct { 219 str string 220 errMsg string 221 }{ 222 {str: `0xffffffffffffffffffff`, errMsg: "value out of range"}, 223 {str: `"foobar`, errMsg: "unexpected EOF"}, 224 {str: `"foobar\J"`, errMsg: "invalid escape sequence"}, 225 {str: `"foobar\xgfoo"`, errMsg: "invalid hex escape"}, 226 {str: `"foobar\u09gafoo"`, errMsg: "invalid unicode escape"}, 227 {str: `"foobar\U0010005zfoo"`, errMsg: "invalid unicode escape"}, 228 {str: `"foobar\U00110000foo"`, errMsg: "unicode escape is out of range"}, 229 {str: "'foobar\nbaz'", errMsg: "encountered end-of-line"}, 230 {str: "'foobar\000baz'", errMsg: "null character ('\\0') not allowed"}, 231 {str: `/* foobar`, errMsg: "unexpected EOF"}, 232 } 233 for i, tc := range testCases { 234 l := newTestLexer(strings.NewReader(tc.str)) 235 var sym protoSymType 236 tok := l.Lex(&sym) 237 testutil.Eq(t, _ERROR, tok) 238 testutil.Require(t, sym.err != nil) 239 testutil.Require(t, strings.Contains(sym.err.Error(), tc.errMsg), "case %d: expected message to contain %q but does not: %q", i, tc.errMsg, sym.err.Error()) 240 } 241 } 242 243 func newTestLexer(in io.Reader) *protoLex { 244 return newLexer(in, "test.proto", newErrorHandler(nil, nil)) 245 }