github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/yaml/lexer/lexer_test.go (about) 1 package lexer_test 2 3 import ( 4 "sort" 5 "strings" 6 "testing" 7 8 "github.com/bingoohuang/gg/pkg/yaml/lexer" 9 "github.com/bingoohuang/gg/pkg/yaml/token" 10 ) 11 12 func TestTokenize(t *testing.T) { 13 sources := []string{ 14 "null\n", 15 "{}\n", 16 "v: hi\n", 17 "v: \"true\"\n", 18 "v: \"false\"\n", 19 "v: true\n", 20 "v: false\n", 21 "v: 10\n", 22 "v: -10\n", 23 "v: 42\n", 24 "v: 4294967296\n", 25 "v: \"10\"\n", 26 "v: 0.1\n", 27 "v: 0.99\n", 28 "v: -0.1\n", 29 "v: .inf\n", 30 "v: -.inf\n", 31 "v: .nan\n", 32 "v: null\n", 33 "v: \"\"\n", 34 "v:\n- A\n- B\n", 35 "v:\n- A\n- |-\n B\n C\n", 36 "v:\n- A\n- 1\n- B:\n - 2\n - 3\n", 37 "a:\n b: c\n", 38 "a: '-'\n", 39 "123\n", 40 "hello: world\n", 41 "a: null\n", 42 "a: {x: 1}\n", 43 "a: [1, 2]\n", 44 "t2: 2018-01-09T10:40:47Z\nt4: 2098-01-09T10:40:47Z\n", 45 "a: {b: c, d: e}\n", 46 "a: 3s\n", 47 "a: <foo>\n", 48 "a: \"1:1\"\n", 49 "a: \"\\0\"\n", 50 "a: !!binary gIGC\n", 51 "a: !!binary |\n " + strings.Repeat("kJCQ", 17) + "kJ\n CQ\n", 52 "b: 2\na: 1\nd: 4\nc: 3\nsub:\n e: 5\n", 53 "a: 1.2.3.4\n", 54 "a: \"2015-02-24T18:19:39Z\"\n", 55 "a: 'b: c'\n", 56 "a: 'Hello #comment'\n", 57 "a: 100.5\n", 58 "a: bogus\n", 59 } 60 for _, src := range sources { 61 lexer.Tokenize(src).Dump() 62 } 63 } 64 65 type testToken struct { 66 line int 67 column int 68 value string 69 } 70 71 func TestSingleLineToken_ValueLineColumnPosition(t *testing.T) { 72 tests := []struct { 73 name string 74 src string 75 expect map[int]string // Column -> Value map. 76 }{ 77 { 78 name: "single quote, single value array", 79 src: "test: ['test']", 80 expect: map[int]string{ 81 1: "test", 82 5: ":", 83 7: "[", 84 8: "test", 85 14: "]", 86 }, 87 }, 88 { 89 name: "double quote, single value array", 90 src: `test: ["test"]`, 91 expect: map[int]string{ 92 1: "test", 93 5: ":", 94 7: "[", 95 8: "test", 96 14: "]", 97 }, 98 }, 99 { 100 name: "no quotes, single value array", 101 src: "test: [somevalue]", 102 expect: map[int]string{ 103 1: "test", 104 5: ":", 105 7: "[", 106 8: "somevalue", 107 17: "]", 108 }, 109 }, 110 { 111 name: "single quote, multi value array", 112 src: "myarr: ['1','2','3', '444' , '55','66' , '77' ]", 113 expect: map[int]string{ 114 1: "myarr", 115 6: ":", 116 8: "[", 117 9: "1", 118 12: ",", 119 13: "2", 120 16: ",", 121 17: "3", 122 20: ",", 123 22: "444", 124 28: ",", 125 30: "55", 126 34: ",", 127 35: "66", 128 40: ",", 129 43: "77", 130 49: "]", 131 }, 132 }, 133 { 134 name: "double quote, multi value array", 135 src: `myarr: ["1","2","3", "444" , "55","66" , "77" ]`, 136 expect: map[int]string{ 137 1: "myarr", 138 6: ":", 139 8: "[", 140 9: "1", 141 12: ",", 142 13: "2", 143 16: ",", 144 17: "3", 145 20: ",", 146 22: "444", 147 28: ",", 148 30: "55", 149 34: ",", 150 35: "66", 151 40: ",", 152 43: "77", 153 49: "]", 154 }, 155 }, 156 { 157 name: "no quote, multi value array", 158 src: "numbers: [1, 5, 99,100, 3, 7 ]", 159 expect: map[int]string{ 160 1: "numbers", 161 8: ":", 162 10: "[", 163 11: "1", 164 12: ",", 165 14: "5", 166 15: ",", 167 17: "99", 168 19: ",", 169 20: "100", 170 23: ",", 171 25: "3", 172 26: ",", 173 28: "7", 174 30: "]", 175 }, 176 }, 177 { 178 name: "double quotes, nested arrays", 179 src: `Strings: ["1",["2",["3"]]]`, 180 expect: map[int]string{ 181 1: "Strings", 182 8: ":", 183 10: "[", 184 11: "1", 185 14: ",", 186 15: "[", 187 16: "2", 188 19: ",", 189 20: "[", 190 21: "3", 191 24: "]", 192 25: "]", 193 26: "]", 194 }, 195 }, 196 { 197 name: "mixed quotes, nested arrays", 198 src: `Values: [1,['2',"3",4,["5",6]]]`, 199 expect: map[int]string{ 200 1: "Values", 201 7: ":", 202 9: "[", 203 10: "1", 204 11: ",", 205 12: "[", 206 13: "2", 207 16: ",", 208 17: "3", 209 20: ",", 210 21: "4", 211 22: ",", 212 23: "[", 213 24: "5", 214 27: ",", 215 28: "6", 216 29: "]", 217 30: "]", 218 31: "]", 219 }, 220 }, 221 { 222 name: "double quote, empty array", 223 src: `Empty: ["", ""]`, 224 expect: map[int]string{ 225 1: "Empty", 226 6: ":", 227 8: "[", 228 9: "", 229 11: ",", 230 13: "", 231 15: "]", 232 }, 233 }, 234 } 235 236 for _, tc := range tests { 237 t.Run(tc.name, func(t *testing.T) { 238 got := lexer.Tokenize(tc.src) 239 sort.Slice(got, func(i, j int) bool { 240 return got[i].Position.Column < got[j].Position.Column 241 }) 242 var expected []testToken 243 for k, v := range tc.expect { 244 tt := testToken{ 245 line: 1, 246 column: k, 247 value: v, 248 } 249 expected = append(expected, tt) 250 } 251 sort.Slice(expected, func(i, j int) bool { 252 return expected[i].column < expected[j].column 253 }) 254 if len(got) != len(expected) { 255 t.Errorf("Tokenize(%s) token count mismatch, expected:%d got:%d", tc.src, len(expected), len(got)) 256 } 257 for i, tok := range got { 258 if !tokenMatches(tok, expected[i]) { 259 t.Errorf("Tokenize(%s) expected:%+v got line:%d column:%d value:%s", tc.src, expected[i], tok.Position.Line, tok.Position.Column, tok.Value) 260 } 261 } 262 }) 263 } 264 } 265 266 func tokenMatches(t *token.Token, e testToken) bool { 267 return t != nil && t.Position != nil && 268 t.Value == e.value && 269 t.Position.Line == e.line && 270 t.Position.Column == e.column 271 } 272 273 func TestMultiLineToken_ValueLineColumnPosition(t *testing.T) { 274 tests := []struct { 275 name string 276 src string 277 expect []testToken 278 }{ 279 { 280 name: "double quote", 281 src: `one: "1 2 3 4 5" 282 two: "1 2 283 3 4 284 5" 285 three: "1 2 3 4 286 5"`, 287 expect: []testToken{ 288 { 289 line: 1, 290 column: 1, 291 value: "one", 292 }, 293 { 294 line: 1, 295 column: 4, 296 value: ":", 297 }, 298 { 299 line: 1, 300 column: 6, 301 value: "1 2 3 4 5", 302 }, 303 { 304 line: 2, 305 column: 1, 306 value: "two", 307 }, 308 { 309 line: 2, 310 column: 4, 311 value: ":", 312 }, 313 { 314 line: 2, 315 column: 6, 316 value: "1 2 3 4 5", 317 }, 318 { 319 line: 5, 320 column: 1, 321 value: "three", 322 }, 323 { 324 line: 5, 325 column: 6, 326 value: ":", 327 }, 328 { 329 line: 5, 330 column: 8, 331 value: "1 2 3 4 5", 332 }, 333 }, 334 }, 335 { 336 name: "single quote in an array", 337 src: `arr: ['1', 'and 338 two'] 339 last: 'hello'`, 340 expect: []testToken{ 341 { 342 line: 1, 343 column: 1, 344 value: "arr", 345 }, 346 { 347 line: 1, 348 column: 4, 349 value: ":", 350 }, 351 { 352 line: 1, 353 column: 6, 354 value: "[", 355 }, 356 { 357 line: 1, 358 column: 7, 359 value: "1", 360 }, 361 { 362 line: 1, 363 column: 10, 364 value: ",", 365 }, 366 { 367 line: 1, 368 column: 12, 369 value: "and two", 370 }, 371 { 372 line: 2, 373 column: 5, 374 value: "]", 375 }, 376 { 377 line: 3, 378 column: 1, 379 value: "last", 380 }, 381 { 382 line: 3, 383 column: 5, 384 value: ":", 385 }, 386 { 387 line: 3, 388 column: 7, 389 value: "hello", 390 }, 391 }, 392 }, 393 { 394 name: "single quote and double quote", 395 src: `foo: "test 396 397 398 399 400 bar" 401 foo2: 'bar2'`, 402 expect: []testToken{ 403 { 404 line: 1, 405 column: 1, 406 value: "foo", 407 }, 408 { 409 line: 1, 410 column: 4, 411 value: ":", 412 }, 413 { 414 line: 1, 415 column: 6, 416 value: "test bar", 417 }, 418 { 419 line: 7, 420 column: 1, 421 value: "foo2", 422 }, 423 { 424 line: 7, 425 column: 5, 426 value: ":", 427 }, 428 { 429 line: 7, 430 column: 7, 431 value: "bar2", 432 }, 433 }, 434 }, 435 } 436 437 for _, tc := range tests { 438 t.Run(tc.name, func(t *testing.T) { 439 got := lexer.Tokenize(tc.src) 440 sort.Slice(got, func(i, j int) bool { 441 // sort by line, then column 442 if got[i].Position.Line < got[j].Position.Line { 443 return true 444 } else if got[i].Position.Line == got[j].Position.Line { 445 return got[i].Position.Column < got[j].Position.Column 446 } 447 return false 448 }) 449 sort.Slice(tc.expect, func(i, j int) bool { 450 if tc.expect[i].line < tc.expect[j].line { 451 return true 452 } else if tc.expect[i].line == tc.expect[j].line { 453 return tc.expect[i].column < tc.expect[j].column 454 } 455 return false 456 }) 457 if len(got) != len(tc.expect) { 458 t.Errorf("Tokenize() token count mismatch, expected:%d got:%d", len(tc.expect), len(got)) 459 } 460 for i, tok := range got { 461 if !tokenMatches(tok, tc.expect[i]) { 462 t.Errorf("Tokenize() expected:%+v got line:%d column:%d value:%s", tc.expect[i], tok.Position.Line, tok.Position.Column, tok.Value) 463 } 464 } 465 }) 466 } 467 }