github.com/bingoohuang/gg@v0.0.0-20240325092523-45da7dee9335/pkg/yaml/lexer/lexer_test.go (about)

     1  package lexer_test
     2  
     3  import (
     4  	"sort"
     5  	"strings"
     6  	"testing"
     7  
     8  	"github.com/bingoohuang/gg/pkg/yaml/lexer"
     9  	"github.com/bingoohuang/gg/pkg/yaml/token"
    10  )
    11  
    12  func TestTokenize(t *testing.T) {
    13  	sources := []string{
    14  		"null\n",
    15  		"{}\n",
    16  		"v: hi\n",
    17  		"v: \"true\"\n",
    18  		"v: \"false\"\n",
    19  		"v: true\n",
    20  		"v: false\n",
    21  		"v: 10\n",
    22  		"v: -10\n",
    23  		"v: 42\n",
    24  		"v: 4294967296\n",
    25  		"v: \"10\"\n",
    26  		"v: 0.1\n",
    27  		"v: 0.99\n",
    28  		"v: -0.1\n",
    29  		"v: .inf\n",
    30  		"v: -.inf\n",
    31  		"v: .nan\n",
    32  		"v: null\n",
    33  		"v: \"\"\n",
    34  		"v:\n- A\n- B\n",
    35  		"v:\n- A\n- |-\n  B\n  C\n",
    36  		"v:\n- A\n- 1\n- B:\n  - 2\n  - 3\n",
    37  		"a:\n  b: c\n",
    38  		"a: '-'\n",
    39  		"123\n",
    40  		"hello: world\n",
    41  		"a: null\n",
    42  		"a: {x: 1}\n",
    43  		"a: [1, 2]\n",
    44  		"t2: 2018-01-09T10:40:47Z\nt4: 2098-01-09T10:40:47Z\n",
    45  		"a: {b: c, d: e}\n",
    46  		"a: 3s\n",
    47  		"a: <foo>\n",
    48  		"a: \"1:1\"\n",
    49  		"a: \"\\0\"\n",
    50  		"a: !!binary gIGC\n",
    51  		"a: !!binary |\n  " + strings.Repeat("kJCQ", 17) + "kJ\n  CQ\n",
    52  		"b: 2\na: 1\nd: 4\nc: 3\nsub:\n  e: 5\n",
    53  		"a: 1.2.3.4\n",
    54  		"a: \"2015-02-24T18:19:39Z\"\n",
    55  		"a: 'b: c'\n",
    56  		"a: 'Hello #comment'\n",
    57  		"a: 100.5\n",
    58  		"a: bogus\n",
    59  	}
    60  	for _, src := range sources {
    61  		lexer.Tokenize(src).Dump()
    62  	}
    63  }
    64  
    65  type testToken struct {
    66  	line   int
    67  	column int
    68  	value  string
    69  }
    70  
    71  func TestSingleLineToken_ValueLineColumnPosition(t *testing.T) {
    72  	tests := []struct {
    73  		name   string
    74  		src    string
    75  		expect map[int]string // Column -> Value map.
    76  	}{
    77  		{
    78  			name: "single quote, single value array",
    79  			src:  "test: ['test']",
    80  			expect: map[int]string{
    81  				1:  "test",
    82  				5:  ":",
    83  				7:  "[",
    84  				8:  "test",
    85  				14: "]",
    86  			},
    87  		},
    88  		{
    89  			name: "double quote, single value array",
    90  			src:  `test: ["test"]`,
    91  			expect: map[int]string{
    92  				1:  "test",
    93  				5:  ":",
    94  				7:  "[",
    95  				8:  "test",
    96  				14: "]",
    97  			},
    98  		},
    99  		{
   100  			name: "no quotes, single value array",
   101  			src:  "test: [somevalue]",
   102  			expect: map[int]string{
   103  				1:  "test",
   104  				5:  ":",
   105  				7:  "[",
   106  				8:  "somevalue",
   107  				17: "]",
   108  			},
   109  		},
   110  		{
   111  			name: "single quote, multi value array",
   112  			src:  "myarr: ['1','2','3', '444' , '55','66' ,  '77'  ]",
   113  			expect: map[int]string{
   114  				1:  "myarr",
   115  				6:  ":",
   116  				8:  "[",
   117  				9:  "1",
   118  				12: ",",
   119  				13: "2",
   120  				16: ",",
   121  				17: "3",
   122  				20: ",",
   123  				22: "444",
   124  				28: ",",
   125  				30: "55",
   126  				34: ",",
   127  				35: "66",
   128  				40: ",",
   129  				43: "77",
   130  				49: "]",
   131  			},
   132  		},
   133  		{
   134  			name: "double quote, multi value array",
   135  			src:  `myarr: ["1","2","3", "444" , "55","66" ,  "77"  ]`,
   136  			expect: map[int]string{
   137  				1:  "myarr",
   138  				6:  ":",
   139  				8:  "[",
   140  				9:  "1",
   141  				12: ",",
   142  				13: "2",
   143  				16: ",",
   144  				17: "3",
   145  				20: ",",
   146  				22: "444",
   147  				28: ",",
   148  				30: "55",
   149  				34: ",",
   150  				35: "66",
   151  				40: ",",
   152  				43: "77",
   153  				49: "]",
   154  			},
   155  		},
   156  		{
   157  			name: "no quote, multi value array",
   158  			src:  "numbers: [1, 5, 99,100, 3, 7 ]",
   159  			expect: map[int]string{
   160  				1:  "numbers",
   161  				8:  ":",
   162  				10: "[",
   163  				11: "1",
   164  				12: ",",
   165  				14: "5",
   166  				15: ",",
   167  				17: "99",
   168  				19: ",",
   169  				20: "100",
   170  				23: ",",
   171  				25: "3",
   172  				26: ",",
   173  				28: "7",
   174  				30: "]",
   175  			},
   176  		},
   177  		{
   178  			name: "double quotes, nested arrays",
   179  			src:  `Strings: ["1",["2",["3"]]]`,
   180  			expect: map[int]string{
   181  				1:  "Strings",
   182  				8:  ":",
   183  				10: "[",
   184  				11: "1",
   185  				14: ",",
   186  				15: "[",
   187  				16: "2",
   188  				19: ",",
   189  				20: "[",
   190  				21: "3",
   191  				24: "]",
   192  				25: "]",
   193  				26: "]",
   194  			},
   195  		},
   196  		{
   197  			name: "mixed quotes, nested arrays",
   198  			src:  `Values: [1,['2',"3",4,["5",6]]]`,
   199  			expect: map[int]string{
   200  				1:  "Values",
   201  				7:  ":",
   202  				9:  "[",
   203  				10: "1",
   204  				11: ",",
   205  				12: "[",
   206  				13: "2",
   207  				16: ",",
   208  				17: "3",
   209  				20: ",",
   210  				21: "4",
   211  				22: ",",
   212  				23: "[",
   213  				24: "5",
   214  				27: ",",
   215  				28: "6",
   216  				29: "]",
   217  				30: "]",
   218  				31: "]",
   219  			},
   220  		},
   221  		{
   222  			name: "double quote, empty array",
   223  			src:  `Empty: ["", ""]`,
   224  			expect: map[int]string{
   225  				1:  "Empty",
   226  				6:  ":",
   227  				8:  "[",
   228  				9:  "",
   229  				11: ",",
   230  				13: "",
   231  				15: "]",
   232  			},
   233  		},
   234  	}
   235  
   236  	for _, tc := range tests {
   237  		t.Run(tc.name, func(t *testing.T) {
   238  			got := lexer.Tokenize(tc.src)
   239  			sort.Slice(got, func(i, j int) bool {
   240  				return got[i].Position.Column < got[j].Position.Column
   241  			})
   242  			var expected []testToken
   243  			for k, v := range tc.expect {
   244  				tt := testToken{
   245  					line:   1,
   246  					column: k,
   247  					value:  v,
   248  				}
   249  				expected = append(expected, tt)
   250  			}
   251  			sort.Slice(expected, func(i, j int) bool {
   252  				return expected[i].column < expected[j].column
   253  			})
   254  			if len(got) != len(expected) {
   255  				t.Errorf("Tokenize(%s) token count mismatch, expected:%d got:%d", tc.src, len(expected), len(got))
   256  			}
   257  			for i, tok := range got {
   258  				if !tokenMatches(tok, expected[i]) {
   259  					t.Errorf("Tokenize(%s) expected:%+v got line:%d column:%d value:%s", tc.src, expected[i], tok.Position.Line, tok.Position.Column, tok.Value)
   260  				}
   261  			}
   262  		})
   263  	}
   264  }
   265  
   266  func tokenMatches(t *token.Token, e testToken) bool {
   267  	return t != nil && t.Position != nil &&
   268  		t.Value == e.value &&
   269  		t.Position.Line == e.line &&
   270  		t.Position.Column == e.column
   271  }
   272  
   273  func TestMultiLineToken_ValueLineColumnPosition(t *testing.T) {
   274  	tests := []struct {
   275  		name   string
   276  		src    string
   277  		expect []testToken
   278  	}{
   279  		{
   280  			name: "double quote",
   281  			src: `one: "1 2 3 4 5"
   282  two: "1 2
   283  3 4
   284  5"
   285  three: "1 2 3 4
   286  5"`,
   287  			expect: []testToken{
   288  				{
   289  					line:   1,
   290  					column: 1,
   291  					value:  "one",
   292  				},
   293  				{
   294  					line:   1,
   295  					column: 4,
   296  					value:  ":",
   297  				},
   298  				{
   299  					line:   1,
   300  					column: 6,
   301  					value:  "1 2 3 4 5",
   302  				},
   303  				{
   304  					line:   2,
   305  					column: 1,
   306  					value:  "two",
   307  				},
   308  				{
   309  					line:   2,
   310  					column: 4,
   311  					value:  ":",
   312  				},
   313  				{
   314  					line:   2,
   315  					column: 6,
   316  					value:  "1 2 3 4 5",
   317  				},
   318  				{
   319  					line:   5,
   320  					column: 1,
   321  					value:  "three",
   322  				},
   323  				{
   324  					line:   5,
   325  					column: 6,
   326  					value:  ":",
   327  				},
   328  				{
   329  					line:   5,
   330  					column: 8,
   331  					value:  "1 2 3 4 5",
   332  				},
   333  			},
   334  		},
   335  		{
   336  			name: "single quote in an array",
   337  			src: `arr: ['1', 'and
   338  two']
   339  last: 'hello'`,
   340  			expect: []testToken{
   341  				{
   342  					line:   1,
   343  					column: 1,
   344  					value:  "arr",
   345  				},
   346  				{
   347  					line:   1,
   348  					column: 4,
   349  					value:  ":",
   350  				},
   351  				{
   352  					line:   1,
   353  					column: 6,
   354  					value:  "[",
   355  				},
   356  				{
   357  					line:   1,
   358  					column: 7,
   359  					value:  "1",
   360  				},
   361  				{
   362  					line:   1,
   363  					column: 10,
   364  					value:  ",",
   365  				},
   366  				{
   367  					line:   1,
   368  					column: 12,
   369  					value:  "and two",
   370  				},
   371  				{
   372  					line:   2,
   373  					column: 5,
   374  					value:  "]",
   375  				},
   376  				{
   377  					line:   3,
   378  					column: 1,
   379  					value:  "last",
   380  				},
   381  				{
   382  					line:   3,
   383  					column: 5,
   384  					value:  ":",
   385  				},
   386  				{
   387  					line:   3,
   388  					column: 7,
   389  					value:  "hello",
   390  				},
   391  			},
   392  		},
   393  		{
   394  			name: "single quote and double quote",
   395  			src: `foo: "test
   396  
   397  
   398  
   399  
   400  bar"
   401  foo2: 'bar2'`,
   402  			expect: []testToken{
   403  				{
   404  					line:   1,
   405  					column: 1,
   406  					value:  "foo",
   407  				},
   408  				{
   409  					line:   1,
   410  					column: 4,
   411  					value:  ":",
   412  				},
   413  				{
   414  					line:   1,
   415  					column: 6,
   416  					value:  "test     bar",
   417  				},
   418  				{
   419  					line:   7,
   420  					column: 1,
   421  					value:  "foo2",
   422  				},
   423  				{
   424  					line:   7,
   425  					column: 5,
   426  					value:  ":",
   427  				},
   428  				{
   429  					line:   7,
   430  					column: 7,
   431  					value:  "bar2",
   432  				},
   433  			},
   434  		},
   435  	}
   436  
   437  	for _, tc := range tests {
   438  		t.Run(tc.name, func(t *testing.T) {
   439  			got := lexer.Tokenize(tc.src)
   440  			sort.Slice(got, func(i, j int) bool {
   441  				// sort by line, then column
   442  				if got[i].Position.Line < got[j].Position.Line {
   443  					return true
   444  				} else if got[i].Position.Line == got[j].Position.Line {
   445  					return got[i].Position.Column < got[j].Position.Column
   446  				}
   447  				return false
   448  			})
   449  			sort.Slice(tc.expect, func(i, j int) bool {
   450  				if tc.expect[i].line < tc.expect[j].line {
   451  					return true
   452  				} else if tc.expect[i].line == tc.expect[j].line {
   453  					return tc.expect[i].column < tc.expect[j].column
   454  				}
   455  				return false
   456  			})
   457  			if len(got) != len(tc.expect) {
   458  				t.Errorf("Tokenize() token count mismatch, expected:%d got:%d", len(tc.expect), len(got))
   459  			}
   460  			for i, tok := range got {
   461  				if !tokenMatches(tok, tc.expect[i]) {
   462  					t.Errorf("Tokenize() expected:%+v got line:%d column:%d value:%s", tc.expect[i], tok.Position.Line, tok.Position.Column, tok.Value)
   463  				}
   464  			}
   465  		})
   466  	}
   467  }