github.com/kamalshkeir/kencoding@v0.0.2-0.20230409043843-44b609a0475a/json/token_test.go

github.com/kamalshkeir/kencoding@v0.0.2-0.20230409043843-44b609a0475a/json/token_test.go (about)

     1  package json
     2  
     3  import (
     4  	"bytes"
     5  	"reflect"
     6  	"testing"
     7  )
     8  
     9  type token struct {
    10  	delim Delim
    11  	value RawValue
    12  	err   error
    13  	depth int
    14  	index int
    15  	isKey bool
    16  }
    17  
    18  func delim(s string, depth, index int) token {
    19  	return token{
    20  		delim: Delim(s[0]),
    21  		value: RawValue(s),
    22  		depth: depth,
    23  		index: index,
    24  	}
    25  }
    26  
    27  func key(v string, depth, index int) token {
    28  	return token{
    29  		value: RawValue(v),
    30  		depth: depth,
    31  		index: index,
    32  		isKey: true,
    33  	}
    34  }
    35  
    36  func value(v string, depth, index int) token {
    37  	return token{
    38  		value: RawValue(v),
    39  		depth: depth,
    40  		index: index,
    41  	}
    42  }
    43  
    44  func tokenize(t *testing.T, b []byte) (tokens []token) {
    45  	tok := NewTokenizer(b)
    46  
    47  	for tok.Next() {
    48  		end := len(b) - tok.Remaining()
    49  		start := end - len(tok.Value)
    50  		if end > len(b) {
    51  			t.Fatalf("token position too far [%d:%d], len(b) is %d", start, end, len(b))
    52  		}
    53  		if !bytes.Equal(b[start:end], tok.Value) {
    54  			t.Fatalf("token position is wrong [%d:%d]", start, end)
    55  		}
    56  
    57  		tokens = append(tokens, token{
    58  			delim: tok.Delim,
    59  			value: tok.Value,
    60  			err:   tok.Err,
    61  			depth: tok.Depth,
    62  			index: tok.Index,
    63  			isKey: tok.IsKey,
    64  		})
    65  	}
    66  
    67  	if tok.Err != nil {
    68  		t.Fatal(tok.Err)
    69  	}
    70  
    71  	return
    72  }
    73  
    74  func TestTokenizer(t *testing.T) {
    75  	tests := []struct {
    76  		input  []byte
    77  		tokens []token
    78  	}{
    79  		{
    80  			input: []byte(`null`),
    81  			tokens: []token{
    82  				value(`null`, 0, 0),
    83  			},
    84  		},
    85  
    86  		{
    87  			input: []byte(`true`),
    88  			tokens: []token{
    89  				value(`true`, 0, 0),
    90  			},
    91  		},
    92  
    93  		{
    94  			input: []byte(`false`),
    95  			tokens: []token{
    96  				value(`false`, 0, 0),
    97  			},
    98  		},
    99  
   100  		{
   101  			input: []byte(`""`),
   102  			tokens: []token{
   103  				value(`""`, 0, 0),
   104  			},
   105  		},
   106  
   107  		{
   108  			input: []byte(`"Hello World!"`),
   109  			tokens: []token{
   110  				value(`"Hello World!"`, 0, 0),
   111  			},
   112  		},
   113  
   114  		{
   115  			input: []byte(`-0.1234`),
   116  			tokens: []token{
   117  				value(`-0.1234`, 0, 0),
   118  			},
   119  		},
   120  
   121  		{
   122  			input: []byte(` { } `),
   123  			tokens: []token{
   124  				delim(`{`, 0, 0),
   125  				delim(`}`, 0, 0),
   126  			},
   127  		},
   128  
   129  		{
   130  			input: []byte(`{ "answer": 42 }`),
   131  			tokens: []token{
   132  				delim(`{`, 0, 0),
   133  				key(`"answer"`, 1, 0),
   134  				delim(`:`, 1, 0),
   135  				value(`42`, 1, 0),
   136  				delim(`}`, 0, 0),
   137  			},
   138  		},
   139  
   140  		{
   141  			input: []byte(`{ "sub": { "key-A": 1, "key-B": 2, "key-C": 3 } }`),
   142  			tokens: []token{
   143  				delim(`{`, 0, 0),
   144  				key(`"sub"`, 1, 0),
   145  				delim(`:`, 1, 0),
   146  				delim(`{`, 1, 0),
   147  				key(`"key-A"`, 2, 0),
   148  				delim(`:`, 2, 0),
   149  				value(`1`, 2, 0),
   150  				delim(`,`, 2, 0),
   151  				key(`"key-B"`, 2, 1),
   152  				delim(`:`, 2, 1),
   153  				value(`2`, 2, 1),
   154  				delim(`,`, 2, 1),
   155  				key(`"key-C"`, 2, 2),
   156  				delim(`:`, 2, 2),
   157  				value(`3`, 2, 2),
   158  				delim(`}`, 1, 0),
   159  				delim(`}`, 0, 0),
   160  			},
   161  		},
   162  
   163  		{
   164  			input: []byte(` [ ] `),
   165  			tokens: []token{
   166  				delim(`[`, 0, 0),
   167  				delim(`]`, 0, 0),
   168  			},
   169  		},
   170  
   171  		{
   172  			input: []byte(`[1, 2, 3]`),
   173  			tokens: []token{
   174  				delim(`[`, 0, 0),
   175  				value(`1`, 1, 0),
   176  				delim(`,`, 1, 0),
   177  				value(`2`, 1, 1),
   178  				delim(`,`, 1, 1),
   179  				value(`3`, 1, 2),
   180  				delim(`]`, 0, 0),
   181  			},
   182  		},
   183  	}
   184  
   185  	for _, test := range tests {
   186  		t.Run(string(test.input), func(t *testing.T) {
   187  			tokens := tokenize(t, test.input)
   188  
   189  			if !reflect.DeepEqual(tokens, test.tokens) {
   190  				t.Error("tokens mismatch")
   191  				t.Logf("expected: %+v", test.tokens)
   192  				t.Logf("found:    %+v", tokens)
   193  			}
   194  		})
   195  	}
   196  }
   197  
   198  // Regression test for syntax that caused panics in Next.
   199  func TestTokenizer_invalidInput(t *testing.T) {
   200  	tests := []struct {
   201  		scenario string
   202  		payload  []byte
   203  	}{
   204  		{
   205  			scenario: "bare comma",
   206  			payload:  []byte(","),
   207  		},
   208  		{
   209  			scenario: "comma after array",
   210  			payload:  []byte("[],"),
   211  		},
   212  		{
   213  			scenario: "comma after object",
   214  			payload:  []byte("{},"),
   215  		},
   216  	}
   217  
   218  	for _, test := range tests {
   219  		t.Run(test.scenario, func(t *testing.T) {
   220  			tkn := NewTokenizer(test.payload)
   221  
   222  			// This shouldn't panic
   223  			for tkn.Next() {
   224  			}
   225  
   226  			if tkn.Err == nil {
   227  				t.Error("expected Err to be set, got nil")
   228  			}
   229  		})
   230  	}
   231  }
   232  
   233  func BenchmarkTokenizer(b *testing.B) {
   234  	values := []struct {
   235  		scenario string
   236  		payload  []byte
   237  	}{
   238  		{
   239  			scenario: "null",
   240  			payload:  []byte(`null`),
   241  		},
   242  
   243  		{
   244  			scenario: "true",
   245  			payload:  []byte(`true`),
   246  		},
   247  
   248  		{
   249  			scenario: "false",
   250  			payload:  []byte(`false`),
   251  		},
   252  
   253  		{
   254  			scenario: "number",
   255  			payload:  []byte(`-1.23456789`),
   256  		},
   257  
   258  		{
   259  			scenario: "string",
   260  			payload:  []byte(`"1234567890"`),
   261  		},
   262  
   263  		{
   264  			scenario: "object",
   265  			payload: []byte(`{
   266      "timestamp": "2019-01-09T18:59:57.456Z",
   267      "channel": "server",
   268      "type": "track",
   269      "event": "Test",
   270      "userId": "test-user-whatever",
   271      "messageId": "test-message-whatever",
   272      "integrations": {
   273          "whatever": {
   274              "debugMode": false
   275          },
   276          "myIntegration": {
   277              "debugMode": true
   278          }
   279      },
   280      "properties": {
   281          "trait1": 1,
   282          "trait2": "test",
   283          "trait3": true
   284      },
   285      "settings": {
   286          "apiKey": "1234567890",
   287          "debugMode": false,
   288          "directChannels": [
   289              "server",
   290              "client"
   291          ],
   292          "endpoint": "https://somewhere.com/v1/integrations/segment"
   293      }
   294  }`),
   295  		},
   296  	}
   297  
   298  	benchmarks := []struct {
   299  		scenario string
   300  		function func(*testing.B, []byte)
   301  	}{
   302  		{
   303  			scenario: "github.com/kamalshkeir/kencoding/json",
   304  			function: func(b *testing.B, json []byte) {
   305  				t := NewTokenizer(nil)
   306  
   307  				for i := 0; i < b.N; i++ {
   308  					t.Reset(json)
   309  
   310  					for t.Next() {
   311  						// Does nothing other than iterating over each token to measure the
   312  						// CPU and memory footprint.
   313  					}
   314  
   315  					if t.Err != nil {
   316  						b.Error(t.Err)
   317  					}
   318  				}
   319  			},
   320  		},
   321  	}
   322  
   323  	for _, bechmark := range benchmarks {
   324  		b.Run(bechmark.scenario, func(b *testing.B) {
   325  			for _, value := range values {
   326  				b.Run(value.scenario, func(b *testing.B) {
   327  					bechmark.function(b, value.payload)
   328  					b.SetBytes(int64(len(value.payload)))
   329  				})
   330  			}
   331  		})
   332  	}
   333  }