github.com/dgraph-io/simdjson-go@v0.3.0/parse_string_test.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package simdjson
    18  
    19  var tests = []struct {
    20  	name    string
    21  	str     string
    22  	success bool
    23  	want    []byte
    24  }{
    25  	{
    26  		name:    "ascii-1",
    27  		str:     `a`,
    28  		success: true,
    29  		want:    []byte(`a`),
    30  	},
    31  	{
    32  		name:    "ascii-2",
    33  		str:     `ba`,
    34  		success: true,
    35  		want:    []byte(`ba`),
    36  	},
    37  	{
    38  		name:    "ascii-3",
    39  		str:     `cba`,
    40  		success: true,
    41  		want:    []byte(`cba`),
    42  	},
    43  	{
    44  		name:    "ascii-long",
    45  		str:     `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`,
    46  		success: true,
    47  		want:    []byte(`abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`),
    48  	},
    49  	{
    50  		name:    "unicode-1",
    51  		str:     `\u1234`,
    52  		success: true,
    53  		want:    []byte{225, 136, 180},
    54  	},
    55  	{
    56  		name:    "unicode-short-by-1",
    57  		str:     `\u123`,
    58  		success: false,
    59  	},
    60  	{
    61  		name:    "unicode-short-by-2",
    62  		str:     `\u12`,
    63  		success: false,
    64  	},
    65  	{
    66  		name:    "unicode-short-by-3",
    67  		str:     `\u1`,
    68  		success: false,
    69  	},
    70  	{
    71  		name:    "unicode-short-by-4",
    72  		str:     `\u`,
    73  		success: false,
    74  	},
    75  	{
    76  		name:    "outside-basic-multilingual-plane",
    77  		str:     `\udbff\u1234`,
    78  		success: true,
    79  		want:    []byte{239, 184, 180},
    80  	},
    81  	{
    82  		name:    "outside-basic-multilingual-plane-short-by-1",
    83  		str:     `\udbff\u123`,
    84  		success: false,
    85  	},
    86  	{
    87  		name:    "outside-basic-multilingual-plane-short-by-2",
    88  		str:     `\udbff\u12`,
    89  		success: false,
    90  	},
    91  	{
    92  		name:    "outside-basic-multilingual-plane-short-by-3",
    93  		str:     `\udbff\u1`,
    94  		success: false,
    95  	},
    96  	{
    97  		name:    "outside-basic-multilingual-plane-short-by-4",
    98  		str:     `\udbff\u`,
    99  		success: false,
   100  	},
   101  	{
   102  		name:    "outside-basic-multilingual-plane-short-by-5",
   103  		str:     `\udbff\`,
   104  		success: false,
   105  	},
   106  	{
   107  		name:    "outside-basic-multilingual-plane-short-by-6",
   108  		str:     `\udbff`,
   109  		success: false,
   110  	},
   111  	{
   112  		name:    "outside-basic-multilingual-plane-short-by-7",
   113  		str:     `\udbf`,
   114  		success: false,
   115  	},
   116  	{
   117  		name:    "outside-basic-multilingual-plane-short-by-8",
   118  		str:     `\udbf`,
   119  		success: false,
   120  	},
   121  	{
   122  		name:    "quote1",
   123  		str:     `a\"b`,
   124  		success: true,
   125  		want:    []byte{97, 34, 98},
   126  	},
   127  	{
   128  		name:    "quote2",
   129  		str:     `a\"b\"c`,
   130  		success: true,
   131  		want:    []byte{97, 34, 98, 34, 99},
   132  	},
   133  	{
   134  		name:    "unicode-1-seq",
   135  		str:     `\u0123`,
   136  		success: true,
   137  		want:    []byte{196, 163},
   138  	},
   139  	{
   140  		name:    "unicode-2-seqs",
   141  		str:     `\u0123\u4567`,
   142  		success: true,
   143  		want:    []byte{196, 163, 228, 149, 167},
   144  	},
   145  	{
   146  		name:    "unicode-3-seqs",
   147  		str:     `\u0123\u4567\u89AB`,
   148  		success: true,
   149  		want:    []byte{196, 163, 228, 149, 167, 232, 166, 171},
   150  	},
   151  	{
   152  		name:    "unicode-4-seqs",
   153  		str:     `\u0123\u4567\u89AB\uCDEF`,
   154  		success: true,
   155  		want:    []byte{196, 163, 228, 149, 167, 232, 166, 171, 236, 183, 175},
   156  	},
   157  	{
   158  		name:    "uni1-end-of-ymm-word",
   159  		str:     `---------9---------9\udbff\u1234`,
   160  		success: true,
   161  		want:    []byte(string(`---------9---------9`) + string([]byte{0xef, 0xb8, 0xb4})),
   162  	},
   163  	{
   164  		name:    "uni1-end-of-ymm-word-pass-one-beyond",
   165  		str:     `---------9---------9-\udbff\u1234`,
   166  		success: true,
   167  		want:    []byte(string(`---------9---------9-`) + string([]byte{0xef, 0xb8, 0xb4})),
   168  	},
   169  	{
   170  		name:    "uni1-end-of-ymm-word-pass-two-beyond",
   171  		str:     `---------9---------9--\udbff\u1234`,
   172  		success: true,
   173  		want:    []byte(string(`---------9---------9--`) + string([]byte{0xef, 0xb8, 0xb4})),
   174  	},
   175  	{
   176  		name:    "uni1-end-of-ymm-word-pass-three-beyond",
   177  		str:     `---------9---------9---\udbff\u1234`,
   178  		success: true,
   179  		want:    []byte(string(`---------9---------9---`) + string([]byte{0xef, 0xb8, 0xb4})),
   180  	},
   181  	{
   182  		name:    "uni1-end-of-ymm-word-fail-one-beyond",
   183  		str:     `---------9---------9-\udbff\u123`,
   184  		success: false,
   185  	},
   186  	{
   187  		name:    "uni1-end-of-ymm-word-pass-two-beyond",
   188  		str:     `---------9---------9--\udbff\u123`,
   189  		success: false,
   190  	},
   191  	{
   192  		name:    "uni1-end-of-ymm-word-fail-three-beyond",
   193  		str:     `---------9---------9---\udbff\u123`,
   194  		success: false,
   195  	},
   196  	{
   197  		name:    "uni1-end-of-ymm-word-single",
   198  		str:     `---------9---------9------\u20ac`,
   199  		success: true,
   200  		want:    []byte(string(`---------9---------9------`) + string([]byte{0xe2, 0x82, 0xac})),
   201  	},
   202  	{
   203  		name:    "uni1-end-of-ymm-word-single-pass-one-beyond",
   204  		str:     `---------9---------9-------\u20ac`,
   205  		success: true,
   206  		want:    []byte(string(`---------9---------9-------`) + string([]byte{0xe2, 0x82, 0xac})),
   207  	},
   208  	{
   209  		name:    "uni1-end-of-ymm-word-single-pass-two-beyond",
   210  		str:     `---------9---------9--------\u20ac`,
   211  		success: true,
   212  		want:    []byte(string(`---------9---------9--------`) + string([]byte{0xe2, 0x82, 0xac})),
   213  	},
   214  	{
   215  		name:    "uni1-end-of-ymm-word-single-pass-three-beyond",
   216  		str:     `---------9---------9---------\u20ac`,
   217  		success: true,
   218  		want:    []byte(string(`---------9---------9---------`) + string([]byte{0xe2, 0x82, 0xac})),
   219  	},
   220  	{
   221  		name:    "uni1-end-of-ymm-word-single-fail-one-beyond",
   222  		str:     `---------9---------9-------\u20a`,
   223  		success: false,
   224  	},
   225  	{
   226  		name:    "uni1-end-of-ymm-word-single-fail-two-beyond",
   227  		str:     `---------9---------9--------\u20a`,
   228  		success: false,
   229  	},
   230  	{
   231  		name:    "uni1-end-of-ymm-word-single-fail-three-beyond",
   232  		str:     `---------9---------9---------\u20a`,
   233  		success: false,
   234  	},
   235  }