github.com/dgraph-io/simdjson-go@v0.3.0/parse_string_test.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package simdjson 18 19 var tests = []struct { 20 name string 21 str string 22 success bool 23 want []byte 24 }{ 25 { 26 name: "ascii-1", 27 str: `a`, 28 success: true, 29 want: []byte(`a`), 30 }, 31 { 32 name: "ascii-2", 33 str: `ba`, 34 success: true, 35 want: []byte(`ba`), 36 }, 37 { 38 name: "ascii-3", 39 str: `cba`, 40 success: true, 41 want: []byte(`cba`), 42 }, 43 { 44 name: "ascii-long", 45 str: `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`, 46 success: true, 47 want: []byte(`abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`), 48 }, 49 { 50 name: "unicode-1", 51 str: `\u1234`, 52 success: true, 53 want: []byte{225, 136, 180}, 54 }, 55 { 56 name: "unicode-short-by-1", 57 str: `\u123`, 58 success: false, 59 }, 60 { 61 name: "unicode-short-by-2", 62 str: `\u12`, 63 success: false, 64 }, 65 { 66 name: "unicode-short-by-3", 67 str: `\u1`, 68 success: false, 69 }, 70 { 71 name: "unicode-short-by-4", 72 str: `\u`, 73 success: false, 74 }, 75 { 76 name: "outside-basic-multilingual-plane", 77 str: `\udbff\u1234`, 78 success: true, 79 want: []byte{239, 184, 180}, 80 }, 81 { 82 name: "outside-basic-multilingual-plane-short-by-1", 83 str: `\udbff\u123`, 84 success: false, 85 }, 86 { 87 name: "outside-basic-multilingual-plane-short-by-2", 88 str: `\udbff\u12`, 89 success: false, 90 }, 91 { 92 name: "outside-basic-multilingual-plane-short-by-3", 93 str: `\udbff\u1`, 94 success: false, 95 }, 96 { 97 name: "outside-basic-multilingual-plane-short-by-4", 98 str: `\udbff\u`, 99 success: false, 100 }, 101 { 102 name: "outside-basic-multilingual-plane-short-by-5", 103 str: `\udbff\`, 104 success: false, 105 }, 106 { 107 name: "outside-basic-multilingual-plane-short-by-6", 108 str: `\udbff`, 109 success: false, 110 }, 111 { 112 name: "outside-basic-multilingual-plane-short-by-7", 113 str: `\udbf`, 114 success: false, 115 }, 116 { 117 name: "outside-basic-multilingual-plane-short-by-8", 118 str: `\udbf`, 119 success: false, 120 }, 121 { 122 name: "quote1", 123 str: `a\"b`, 124 success: true, 125 want: []byte{97, 34, 98}, 126 }, 127 { 128 name: "quote2", 129 str: `a\"b\"c`, 130 success: true, 131 want: []byte{97, 34, 98, 34, 99}, 132 }, 133 { 134 name: "unicode-1-seq", 135 str: `\u0123`, 136 success: true, 137 want: []byte{196, 163}, 138 }, 139 { 140 name: "unicode-2-seqs", 141 str: `\u0123\u4567`, 142 success: true, 143 want: []byte{196, 163, 228, 149, 167}, 144 }, 145 { 146 name: "unicode-3-seqs", 147 str: `\u0123\u4567\u89AB`, 148 success: true, 149 want: []byte{196, 163, 228, 149, 167, 232, 166, 171}, 150 }, 151 { 152 name: "unicode-4-seqs", 153 str: `\u0123\u4567\u89AB\uCDEF`, 154 success: true, 155 want: []byte{196, 163, 228, 149, 167, 232, 166, 171, 236, 183, 175}, 156 }, 157 { 158 name: "uni1-end-of-ymm-word", 159 str: `---------9---------9\udbff\u1234`, 160 success: true, 161 want: []byte(string(`---------9---------9`) + string([]byte{0xef, 0xb8, 0xb4})), 162 }, 163 { 164 name: "uni1-end-of-ymm-word-pass-one-beyond", 165 str: `---------9---------9-\udbff\u1234`, 166 success: true, 167 want: []byte(string(`---------9---------9-`) + string([]byte{0xef, 0xb8, 0xb4})), 168 }, 169 { 170 name: "uni1-end-of-ymm-word-pass-two-beyond", 171 str: `---------9---------9--\udbff\u1234`, 172 success: true, 173 want: []byte(string(`---------9---------9--`) + string([]byte{0xef, 0xb8, 0xb4})), 174 }, 175 { 176 name: "uni1-end-of-ymm-word-pass-three-beyond", 177 str: `---------9---------9---\udbff\u1234`, 178 success: true, 179 want: []byte(string(`---------9---------9---`) + string([]byte{0xef, 0xb8, 0xb4})), 180 }, 181 { 182 name: "uni1-end-of-ymm-word-fail-one-beyond", 183 str: `---------9---------9-\udbff\u123`, 184 success: false, 185 }, 186 { 187 name: "uni1-end-of-ymm-word-pass-two-beyond", 188 str: `---------9---------9--\udbff\u123`, 189 success: false, 190 }, 191 { 192 name: "uni1-end-of-ymm-word-fail-three-beyond", 193 str: `---------9---------9---\udbff\u123`, 194 success: false, 195 }, 196 { 197 name: "uni1-end-of-ymm-word-single", 198 str: `---------9---------9------\u20ac`, 199 success: true, 200 want: []byte(string(`---------9---------9------`) + string([]byte{0xe2, 0x82, 0xac})), 201 }, 202 { 203 name: "uni1-end-of-ymm-word-single-pass-one-beyond", 204 str: `---------9---------9-------\u20ac`, 205 success: true, 206 want: []byte(string(`---------9---------9-------`) + string([]byte{0xe2, 0x82, 0xac})), 207 }, 208 { 209 name: "uni1-end-of-ymm-word-single-pass-two-beyond", 210 str: `---------9---------9--------\u20ac`, 211 success: true, 212 want: []byte(string(`---------9---------9--------`) + string([]byte{0xe2, 0x82, 0xac})), 213 }, 214 { 215 name: "uni1-end-of-ymm-word-single-pass-three-beyond", 216 str: `---------9---------9---------\u20ac`, 217 success: true, 218 want: []byte(string(`---------9---------9---------`) + string([]byte{0xe2, 0x82, 0xac})), 219 }, 220 { 221 name: "uni1-end-of-ymm-word-single-fail-one-beyond", 222 str: `---------9---------9-------\u20a`, 223 success: false, 224 }, 225 { 226 name: "uni1-end-of-ymm-word-single-fail-two-beyond", 227 str: `---------9---------9--------\u20a`, 228 success: false, 229 }, 230 { 231 name: "uni1-end-of-ymm-word-single-fail-three-beyond", 232 str: `---------9---------9---------\u20a`, 233 success: false, 234 }, 235 }