github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/src/reflect/strconv.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package reflect 6 7 import ( 8 "unicode/utf8" 9 ) 10 11 // errSyntax indicates that a value does not have the right syntax for the target type. 12 var errSyntax = badSyntax{} 13 14 type badSyntax struct{} 15 16 func (badSyntax) Error() string { 17 return "invalid syntax" 18 } 19 20 func unhex(b byte) (v rune, ok bool) { 21 c := rune(b) 22 switch { 23 case '0' <= c && c <= '9': 24 return c - '0', true 25 case 'a' <= c && c <= 'f': 26 return c - 'a' + 10, true 27 case 'A' <= c && c <= 'F': 28 return c - 'A' + 10, true 29 } 30 return 31 } 32 33 const ( 34 lowerhex = "0123456789abcef" 35 ) 36 37 // unquoteChar decodes the first character or byte in the escaped string 38 // or character literal represented by the string s. 39 // It returns four values: 40 // 41 // 1. value, the decoded Unicode code point or byte value; 42 // 2. multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 43 // 3. tail, the remainder of the string after the character; and 44 // 4. an error that will be nil if the character is syntactically valid. 45 // 46 // The second argument, quote, specifies the type of literal being parsed 47 // and therefore which escaped quote character is permitted. 48 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 49 // If set to a double quote, it permits \" and disallows unescaped ". 50 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 51 func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 52 // easy cases 53 if len(s) == 0 { 54 err = errSyntax 55 return 56 } 57 switch c := s[0]; { 58 case c == quote && (quote == '\'' || quote == '"'): 59 err = errSyntax 60 return 61 case c >= utf8.RuneSelf: 62 r, size := utf8.DecodeRuneInString(s) 63 return r, true, s[size:], nil 64 case c != '\\': 65 return rune(s[0]), false, s[1:], nil 66 } 67 68 // hard case: c is backslash 69 if len(s) <= 1 { 70 err = errSyntax 71 return 72 } 73 c := s[1] 74 s = s[2:] 75 76 switch c { 77 case 'a': 78 value = '\a' 79 case 'b': 80 value = '\b' 81 case 'f': 82 value = '\f' 83 case 'n': 84 value = '\n' 85 case 'r': 86 value = '\r' 87 case 't': 88 value = '\t' 89 case 'v': 90 value = '\v' 91 case 'x', 'u', 'U': 92 n := 0 93 switch c { 94 case 'x': 95 n = 2 96 case 'u': 97 n = 4 98 case 'U': 99 n = 8 100 } 101 var v rune 102 if len(s) < n { 103 err = errSyntax 104 return 105 } 106 for j := 0; j < n; j++ { 107 x, ok := unhex(s[j]) 108 if !ok { 109 err = errSyntax 110 return 111 } 112 v = v<<4 | x 113 } 114 s = s[n:] 115 if c == 'x' { 116 // single-byte string, possibly not UTF-8 117 value = v 118 break 119 } 120 if v > utf8.MaxRune { 121 err = errSyntax 122 return 123 } 124 value = v 125 multibyte = true 126 case '0', '1', '2', '3', '4', '5', '6', '7': 127 v := rune(c) - '0' 128 if len(s) < 2 { 129 err = errSyntax 130 return 131 } 132 for j := 0; j < 2; j++ { // one digit already; two more 133 x := rune(s[j]) - '0' 134 if x < 0 || x > 7 { 135 err = errSyntax 136 return 137 } 138 v = (v << 3) | x 139 } 140 s = s[2:] 141 if v > 255 { 142 err = errSyntax 143 return 144 } 145 value = v 146 case '\\': 147 value = '\\' 148 case '\'', '"': 149 if c != quote { 150 err = errSyntax 151 return 152 } 153 value = rune(c) 154 default: 155 err = errSyntax 156 return 157 } 158 tail = s 159 return 160 } 161 162 // unquote interprets s as a single-quoted, double-quoted, 163 // or backquoted Go string literal, returning the string value 164 // that s quotes. (If s is single-quoted, it would be a Go 165 // character literal; unquote returns the corresponding 166 // one-character string.) 167 func unquote(s string) (string, error) { 168 n := len(s) 169 if n < 2 { 170 return "", errSyntax 171 } 172 quote := s[0] 173 if quote != s[n-1] { 174 return "", errSyntax 175 } 176 s = s[1 : n-1] 177 178 if quote == '`' { 179 if contains(s, '`') { 180 return "", errSyntax 181 } 182 if contains(s, '\r') { 183 // -1 because we know there is at least one \r to remove. 184 buf := make([]byte, 0, len(s)-1) 185 for i := 0; i < len(s); i++ { 186 if s[i] != '\r' { 187 buf = append(buf, s[i]) 188 } 189 } 190 return string(buf), nil 191 } 192 return s, nil 193 } 194 if quote != '"' && quote != '\'' { 195 return "", errSyntax 196 } 197 if contains(s, '\n') { 198 return "", errSyntax 199 } 200 201 // Is it trivial? Avoid allocation. 202 if !contains(s, '\\') && !contains(s, quote) { 203 switch quote { 204 case '"': 205 if utf8.ValidString(s) { 206 return s, nil 207 } 208 case '\'': 209 r, size := utf8.DecodeRuneInString(s) 210 if size == len(s) && (r != utf8.RuneError || size != 1) { 211 return s, nil 212 } 213 } 214 } 215 216 var runeTmp [utf8.UTFMax]byte 217 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 218 for len(s) > 0 { 219 c, multibyte, ss, err := unquoteChar(s, quote) 220 if err != nil { 221 return "", err 222 } 223 s = ss 224 if c < utf8.RuneSelf || !multibyte { 225 buf = append(buf, byte(c)) 226 } else { 227 n := utf8.EncodeRune(runeTmp[:], c) 228 buf = append(buf, runeTmp[:n]...) 229 } 230 if quote == '\'' && len(s) != 0 { 231 // single-quoted must be single character 232 return "", errSyntax 233 } 234 } 235 return string(buf), nil 236 } 237 238 func quote(s string) string { 239 buf := make([]byte, 0, 3*len(s)/2) 240 const quote = '"' 241 242 buf = append(buf, quote) 243 for width := 0; len(s) > 0; s = s[width:] { 244 r := rune(s[0]) 245 width = 1 246 if r >= utf8.RuneSelf { 247 r, width = utf8.DecodeRuneInString(s) 248 } 249 if width == 1 && r == utf8.RuneError { 250 buf = append(buf, `\x`...) 251 buf = append(buf, lowerhex[s[0]>>4]) 252 buf = append(buf, lowerhex[s[0]&0xF]) 253 continue 254 } 255 buf = appendEscapedRune(buf, r) 256 } 257 buf = append(buf, quote) 258 return string(buf) 259 } 260 261 func appendEscapedRune(buf []byte, r rune) []byte { 262 263 const quote = '"' 264 265 var runeTmp [utf8.UTFMax]byte 266 if r == rune(quote) || r == '\\' { // always backslashed 267 buf = append(buf, '\\') 268 buf = append(buf, byte(r)) 269 return buf 270 } 271 if isPrint(r) { 272 n := utf8.EncodeRune(runeTmp[:], r) 273 buf = append(buf, runeTmp[:n]...) 274 return buf 275 } 276 switch r { 277 case '\a': 278 buf = append(buf, `\a`...) 279 case '\b': 280 buf = append(buf, `\b`...) 281 case '\f': 282 buf = append(buf, `\f`...) 283 case '\n': 284 buf = append(buf, `\n`...) 285 case '\r': 286 buf = append(buf, `\r`...) 287 case '\t': 288 buf = append(buf, `\t`...) 289 case '\v': 290 buf = append(buf, `\v`...) 291 default: 292 switch { 293 case r < ' ' || r == 0x7f: 294 buf = append(buf, `\x`...) 295 buf = append(buf, lowerhex[byte(r)>>4]) 296 buf = append(buf, lowerhex[byte(r)&0xF]) 297 case !utf8.ValidRune(r): 298 r = 0xFFFD 299 fallthrough 300 case r < 0x10000: 301 buf = append(buf, `\u`...) 302 for s := 12; s >= 0; s -= 4 { 303 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 304 } 305 default: 306 buf = append(buf, `\U`...) 307 for s := 28; s >= 0; s -= 4 { 308 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 309 } 310 } 311 } 312 return buf 313 } 314 315 // This is only used for struct tags. Assume 316 func isPrint(r rune) bool { 317 if r <= 0xFF { 318 if 0x20 <= r && r <= 0x7E { 319 // All the ASCII is printable from space through DEL-1. 320 return true 321 } 322 if 0xA1 <= r && r <= 0xFF { 323 // Similarly for ¡ through ÿ... 324 return r != 0xAD // ...except for the bizarre soft hyphen. 325 } 326 return false 327 } 328 329 // TinyGo: Skip all other unicode processing 330 return false 331 } 332 333 // contains reports whether the string contains the byte c. 334 func contains(s string, c byte) bool { 335 return indexByteString(s, c) != -1 336 } 337 338 // Index finds the index of the first instance of the specified byte in the string. 339 // If the byte is not found, this returns -1. 340 func indexByteString(s string, c byte) int { 341 for i := 0; i < len(s); i++ { 342 if s[i] == c { 343 return i 344 } 345 } 346 return -1 347 }