github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run makeisprint.go -output isprint.go 6 7 package strconv 8 9 import ( 10 "unicode/utf8" 11 ) 12 13 const ( 14 lowerhex = "0123456789abcdef" 15 upperhex = "0123456789ABCDEF" 16 ) 17 18 // contains reports whether the string contains the byte c. 19 func contains(s string, c byte) bool { 20 return index(s, c) != -1 21 } 22 23 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { 24 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly)) 25 } 26 27 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string { 28 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly)) 29 } 30 31 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte { 32 // Often called with big strings, so preallocate. If there's quoting, 33 // this is conservative but still helps a lot. 34 if cap(buf)-len(buf) < len(s) { 35 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 36 copy(nBuf, buf) 37 buf = nBuf 38 } 39 buf = append(buf, quote) 40 for width := 0; len(s) > 0; s = s[width:] { 41 r := rune(s[0]) 42 width = 1 43 if r >= utf8.RuneSelf { 44 r, width = utf8.DecodeRuneInString(s) 45 } 46 if width == 1 && r == utf8.RuneError { 47 buf = append(buf, `\x`...) 48 buf = append(buf, lowerhex[s[0]>>4]) 49 buf = append(buf, lowerhex[s[0]&0xF]) 50 continue 51 } 52 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 53 } 54 buf = append(buf, quote) 55 return buf 56 } 57 58 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 59 buf = append(buf, quote) 60 if !utf8.ValidRune(r) { 61 r = utf8.RuneError 62 } 63 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 64 buf = append(buf, quote) 65 return buf 66 } 67 68 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 69 if r == rune(quote) || r == '\\' { // always backslashed 70 buf = append(buf, '\\') 71 buf = append(buf, byte(r)) 72 return buf 73 } 74 if ASCIIonly { 75 if r < utf8.RuneSelf && IsPrint(r) { 76 buf = append(buf, byte(r)) 77 return buf 78 } 79 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { 80 return utf8.AppendRune(buf, r) 81 } 82 switch r { 83 case '\a': 84 buf = append(buf, `\a`...) 85 case '\b': 86 buf = append(buf, `\b`...) 87 case '\f': 88 buf = append(buf, `\f`...) 89 case '\n': 90 buf = append(buf, `\n`...) 91 case '\r': 92 buf = append(buf, `\r`...) 93 case '\t': 94 buf = append(buf, `\t`...) 95 case '\v': 96 buf = append(buf, `\v`...) 97 default: 98 switch { 99 case r < ' ' || r == 0x7f: 100 buf = append(buf, `\x`...) 101 buf = append(buf, lowerhex[byte(r)>>4]) 102 buf = append(buf, lowerhex[byte(r)&0xF]) 103 case !utf8.ValidRune(r): 104 r = 0xFFFD 105 fallthrough 106 case r < 0x10000: 107 buf = append(buf, `\u`...) 108 for s := 12; s >= 0; s -= 4 { 109 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 110 } 111 default: 112 buf = append(buf, `\U`...) 113 for s := 28; s >= 0; s -= 4 { 114 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 115 } 116 } 117 } 118 return buf 119 } 120 121 // Quote returns a double-quoted Go string literal representing s. The 122 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 123 // control characters and non-printable characters as defined by 124 // IsPrint. 125 func Quote(s string) string { 126 return quoteWith(s, '"', false, false) 127 } 128 129 // AppendQuote appends a double-quoted Go string literal representing s, 130 // as generated by Quote, to dst and returns the extended buffer. 131 func AppendQuote(dst []byte, s string) []byte { 132 return appendQuotedWith(dst, s, '"', false, false) 133 } 134 135 // QuoteToASCII returns a double-quoted Go string literal representing s. 136 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 137 // non-ASCII characters and non-printable characters as defined by IsPrint. 138 func QuoteToASCII(s string) string { 139 return quoteWith(s, '"', true, false) 140 } 141 142 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 143 // as generated by QuoteToASCII, to dst and returns the extended buffer. 144 func AppendQuoteToASCII(dst []byte, s string) []byte { 145 return appendQuotedWith(dst, s, '"', true, false) 146 } 147 148 // QuoteToGraphic returns a double-quoted Go string literal representing s. 149 // The returned string leaves Unicode graphic characters, as defined by 150 // IsGraphic, unchanged and uses Go escape sequences (\t, \n, \xFF, \u0100) 151 // for non-graphic characters. 152 func QuoteToGraphic(s string) string { 153 return quoteWith(s, '"', false, true) 154 } 155 156 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s, 157 // as generated by QuoteToGraphic, to dst and returns the extended buffer. 158 func AppendQuoteToGraphic(dst []byte, s string) []byte { 159 return appendQuotedWith(dst, s, '"', false, true) 160 } 161 162 // QuoteRune returns a single-quoted Go character literal representing the 163 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 164 // for control characters and non-printable characters as defined by IsPrint. 165 // If r is not a valid Unicode code point, it is interpreted as the Unicode 166 // replacement character U+FFFD. 167 func QuoteRune(r rune) string { 168 return quoteRuneWith(r, '\'', false, false) 169 } 170 171 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 172 // as generated by QuoteRune, to dst and returns the extended buffer. 173 func AppendQuoteRune(dst []byte, r rune) []byte { 174 return appendQuotedRuneWith(dst, r, '\'', false, false) 175 } 176 177 // QuoteRuneToASCII returns a single-quoted Go character literal representing 178 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 179 // \u0100) for non-ASCII characters and non-printable characters as defined 180 // by IsPrint. 181 // If r is not a valid Unicode code point, it is interpreted as the Unicode 182 // replacement character U+FFFD. 183 func QuoteRuneToASCII(r rune) string { 184 return quoteRuneWith(r, '\'', true, false) 185 } 186 187 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 188 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 189 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 190 return appendQuotedRuneWith(dst, r, '\'', true, false) 191 } 192 193 // QuoteRuneToGraphic returns a single-quoted Go character literal representing 194 // the rune. If the rune is not a Unicode graphic character, 195 // as defined by IsGraphic, the returned string will use a Go escape sequence 196 // (\t, \n, \xFF, \u0100). 197 // If r is not a valid Unicode code point, it is interpreted as the Unicode 198 // replacement character U+FFFD. 199 func QuoteRuneToGraphic(r rune) string { 200 return quoteRuneWith(r, '\'', false, true) 201 } 202 203 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, 204 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. 205 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { 206 return appendQuotedRuneWith(dst, r, '\'', false, true) 207 } 208 209 // CanBackquote reports whether the string s can be represented 210 // unchanged as a single-line backquoted string without control 211 // characters other than tab. 212 func CanBackquote(s string) bool { 213 for len(s) > 0 { 214 r, wid := utf8.DecodeRuneInString(s) 215 s = s[wid:] 216 if wid > 1 { 217 if r == '\ufeff' { 218 return false // BOMs are invisible and should not be quoted. 219 } 220 continue // All other multibyte runes are correctly encoded and assumed printable. 221 } 222 if r == utf8.RuneError { 223 return false 224 } 225 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { 226 return false 227 } 228 } 229 return true 230 } 231 232 func unhex(b byte) (v rune, ok bool) { 233 c := rune(b) 234 switch { 235 case '0' <= c && c <= '9': 236 return c - '0', true 237 case 'a' <= c && c <= 'f': 238 return c - 'a' + 10, true 239 case 'A' <= c && c <= 'F': 240 return c - 'A' + 10, true 241 } 242 return 243 } 244 245 // UnquoteChar decodes the first character or byte in the escaped string 246 // or character literal represented by the string s. 247 // It returns four values: 248 // 249 // 1. value, the decoded Unicode code point or byte value; 250 // 2. multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 251 // 3. tail, the remainder of the string after the character; and 252 // 4. an error that will be nil if the character is syntactically valid. 253 // 254 // The second argument, quote, specifies the type of literal being parsed 255 // and therefore which escaped quote character is permitted. 256 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 257 // If set to a double quote, it permits \" and disallows unescaped ". 258 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 259 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 260 // easy cases 261 if len(s) == 0 { 262 err = ErrSyntax 263 return 264 } 265 switch c := s[0]; { 266 case c == quote && (quote == '\'' || quote == '"'): 267 err = ErrSyntax 268 return 269 case c >= utf8.RuneSelf: 270 r, size := utf8.DecodeRuneInString(s) 271 return r, true, s[size:], nil 272 case c != '\\': 273 return rune(s[0]), false, s[1:], nil 274 } 275 276 // hard case: c is backslash 277 if len(s) <= 1 { 278 err = ErrSyntax 279 return 280 } 281 c := s[1] 282 s = s[2:] 283 284 switch c { 285 case 'a': 286 value = '\a' 287 case 'b': 288 value = '\b' 289 case 'f': 290 value = '\f' 291 case 'n': 292 value = '\n' 293 case 'r': 294 value = '\r' 295 case 't': 296 value = '\t' 297 case 'v': 298 value = '\v' 299 case 'x', 'u', 'U': 300 n := 0 301 switch c { 302 case 'x': 303 n = 2 304 case 'u': 305 n = 4 306 case 'U': 307 n = 8 308 } 309 var v rune 310 if len(s) < n { 311 err = ErrSyntax 312 return 313 } 314 for j := 0; j < n; j++ { 315 x, ok := unhex(s[j]) 316 if !ok { 317 err = ErrSyntax 318 return 319 } 320 v = v<<4 | x 321 } 322 s = s[n:] 323 if c == 'x' { 324 // single-byte string, possibly not UTF-8 325 value = v 326 break 327 } 328 if !utf8.ValidRune(v) { 329 err = ErrSyntax 330 return 331 } 332 value = v 333 multibyte = true 334 case '0', '1', '2', '3', '4', '5', '6', '7': 335 v := rune(c) - '0' 336 if len(s) < 2 { 337 err = ErrSyntax 338 return 339 } 340 for j := 0; j < 2; j++ { // one digit already; two more 341 x := rune(s[j]) - '0' 342 if x < 0 || x > 7 { 343 err = ErrSyntax 344 return 345 } 346 v = (v << 3) | x 347 } 348 s = s[2:] 349 if v > 255 { 350 err = ErrSyntax 351 return 352 } 353 value = v 354 case '\\': 355 value = '\\' 356 case '\'', '"': 357 if c != quote { 358 err = ErrSyntax 359 return 360 } 361 value = rune(c) 362 default: 363 err = ErrSyntax 364 return 365 } 366 tail = s 367 return 368 } 369 370 // QuotedPrefix returns the quoted string (as understood by Unquote) at the prefix of s. 371 // If s does not start with a valid quoted string, QuotedPrefix returns an error. 372 func QuotedPrefix(s string) (string, error) { 373 out, _, err := unquote(s, false) 374 return out, err 375 } 376 377 // Unquote interprets s as a single-quoted, double-quoted, 378 // or backquoted Go string literal, returning the string value 379 // that s quotes. (If s is single-quoted, it would be a Go 380 // character literal; Unquote returns the corresponding 381 // one-character string.) 382 func Unquote(s string) (string, error) { 383 out, rem, err := unquote(s, true) 384 if len(rem) > 0 { 385 return "", ErrSyntax 386 } 387 return out, err 388 } 389 390 // unquote parses a quoted string at the start of the input, 391 // returning the parsed prefix, the remaining suffix, and any parse errors. 392 // If unescape is true, the parsed prefix is unescaped, 393 // otherwise the input prefix is provided verbatim. 394 func unquote(in string, unescape bool) (out, rem string, err error) { 395 // Determine the quote form and optimistically find the terminating quote. 396 if len(in) < 2 { 397 return "", in, ErrSyntax 398 } 399 quote := in[0] 400 end := index(in[1:], quote) 401 if end < 0 { 402 return "", in, ErrSyntax 403 } 404 end += 2 // position after terminating quote; may be wrong if escape sequences are present 405 406 switch quote { 407 case '`': 408 switch { 409 case !unescape: 410 out = in[:end] // include quotes 411 case !contains(in[:end], '\r'): 412 out = in[len("`") : end-len("`")] // exclude quotes 413 default: 414 // Carriage return characters ('\r') inside raw string literals 415 // are discarded from the raw string value. 416 buf := make([]byte, 0, end-len("`")-len("\r")-len("`")) 417 for i := len("`"); i < end-len("`"); i++ { 418 if in[i] != '\r' { 419 buf = append(buf, in[i]) 420 } 421 } 422 out = string(buf) 423 } 424 // NOTE: Prior implementations did not verify that raw strings consist 425 // of valid UTF-8 characters and we continue to not verify it as such. 426 // The Go specification does not explicitly require valid UTF-8, 427 // but only mention that it is implicitly valid for Go source code 428 // (which must be valid UTF-8). 429 return out, in[end:], nil 430 case '"', '\'': 431 // Handle quoted strings without any escape sequences. 432 if !contains(in[:end], '\\') && !contains(in[:end], '\n') { 433 var valid bool 434 switch quote { 435 case '"': 436 valid = utf8.ValidString(in[len(`"`) : end-len(`"`)]) 437 case '\'': 438 r, n := utf8.DecodeRuneInString(in[len("'") : end-len("'")]) 439 valid = len("'")+n+len("'") == end && (r != utf8.RuneError || n != 1) 440 } 441 if valid { 442 out = in[:end] 443 if unescape { 444 out = out[1 : end-1] // exclude quotes 445 } 446 return out, in[end:], nil 447 } 448 } 449 450 // Handle quoted strings with escape sequences. 451 var buf []byte 452 in0 := in 453 in = in[1:] // skip starting quote 454 if unescape { 455 buf = make([]byte, 0, 3*end/2) // try to avoid more allocations 456 } 457 for len(in) > 0 && in[0] != quote { 458 // Process the next character, 459 // rejecting any unescaped newline characters which are invalid. 460 r, multibyte, rem, err := UnquoteChar(in, quote) 461 if in[0] == '\n' || err != nil { 462 return "", in0, ErrSyntax 463 } 464 in = rem 465 466 // Append the character if unescaping the input. 467 if unescape { 468 if r < utf8.RuneSelf || !multibyte { 469 buf = append(buf, byte(r)) 470 } else { 471 buf = utf8.AppendRune(buf, r) 472 } 473 } 474 475 // Single quoted strings must be a single character. 476 if quote == '\'' { 477 break 478 } 479 } 480 481 // Verify that the string ends with a terminating quote. 482 if !(len(in) > 0 && in[0] == quote) { 483 return "", in0, ErrSyntax 484 } 485 in = in[1:] // skip terminating quote 486 487 if unescape { 488 return string(buf), in, nil 489 } 490 return in0[:len(in0)-len(in)], in, nil 491 default: 492 return "", in, ErrSyntax 493 } 494 } 495 496 // bsearch is semantically the same as [slices.BinarySearch] (without NaN checks) 497 // We copied this function because we can not import "slices" here. 498 func bsearch[S ~[]E, E ~uint16 | ~uint32](s S, v E) (int, bool) { 499 n := len(s) 500 i, j := 0, n 501 for i < j { 502 h := i + (j-i)>>1 503 if s[h] < v { 504 i = h + 1 505 } else { 506 j = h 507 } 508 } 509 return i, i < n && s[i] == v 510 } 511 512 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 513 // to give the same answer. It allows this package not to depend on unicode, 514 // and therefore not pull in all the Unicode tables. If the linker were better 515 // at tossing unused tables, we could get rid of this implementation. 516 // That would be nice. 517 518 // IsPrint reports whether the rune is defined as printable by Go, with 519 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 520 // symbols and ASCII space. 521 func IsPrint(r rune) bool { 522 // Fast check for Latin-1 523 if r <= 0xFF { 524 if 0x20 <= r && r <= 0x7E { 525 // All the ASCII is printable from space through DEL-1. 526 return true 527 } 528 if 0xA1 <= r && r <= 0xFF { 529 // Similarly for ¡ through ÿ... 530 return r != 0xAD // ...except for the bizarre soft hyphen. 531 } 532 return false 533 } 534 535 // Same algorithm, either on uint16 or uint32 value. 536 // First, find first i such that isPrint[i] >= x. 537 // This is the index of either the start or end of a pair that might span x. 538 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 539 // If we find x in a range, make sure x is not in isNotPrint list. 540 541 if 0 <= r && r < 1<<16 { 542 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 543 i, _ := bsearch(isPrint, rr) 544 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 545 return false 546 } 547 _, found := bsearch(isNotPrint, rr) 548 return !found 549 } 550 551 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 552 i, _ := bsearch(isPrint, rr) 553 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 554 return false 555 } 556 if r >= 0x20000 { 557 return true 558 } 559 r -= 0x10000 560 _, found := bsearch(isNotPrint, uint16(r)) 561 return !found 562 } 563 564 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such 565 // characters include letters, marks, numbers, punctuation, symbols, and 566 // spaces, from categories L, M, N, P, S, and Zs. 567 func IsGraphic(r rune) bool { 568 if IsPrint(r) { 569 return true 570 } 571 return isInGraphicList(r) 572 } 573 574 // isInGraphicList reports whether the rune is in the isGraphic list. This separation 575 // from IsGraphic allows quoteWith to avoid two calls to IsPrint. 576 // Should be called only if IsPrint fails. 577 func isInGraphicList(r rune) bool { 578 // We know r must fit in 16 bits - see makeisprint.go. 579 if r > 0xFFFF { 580 return false 581 } 582 _, found := bsearch(isGraphic, uint16(r)) 583 return found 584 }