github.com/gidoBOSSftw5731/go/src@v0.0.0-20210226122457-d24b0edbf019/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run makeisprint.go -output isprint.go 6 7 package strconv 8 9 import ( 10 "unicode/utf8" 11 ) 12 13 const ( 14 lowerhex = "0123456789abcdef" 15 upperhex = "0123456789ABCDEF" 16 ) 17 18 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { 19 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly)) 20 } 21 22 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string { 23 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly)) 24 } 25 26 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte { 27 // Often called with big strings, so preallocate. If there's quoting, 28 // this is conservative but still helps a lot. 29 if cap(buf)-len(buf) < len(s) { 30 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 31 copy(nBuf, buf) 32 buf = nBuf 33 } 34 buf = append(buf, quote) 35 for width := 0; len(s) > 0; s = s[width:] { 36 r := rune(s[0]) 37 width = 1 38 if r >= utf8.RuneSelf { 39 r, width = utf8.DecodeRuneInString(s) 40 } 41 if width == 1 && r == utf8.RuneError { 42 buf = append(buf, `\x`...) 43 buf = append(buf, lowerhex[s[0]>>4]) 44 buf = append(buf, lowerhex[s[0]&0xF]) 45 continue 46 } 47 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 48 } 49 buf = append(buf, quote) 50 return buf 51 } 52 53 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 54 buf = append(buf, quote) 55 if !utf8.ValidRune(r) { 56 r = utf8.RuneError 57 } 58 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 59 buf = append(buf, quote) 60 return buf 61 } 62 63 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 64 var runeTmp [utf8.UTFMax]byte 65 if r == rune(quote) || r == '\\' { // always backslashed 66 buf = append(buf, '\\') 67 buf = append(buf, byte(r)) 68 return buf 69 } 70 if ASCIIonly { 71 if r < utf8.RuneSelf && IsPrint(r) { 72 buf = append(buf, byte(r)) 73 return buf 74 } 75 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { 76 n := utf8.EncodeRune(runeTmp[:], r) 77 buf = append(buf, runeTmp[:n]...) 78 return buf 79 } 80 switch r { 81 case '\a': 82 buf = append(buf, `\a`...) 83 case '\b': 84 buf = append(buf, `\b`...) 85 case '\f': 86 buf = append(buf, `\f`...) 87 case '\n': 88 buf = append(buf, `\n`...) 89 case '\r': 90 buf = append(buf, `\r`...) 91 case '\t': 92 buf = append(buf, `\t`...) 93 case '\v': 94 buf = append(buf, `\v`...) 95 default: 96 switch { 97 case r < ' ': 98 buf = append(buf, `\x`...) 99 buf = append(buf, lowerhex[byte(r)>>4]) 100 buf = append(buf, lowerhex[byte(r)&0xF]) 101 case r > utf8.MaxRune: 102 r = 0xFFFD 103 fallthrough 104 case r < 0x10000: 105 buf = append(buf, `\u`...) 106 for s := 12; s >= 0; s -= 4 { 107 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 108 } 109 default: 110 buf = append(buf, `\U`...) 111 for s := 28; s >= 0; s -= 4 { 112 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 113 } 114 } 115 } 116 return buf 117 } 118 119 // Quote returns a double-quoted Go string literal representing s. The 120 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 121 // control characters and non-printable characters as defined by 122 // IsPrint. 123 func Quote(s string) string { 124 return quoteWith(s, '"', false, false) 125 } 126 127 // AppendQuote appends a double-quoted Go string literal representing s, 128 // as generated by Quote, to dst and returns the extended buffer. 129 func AppendQuote(dst []byte, s string) []byte { 130 return appendQuotedWith(dst, s, '"', false, false) 131 } 132 133 // QuoteToASCII returns a double-quoted Go string literal representing s. 134 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 135 // non-ASCII characters and non-printable characters as defined by IsPrint. 136 func QuoteToASCII(s string) string { 137 return quoteWith(s, '"', true, false) 138 } 139 140 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 141 // as generated by QuoteToASCII, to dst and returns the extended buffer. 142 func AppendQuoteToASCII(dst []byte, s string) []byte { 143 return appendQuotedWith(dst, s, '"', true, false) 144 } 145 146 // QuoteToGraphic returns a double-quoted Go string literal representing s. 147 // The returned string leaves Unicode graphic characters, as defined by 148 // IsGraphic, unchanged and uses Go escape sequences (\t, \n, \xFF, \u0100) 149 // for non-graphic characters. 150 func QuoteToGraphic(s string) string { 151 return quoteWith(s, '"', false, true) 152 } 153 154 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s, 155 // as generated by QuoteToGraphic, to dst and returns the extended buffer. 156 func AppendQuoteToGraphic(dst []byte, s string) []byte { 157 return appendQuotedWith(dst, s, '"', false, true) 158 } 159 160 // QuoteRune returns a single-quoted Go character literal representing the 161 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 162 // for control characters and non-printable characters as defined by IsPrint. 163 func QuoteRune(r rune) string { 164 return quoteRuneWith(r, '\'', false, false) 165 } 166 167 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 168 // as generated by QuoteRune, to dst and returns the extended buffer. 169 func AppendQuoteRune(dst []byte, r rune) []byte { 170 return appendQuotedRuneWith(dst, r, '\'', false, false) 171 } 172 173 // QuoteRuneToASCII returns a single-quoted Go character literal representing 174 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 175 // \u0100) for non-ASCII characters and non-printable characters as defined 176 // by IsPrint. 177 func QuoteRuneToASCII(r rune) string { 178 return quoteRuneWith(r, '\'', true, false) 179 } 180 181 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 182 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 183 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 184 return appendQuotedRuneWith(dst, r, '\'', true, false) 185 } 186 187 // QuoteRuneToGraphic returns a single-quoted Go character literal representing 188 // the rune. If the rune is not a Unicode graphic character, 189 // as defined by IsGraphic, the returned string will use a Go escape sequence 190 // (\t, \n, \xFF, \u0100). 191 func QuoteRuneToGraphic(r rune) string { 192 return quoteRuneWith(r, '\'', false, true) 193 } 194 195 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, 196 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. 197 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { 198 return appendQuotedRuneWith(dst, r, '\'', false, true) 199 } 200 201 // CanBackquote reports whether the string s can be represented 202 // unchanged as a single-line backquoted string without control 203 // characters other than tab. 204 func CanBackquote(s string) bool { 205 for len(s) > 0 { 206 r, wid := utf8.DecodeRuneInString(s) 207 s = s[wid:] 208 if wid > 1 { 209 if r == '\ufeff' { 210 return false // BOMs are invisible and should not be quoted. 211 } 212 continue // All other multibyte runes are correctly encoded and assumed printable. 213 } 214 if r == utf8.RuneError { 215 return false 216 } 217 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { 218 return false 219 } 220 } 221 return true 222 } 223 224 func unhex(b byte) (v rune, ok bool) { 225 c := rune(b) 226 switch { 227 case '0' <= c && c <= '9': 228 return c - '0', true 229 case 'a' <= c && c <= 'f': 230 return c - 'a' + 10, true 231 case 'A' <= c && c <= 'F': 232 return c - 'A' + 10, true 233 } 234 return 235 } 236 237 // UnquoteChar decodes the first character or byte in the escaped string 238 // or character literal represented by the string s. 239 // It returns four values: 240 // 241 // 1) value, the decoded Unicode code point or byte value; 242 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 243 // 3) tail, the remainder of the string after the character; and 244 // 4) an error that will be nil if the character is syntactically valid. 245 // 246 // The second argument, quote, specifies the type of literal being parsed 247 // and therefore which escaped quote character is permitted. 248 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 249 // If set to a double quote, it permits \" and disallows unescaped ". 250 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 251 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 252 // easy cases 253 if len(s) == 0 { 254 err = ErrSyntax 255 return 256 } 257 switch c := s[0]; { 258 case c == quote && (quote == '\'' || quote == '"'): 259 err = ErrSyntax 260 return 261 case c >= utf8.RuneSelf: 262 r, size := utf8.DecodeRuneInString(s) 263 return r, true, s[size:], nil 264 case c != '\\': 265 return rune(s[0]), false, s[1:], nil 266 } 267 268 // hard case: c is backslash 269 if len(s) <= 1 { 270 err = ErrSyntax 271 return 272 } 273 c := s[1] 274 s = s[2:] 275 276 switch c { 277 case 'a': 278 value = '\a' 279 case 'b': 280 value = '\b' 281 case 'f': 282 value = '\f' 283 case 'n': 284 value = '\n' 285 case 'r': 286 value = '\r' 287 case 't': 288 value = '\t' 289 case 'v': 290 value = '\v' 291 case 'x', 'u', 'U': 292 n := 0 293 switch c { 294 case 'x': 295 n = 2 296 case 'u': 297 n = 4 298 case 'U': 299 n = 8 300 } 301 var v rune 302 if len(s) < n { 303 err = ErrSyntax 304 return 305 } 306 for j := 0; j < n; j++ { 307 x, ok := unhex(s[j]) 308 if !ok { 309 err = ErrSyntax 310 return 311 } 312 v = v<<4 | x 313 } 314 s = s[n:] 315 if c == 'x' { 316 // single-byte string, possibly not UTF-8 317 value = v 318 break 319 } 320 if v > utf8.MaxRune { 321 err = ErrSyntax 322 return 323 } 324 value = v 325 multibyte = true 326 case '0', '1', '2', '3', '4', '5', '6', '7': 327 v := rune(c) - '0' 328 if len(s) < 2 { 329 err = ErrSyntax 330 return 331 } 332 for j := 0; j < 2; j++ { // one digit already; two more 333 x := rune(s[j]) - '0' 334 if x < 0 || x > 7 { 335 err = ErrSyntax 336 return 337 } 338 v = (v << 3) | x 339 } 340 s = s[2:] 341 if v > 255 { 342 err = ErrSyntax 343 return 344 } 345 value = v 346 case '\\': 347 value = '\\' 348 case '\'', '"': 349 if c != quote { 350 err = ErrSyntax 351 return 352 } 353 value = rune(c) 354 default: 355 err = ErrSyntax 356 return 357 } 358 tail = s 359 return 360 } 361 362 // Unquote interprets s as a single-quoted, double-quoted, 363 // or backquoted Go string literal, returning the string value 364 // that s quotes. (If s is single-quoted, it would be a Go 365 // character literal; Unquote returns the corresponding 366 // one-character string.) 367 func Unquote(s string) (string, error) { 368 n := len(s) 369 if n < 2 { 370 return "", ErrSyntax 371 } 372 quote := s[0] 373 if quote != s[n-1] { 374 return "", ErrSyntax 375 } 376 s = s[1 : n-1] 377 378 if quote == '`' { 379 if contains(s, '`') { 380 return "", ErrSyntax 381 } 382 if contains(s, '\r') { 383 // -1 because we know there is at least one \r to remove. 384 buf := make([]byte, 0, len(s)-1) 385 for i := 0; i < len(s); i++ { 386 if s[i] != '\r' { 387 buf = append(buf, s[i]) 388 } 389 } 390 return string(buf), nil 391 } 392 return s, nil 393 } 394 if quote != '"' && quote != '\'' { 395 return "", ErrSyntax 396 } 397 if contains(s, '\n') { 398 return "", ErrSyntax 399 } 400 401 // Is it trivial? Avoid allocation. 402 if !contains(s, '\\') && !contains(s, quote) { 403 switch quote { 404 case '"': 405 if utf8.ValidString(s) { 406 return s, nil 407 } 408 case '\'': 409 r, size := utf8.DecodeRuneInString(s) 410 if size == len(s) && (r != utf8.RuneError || size != 1) { 411 return s, nil 412 } 413 } 414 } 415 416 var runeTmp [utf8.UTFMax]byte 417 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 418 for len(s) > 0 { 419 c, multibyte, ss, err := UnquoteChar(s, quote) 420 if err != nil { 421 return "", err 422 } 423 s = ss 424 if c < utf8.RuneSelf || !multibyte { 425 buf = append(buf, byte(c)) 426 } else { 427 n := utf8.EncodeRune(runeTmp[:], c) 428 buf = append(buf, runeTmp[:n]...) 429 } 430 if quote == '\'' && len(s) != 0 { 431 // single-quoted must be single character 432 return "", ErrSyntax 433 } 434 } 435 return string(buf), nil 436 } 437 438 // bsearch16 returns the smallest i such that a[i] >= x. 439 // If there is no such i, bsearch16 returns len(a). 440 func bsearch16(a []uint16, x uint16) int { 441 i, j := 0, len(a) 442 for i < j { 443 h := i + (j-i)>>1 444 if a[h] < x { 445 i = h + 1 446 } else { 447 j = h 448 } 449 } 450 return i 451 } 452 453 // bsearch32 returns the smallest i such that a[i] >= x. 454 // If there is no such i, bsearch32 returns len(a). 455 func bsearch32(a []uint32, x uint32) int { 456 i, j := 0, len(a) 457 for i < j { 458 h := i + (j-i)>>1 459 if a[h] < x { 460 i = h + 1 461 } else { 462 j = h 463 } 464 } 465 return i 466 } 467 468 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 469 // to give the same answer. It allows this package not to depend on unicode, 470 // and therefore not pull in all the Unicode tables. If the linker were better 471 // at tossing unused tables, we could get rid of this implementation. 472 // That would be nice. 473 474 // IsPrint reports whether the rune is defined as printable by Go, with 475 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 476 // symbols and ASCII space. 477 func IsPrint(r rune) bool { 478 // Fast check for Latin-1 479 if r <= 0xFF { 480 if 0x20 <= r && r <= 0x7E { 481 // All the ASCII is printable from space through DEL-1. 482 return true 483 } 484 if 0xA1 <= r && r <= 0xFF { 485 // Similarly for ¡ through ÿ... 486 return r != 0xAD // ...except for the bizarre soft hyphen. 487 } 488 return false 489 } 490 491 // Same algorithm, either on uint16 or uint32 value. 492 // First, find first i such that isPrint[i] >= x. 493 // This is the index of either the start or end of a pair that might span x. 494 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 495 // If we find x in a range, make sure x is not in isNotPrint list. 496 497 if 0 <= r && r < 1<<16 { 498 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 499 i := bsearch16(isPrint, rr) 500 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 501 return false 502 } 503 j := bsearch16(isNotPrint, rr) 504 return j >= len(isNotPrint) || isNotPrint[j] != rr 505 } 506 507 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 508 i := bsearch32(isPrint, rr) 509 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 510 return false 511 } 512 if r >= 0x20000 { 513 return true 514 } 515 r -= 0x10000 516 j := bsearch16(isNotPrint, uint16(r)) 517 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 518 } 519 520 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such 521 // characters include letters, marks, numbers, punctuation, symbols, and 522 // spaces, from categories L, M, N, P, S, and Zs. 523 func IsGraphic(r rune) bool { 524 if IsPrint(r) { 525 return true 526 } 527 return isInGraphicList(r) 528 } 529 530 // isInGraphicList reports whether the rune is in the isGraphic list. This separation 531 // from IsGraphic allows quoteWith to avoid two calls to IsPrint. 532 // Should be called only if IsPrint fails. 533 func isInGraphicList(r rune) bool { 534 // We know r must fit in 16 bits - see makeisprint.go. 535 if r > 0xFFFF { 536 return false 537 } 538 rr := uint16(r) 539 i := bsearch16(isGraphic, rr) 540 return i < len(isGraphic) && rr == isGraphic[i] 541 }