github.com/s1s1ty/go@v0.0.0-20180207192209-104445e3140f/src/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run makeisprint.go -output isprint.go 6 7 package strconv 8 9 import "unicode/utf8" 10 11 const lowerhex = "0123456789abcdef" 12 13 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { 14 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly)) 15 } 16 17 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string { 18 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly)) 19 } 20 21 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte { 22 buf = append(buf, quote) 23 for width := 0; len(s) > 0; s = s[width:] { 24 r := rune(s[0]) 25 width = 1 26 if r >= utf8.RuneSelf { 27 r, width = utf8.DecodeRuneInString(s) 28 } 29 if width == 1 && r == utf8.RuneError { 30 buf = append(buf, `\x`...) 31 buf = append(buf, lowerhex[s[0]>>4]) 32 buf = append(buf, lowerhex[s[0]&0xF]) 33 continue 34 } 35 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 36 } 37 buf = append(buf, quote) 38 return buf 39 } 40 41 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 42 buf = append(buf, quote) 43 if !utf8.ValidRune(r) { 44 r = utf8.RuneError 45 } 46 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 47 buf = append(buf, quote) 48 return buf 49 } 50 51 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 52 var runeTmp [utf8.UTFMax]byte 53 if r == rune(quote) || r == '\\' { // always backslashed 54 buf = append(buf, '\\') 55 buf = append(buf, byte(r)) 56 return buf 57 } 58 if ASCIIonly { 59 if r < utf8.RuneSelf && IsPrint(r) { 60 buf = append(buf, byte(r)) 61 return buf 62 } 63 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { 64 n := utf8.EncodeRune(runeTmp[:], r) 65 buf = append(buf, runeTmp[:n]...) 66 return buf 67 } 68 switch r { 69 case '\a': 70 buf = append(buf, `\a`...) 71 case '\b': 72 buf = append(buf, `\b`...) 73 case '\f': 74 buf = append(buf, `\f`...) 75 case '\n': 76 buf = append(buf, `\n`...) 77 case '\r': 78 buf = append(buf, `\r`...) 79 case '\t': 80 buf = append(buf, `\t`...) 81 case '\v': 82 buf = append(buf, `\v`...) 83 default: 84 switch { 85 case r < ' ': 86 buf = append(buf, `\x`...) 87 buf = append(buf, lowerhex[byte(r)>>4]) 88 buf = append(buf, lowerhex[byte(r)&0xF]) 89 case r > utf8.MaxRune: 90 r = 0xFFFD 91 fallthrough 92 case r < 0x10000: 93 buf = append(buf, `\u`...) 94 for s := 12; s >= 0; s -= 4 { 95 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 96 } 97 default: 98 buf = append(buf, `\U`...) 99 for s := 28; s >= 0; s -= 4 { 100 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 101 } 102 } 103 } 104 return buf 105 } 106 107 // Quote returns a double-quoted Go string literal representing s. The 108 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 109 // control characters and non-printable characters as defined by 110 // IsPrint. 111 func Quote(s string) string { 112 return quoteWith(s, '"', false, false) 113 } 114 115 // AppendQuote appends a double-quoted Go string literal representing s, 116 // as generated by Quote, to dst and returns the extended buffer. 117 func AppendQuote(dst []byte, s string) []byte { 118 return appendQuotedWith(dst, s, '"', false, false) 119 } 120 121 // QuoteToASCII returns a double-quoted Go string literal representing s. 122 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 123 // non-ASCII characters and non-printable characters as defined by IsPrint. 124 func QuoteToASCII(s string) string { 125 return quoteWith(s, '"', true, false) 126 } 127 128 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 129 // as generated by QuoteToASCII, to dst and returns the extended buffer. 130 func AppendQuoteToASCII(dst []byte, s string) []byte { 131 return appendQuotedWith(dst, s, '"', true, false) 132 } 133 134 // QuoteToGraphic returns a double-quoted Go string literal representing s. 135 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 136 // non-ASCII characters and non-printable characters as defined by IsGraphic. 137 func QuoteToGraphic(s string) string { 138 return quoteWith(s, '"', false, true) 139 } 140 141 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s, 142 // as generated by QuoteToGraphic, to dst and returns the extended buffer. 143 func AppendQuoteToGraphic(dst []byte, s string) []byte { 144 return appendQuotedWith(dst, s, '"', false, true) 145 } 146 147 // QuoteRune returns a single-quoted Go character literal representing the 148 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 149 // for control characters and non-printable characters as defined by IsPrint. 150 func QuoteRune(r rune) string { 151 return quoteRuneWith(r, '\'', false, false) 152 } 153 154 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 155 // as generated by QuoteRune, to dst and returns the extended buffer. 156 func AppendQuoteRune(dst []byte, r rune) []byte { 157 return appendQuotedRuneWith(dst, r, '\'', false, false) 158 } 159 160 // QuoteRuneToASCII returns a single-quoted Go character literal representing 161 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 162 // \u0100) for non-ASCII characters and non-printable characters as defined 163 // by IsPrint. 164 func QuoteRuneToASCII(r rune) string { 165 return quoteRuneWith(r, '\'', true, false) 166 } 167 168 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 169 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 170 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 171 return appendQuotedRuneWith(dst, r, '\'', true, false) 172 } 173 174 // QuoteRuneToGraphic returns a single-quoted Go character literal representing 175 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 176 // \u0100) for non-ASCII characters and non-printable characters as defined 177 // by IsGraphic. 178 func QuoteRuneToGraphic(r rune) string { 179 return quoteRuneWith(r, '\'', false, true) 180 } 181 182 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, 183 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. 184 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { 185 return appendQuotedRuneWith(dst, r, '\'', false, true) 186 } 187 188 // CanBackquote reports whether the string s can be represented 189 // unchanged as a single-line backquoted string without control 190 // characters other than tab. 191 func CanBackquote(s string) bool { 192 for len(s) > 0 { 193 r, wid := utf8.DecodeRuneInString(s) 194 s = s[wid:] 195 if wid > 1 { 196 if r == '\ufeff' { 197 return false // BOMs are invisible and should not be quoted. 198 } 199 continue // All other multibyte runes are correctly encoded and assumed printable. 200 } 201 if r == utf8.RuneError { 202 return false 203 } 204 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { 205 return false 206 } 207 } 208 return true 209 } 210 211 func unhex(b byte) (v rune, ok bool) { 212 c := rune(b) 213 switch { 214 case '0' <= c && c <= '9': 215 return c - '0', true 216 case 'a' <= c && c <= 'f': 217 return c - 'a' + 10, true 218 case 'A' <= c && c <= 'F': 219 return c - 'A' + 10, true 220 } 221 return 222 } 223 224 // UnquoteChar decodes the first character or byte in the escaped string 225 // or character literal represented by the string s. 226 // It returns four values: 227 // 228 // 1) value, the decoded Unicode code point or byte value; 229 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 230 // 3) tail, the remainder of the string after the character; and 231 // 4) an error that will be nil if the character is syntactically valid. 232 // 233 // The second argument, quote, specifies the type of literal being parsed 234 // and therefore which escaped quote character is permitted. 235 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 236 // If set to a double quote, it permits \" and disallows unescaped ". 237 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 238 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 239 // easy cases 240 switch c := s[0]; { 241 case c == quote && (quote == '\'' || quote == '"'): 242 err = ErrSyntax 243 return 244 case c >= utf8.RuneSelf: 245 r, size := utf8.DecodeRuneInString(s) 246 return r, true, s[size:], nil 247 case c != '\\': 248 return rune(s[0]), false, s[1:], nil 249 } 250 251 // hard case: c is backslash 252 if len(s) <= 1 { 253 err = ErrSyntax 254 return 255 } 256 c := s[1] 257 s = s[2:] 258 259 switch c { 260 case 'a': 261 value = '\a' 262 case 'b': 263 value = '\b' 264 case 'f': 265 value = '\f' 266 case 'n': 267 value = '\n' 268 case 'r': 269 value = '\r' 270 case 't': 271 value = '\t' 272 case 'v': 273 value = '\v' 274 case 'x', 'u', 'U': 275 n := 0 276 switch c { 277 case 'x': 278 n = 2 279 case 'u': 280 n = 4 281 case 'U': 282 n = 8 283 } 284 var v rune 285 if len(s) < n { 286 err = ErrSyntax 287 return 288 } 289 for j := 0; j < n; j++ { 290 x, ok := unhex(s[j]) 291 if !ok { 292 err = ErrSyntax 293 return 294 } 295 v = v<<4 | x 296 } 297 s = s[n:] 298 if c == 'x' { 299 // single-byte string, possibly not UTF-8 300 value = v 301 break 302 } 303 if v > utf8.MaxRune { 304 err = ErrSyntax 305 return 306 } 307 value = v 308 multibyte = true 309 case '0', '1', '2', '3', '4', '5', '6', '7': 310 v := rune(c) - '0' 311 if len(s) < 2 { 312 err = ErrSyntax 313 return 314 } 315 for j := 0; j < 2; j++ { // one digit already; two more 316 x := rune(s[j]) - '0' 317 if x < 0 || x > 7 { 318 err = ErrSyntax 319 return 320 } 321 v = (v << 3) | x 322 } 323 s = s[2:] 324 if v > 255 { 325 err = ErrSyntax 326 return 327 } 328 value = v 329 case '\\': 330 value = '\\' 331 case '\'', '"': 332 if c != quote { 333 err = ErrSyntax 334 return 335 } 336 value = rune(c) 337 default: 338 err = ErrSyntax 339 return 340 } 341 tail = s 342 return 343 } 344 345 // Unquote interprets s as a single-quoted, double-quoted, 346 // or backquoted Go string literal, returning the string value 347 // that s quotes. (If s is single-quoted, it would be a Go 348 // character literal; Unquote returns the corresponding 349 // one-character string.) 350 func Unquote(s string) (string, error) { 351 n := len(s) 352 if n < 2 { 353 return "", ErrSyntax 354 } 355 quote := s[0] 356 if quote != s[n-1] { 357 return "", ErrSyntax 358 } 359 s = s[1 : n-1] 360 361 if quote == '`' { 362 if contains(s, '`') { 363 return "", ErrSyntax 364 } 365 if contains(s, '\r') { 366 // -1 because we know there is at least one \r to remove. 367 buf := make([]byte, 0, len(s)-1) 368 for i := 0; i < len(s); i++ { 369 if s[i] != '\r' { 370 buf = append(buf, s[i]) 371 } 372 } 373 return string(buf), nil 374 } 375 return s, nil 376 } 377 if quote != '"' && quote != '\'' { 378 return "", ErrSyntax 379 } 380 if contains(s, '\n') { 381 return "", ErrSyntax 382 } 383 384 // Is it trivial? Avoid allocation. 385 if !contains(s, '\\') && !contains(s, quote) { 386 switch quote { 387 case '"': 388 return s, nil 389 case '\'': 390 r, size := utf8.DecodeRuneInString(s) 391 if size == len(s) && (r != utf8.RuneError || size != 1) { 392 return s, nil 393 } 394 } 395 } 396 397 var runeTmp [utf8.UTFMax]byte 398 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 399 for len(s) > 0 { 400 c, multibyte, ss, err := UnquoteChar(s, quote) 401 if err != nil { 402 return "", err 403 } 404 s = ss 405 if c < utf8.RuneSelf || !multibyte { 406 buf = append(buf, byte(c)) 407 } else { 408 n := utf8.EncodeRune(runeTmp[:], c) 409 buf = append(buf, runeTmp[:n]...) 410 } 411 if quote == '\'' && len(s) != 0 { 412 // single-quoted must be single character 413 return "", ErrSyntax 414 } 415 } 416 return string(buf), nil 417 } 418 419 // contains reports whether the string contains the byte c. 420 func contains(s string, c byte) bool { 421 for i := 0; i < len(s); i++ { 422 if s[i] == c { 423 return true 424 } 425 } 426 return false 427 } 428 429 // bsearch16 returns the smallest i such that a[i] >= x. 430 // If there is no such i, bsearch16 returns len(a). 431 func bsearch16(a []uint16, x uint16) int { 432 i, j := 0, len(a) 433 for i < j { 434 h := i + (j-i)/2 435 if a[h] < x { 436 i = h + 1 437 } else { 438 j = h 439 } 440 } 441 return i 442 } 443 444 // bsearch32 returns the smallest i such that a[i] >= x. 445 // If there is no such i, bsearch32 returns len(a). 446 func bsearch32(a []uint32, x uint32) int { 447 i, j := 0, len(a) 448 for i < j { 449 h := i + (j-i)/2 450 if a[h] < x { 451 i = h + 1 452 } else { 453 j = h 454 } 455 } 456 return i 457 } 458 459 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 460 // to give the same answer. It allows this package not to depend on unicode, 461 // and therefore not pull in all the Unicode tables. If the linker were better 462 // at tossing unused tables, we could get rid of this implementation. 463 // That would be nice. 464 465 // IsPrint reports whether the rune is defined as printable by Go, with 466 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 467 // symbols and ASCII space. 468 func IsPrint(r rune) bool { 469 // Fast check for Latin-1 470 if r <= 0xFF { 471 if 0x20 <= r && r <= 0x7E { 472 // All the ASCII is printable from space through DEL-1. 473 return true 474 } 475 if 0xA1 <= r && r <= 0xFF { 476 // Similarly for ¡ through ÿ... 477 return r != 0xAD // ...except for the bizarre soft hyphen. 478 } 479 return false 480 } 481 482 // Same algorithm, either on uint16 or uint32 value. 483 // First, find first i such that isPrint[i] >= x. 484 // This is the index of either the start or end of a pair that might span x. 485 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 486 // If we find x in a range, make sure x is not in isNotPrint list. 487 488 if 0 <= r && r < 1<<16 { 489 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 490 i := bsearch16(isPrint, rr) 491 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 492 return false 493 } 494 j := bsearch16(isNotPrint, rr) 495 return j >= len(isNotPrint) || isNotPrint[j] != rr 496 } 497 498 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 499 i := bsearch32(isPrint, rr) 500 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 501 return false 502 } 503 if r >= 0x20000 { 504 return true 505 } 506 r -= 0x10000 507 j := bsearch16(isNotPrint, uint16(r)) 508 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 509 } 510 511 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such 512 // characters include letters, marks, numbers, punctuation, symbols, and 513 // spaces, from categories L, M, N, P, S, and Zs. 514 func IsGraphic(r rune) bool { 515 if IsPrint(r) { 516 return true 517 } 518 return isInGraphicList(r) 519 } 520 521 // isInGraphicList reports whether the rune is in the isGraphic list. This separation 522 // from IsGraphic allows quoteWith to avoid two calls to IsPrint. 523 // Should be called only if IsPrint fails. 524 func isInGraphicList(r rune) bool { 525 // We know r must fit in 16 bits - see makeisprint.go. 526 if r > 0xFFFF { 527 return false 528 } 529 rr := uint16(r) 530 i := bsearch16(isGraphic, rr) 531 return i < len(isGraphic) && rr == isGraphic[i] 532 }