github.com/mh-cbon/go@v0.0.0-20160603070303-9e112a3fe4c0/src/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run makeisprint.go -output isprint.go 6 7 package strconv 8 9 import "unicode/utf8" 10 11 const lowerhex = "0123456789abcdef" 12 13 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { 14 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly)) 15 } 16 17 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string { 18 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly)) 19 } 20 21 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte { 22 buf = append(buf, quote) 23 for width := 0; len(s) > 0; s = s[width:] { 24 r := rune(s[0]) 25 width = 1 26 if r >= utf8.RuneSelf { 27 r, width = utf8.DecodeRuneInString(s) 28 } 29 if width == 1 && r == utf8.RuneError { 30 buf = append(buf, `\x`...) 31 buf = append(buf, lowerhex[s[0]>>4]) 32 buf = append(buf, lowerhex[s[0]&0xF]) 33 continue 34 } 35 buf = appendEscapedRune(buf, r, width, quote, ASCIIonly, graphicOnly) 36 } 37 buf = append(buf, quote) 38 return buf 39 } 40 41 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 42 buf = append(buf, quote) 43 if !utf8.ValidRune(r) { 44 r = utf8.RuneError 45 } 46 buf = appendEscapedRune(buf, r, utf8.RuneLen(r), quote, ASCIIonly, graphicOnly) 47 buf = append(buf, quote) 48 return buf 49 } 50 51 func appendEscapedRune(buf []byte, r rune, width int, quote byte, ASCIIonly, graphicOnly bool) []byte { 52 var runeTmp [utf8.UTFMax]byte 53 if r == rune(quote) || r == '\\' { // always backslashed 54 buf = append(buf, '\\') 55 buf = append(buf, byte(r)) 56 return buf 57 } 58 if ASCIIonly { 59 if r < utf8.RuneSelf && IsPrint(r) { 60 buf = append(buf, byte(r)) 61 return buf 62 } 63 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { 64 n := utf8.EncodeRune(runeTmp[:], r) 65 buf = append(buf, runeTmp[:n]...) 66 return buf 67 } 68 switch r { 69 case '\a': 70 buf = append(buf, `\a`...) 71 case '\b': 72 buf = append(buf, `\b`...) 73 case '\f': 74 buf = append(buf, `\f`...) 75 case '\n': 76 buf = append(buf, `\n`...) 77 case '\r': 78 buf = append(buf, `\r`...) 79 case '\t': 80 buf = append(buf, `\t`...) 81 case '\v': 82 buf = append(buf, `\v`...) 83 default: 84 switch { 85 case r < ' ': 86 buf = append(buf, `\x`...) 87 buf = append(buf, lowerhex[byte(r)>>4]) 88 buf = append(buf, lowerhex[byte(r)&0xF]) 89 case r > utf8.MaxRune: 90 r = 0xFFFD 91 fallthrough 92 case r < 0x10000: 93 buf = append(buf, `\u`...) 94 for s := 12; s >= 0; s -= 4 { 95 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 96 } 97 default: 98 buf = append(buf, `\U`...) 99 for s := 28; s >= 0; s -= 4 { 100 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 101 } 102 } 103 } 104 return buf 105 } 106 107 // Quote returns a double-quoted Go string literal representing s. The 108 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 109 // control characters and non-printable characters as defined by 110 // IsPrint. 111 func Quote(s string) string { 112 return quoteWith(s, '"', false, false) 113 } 114 115 // AppendQuote appends a double-quoted Go string literal representing s, 116 // as generated by Quote, to dst and returns the extended buffer. 117 func AppendQuote(dst []byte, s string) []byte { 118 return appendQuotedWith(dst, s, '"', false, false) 119 } 120 121 // QuoteToASCII returns a double-quoted Go string literal representing s. 122 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 123 // non-ASCII characters and non-printable characters as defined by IsPrint. 124 func QuoteToASCII(s string) string { 125 return quoteWith(s, '"', true, false) 126 } 127 128 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 129 // as generated by QuoteToASCII, to dst and returns the extended buffer. 130 func AppendQuoteToASCII(dst []byte, s string) []byte { 131 return appendQuotedWith(dst, s, '"', true, false) 132 } 133 134 // QuoteToGraphic returns a double-quoted Go string literal representing s. 135 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 136 // non-ASCII characters and non-printable characters as defined by IsGraphic. 137 func QuoteToGraphic(s string) string { 138 return quoteWith(s, '"', false, true) 139 } 140 141 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s, 142 // as generated by QuoteToGraphic, to dst and returns the extended buffer. 143 func AppendQuoteToGraphic(dst []byte, s string) []byte { 144 return appendQuotedWith(dst, s, '"', false, true) 145 } 146 147 // QuoteRune returns a single-quoted Go character literal representing the 148 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 149 // for control characters and non-printable characters as defined by IsPrint. 150 func QuoteRune(r rune) string { 151 return quoteRuneWith(r, '\'', false, false) 152 } 153 154 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 155 // as generated by QuoteRune, to dst and returns the extended buffer. 156 func AppendQuoteRune(dst []byte, r rune) []byte { 157 return appendQuotedRuneWith(dst, r, '\'', false, false) 158 } 159 160 // QuoteRuneToASCII returns a single-quoted Go character literal representing 161 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 162 // \u0100) for non-ASCII characters and non-printable characters as defined 163 // by IsPrint. 164 func QuoteRuneToASCII(r rune) string { 165 return quoteRuneWith(r, '\'', true, false) 166 } 167 168 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 169 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 170 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 171 return appendQuotedRuneWith(dst, r, '\'', true, false) 172 } 173 174 // QuoteRuneToGraphic returns a single-quoted Go character literal representing 175 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 176 // \u0100) for non-ASCII characters and non-printable characters as defined 177 // by IsGraphic. 178 func QuoteRuneToGraphic(r rune) string { 179 return quoteRuneWith(r, '\'', false, true) 180 } 181 182 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, 183 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. 184 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { 185 return appendQuotedRuneWith(dst, r, '\'', false, true) 186 } 187 188 // CanBackquote reports whether the string s can be represented 189 // unchanged as a single-line backquoted string without control 190 // characters other than tab. 191 func CanBackquote(s string) bool { 192 for len(s) > 0 { 193 r, wid := utf8.DecodeRuneInString(s) 194 s = s[wid:] 195 if wid > 1 { 196 if r == '\ufeff' { 197 return false // BOMs are invisible and should not be quoted. 198 } 199 continue // All other multibyte runes are correctly encoded and assumed printable. 200 } 201 if r == utf8.RuneError { 202 return false 203 } 204 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { 205 return false 206 } 207 } 208 return true 209 } 210 211 func unhex(b byte) (v rune, ok bool) { 212 c := rune(b) 213 switch { 214 case '0' <= c && c <= '9': 215 return c - '0', true 216 case 'a' <= c && c <= 'f': 217 return c - 'a' + 10, true 218 case 'A' <= c && c <= 'F': 219 return c - 'A' + 10, true 220 } 221 return 222 } 223 224 // UnquoteChar decodes the first character or byte in the escaped string 225 // or character literal represented by the string s. 226 // It returns four values: 227 // 228 // 1) value, the decoded Unicode code point or byte value; 229 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 230 // 3) tail, the remainder of the string after the character; and 231 // 4) an error that will be nil if the character is syntactically valid. 232 // 233 // The second argument, quote, specifies the type of literal being parsed 234 // and therefore which escaped quote character is permitted. 235 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 236 // If set to a double quote, it permits \" and disallows unescaped ". 237 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 238 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 239 // easy cases 240 switch c := s[0]; { 241 case c == quote && (quote == '\'' || quote == '"'): 242 err = ErrSyntax 243 return 244 case c >= utf8.RuneSelf: 245 r, size := utf8.DecodeRuneInString(s) 246 return r, true, s[size:], nil 247 case c != '\\': 248 return rune(s[0]), false, s[1:], nil 249 } 250 251 // hard case: c is backslash 252 if len(s) <= 1 { 253 err = ErrSyntax 254 return 255 } 256 c := s[1] 257 s = s[2:] 258 259 switch c { 260 case 'a': 261 value = '\a' 262 case 'b': 263 value = '\b' 264 case 'f': 265 value = '\f' 266 case 'n': 267 value = '\n' 268 case 'r': 269 value = '\r' 270 case 't': 271 value = '\t' 272 case 'v': 273 value = '\v' 274 case 'x', 'u', 'U': 275 n := 0 276 switch c { 277 case 'x': 278 n = 2 279 case 'u': 280 n = 4 281 case 'U': 282 n = 8 283 } 284 var v rune 285 if len(s) < n { 286 err = ErrSyntax 287 return 288 } 289 for j := 0; j < n; j++ { 290 x, ok := unhex(s[j]) 291 if !ok { 292 err = ErrSyntax 293 return 294 } 295 v = v<<4 | x 296 } 297 s = s[n:] 298 if c == 'x' { 299 // single-byte string, possibly not UTF-8 300 value = v 301 break 302 } 303 if v > utf8.MaxRune { 304 err = ErrSyntax 305 return 306 } 307 value = v 308 multibyte = true 309 case '0', '1', '2', '3', '4', '5', '6', '7': 310 v := rune(c) - '0' 311 if len(s) < 2 { 312 err = ErrSyntax 313 return 314 } 315 for j := 0; j < 2; j++ { // one digit already; two more 316 x := rune(s[j]) - '0' 317 if x < 0 || x > 7 { 318 err = ErrSyntax 319 return 320 } 321 v = (v << 3) | x 322 } 323 s = s[2:] 324 if v > 255 { 325 err = ErrSyntax 326 return 327 } 328 value = v 329 case '\\': 330 value = '\\' 331 case '\'', '"': 332 if c != quote { 333 err = ErrSyntax 334 return 335 } 336 value = rune(c) 337 default: 338 err = ErrSyntax 339 return 340 } 341 tail = s 342 return 343 } 344 345 // Unquote interprets s as a single-quoted, double-quoted, 346 // or backquoted Go string literal, returning the string value 347 // that s quotes. (If s is single-quoted, it would be a Go 348 // character literal; Unquote returns the corresponding 349 // one-character string.) 350 func Unquote(s string) (string, error) { 351 n := len(s) 352 if n < 2 { 353 return "", ErrSyntax 354 } 355 quote := s[0] 356 if quote != s[n-1] { 357 return "", ErrSyntax 358 } 359 s = s[1 : n-1] 360 361 if quote == '`' { 362 if contains(s, '`') { 363 return "", ErrSyntax 364 } 365 return s, nil 366 } 367 if quote != '"' && quote != '\'' { 368 return "", ErrSyntax 369 } 370 if contains(s, '\n') { 371 return "", ErrSyntax 372 } 373 374 // Is it trivial? Avoid allocation. 375 if !contains(s, '\\') && !contains(s, quote) { 376 switch quote { 377 case '"': 378 return s, nil 379 case '\'': 380 r, size := utf8.DecodeRuneInString(s) 381 if size == len(s) && (r != utf8.RuneError || size != 1) { 382 return s, nil 383 } 384 } 385 } 386 387 var runeTmp [utf8.UTFMax]byte 388 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 389 for len(s) > 0 { 390 c, multibyte, ss, err := UnquoteChar(s, quote) 391 if err != nil { 392 return "", err 393 } 394 s = ss 395 if c < utf8.RuneSelf || !multibyte { 396 buf = append(buf, byte(c)) 397 } else { 398 n := utf8.EncodeRune(runeTmp[:], c) 399 buf = append(buf, runeTmp[:n]...) 400 } 401 if quote == '\'' && len(s) != 0 { 402 // single-quoted must be single character 403 return "", ErrSyntax 404 } 405 } 406 return string(buf), nil 407 } 408 409 // contains reports whether the string contains the byte c. 410 func contains(s string, c byte) bool { 411 for i := 0; i < len(s); i++ { 412 if s[i] == c { 413 return true 414 } 415 } 416 return false 417 } 418 419 // bsearch16 returns the smallest i such that a[i] >= x. 420 // If there is no such i, bsearch16 returns len(a). 421 func bsearch16(a []uint16, x uint16) int { 422 i, j := 0, len(a) 423 for i < j { 424 h := i + (j-i)/2 425 if a[h] < x { 426 i = h + 1 427 } else { 428 j = h 429 } 430 } 431 return i 432 } 433 434 // bsearch32 returns the smallest i such that a[i] >= x. 435 // If there is no such i, bsearch32 returns len(a). 436 func bsearch32(a []uint32, x uint32) int { 437 i, j := 0, len(a) 438 for i < j { 439 h := i + (j-i)/2 440 if a[h] < x { 441 i = h + 1 442 } else { 443 j = h 444 } 445 } 446 return i 447 } 448 449 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 450 // to give the same answer. It allows this package not to depend on unicode, 451 // and therefore not pull in all the Unicode tables. If the linker were better 452 // at tossing unused tables, we could get rid of this implementation. 453 // That would be nice. 454 455 // IsPrint reports whether the rune is defined as printable by Go, with 456 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 457 // symbols and ASCII space. 458 func IsPrint(r rune) bool { 459 // Fast check for Latin-1 460 if r <= 0xFF { 461 if 0x20 <= r && r <= 0x7E { 462 // All the ASCII is printable from space through DEL-1. 463 return true 464 } 465 if 0xA1 <= r && r <= 0xFF { 466 // Similarly for ¡ through ÿ... 467 return r != 0xAD // ...except for the bizarre soft hyphen. 468 } 469 return false 470 } 471 472 // Same algorithm, either on uint16 or uint32 value. 473 // First, find first i such that isPrint[i] >= x. 474 // This is the index of either the start or end of a pair that might span x. 475 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 476 // If we find x in a range, make sure x is not in isNotPrint list. 477 478 if 0 <= r && r < 1<<16 { 479 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 480 i := bsearch16(isPrint, rr) 481 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 482 return false 483 } 484 j := bsearch16(isNotPrint, rr) 485 return j >= len(isNotPrint) || isNotPrint[j] != rr 486 } 487 488 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 489 i := bsearch32(isPrint, rr) 490 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 491 return false 492 } 493 if r >= 0x20000 { 494 return true 495 } 496 r -= 0x10000 497 j := bsearch16(isNotPrint, uint16(r)) 498 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 499 } 500 501 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such 502 // characters include letters, marks, numbers, punctuation, symbols, and 503 // spaces, from categories L, M, N, P, S, and Zs. 504 func IsGraphic(r rune) bool { 505 if IsPrint(r) { 506 return true 507 } 508 return isInGraphicList(r) 509 } 510 511 // isInGraphicList reports whether the rune is in the isGraphic list. This separation 512 // from IsGraphic allows quoteWith to avoid two calls to IsPrint. 513 // Should be called only if IsPrint fails. 514 func isInGraphicList(r rune) bool { 515 // We know r must fit in 16 bits - see makeisprint.go. 516 if r > 0xFFFF { 517 return false 518 } 519 rr := uint16(r) 520 i := bsearch16(isGraphic, rr) 521 return i < len(isGraphic) && rr == isGraphic[i] 522 }