github.com/fjballest/golang@v0.0.0-20151209143359-e4c5fe594ca8/src/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run makeisprint.go -output isprint.go 6 7 package strconv 8 9 import ( 10 "unicode/utf8" 11 ) 12 13 const lowerhex = "0123456789abcdef" 14 15 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { 16 var runeTmp [utf8.UTFMax]byte 17 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 18 buf = append(buf, quote) 19 for width := 0; len(s) > 0; s = s[width:] { 20 r := rune(s[0]) 21 width = 1 22 if r >= utf8.RuneSelf { 23 r, width = utf8.DecodeRuneInString(s) 24 } 25 if width == 1 && r == utf8.RuneError { 26 buf = append(buf, `\x`...) 27 buf = append(buf, lowerhex[s[0]>>4]) 28 buf = append(buf, lowerhex[s[0]&0xF]) 29 continue 30 } 31 if r == rune(quote) || r == '\\' { // always backslashed 32 buf = append(buf, '\\') 33 buf = append(buf, byte(r)) 34 continue 35 } 36 if ASCIIonly { 37 if r < utf8.RuneSelf && IsPrint(r) { 38 buf = append(buf, byte(r)) 39 continue 40 } 41 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { 42 n := utf8.EncodeRune(runeTmp[:], r) 43 buf = append(buf, runeTmp[:n]...) 44 continue 45 } 46 switch r { 47 case '\a': 48 buf = append(buf, `\a`...) 49 case '\b': 50 buf = append(buf, `\b`...) 51 case '\f': 52 buf = append(buf, `\f`...) 53 case '\n': 54 buf = append(buf, `\n`...) 55 case '\r': 56 buf = append(buf, `\r`...) 57 case '\t': 58 buf = append(buf, `\t`...) 59 case '\v': 60 buf = append(buf, `\v`...) 61 default: 62 switch { 63 case r < ' ': 64 buf = append(buf, `\x`...) 65 buf = append(buf, lowerhex[s[0]>>4]) 66 buf = append(buf, lowerhex[s[0]&0xF]) 67 case r > utf8.MaxRune: 68 r = 0xFFFD 69 fallthrough 70 case r < 0x10000: 71 buf = append(buf, `\u`...) 72 for s := 12; s >= 0; s -= 4 { 73 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 74 } 75 default: 76 buf = append(buf, `\U`...) 77 for s := 28; s >= 0; s -= 4 { 78 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 79 } 80 } 81 } 82 } 83 buf = append(buf, quote) 84 return string(buf) 85 86 } 87 88 // Quote returns a double-quoted Go string literal representing s. The 89 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 90 // control characters and non-printable characters as defined by 91 // IsPrint. 92 func Quote(s string) string { 93 return quoteWith(s, '"', false, false) 94 } 95 96 // AppendQuote appends a double-quoted Go string literal representing s, 97 // as generated by Quote, to dst and returns the extended buffer. 98 func AppendQuote(dst []byte, s string) []byte { 99 return append(dst, Quote(s)...) 100 } 101 102 // QuoteToASCII returns a double-quoted Go string literal representing s. 103 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 104 // non-ASCII characters and non-printable characters as defined by IsPrint. 105 func QuoteToASCII(s string) string { 106 return quoteWith(s, '"', true, false) 107 } 108 109 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 110 // as generated by QuoteToASCII, to dst and returns the extended buffer. 111 func AppendQuoteToASCII(dst []byte, s string) []byte { 112 return append(dst, QuoteToASCII(s)...) 113 } 114 115 // QuoteToGraphic returns a double-quoted Go string literal representing s. 116 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 117 // non-ASCII characters and non-printable characters as defined by IsGraphic. 118 func QuoteToGraphic(s string) string { 119 return quoteWith(s, '"', false, true) 120 } 121 122 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s, 123 // as generated by QuoteToGraphic, to dst and returns the extended buffer. 124 func AppendQuoteToGraphic(dst []byte, s string) []byte { 125 return append(dst, QuoteToGraphic(s)...) 126 } 127 128 // QuoteRune returns a single-quoted Go character literal representing the 129 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 130 // for control characters and non-printable characters as defined by IsPrint. 131 func QuoteRune(r rune) string { 132 // TODO: avoid the allocation here. 133 return quoteWith(string(r), '\'', false, false) 134 } 135 136 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 137 // as generated by QuoteRune, to dst and returns the extended buffer. 138 func AppendQuoteRune(dst []byte, r rune) []byte { 139 return append(dst, QuoteRune(r)...) 140 } 141 142 // QuoteRuneToASCII returns a single-quoted Go character literal representing 143 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 144 // \u0100) for non-ASCII characters and non-printable characters as defined 145 // by IsPrint. 146 func QuoteRuneToASCII(r rune) string { 147 // TODO: avoid the allocation here. 148 return quoteWith(string(r), '\'', true, false) 149 } 150 151 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 152 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 153 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 154 return append(dst, QuoteRuneToASCII(r)...) 155 } 156 157 // QuoteRuneToGraphic returns a single-quoted Go character literal representing 158 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 159 // \u0100) for non-ASCII characters and non-printable characters as defined 160 // by IsGraphic. 161 func QuoteRuneToGraphic(r rune) string { 162 // TODO: avoid the allocation here. 163 return quoteWith(string(r), '\'', false, true) 164 } 165 166 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, 167 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. 168 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { 169 return append(dst, QuoteRuneToGraphic(r)...) 170 } 171 172 // CanBackquote reports whether the string s can be represented 173 // unchanged as a single-line backquoted string without control 174 // characters other than tab. 175 func CanBackquote(s string) bool { 176 for len(s) > 0 { 177 r, wid := utf8.DecodeRuneInString(s) 178 s = s[wid:] 179 if wid > 1 { 180 if r == '\ufeff' { 181 return false // BOMs are invisible and should not be quoted. 182 } 183 continue // All other multibyte runes are correctly encoded and assumed printable. 184 } 185 if r == utf8.RuneError { 186 return false 187 } 188 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { 189 return false 190 } 191 } 192 return true 193 } 194 195 func unhex(b byte) (v rune, ok bool) { 196 c := rune(b) 197 switch { 198 case '0' <= c && c <= '9': 199 return c - '0', true 200 case 'a' <= c && c <= 'f': 201 return c - 'a' + 10, true 202 case 'A' <= c && c <= 'F': 203 return c - 'A' + 10, true 204 } 205 return 206 } 207 208 // UnquoteChar decodes the first character or byte in the escaped string 209 // or character literal represented by the string s. 210 // It returns four values: 211 // 212 // 1) value, the decoded Unicode code point or byte value; 213 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 214 // 3) tail, the remainder of the string after the character; and 215 // 4) an error that will be nil if the character is syntactically valid. 216 // 217 // The second argument, quote, specifies the type of literal being parsed 218 // and therefore which escaped quote character is permitted. 219 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 220 // If set to a double quote, it permits \" and disallows unescaped ". 221 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 222 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 223 // easy cases 224 switch c := s[0]; { 225 case c == quote && (quote == '\'' || quote == '"'): 226 err = ErrSyntax 227 return 228 case c >= utf8.RuneSelf: 229 r, size := utf8.DecodeRuneInString(s) 230 return r, true, s[size:], nil 231 case c != '\\': 232 return rune(s[0]), false, s[1:], nil 233 } 234 235 // hard case: c is backslash 236 if len(s) <= 1 { 237 err = ErrSyntax 238 return 239 } 240 c := s[1] 241 s = s[2:] 242 243 switch c { 244 case 'a': 245 value = '\a' 246 case 'b': 247 value = '\b' 248 case 'f': 249 value = '\f' 250 case 'n': 251 value = '\n' 252 case 'r': 253 value = '\r' 254 case 't': 255 value = '\t' 256 case 'v': 257 value = '\v' 258 case 'x', 'u', 'U': 259 n := 0 260 switch c { 261 case 'x': 262 n = 2 263 case 'u': 264 n = 4 265 case 'U': 266 n = 8 267 } 268 var v rune 269 if len(s) < n { 270 err = ErrSyntax 271 return 272 } 273 for j := 0; j < n; j++ { 274 x, ok := unhex(s[j]) 275 if !ok { 276 err = ErrSyntax 277 return 278 } 279 v = v<<4 | x 280 } 281 s = s[n:] 282 if c == 'x' { 283 // single-byte string, possibly not UTF-8 284 value = v 285 break 286 } 287 if v > utf8.MaxRune { 288 err = ErrSyntax 289 return 290 } 291 value = v 292 multibyte = true 293 case '0', '1', '2', '3', '4', '5', '6', '7': 294 v := rune(c) - '0' 295 if len(s) < 2 { 296 err = ErrSyntax 297 return 298 } 299 for j := 0; j < 2; j++ { // one digit already; two more 300 x := rune(s[j]) - '0' 301 if x < 0 || x > 7 { 302 err = ErrSyntax 303 return 304 } 305 v = (v << 3) | x 306 } 307 s = s[2:] 308 if v > 255 { 309 err = ErrSyntax 310 return 311 } 312 value = v 313 case '\\': 314 value = '\\' 315 case '\'', '"': 316 if c != quote { 317 err = ErrSyntax 318 return 319 } 320 value = rune(c) 321 default: 322 err = ErrSyntax 323 return 324 } 325 tail = s 326 return 327 } 328 329 // Unquote interprets s as a single-quoted, double-quoted, 330 // or backquoted Go string literal, returning the string value 331 // that s quotes. (If s is single-quoted, it would be a Go 332 // character literal; Unquote returns the corresponding 333 // one-character string.) 334 func Unquote(s string) (t string, err error) { 335 n := len(s) 336 if n < 2 { 337 return "", ErrSyntax 338 } 339 quote := s[0] 340 if quote != s[n-1] { 341 return "", ErrSyntax 342 } 343 s = s[1 : n-1] 344 345 if quote == '`' { 346 if contains(s, '`') { 347 return "", ErrSyntax 348 } 349 return s, nil 350 } 351 if quote != '"' && quote != '\'' { 352 return "", ErrSyntax 353 } 354 if contains(s, '\n') { 355 return "", ErrSyntax 356 } 357 358 // Is it trivial? Avoid allocation. 359 if !contains(s, '\\') && !contains(s, quote) { 360 switch quote { 361 case '"': 362 return s, nil 363 case '\'': 364 r, size := utf8.DecodeRuneInString(s) 365 if size == len(s) && (r != utf8.RuneError || size != 1) { 366 return s, nil 367 } 368 } 369 } 370 371 var runeTmp [utf8.UTFMax]byte 372 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 373 for len(s) > 0 { 374 c, multibyte, ss, err := UnquoteChar(s, quote) 375 if err != nil { 376 return "", err 377 } 378 s = ss 379 if c < utf8.RuneSelf || !multibyte { 380 buf = append(buf, byte(c)) 381 } else { 382 n := utf8.EncodeRune(runeTmp[:], c) 383 buf = append(buf, runeTmp[:n]...) 384 } 385 if quote == '\'' && len(s) != 0 { 386 // single-quoted must be single character 387 return "", ErrSyntax 388 } 389 } 390 return string(buf), nil 391 } 392 393 // contains reports whether the string contains the byte c. 394 func contains(s string, c byte) bool { 395 for i := 0; i < len(s); i++ { 396 if s[i] == c { 397 return true 398 } 399 } 400 return false 401 } 402 403 // bsearch16 returns the smallest i such that a[i] >= x. 404 // If there is no such i, bsearch16 returns len(a). 405 func bsearch16(a []uint16, x uint16) int { 406 i, j := 0, len(a) 407 for i < j { 408 h := i + (j-i)/2 409 if a[h] < x { 410 i = h + 1 411 } else { 412 j = h 413 } 414 } 415 return i 416 } 417 418 // bsearch32 returns the smallest i such that a[i] >= x. 419 // If there is no such i, bsearch32 returns len(a). 420 func bsearch32(a []uint32, x uint32) int { 421 i, j := 0, len(a) 422 for i < j { 423 h := i + (j-i)/2 424 if a[h] < x { 425 i = h + 1 426 } else { 427 j = h 428 } 429 } 430 return i 431 } 432 433 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 434 // to give the same answer. It allows this package not to depend on unicode, 435 // and therefore not pull in all the Unicode tables. If the linker were better 436 // at tossing unused tables, we could get rid of this implementation. 437 // That would be nice. 438 439 // IsPrint reports whether the rune is defined as printable by Go, with 440 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 441 // symbols and ASCII space. 442 func IsPrint(r rune) bool { 443 // Fast check for Latin-1 444 if r <= 0xFF { 445 if 0x20 <= r && r <= 0x7E { 446 // All the ASCII is printable from space through DEL-1. 447 return true 448 } 449 if 0xA1 <= r && r <= 0xFF { 450 // Similarly for ¡ through ÿ... 451 return r != 0xAD // ...except for the bizarre soft hyphen. 452 } 453 return false 454 } 455 456 // Same algorithm, either on uint16 or uint32 value. 457 // First, find first i such that isPrint[i] >= x. 458 // This is the index of either the start or end of a pair that might span x. 459 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 460 // If we find x in a range, make sure x is not in isNotPrint list. 461 462 if 0 <= r && r < 1<<16 { 463 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 464 i := bsearch16(isPrint, rr) 465 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 466 return false 467 } 468 j := bsearch16(isNotPrint, rr) 469 return j >= len(isNotPrint) || isNotPrint[j] != rr 470 } 471 472 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 473 i := bsearch32(isPrint, rr) 474 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 475 return false 476 } 477 if r >= 0x20000 { 478 return true 479 } 480 r -= 0x10000 481 j := bsearch16(isNotPrint, uint16(r)) 482 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 483 } 484 485 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such 486 // characters include letters, marks, numbers, punctuation, symbols, and 487 // spaces, from categories L, M, N, P, S, and Zs. 488 func IsGraphic(r rune) bool { 489 if IsPrint(r) { 490 return true 491 } 492 return isInGraphicList(r) 493 } 494 495 // isInGraphicList reports whether the rune is in the isGraphic list. This separation 496 // from IsGraphic allows quoteWith to avoid two calls to IsPrint. 497 // Should be called only if IsPrint fails. 498 func isInGraphicList(r rune) bool { 499 // We know r must fit in 16 bits - see makeisprint.go. 500 if r > 0xFFFF { 501 return false 502 } 503 rr := uint16(r) 504 i := bsearch16(isGraphic, rr) 505 return i < len(isGraphic) && rr == isGraphic[i] 506 }