github.com/megatontech/mynoteforgo@v0.0.0-20200507084910-5d0c6ea6e890/源码/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run makeisprint.go -output isprint.go 6 7 package strconv 8 9 import ( 10 "internal/bytealg" 11 "unicode/utf8" 12 ) 13 14 const lowerhex = "0123456789abcdef" 15 16 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { 17 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly)) 18 } 19 20 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string { 21 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly)) 22 } 23 24 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte { 25 buf = append(buf, quote) 26 for width := 0; len(s) > 0; s = s[width:] { 27 r := rune(s[0]) 28 width = 1 29 if r >= utf8.RuneSelf { 30 r, width = utf8.DecodeRuneInString(s) 31 } 32 if width == 1 && r == utf8.RuneError { 33 buf = append(buf, `\x`...) 34 buf = append(buf, lowerhex[s[0]>>4]) 35 buf = append(buf, lowerhex[s[0]&0xF]) 36 continue 37 } 38 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 39 } 40 buf = append(buf, quote) 41 return buf 42 } 43 44 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 45 buf = append(buf, quote) 46 if !utf8.ValidRune(r) { 47 r = utf8.RuneError 48 } 49 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 50 buf = append(buf, quote) 51 return buf 52 } 53 54 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 55 var runeTmp [utf8.UTFMax]byte 56 if r == rune(quote) || r == '\\' { // always backslashed 57 buf = append(buf, '\\') 58 buf = append(buf, byte(r)) 59 return buf 60 } 61 if ASCIIonly { 62 if r < utf8.RuneSelf && IsPrint(r) { 63 buf = append(buf, byte(r)) 64 return buf 65 } 66 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { 67 n := utf8.EncodeRune(runeTmp[:], r) 68 buf = append(buf, runeTmp[:n]...) 69 return buf 70 } 71 switch r { 72 case '\a': 73 buf = append(buf, `\a`...) 74 case '\b': 75 buf = append(buf, `\b`...) 76 case '\f': 77 buf = append(buf, `\f`...) 78 case '\n': 79 buf = append(buf, `\n`...) 80 case '\r': 81 buf = append(buf, `\r`...) 82 case '\t': 83 buf = append(buf, `\t`...) 84 case '\v': 85 buf = append(buf, `\v`...) 86 default: 87 switch { 88 case r < ' ': 89 buf = append(buf, `\x`...) 90 buf = append(buf, lowerhex[byte(r)>>4]) 91 buf = append(buf, lowerhex[byte(r)&0xF]) 92 case r > utf8.MaxRune: 93 r = 0xFFFD 94 fallthrough 95 case r < 0x10000: 96 buf = append(buf, `\u`...) 97 for s := 12; s >= 0; s -= 4 { 98 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 99 } 100 default: 101 buf = append(buf, `\U`...) 102 for s := 28; s >= 0; s -= 4 { 103 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 104 } 105 } 106 } 107 return buf 108 } 109 110 // Quote returns a double-quoted Go string literal representing s. The 111 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 112 // control characters and non-printable characters as defined by 113 // IsPrint. 114 func Quote(s string) string { 115 return quoteWith(s, '"', false, false) 116 } 117 118 // AppendQuote appends a double-quoted Go string literal representing s, 119 // as generated by Quote, to dst and returns the extended buffer. 120 func AppendQuote(dst []byte, s string) []byte { 121 return appendQuotedWith(dst, s, '"', false, false) 122 } 123 124 // QuoteToASCII returns a double-quoted Go string literal representing s. 125 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 126 // non-ASCII characters and non-printable characters as defined by IsPrint. 127 func QuoteToASCII(s string) string { 128 return quoteWith(s, '"', true, false) 129 } 130 131 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 132 // as generated by QuoteToASCII, to dst and returns the extended buffer. 133 func AppendQuoteToASCII(dst []byte, s string) []byte { 134 return appendQuotedWith(dst, s, '"', true, false) 135 } 136 137 // QuoteToGraphic returns a double-quoted Go string literal representing s. 138 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 139 // non-ASCII characters and non-printable characters as defined by IsGraphic. 140 func QuoteToGraphic(s string) string { 141 return quoteWith(s, '"', false, true) 142 } 143 144 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s, 145 // as generated by QuoteToGraphic, to dst and returns the extended buffer. 146 func AppendQuoteToGraphic(dst []byte, s string) []byte { 147 return appendQuotedWith(dst, s, '"', false, true) 148 } 149 150 // QuoteRune returns a single-quoted Go character literal representing the 151 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 152 // for control characters and non-printable characters as defined by IsPrint. 153 func QuoteRune(r rune) string { 154 return quoteRuneWith(r, '\'', false, false) 155 } 156 157 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 158 // as generated by QuoteRune, to dst and returns the extended buffer. 159 func AppendQuoteRune(dst []byte, r rune) []byte { 160 return appendQuotedRuneWith(dst, r, '\'', false, false) 161 } 162 163 // QuoteRuneToASCII returns a single-quoted Go character literal representing 164 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 165 // \u0100) for non-ASCII characters and non-printable characters as defined 166 // by IsPrint. 167 func QuoteRuneToASCII(r rune) string { 168 return quoteRuneWith(r, '\'', true, false) 169 } 170 171 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 172 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 173 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 174 return appendQuotedRuneWith(dst, r, '\'', true, false) 175 } 176 177 // QuoteRuneToGraphic returns a single-quoted Go character literal representing 178 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 179 // \u0100) for non-ASCII characters and non-printable characters as defined 180 // by IsGraphic. 181 func QuoteRuneToGraphic(r rune) string { 182 return quoteRuneWith(r, '\'', false, true) 183 } 184 185 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, 186 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. 187 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { 188 return appendQuotedRuneWith(dst, r, '\'', false, true) 189 } 190 191 // CanBackquote reports whether the string s can be represented 192 // unchanged as a single-line backquoted string without control 193 // characters other than tab. 194 func CanBackquote(s string) bool { 195 for len(s) > 0 { 196 r, wid := utf8.DecodeRuneInString(s) 197 s = s[wid:] 198 if wid > 1 { 199 if r == '\ufeff' { 200 return false // BOMs are invisible and should not be quoted. 201 } 202 continue // All other multibyte runes are correctly encoded and assumed printable. 203 } 204 if r == utf8.RuneError { 205 return false 206 } 207 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { 208 return false 209 } 210 } 211 return true 212 } 213 214 func unhex(b byte) (v rune, ok bool) { 215 c := rune(b) 216 switch { 217 case '0' <= c && c <= '9': 218 return c - '0', true 219 case 'a' <= c && c <= 'f': 220 return c - 'a' + 10, true 221 case 'A' <= c && c <= 'F': 222 return c - 'A' + 10, true 223 } 224 return 225 } 226 227 // UnquoteChar decodes the first character or byte in the escaped string 228 // or character literal represented by the string s. 229 // It returns four values: 230 // 231 // 1) value, the decoded Unicode code point or byte value; 232 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 233 // 3) tail, the remainder of the string after the character; and 234 // 4) an error that will be nil if the character is syntactically valid. 235 // 236 // The second argument, quote, specifies the type of literal being parsed 237 // and therefore which escaped quote character is permitted. 238 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 239 // If set to a double quote, it permits \" and disallows unescaped ". 240 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 241 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 242 // easy cases 243 if len(s) == 0 { 244 err = ErrSyntax 245 return 246 } 247 switch c := s[0]; { 248 case c == quote && (quote == '\'' || quote == '"'): 249 err = ErrSyntax 250 return 251 case c >= utf8.RuneSelf: 252 r, size := utf8.DecodeRuneInString(s) 253 return r, true, s[size:], nil 254 case c != '\\': 255 return rune(s[0]), false, s[1:], nil 256 } 257 258 // hard case: c is backslash 259 if len(s) <= 1 { 260 err = ErrSyntax 261 return 262 } 263 c := s[1] 264 s = s[2:] 265 266 switch c { 267 case 'a': 268 value = '\a' 269 case 'b': 270 value = '\b' 271 case 'f': 272 value = '\f' 273 case 'n': 274 value = '\n' 275 case 'r': 276 value = '\r' 277 case 't': 278 value = '\t' 279 case 'v': 280 value = '\v' 281 case 'x', 'u', 'U': 282 n := 0 283 switch c { 284 case 'x': 285 n = 2 286 case 'u': 287 n = 4 288 case 'U': 289 n = 8 290 } 291 var v rune 292 if len(s) < n { 293 err = ErrSyntax 294 return 295 } 296 for j := 0; j < n; j++ { 297 x, ok := unhex(s[j]) 298 if !ok { 299 err = ErrSyntax 300 return 301 } 302 v = v<<4 | x 303 } 304 s = s[n:] 305 if c == 'x' { 306 // single-byte string, possibly not UTF-8 307 value = v 308 break 309 } 310 if v > utf8.MaxRune { 311 err = ErrSyntax 312 return 313 } 314 value = v 315 multibyte = true 316 case '0', '1', '2', '3', '4', '5', '6', '7': 317 v := rune(c) - '0' 318 if len(s) < 2 { 319 err = ErrSyntax 320 return 321 } 322 for j := 0; j < 2; j++ { // one digit already; two more 323 x := rune(s[j]) - '0' 324 if x < 0 || x > 7 { 325 err = ErrSyntax 326 return 327 } 328 v = (v << 3) | x 329 } 330 s = s[2:] 331 if v > 255 { 332 err = ErrSyntax 333 return 334 } 335 value = v 336 case '\\': 337 value = '\\' 338 case '\'', '"': 339 if c != quote { 340 err = ErrSyntax 341 return 342 } 343 value = rune(c) 344 default: 345 err = ErrSyntax 346 return 347 } 348 tail = s 349 return 350 } 351 352 // Unquote interprets s as a single-quoted, double-quoted, 353 // or backquoted Go string literal, returning the string value 354 // that s quotes. (If s is single-quoted, it would be a Go 355 // character literal; Unquote returns the corresponding 356 // one-character string.) 357 func Unquote(s string) (string, error) { 358 n := len(s) 359 if n < 2 { 360 return "", ErrSyntax 361 } 362 quote := s[0] 363 if quote != s[n-1] { 364 return "", ErrSyntax 365 } 366 s = s[1 : n-1] 367 368 if quote == '`' { 369 if contains(s, '`') { 370 return "", ErrSyntax 371 } 372 if contains(s, '\r') { 373 // -1 because we know there is at least one \r to remove. 374 buf := make([]byte, 0, len(s)-1) 375 for i := 0; i < len(s); i++ { 376 if s[i] != '\r' { 377 buf = append(buf, s[i]) 378 } 379 } 380 return string(buf), nil 381 } 382 return s, nil 383 } 384 if quote != '"' && quote != '\'' { 385 return "", ErrSyntax 386 } 387 if contains(s, '\n') { 388 return "", ErrSyntax 389 } 390 391 // Is it trivial? Avoid allocation. 392 if !contains(s, '\\') && !contains(s, quote) { 393 switch quote { 394 case '"': 395 if utf8.ValidString(s) { 396 return s, nil 397 } 398 case '\'': 399 r, size := utf8.DecodeRuneInString(s) 400 if size == len(s) && (r != utf8.RuneError || size != 1) { 401 return s, nil 402 } 403 } 404 } 405 406 var runeTmp [utf8.UTFMax]byte 407 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 408 for len(s) > 0 { 409 c, multibyte, ss, err := UnquoteChar(s, quote) 410 if err != nil { 411 return "", err 412 } 413 s = ss 414 if c < utf8.RuneSelf || !multibyte { 415 buf = append(buf, byte(c)) 416 } else { 417 n := utf8.EncodeRune(runeTmp[:], c) 418 buf = append(buf, runeTmp[:n]...) 419 } 420 if quote == '\'' && len(s) != 0 { 421 // single-quoted must be single character 422 return "", ErrSyntax 423 } 424 } 425 return string(buf), nil 426 } 427 428 // contains reports whether the string contains the byte c. 429 func contains(s string, c byte) bool { 430 return bytealg.IndexByteString(s, c) != -1 431 } 432 433 // bsearch16 returns the smallest i such that a[i] >= x. 434 // If there is no such i, bsearch16 returns len(a). 435 func bsearch16(a []uint16, x uint16) int { 436 i, j := 0, len(a) 437 for i < j { 438 h := i + (j-i)/2 439 if a[h] < x { 440 i = h + 1 441 } else { 442 j = h 443 } 444 } 445 return i 446 } 447 448 // bsearch32 returns the smallest i such that a[i] >= x. 449 // If there is no such i, bsearch32 returns len(a). 450 func bsearch32(a []uint32, x uint32) int { 451 i, j := 0, len(a) 452 for i < j { 453 h := i + (j-i)/2 454 if a[h] < x { 455 i = h + 1 456 } else { 457 j = h 458 } 459 } 460 return i 461 } 462 463 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 464 // to give the same answer. It allows this package not to depend on unicode, 465 // and therefore not pull in all the Unicode tables. If the linker were better 466 // at tossing unused tables, we could get rid of this implementation. 467 // That would be nice. 468 469 // IsPrint reports whether the rune is defined as printable by Go, with 470 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 471 // symbols and ASCII space. 472 func IsPrint(r rune) bool { 473 // Fast check for Latin-1 474 if r <= 0xFF { 475 if 0x20 <= r && r <= 0x7E { 476 // All the ASCII is printable from space through DEL-1. 477 return true 478 } 479 if 0xA1 <= r && r <= 0xFF { 480 // Similarly for ¡ through ÿ... 481 return r != 0xAD // ...except for the bizarre soft hyphen. 482 } 483 return false 484 } 485 486 // Same algorithm, either on uint16 or uint32 value. 487 // First, find first i such that isPrint[i] >= x. 488 // This is the index of either the start or end of a pair that might span x. 489 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 490 // If we find x in a range, make sure x is not in isNotPrint list. 491 492 if 0 <= r && r < 1<<16 { 493 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 494 i := bsearch16(isPrint, rr) 495 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 496 return false 497 } 498 j := bsearch16(isNotPrint, rr) 499 return j >= len(isNotPrint) || isNotPrint[j] != rr 500 } 501 502 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 503 i := bsearch32(isPrint, rr) 504 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 505 return false 506 } 507 if r >= 0x20000 { 508 return true 509 } 510 r -= 0x10000 511 j := bsearch16(isNotPrint, uint16(r)) 512 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 513 } 514 515 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such 516 // characters include letters, marks, numbers, punctuation, symbols, and 517 // spaces, from categories L, M, N, P, S, and Zs. 518 func IsGraphic(r rune) bool { 519 if IsPrint(r) { 520 return true 521 } 522 return isInGraphicList(r) 523 } 524 525 // isInGraphicList reports whether the rune is in the isGraphic list. This separation 526 // from IsGraphic allows quoteWith to avoid two calls to IsPrint. 527 // Should be called only if IsPrint fails. 528 func isInGraphicList(r rune) bool { 529 // We know r must fit in 16 bits - see makeisprint.go. 530 if r > 0xFFFF { 531 return false 532 } 533 rr := uint16(r) 534 i := bsearch16(isGraphic, rr) 535 return i < len(isGraphic) && rr == isGraphic[i] 536 }