github.com/remobjects/goldbaselibrary@v0.0.0-20230924164425-d458680a936b/Source/Gold/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run makeisprint.go -output isprint.go 6 7 package strconv 8 9 import ( 10 "internal/bytealg" 11 "unicode/utf8" 12 ) 13 14 const ( 15 lowerhex = "0123456789abcdef" 16 upperhex = "0123456789ABCDEF" 17 ) 18 19 func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string { 20 return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly)) 21 } 22 23 func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string { 24 return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly)) 25 } 26 27 func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte { 28 // Often called with big strings, so preallocate. If there's quoting, 29 // this is conservative but still helps a lot. 30 if cap(buf)-len(buf) < len(s) { 31 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 32 copy(nBuf, buf) 33 buf = nBuf 34 } 35 buf = append(buf, quote) 36 for width := 0; len(s) > 0; s = s[width:] { 37 r := rune(s[0]) 38 width = 1 39 if r >= utf8.RuneSelf { 40 r, width = utf8.DecodeRuneInString(s) 41 } 42 if width == 1 && r == utf8.RuneError { 43 buf = append(buf, `\x`...) 44 buf = append(buf, lowerhex[s[0]>>4]) 45 buf = append(buf, lowerhex[s[0]&0xF]) 46 continue 47 } 48 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 49 } 50 buf = append(buf, quote) 51 return buf 52 } 53 54 func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 55 buf = append(buf, quote) 56 if !utf8.ValidRune(r) { 57 r = utf8.RuneError 58 } 59 buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly) 60 buf = append(buf, quote) 61 return buf 62 } 63 64 func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte { 65 var runeTmp [utf8.UTFMax]byte 66 if r == rune(quote) || r == '\\' { // always backslashed 67 buf = append(buf, '\\') 68 buf = append(buf, byte(r)) 69 return buf 70 } 71 if ASCIIonly { 72 if r < utf8.RuneSelf && IsPrint(r) { 73 buf = append(buf, byte(r)) 74 return buf 75 } 76 } else if IsPrint(r) || graphicOnly && isInGraphicList(r) { 77 n := utf8.EncodeRune(runeTmp[:], r) 78 buf = append(buf, runeTmp[:n]...) 79 return buf 80 } 81 switch r { 82 case '\a': 83 buf = append(buf, `\a`...) 84 case '\b': 85 buf = append(buf, `\b`...) 86 case '\f': 87 buf = append(buf, `\f`...) 88 case '\n': 89 buf = append(buf, `\n`...) 90 case '\r': 91 buf = append(buf, `\r`...) 92 case '\t': 93 buf = append(buf, `\t`...) 94 case '\v': 95 buf = append(buf, `\v`...) 96 default: 97 switch { 98 case r < ' ': 99 buf = append(buf, `\x`...) 100 buf = append(buf, lowerhex[byte(r)>>4]) 101 buf = append(buf, lowerhex[byte(r)&0xF]) 102 case r > utf8.MaxRune: 103 r = 0xFFFD 104 fallthrough 105 case r < 0x10000: 106 buf = append(buf, `\u`...) 107 for s := 12; s >= 0; s -= 4 { 108 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 109 } 110 default: 111 buf = append(buf, `\U`...) 112 for s := 28; s >= 0; s -= 4 { 113 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 114 } 115 } 116 } 117 return buf 118 } 119 120 // Quote returns a double-quoted Go string literal representing s. The 121 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 122 // control characters and non-printable characters as defined by 123 // IsPrint. 124 func Quote(s string) string { 125 return quoteWith(s, '"', false, false) 126 } 127 128 // AppendQuote appends a double-quoted Go string literal representing s, 129 // as generated by Quote, to dst and returns the extended buffer. 130 func AppendQuote(dst []byte, s string) []byte { 131 return appendQuotedWith(dst, s, '"', false, false) 132 } 133 134 // QuoteToASCII returns a double-quoted Go string literal representing s. 135 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 136 // non-ASCII characters and non-printable characters as defined by IsPrint. 137 func QuoteToASCII(s string) string { 138 return quoteWith(s, '"', true, false) 139 } 140 141 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 142 // as generated by QuoteToASCII, to dst and returns the extended buffer. 143 func AppendQuoteToASCII(dst []byte, s string) []byte { 144 return appendQuotedWith(dst, s, '"', true, false) 145 } 146 147 // QuoteToGraphic returns a double-quoted Go string literal representing s. 148 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 149 // non-ASCII characters and non-printable characters as defined by IsGraphic. 150 func QuoteToGraphic(s string) string { 151 return quoteWith(s, '"', false, true) 152 } 153 154 // AppendQuoteToGraphic appends a double-quoted Go string literal representing s, 155 // as generated by QuoteToGraphic, to dst and returns the extended buffer. 156 func AppendQuoteToGraphic(dst []byte, s string) []byte { 157 return appendQuotedWith(dst, s, '"', false, true) 158 } 159 160 // QuoteRune returns a single-quoted Go character literal representing the 161 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 162 // for control characters and non-printable characters as defined by IsPrint. 163 func QuoteRune(r rune) string { 164 return quoteRuneWith(r, '\'', false, false) 165 } 166 167 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 168 // as generated by QuoteRune, to dst and returns the extended buffer. 169 func AppendQuoteRune(dst []byte, r rune) []byte { 170 return appendQuotedRuneWith(dst, r, '\'', false, false) 171 } 172 173 // QuoteRuneToASCII returns a single-quoted Go character literal representing 174 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 175 // \u0100) for non-ASCII characters and non-printable characters as defined 176 // by IsPrint. 177 func QuoteRuneToASCII(r rune) string { 178 return quoteRuneWith(r, '\'', true, false) 179 } 180 181 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 182 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 183 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 184 return appendQuotedRuneWith(dst, r, '\'', true, false) 185 } 186 187 // QuoteRuneToGraphic returns a single-quoted Go character literal representing 188 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 189 // \u0100) for non-ASCII characters and non-printable characters as defined 190 // by IsGraphic. 191 func QuoteRuneToGraphic(r rune) string { 192 return quoteRuneWith(r, '\'', false, true) 193 } 194 195 // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune, 196 // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer. 197 func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte { 198 return appendQuotedRuneWith(dst, r, '\'', false, true) 199 } 200 201 // CanBackquote reports whether the string s can be represented 202 // unchanged as a single-line backquoted string without control 203 // characters other than tab. 204 func CanBackquote(s string) bool { 205 for len(s) > 0 { 206 r, wid := utf8.DecodeRuneInString(s) 207 s = s[wid:] 208 if wid > 1 { 209 if r == '\ufeff' { 210 return false // BOMs are invisible and should not be quoted. 211 } 212 continue // All other multibyte runes are correctly encoded and assumed printable. 213 } 214 if r == utf8.RuneError { 215 return false 216 } 217 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { 218 return false 219 } 220 } 221 return true 222 } 223 224 func unhex(b byte) (v rune, ok bool) { 225 c := rune(b) 226 switch { 227 case '0' <= c && c <= '9': 228 return c - '0', true 229 case 'a' <= c && c <= 'f': 230 return c - 'a' + 10, true 231 case 'A' <= c && c <= 'F': 232 return c - 'A' + 10, true 233 } 234 return 235 } 236 237 // UnquoteChar decodes the first character or byte in the escaped string 238 // or character literal represented by the string s. 239 // It returns four values: 240 // 241 // 1) value, the decoded Unicode code point or byte value; 242 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 243 // 3) tail, the remainder of the string after the character; and 244 // 4) an error that will be nil if the character is syntactically valid. 245 // 246 // The second argument, quote, specifies the type of literal being parsed 247 // and therefore which escaped quote character is permitted. 248 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 249 // If set to a double quote, it permits \" and disallows unescaped ". 250 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 251 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 252 // easy cases 253 if len(s) == 0 { 254 err = ErrSyntax 255 return 256 } 257 switch c := s[0]; { 258 case c == quote && (quote == '\'' || quote == '"'): 259 err = ErrSyntax 260 return 261 case c >= utf8.RuneSelf: 262 r, size := utf8.DecodeRuneInString(s) 263 return r, true, s[size:], nil 264 case c != '\\': 265 return rune(s[0]), false, s[1:], nil 266 } 267 268 // hard case: c is backslash 269 if len(s) <= 1 { 270 err = ErrSyntax 271 return 272 } 273 c := s[1] 274 s = s[2:] 275 276 switch c { 277 case 'a': 278 value = '\a' 279 case 'b': 280 value = '\b' 281 case 'f': 282 value = '\f' 283 case 'n': 284 value = '\n' 285 case 'r': 286 value = '\r' 287 case 't': 288 value = '\t' 289 case 'v': 290 value = '\v' 291 case 'x', 'u', 'U': 292 n := 0 293 switch c { 294 case 'x': 295 n = 2 296 case 'u': 297 n = 4 298 case 'U': 299 n = 8 300 } 301 var v rune 302 if len(s) < n { 303 err = ErrSyntax 304 return 305 } 306 for j := 0; j < n; j++ { 307 x, ok := unhex(s[j]) 308 if !ok { 309 err = ErrSyntax 310 return 311 } 312 v = v<<4 | x 313 } 314 s = s[n:] 315 if c == 'x' { 316 // single-byte string, possibly not UTF-8 317 value = v 318 break 319 } 320 if v > utf8.MaxRune { 321 err = ErrSyntax 322 return 323 } 324 value = v 325 multibyte = true 326 case '0', '1', '2', '3', '4', '5', '6', '7': 327 v := rune(c) - '0' 328 if len(s) < 2 { 329 err = ErrSyntax 330 return 331 } 332 for j := 0; j < 2; j++ { // one digit already; two more 333 x := rune(s[j]) - '0' 334 if x < 0 || x > 7 { 335 err = ErrSyntax 336 return 337 } 338 v = (v << 3) | x 339 } 340 s = s[2:] 341 if v > 255 { 342 err = ErrSyntax 343 return 344 } 345 value = v 346 case '\\': 347 value = '\\' 348 case '\'', '"': 349 if c != quote { 350 err = ErrSyntax 351 return 352 } 353 value = rune(c) 354 default: 355 err = ErrSyntax 356 return 357 } 358 tail = s 359 return 360 } 361 362 // Unquote interprets s as a single-quoted, double-quoted, 363 // or backquoted Go string literal, returning the string value 364 // that s quotes. (If s is single-quoted, it would be a Go 365 // character literal; Unquote returns the corresponding 366 // one-character string.) 367 func Unquote(s string) (string, error) { 368 n := len(s) 369 if n < 2 { 370 return "", ErrSyntax 371 } 372 quote := s[0] 373 if quote != s[n-1] { 374 return "", ErrSyntax 375 } 376 s = s[1 : n-1] 377 378 if quote == '`' { 379 if contains(s, '`') { 380 return "", ErrSyntax 381 } 382 if contains(s, '\r') { 383 // -1 because we know there is at least one \r to remove. 384 buf := make([]byte, 0, len(s)-1) 385 for i := 0; i < len(s); i++ { 386 if s[i] != '\r' { 387 buf = append(buf, s[i]) 388 } 389 } 390 return string(buf), nil 391 } 392 return s, nil 393 } 394 if quote != '"' && quote != '\'' { 395 return "", ErrSyntax 396 } 397 if contains(s, '\n') { 398 return "", ErrSyntax 399 } 400 401 // Is it trivial? Avoid allocation. 402 if !contains(s, '\\') && !contains(s, quote) { 403 switch quote { 404 case '"': 405 if utf8.ValidString(s) { 406 return s, nil 407 } 408 case '\'': 409 r, size := utf8.DecodeRuneInString(s) 410 if size == len(s) && (r != utf8.RuneError || size != 1) { 411 return s, nil 412 } 413 } 414 } 415 416 var runeTmp [utf8.UTFMax]byte 417 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 418 for len(s) > 0 { 419 c, multibyte, ss, err := UnquoteChar(s, quote) 420 if err != nil { 421 return "", err 422 } 423 s = ss 424 if c < utf8.RuneSelf || !multibyte { 425 buf = append(buf, byte(c)) 426 } else { 427 n := utf8.EncodeRune(runeTmp[:], c) 428 buf = append(buf, runeTmp[:n]...) 429 } 430 if quote == '\'' && len(s) != 0 { 431 // single-quoted must be single character 432 return "", ErrSyntax 433 } 434 } 435 return string(buf), nil 436 } 437 438 // contains reports whether the string contains the byte c. 439 func contains(s string, c byte) bool { 440 return bytealg.IndexByteString(s, c) != -1 441 } 442 443 // bsearch16 returns the smallest i such that a[i] >= x. 444 // If there is no such i, bsearch16 returns len(a). 445 func bsearch16(a []uint16, x uint16) int { 446 i, j := 0, len(a) 447 for i < j { 448 h := i + (j-i)/2 449 if a[h] < x { 450 i = h + 1 451 } else { 452 j = h 453 } 454 } 455 return i 456 } 457 458 // bsearch32 returns the smallest i such that a[i] >= x. 459 // If there is no such i, bsearch32 returns len(a). 460 func bsearch32(a []uint32, x uint32) int { 461 i, j := 0, len(a) 462 for i < j { 463 h := i + (j-i)/2 464 if a[h] < x { 465 i = h + 1 466 } else { 467 j = h 468 } 469 } 470 return i 471 } 472 473 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 474 // to give the same answer. It allows this package not to depend on unicode, 475 // and therefore not pull in all the Unicode tables. If the linker were better 476 // at tossing unused tables, we could get rid of this implementation. 477 // That would be nice. 478 479 // IsPrint reports whether the rune is defined as printable by Go, with 480 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 481 // symbols and ASCII space. 482 func IsPrint(r rune) bool { 483 // Fast check for Latin-1 484 if r <= 0xFF { 485 if 0x20 <= r && r <= 0x7E { 486 // All the ASCII is printable from space through DEL-1. 487 return true 488 } 489 if 0xA1 <= r && r <= 0xFF { 490 // Similarly for ¡ through ÿ... 491 return r != 0xAD // ...except for the bizarre soft hyphen. 492 } 493 return false 494 } 495 496 // Same algorithm, either on uint16 or uint32 value. 497 // First, find first i such that isPrint[i] >= x. 498 // This is the index of either the start or end of a pair that might span x. 499 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 500 // If we find x in a range, make sure x is not in isNotPrint list. 501 502 if 0 <= r && r < 1<<16 { 503 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 504 i := bsearch16(isPrint, rr) 505 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 506 return false 507 } 508 j := bsearch16(isNotPrint, rr) 509 return j >= len(isNotPrint) || isNotPrint[j] != rr 510 } 511 512 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 513 i := bsearch32(isPrint, rr) 514 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 515 return false 516 } 517 if r >= 0x20000 { 518 return true 519 } 520 r -= 0x10000 521 j := bsearch16(isNotPrint, uint16(r)) 522 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 523 } 524 525 // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such 526 // characters include letters, marks, numbers, punctuation, symbols, and 527 // spaces, from categories L, M, N, P, S, and Zs. 528 func IsGraphic(r rune) bool { 529 if IsPrint(r) { 530 return true 531 } 532 return isInGraphicList(r) 533 } 534 535 // isInGraphicList reports whether the rune is in the isGraphic list. This separation 536 // from IsGraphic allows quoteWith to avoid two calls to IsPrint. 537 // Should be called only if IsPrint fails. 538 func isInGraphicList(r rune) bool { 539 // We know r must fit in 16 bits - see makeisprint.go. 540 if r > 0xFFFF { 541 return false 542 } 543 rr := uint16(r) 544 i := bsearch16(isGraphic, rr) 545 return i < len(isGraphic) && rr == isGraphic[i] 546 }