github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate go run makeisprint.go -output isprint.go 6 7 package strconv 8 9 import ( 10 "unicode/utf8" 11 ) 12 13 const lowerhex = "0123456789abcdef" 14 15 func quoteWith(s string, quote byte, ASCIIonly bool) string { 16 var runeTmp [utf8.UTFMax]byte 17 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 18 buf = append(buf, quote) 19 for width := 0; len(s) > 0; s = s[width:] { 20 r := rune(s[0]) 21 width = 1 22 if r >= utf8.RuneSelf { 23 r, width = utf8.DecodeRuneInString(s) 24 } 25 if width == 1 && r == utf8.RuneError { 26 buf = append(buf, `\x`...) 27 buf = append(buf, lowerhex[s[0]>>4]) 28 buf = append(buf, lowerhex[s[0]&0xF]) 29 continue 30 } 31 if r == rune(quote) || r == '\\' { // always backslashed 32 buf = append(buf, '\\') 33 buf = append(buf, byte(r)) 34 continue 35 } 36 if ASCIIonly { 37 if r < utf8.RuneSelf && IsPrint(r) { 38 buf = append(buf, byte(r)) 39 continue 40 } 41 } else if IsPrint(r) { 42 n := utf8.EncodeRune(runeTmp[:], r) 43 buf = append(buf, runeTmp[:n]...) 44 continue 45 } 46 switch r { 47 case '\a': 48 buf = append(buf, `\a`...) 49 case '\b': 50 buf = append(buf, `\b`...) 51 case '\f': 52 buf = append(buf, `\f`...) 53 case '\n': 54 buf = append(buf, `\n`...) 55 case '\r': 56 buf = append(buf, `\r`...) 57 case '\t': 58 buf = append(buf, `\t`...) 59 case '\v': 60 buf = append(buf, `\v`...) 61 default: 62 switch { 63 case r < ' ': 64 buf = append(buf, `\x`...) 65 buf = append(buf, lowerhex[s[0]>>4]) 66 buf = append(buf, lowerhex[s[0]&0xF]) 67 case r > utf8.MaxRune: 68 r = 0xFFFD 69 fallthrough 70 case r < 0x10000: 71 buf = append(buf, `\u`...) 72 for s := 12; s >= 0; s -= 4 { 73 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 74 } 75 default: 76 buf = append(buf, `\U`...) 77 for s := 28; s >= 0; s -= 4 { 78 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 79 } 80 } 81 } 82 } 83 buf = append(buf, quote) 84 return string(buf) 85 86 } 87 88 // Quote returns a double-quoted Go string literal representing s. The 89 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 90 // control characters and non-printable characters as defined by 91 // IsPrint. 92 func Quote(s string) string { 93 return quoteWith(s, '"', false) 94 } 95 96 // AppendQuote appends a double-quoted Go string literal representing s, 97 // as generated by Quote, to dst and returns the extended buffer. 98 func AppendQuote(dst []byte, s string) []byte { 99 return append(dst, Quote(s)...) 100 } 101 102 // QuoteToASCII returns a double-quoted Go string literal representing s. 103 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 104 // non-ASCII characters and non-printable characters as defined by IsPrint. 105 func QuoteToASCII(s string) string { 106 return quoteWith(s, '"', true) 107 } 108 109 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 110 // as generated by QuoteToASCII, to dst and returns the extended buffer. 111 func AppendQuoteToASCII(dst []byte, s string) []byte { 112 return append(dst, QuoteToASCII(s)...) 113 } 114 115 // QuoteRune returns a single-quoted Go character literal representing the 116 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 117 // for control characters and non-printable characters as defined by IsPrint. 118 func QuoteRune(r rune) string { 119 // TODO: avoid the allocation here. 120 return quoteWith(string(r), '\'', false) 121 } 122 123 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 124 // as generated by QuoteRune, to dst and returns the extended buffer. 125 func AppendQuoteRune(dst []byte, r rune) []byte { 126 return append(dst, QuoteRune(r)...) 127 } 128 129 // QuoteRuneToASCII returns a single-quoted Go character literal representing 130 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 131 // \u0100) for non-ASCII characters and non-printable characters as defined 132 // by IsPrint. 133 func QuoteRuneToASCII(r rune) string { 134 // TODO: avoid the allocation here. 135 return quoteWith(string(r), '\'', true) 136 } 137 138 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 139 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 140 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 141 return append(dst, QuoteRuneToASCII(r)...) 142 } 143 144 // CanBackquote reports whether the string s can be represented 145 // unchanged as a single-line backquoted string without control 146 // characters other than tab. 147 func CanBackquote(s string) bool { 148 for len(s) > 0 { 149 r, wid := utf8.DecodeRuneInString(s) 150 s = s[wid:] 151 if wid > 1 { 152 if r == '\ufeff' { 153 return false // BOMs are invisible and should not be quoted. 154 } 155 continue // All other multibyte runes are correctly encoded and assumed printable. 156 } 157 if r == utf8.RuneError { 158 return false 159 } 160 if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' { 161 return false 162 } 163 } 164 return true 165 } 166 167 func unhex(b byte) (v rune, ok bool) { 168 c := rune(b) 169 switch { 170 case '0' <= c && c <= '9': 171 return c - '0', true 172 case 'a' <= c && c <= 'f': 173 return c - 'a' + 10, true 174 case 'A' <= c && c <= 'F': 175 return c - 'A' + 10, true 176 } 177 return 178 } 179 180 // UnquoteChar decodes the first character or byte in the escaped string 181 // or character literal represented by the string s. 182 // It returns four values: 183 // 184 // 1) value, the decoded Unicode code point or byte value; 185 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 186 // 3) tail, the remainder of the string after the character; and 187 // 4) an error that will be nil if the character is syntactically valid. 188 // 189 // The second argument, quote, specifies the type of literal being parsed 190 // and therefore which escaped quote character is permitted. 191 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 192 // If set to a double quote, it permits \" and disallows unescaped ". 193 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 194 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 195 // easy cases 196 switch c := s[0]; { 197 case c == quote && (quote == '\'' || quote == '"'): 198 err = ErrSyntax 199 return 200 case c >= utf8.RuneSelf: 201 r, size := utf8.DecodeRuneInString(s) 202 return r, true, s[size:], nil 203 case c != '\\': 204 return rune(s[0]), false, s[1:], nil 205 } 206 207 // hard case: c is backslash 208 if len(s) <= 1 { 209 err = ErrSyntax 210 return 211 } 212 c := s[1] 213 s = s[2:] 214 215 switch c { 216 case 'a': 217 value = '\a' 218 case 'b': 219 value = '\b' 220 case 'f': 221 value = '\f' 222 case 'n': 223 value = '\n' 224 case 'r': 225 value = '\r' 226 case 't': 227 value = '\t' 228 case 'v': 229 value = '\v' 230 case 'x', 'u', 'U': 231 n := 0 232 switch c { 233 case 'x': 234 n = 2 235 case 'u': 236 n = 4 237 case 'U': 238 n = 8 239 } 240 var v rune 241 if len(s) < n { 242 err = ErrSyntax 243 return 244 } 245 for j := 0; j < n; j++ { 246 x, ok := unhex(s[j]) 247 if !ok { 248 err = ErrSyntax 249 return 250 } 251 v = v<<4 | x 252 } 253 s = s[n:] 254 if c == 'x' { 255 // single-byte string, possibly not UTF-8 256 value = v 257 break 258 } 259 if v > utf8.MaxRune { 260 err = ErrSyntax 261 return 262 } 263 value = v 264 multibyte = true 265 case '0', '1', '2', '3', '4', '5', '6', '7': 266 v := rune(c) - '0' 267 if len(s) < 2 { 268 err = ErrSyntax 269 return 270 } 271 for j := 0; j < 2; j++ { // one digit already; two more 272 x := rune(s[j]) - '0' 273 if x < 0 || x > 7 { 274 err = ErrSyntax 275 return 276 } 277 v = (v << 3) | x 278 } 279 s = s[2:] 280 if v > 255 { 281 err = ErrSyntax 282 return 283 } 284 value = v 285 case '\\': 286 value = '\\' 287 case '\'', '"': 288 if c != quote { 289 err = ErrSyntax 290 return 291 } 292 value = rune(c) 293 default: 294 err = ErrSyntax 295 return 296 } 297 tail = s 298 return 299 } 300 301 // Unquote interprets s as a single-quoted, double-quoted, 302 // or backquoted Go string literal, returning the string value 303 // that s quotes. (If s is single-quoted, it would be a Go 304 // character literal; Unquote returns the corresponding 305 // one-character string.) 306 func Unquote(s string) (t string, err error) { 307 n := len(s) 308 if n < 2 { 309 return "", ErrSyntax 310 } 311 quote := s[0] 312 if quote != s[n-1] { 313 return "", ErrSyntax 314 } 315 s = s[1 : n-1] 316 317 if quote == '`' { 318 if contains(s, '`') { 319 return "", ErrSyntax 320 } 321 return s, nil 322 } 323 if quote != '"' && quote != '\'' { 324 return "", ErrSyntax 325 } 326 if contains(s, '\n') { 327 return "", ErrSyntax 328 } 329 330 // Is it trivial? Avoid allocation. 331 if !contains(s, '\\') && !contains(s, quote) { 332 switch quote { 333 case '"': 334 return s, nil 335 case '\'': 336 r, size := utf8.DecodeRuneInString(s) 337 if size == len(s) && (r != utf8.RuneError || size != 1) { 338 return s, nil 339 } 340 } 341 } 342 343 var runeTmp [utf8.UTFMax]byte 344 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 345 for len(s) > 0 { 346 c, multibyte, ss, err := UnquoteChar(s, quote) 347 if err != nil { 348 return "", err 349 } 350 s = ss 351 if c < utf8.RuneSelf || !multibyte { 352 buf = append(buf, byte(c)) 353 } else { 354 n := utf8.EncodeRune(runeTmp[:], c) 355 buf = append(buf, runeTmp[:n]...) 356 } 357 if quote == '\'' && len(s) != 0 { 358 // single-quoted must be single character 359 return "", ErrSyntax 360 } 361 } 362 return string(buf), nil 363 } 364 365 // contains reports whether the string contains the byte c. 366 func contains(s string, c byte) bool { 367 for i := 0; i < len(s); i++ { 368 if s[i] == c { 369 return true 370 } 371 } 372 return false 373 } 374 375 // bsearch16 returns the smallest i such that a[i] >= x. 376 // If there is no such i, bsearch16 returns len(a). 377 func bsearch16(a []uint16, x uint16) int { 378 i, j := 0, len(a) 379 for i < j { 380 h := i + (j-i)/2 381 if a[h] < x { 382 i = h + 1 383 } else { 384 j = h 385 } 386 } 387 return i 388 } 389 390 // bsearch32 returns the smallest i such that a[i] >= x. 391 // If there is no such i, bsearch32 returns len(a). 392 func bsearch32(a []uint32, x uint32) int { 393 i, j := 0, len(a) 394 for i < j { 395 h := i + (j-i)/2 396 if a[h] < x { 397 i = h + 1 398 } else { 399 j = h 400 } 401 } 402 return i 403 } 404 405 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 406 // to give the same answer. It allows this package not to depend on unicode, 407 // and therefore not pull in all the Unicode tables. If the linker were better 408 // at tossing unused tables, we could get rid of this implementation. 409 // That would be nice. 410 411 // IsPrint reports whether the rune is defined as printable by Go, with 412 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 413 // symbols and ASCII space. 414 func IsPrint(r rune) bool { 415 // Fast check for Latin-1 416 if r <= 0xFF { 417 if 0x20 <= r && r <= 0x7E { 418 // All the ASCII is printable from space through DEL-1. 419 return true 420 } 421 if 0xA1 <= r && r <= 0xFF { 422 // Similarly for ¡ through ÿ... 423 return r != 0xAD // ...except for the bizarre soft hyphen. 424 } 425 return false 426 } 427 428 // Same algorithm, either on uint16 or uint32 value. 429 // First, find first i such that isPrint[i] >= x. 430 // This is the index of either the start or end of a pair that might span x. 431 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 432 // If we find x in a range, make sure x is not in isNotPrint list. 433 434 if 0 <= r && r < 1<<16 { 435 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 436 i := bsearch16(isPrint, rr) 437 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 438 return false 439 } 440 j := bsearch16(isNotPrint, rr) 441 return j >= len(isNotPrint) || isNotPrint[j] != rr 442 } 443 444 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 445 i := bsearch32(isPrint, rr) 446 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 447 return false 448 } 449 if r >= 0x20000 { 450 return true 451 } 452 r -= 0x10000 453 j := bsearch16(isNotPrint, uint16(r)) 454 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 455 }