github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/strconv/quote.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strconv 6 7 import ( 8 "unicode/utf8" 9 ) 10 11 const lowerhex = "0123456789abcdef" 12 13 func quoteWith(s string, quote byte, ASCIIonly bool) string { 14 var runeTmp [utf8.UTFMax]byte 15 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 16 buf = append(buf, quote) 17 for width := 0; len(s) > 0; s = s[width:] { 18 r := rune(s[0]) 19 width = 1 20 if r >= utf8.RuneSelf { 21 r, width = utf8.DecodeRuneInString(s) 22 } 23 if width == 1 && r == utf8.RuneError { 24 buf = append(buf, `\x`...) 25 buf = append(buf, lowerhex[s[0]>>4]) 26 buf = append(buf, lowerhex[s[0]&0xF]) 27 continue 28 } 29 if r == rune(quote) || r == '\\' { // always backslashed 30 buf = append(buf, '\\') 31 buf = append(buf, byte(r)) 32 continue 33 } 34 if ASCIIonly { 35 if r < utf8.RuneSelf && IsPrint(r) { 36 buf = append(buf, byte(r)) 37 continue 38 } 39 } else if IsPrint(r) { 40 n := utf8.EncodeRune(runeTmp[:], r) 41 buf = append(buf, runeTmp[:n]...) 42 continue 43 } 44 switch r { 45 case '\a': 46 buf = append(buf, `\a`...) 47 case '\b': 48 buf = append(buf, `\b`...) 49 case '\f': 50 buf = append(buf, `\f`...) 51 case '\n': 52 buf = append(buf, `\n`...) 53 case '\r': 54 buf = append(buf, `\r`...) 55 case '\t': 56 buf = append(buf, `\t`...) 57 case '\v': 58 buf = append(buf, `\v`...) 59 default: 60 switch { 61 case r < ' ': 62 buf = append(buf, `\x`...) 63 buf = append(buf, lowerhex[s[0]>>4]) 64 buf = append(buf, lowerhex[s[0]&0xF]) 65 case r > utf8.MaxRune: 66 r = 0xFFFD 67 fallthrough 68 case r < 0x10000: 69 buf = append(buf, `\u`...) 70 for s := 12; s >= 0; s -= 4 { 71 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 72 } 73 default: 74 buf = append(buf, `\U`...) 75 for s := 28; s >= 0; s -= 4 { 76 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 77 } 78 } 79 } 80 } 81 buf = append(buf, quote) 82 return string(buf) 83 84 } 85 86 // Quote returns a double-quoted Go string literal representing s. The 87 // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 88 // control characters and non-printable characters as defined by 89 // IsPrint. 90 func Quote(s string) string { 91 return quoteWith(s, '"', false) 92 } 93 94 // AppendQuote appends a double-quoted Go string literal representing s, 95 // as generated by Quote, to dst and returns the extended buffer. 96 func AppendQuote(dst []byte, s string) []byte { 97 return append(dst, Quote(s)...) 98 } 99 100 // QuoteToASCII returns a double-quoted Go string literal representing s. 101 // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for 102 // non-ASCII characters and non-printable characters as defined by IsPrint. 103 func QuoteToASCII(s string) string { 104 return quoteWith(s, '"', true) 105 } 106 107 // AppendQuoteToASCII appends a double-quoted Go string literal representing s, 108 // as generated by QuoteToASCII, to dst and returns the extended buffer. 109 func AppendQuoteToASCII(dst []byte, s string) []byte { 110 return append(dst, QuoteToASCII(s)...) 111 } 112 113 // QuoteRune returns a single-quoted Go character literal representing the 114 // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) 115 // for control characters and non-printable characters as defined by IsPrint. 116 func QuoteRune(r rune) string { 117 // TODO: avoid the allocation here. 118 return quoteWith(string(r), '\'', false) 119 } 120 121 // AppendQuoteRune appends a single-quoted Go character literal representing the rune, 122 // as generated by QuoteRune, to dst and returns the extended buffer. 123 func AppendQuoteRune(dst []byte, r rune) []byte { 124 return append(dst, QuoteRune(r)...) 125 } 126 127 // QuoteRuneToASCII returns a single-quoted Go character literal representing 128 // the rune. The returned string uses Go escape sequences (\t, \n, \xFF, 129 // \u0100) for non-ASCII characters and non-printable characters as defined 130 // by IsPrint. 131 func QuoteRuneToASCII(r rune) string { 132 // TODO: avoid the allocation here. 133 return quoteWith(string(r), '\'', true) 134 } 135 136 // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune, 137 // as generated by QuoteRuneToASCII, to dst and returns the extended buffer. 138 func AppendQuoteRuneToASCII(dst []byte, r rune) []byte { 139 return append(dst, QuoteRuneToASCII(r)...) 140 } 141 142 // CanBackquote reports whether the string s can be represented 143 // unchanged as a single-line backquoted string without control 144 // characters other than space and tab. 145 func CanBackquote(s string) bool { 146 for i := 0; i < len(s); i++ { 147 if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' { 148 return false 149 } 150 } 151 return true 152 } 153 154 func unhex(b byte) (v rune, ok bool) { 155 c := rune(b) 156 switch { 157 case '0' <= c && c <= '9': 158 return c - '0', true 159 case 'a' <= c && c <= 'f': 160 return c - 'a' + 10, true 161 case 'A' <= c && c <= 'F': 162 return c - 'A' + 10, true 163 } 164 return 165 } 166 167 // UnquoteChar decodes the first character or byte in the escaped string 168 // or character literal represented by the string s. 169 // It returns four values: 170 // 171 // 1) value, the decoded Unicode code point or byte value; 172 // 2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation; 173 // 3) tail, the remainder of the string after the character; and 174 // 4) an error that will be nil if the character is syntactically valid. 175 // 176 // The second argument, quote, specifies the type of literal being parsed 177 // and therefore which escaped quote character is permitted. 178 // If set to a single quote, it permits the sequence \' and disallows unescaped '. 179 // If set to a double quote, it permits \" and disallows unescaped ". 180 // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped. 181 func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) { 182 // easy cases 183 switch c := s[0]; { 184 case c == quote && (quote == '\'' || quote == '"'): 185 err = ErrSyntax 186 return 187 case c >= utf8.RuneSelf: 188 r, size := utf8.DecodeRuneInString(s) 189 return r, true, s[size:], nil 190 case c != '\\': 191 return rune(s[0]), false, s[1:], nil 192 } 193 194 // hard case: c is backslash 195 if len(s) <= 1 { 196 err = ErrSyntax 197 return 198 } 199 c := s[1] 200 s = s[2:] 201 202 switch c { 203 case 'a': 204 value = '\a' 205 case 'b': 206 value = '\b' 207 case 'f': 208 value = '\f' 209 case 'n': 210 value = '\n' 211 case 'r': 212 value = '\r' 213 case 't': 214 value = '\t' 215 case 'v': 216 value = '\v' 217 case 'x', 'u', 'U': 218 n := 0 219 switch c { 220 case 'x': 221 n = 2 222 case 'u': 223 n = 4 224 case 'U': 225 n = 8 226 } 227 var v rune 228 if len(s) < n { 229 err = ErrSyntax 230 return 231 } 232 for j := 0; j < n; j++ { 233 x, ok := unhex(s[j]) 234 if !ok { 235 err = ErrSyntax 236 return 237 } 238 v = v<<4 | x 239 } 240 s = s[n:] 241 if c == 'x' { 242 // single-byte string, possibly not UTF-8 243 value = v 244 break 245 } 246 if v > utf8.MaxRune { 247 err = ErrSyntax 248 return 249 } 250 value = v 251 multibyte = true 252 case '0', '1', '2', '3', '4', '5', '6', '7': 253 v := rune(c) - '0' 254 if len(s) < 2 { 255 err = ErrSyntax 256 return 257 } 258 for j := 0; j < 2; j++ { // one digit already; two more 259 x := rune(s[j]) - '0' 260 if x < 0 || x > 7 { 261 err = ErrSyntax 262 return 263 } 264 v = (v << 3) | x 265 } 266 s = s[2:] 267 if v > 255 { 268 err = ErrSyntax 269 return 270 } 271 value = v 272 case '\\': 273 value = '\\' 274 case '\'', '"': 275 if c != quote { 276 err = ErrSyntax 277 return 278 } 279 value = rune(c) 280 default: 281 err = ErrSyntax 282 return 283 } 284 tail = s 285 return 286 } 287 288 // Unquote interprets s as a single-quoted, double-quoted, 289 // or backquoted Go string literal, returning the string value 290 // that s quotes. (If s is single-quoted, it would be a Go 291 // character literal; Unquote returns the corresponding 292 // one-character string.) 293 func Unquote(s string) (t string, err error) { 294 n := len(s) 295 if n < 2 { 296 return "", ErrSyntax 297 } 298 quote := s[0] 299 if quote != s[n-1] { 300 return "", ErrSyntax 301 } 302 s = s[1 : n-1] 303 304 if quote == '`' { 305 if contains(s, '`') { 306 return "", ErrSyntax 307 } 308 return s, nil 309 } 310 if quote != '"' && quote != '\'' { 311 return "", ErrSyntax 312 } 313 if contains(s, '\n') { 314 return "", ErrSyntax 315 } 316 317 // Is it trivial? Avoid allocation. 318 if !contains(s, '\\') && !contains(s, quote) { 319 switch quote { 320 case '"': 321 return s, nil 322 case '\'': 323 r, size := utf8.DecodeRuneInString(s) 324 if size == len(s) && (r != utf8.RuneError || size != 1) { 325 return s, nil 326 } 327 } 328 } 329 330 var runeTmp [utf8.UTFMax]byte 331 buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations. 332 for len(s) > 0 { 333 c, multibyte, ss, err := UnquoteChar(s, quote) 334 if err != nil { 335 return "", err 336 } 337 s = ss 338 if c < utf8.RuneSelf || !multibyte { 339 buf = append(buf, byte(c)) 340 } else { 341 n := utf8.EncodeRune(runeTmp[:], c) 342 buf = append(buf, runeTmp[:n]...) 343 } 344 if quote == '\'' && len(s) != 0 { 345 // single-quoted must be single character 346 return "", ErrSyntax 347 } 348 } 349 return string(buf), nil 350 } 351 352 // contains reports whether the string contains the byte c. 353 func contains(s string, c byte) bool { 354 for i := 0; i < len(s); i++ { 355 if s[i] == c { 356 return true 357 } 358 } 359 return false 360 } 361 362 // bsearch16 returns the smallest i such that a[i] >= x. 363 // If there is no such i, bsearch16 returns len(a). 364 func bsearch16(a []uint16, x uint16) int { 365 i, j := 0, len(a) 366 for i < j { 367 h := i + (j-i)/2 368 if a[h] < x { 369 i = h + 1 370 } else { 371 j = h 372 } 373 } 374 return i 375 } 376 377 // bsearch32 returns the smallest i such that a[i] >= x. 378 // If there is no such i, bsearch32 returns len(a). 379 func bsearch32(a []uint32, x uint32) int { 380 i, j := 0, len(a) 381 for i < j { 382 h := i + (j-i)/2 383 if a[h] < x { 384 i = h + 1 385 } else { 386 j = h 387 } 388 } 389 return i 390 } 391 392 // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests 393 // to give the same answer. It allows this package not to depend on unicode, 394 // and therefore not pull in all the Unicode tables. If the linker were better 395 // at tossing unused tables, we could get rid of this implementation. 396 // That would be nice. 397 398 // IsPrint reports whether the rune is defined as printable by Go, with 399 // the same definition as unicode.IsPrint: letters, numbers, punctuation, 400 // symbols and ASCII space. 401 func IsPrint(r rune) bool { 402 // Fast check for Latin-1 403 if r <= 0xFF { 404 if 0x20 <= r && r <= 0x7E { 405 // All the ASCII is printable from space through DEL-1. 406 return true 407 } 408 if 0xA1 <= r && r <= 0xFF { 409 // Similarly for ¡ through ÿ... 410 return r != 0xAD // ...except for the bizarre soft hyphen. 411 } 412 return false 413 } 414 415 // Same algorithm, either on uint16 or uint32 value. 416 // First, find first i such that isPrint[i] >= x. 417 // This is the index of either the start or end of a pair that might span x. 418 // The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]). 419 // If we find x in a range, make sure x is not in isNotPrint list. 420 421 if 0 <= r && r < 1<<16 { 422 rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16 423 i := bsearch16(isPrint, rr) 424 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 425 return false 426 } 427 j := bsearch16(isNotPrint, rr) 428 return j >= len(isNotPrint) || isNotPrint[j] != rr 429 } 430 431 rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32 432 i := bsearch32(isPrint, rr) 433 if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr { 434 return false 435 } 436 if r >= 0x20000 { 437 return true 438 } 439 r -= 0x10000 440 j := bsearch16(isNotPrint, uint16(r)) 441 return j >= len(isNotPrint) || isNotPrint[j] != uint16(r) 442 }