github.com/spotify/syslog-redirector-golang@v0.0.0-20140320174030-4859f03d829a/src/pkg/strconv/quote.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  import (
     8  	"unicode/utf8"
     9  )
    10  
    11  const lowerhex = "0123456789abcdef"
    12  
    13  func quoteWith(s string, quote byte, ASCIIonly bool) string {
    14  	var runeTmp [utf8.UTFMax]byte
    15  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
    16  	buf = append(buf, quote)
    17  	for width := 0; len(s) > 0; s = s[width:] {
    18  		r := rune(s[0])
    19  		width = 1
    20  		if r >= utf8.RuneSelf {
    21  			r, width = utf8.DecodeRuneInString(s)
    22  		}
    23  		if width == 1 && r == utf8.RuneError {
    24  			buf = append(buf, `\x`...)
    25  			buf = append(buf, lowerhex[s[0]>>4])
    26  			buf = append(buf, lowerhex[s[0]&0xF])
    27  			continue
    28  		}
    29  		if r == rune(quote) || r == '\\' { // always backslashed
    30  			buf = append(buf, '\\')
    31  			buf = append(buf, byte(r))
    32  			continue
    33  		}
    34  		if ASCIIonly {
    35  			if r < utf8.RuneSelf && IsPrint(r) {
    36  				buf = append(buf, byte(r))
    37  				continue
    38  			}
    39  		} else if IsPrint(r) {
    40  			n := utf8.EncodeRune(runeTmp[:], r)
    41  			buf = append(buf, runeTmp[:n]...)
    42  			continue
    43  		}
    44  		switch r {
    45  		case '\a':
    46  			buf = append(buf, `\a`...)
    47  		case '\b':
    48  			buf = append(buf, `\b`...)
    49  		case '\f':
    50  			buf = append(buf, `\f`...)
    51  		case '\n':
    52  			buf = append(buf, `\n`...)
    53  		case '\r':
    54  			buf = append(buf, `\r`...)
    55  		case '\t':
    56  			buf = append(buf, `\t`...)
    57  		case '\v':
    58  			buf = append(buf, `\v`...)
    59  		default:
    60  			switch {
    61  			case r < ' ':
    62  				buf = append(buf, `\x`...)
    63  				buf = append(buf, lowerhex[s[0]>>4])
    64  				buf = append(buf, lowerhex[s[0]&0xF])
    65  			case r > utf8.MaxRune:
    66  				r = 0xFFFD
    67  				fallthrough
    68  			case r < 0x10000:
    69  				buf = append(buf, `\u`...)
    70  				for s := 12; s >= 0; s -= 4 {
    71  					buf = append(buf, lowerhex[r>>uint(s)&0xF])
    72  				}
    73  			default:
    74  				buf = append(buf, `\U`...)
    75  				for s := 28; s >= 0; s -= 4 {
    76  					buf = append(buf, lowerhex[r>>uint(s)&0xF])
    77  				}
    78  			}
    79  		}
    80  	}
    81  	buf = append(buf, quote)
    82  	return string(buf)
    83  
    84  }
    85  
    86  // Quote returns a double-quoted Go string literal representing s.  The
    87  // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
    88  // control characters and non-printable characters as defined by
    89  // IsPrint.
    90  func Quote(s string) string {
    91  	return quoteWith(s, '"', false)
    92  }
    93  
    94  // AppendQuote appends a double-quoted Go string literal representing s,
    95  // as generated by Quote, to dst and returns the extended buffer.
    96  func AppendQuote(dst []byte, s string) []byte {
    97  	return append(dst, Quote(s)...)
    98  }
    99  
   100  // QuoteToASCII returns a double-quoted Go string literal representing s.
   101  // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   102  // non-ASCII characters and non-printable characters as defined by IsPrint.
   103  func QuoteToASCII(s string) string {
   104  	return quoteWith(s, '"', true)
   105  }
   106  
   107  // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
   108  // as generated by QuoteToASCII, to dst and returns the extended buffer.
   109  func AppendQuoteToASCII(dst []byte, s string) []byte {
   110  	return append(dst, QuoteToASCII(s)...)
   111  }
   112  
   113  // QuoteRune returns a single-quoted Go character literal representing the
   114  // rune.  The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
   115  // for control characters and non-printable characters as defined by IsPrint.
   116  func QuoteRune(r rune) string {
   117  	// TODO: avoid the allocation here.
   118  	return quoteWith(string(r), '\'', false)
   119  }
   120  
   121  // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
   122  // as generated by QuoteRune, to dst and returns the extended buffer.
   123  func AppendQuoteRune(dst []byte, r rune) []byte {
   124  	return append(dst, QuoteRune(r)...)
   125  }
   126  
   127  // QuoteRuneToASCII returns a single-quoted Go character literal representing
   128  // the rune.  The returned string uses Go escape sequences (\t, \n, \xFF,
   129  // \u0100) for non-ASCII characters and non-printable characters as defined
   130  // by IsPrint.
   131  func QuoteRuneToASCII(r rune) string {
   132  	// TODO: avoid the allocation here.
   133  	return quoteWith(string(r), '\'', true)
   134  }
   135  
   136  // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
   137  // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
   138  func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
   139  	return append(dst, QuoteRuneToASCII(r)...)
   140  }
   141  
   142  // CanBackquote reports whether the string s can be represented
   143  // unchanged as a single-line backquoted string without control
   144  // characters other than space and tab.
   145  func CanBackquote(s string) bool {
   146  	for i := 0; i < len(s); i++ {
   147  		if (s[i] < ' ' && s[i] != '\t') || s[i] == '`' {
   148  			return false
   149  		}
   150  	}
   151  	return true
   152  }
   153  
   154  func unhex(b byte) (v rune, ok bool) {
   155  	c := rune(b)
   156  	switch {
   157  	case '0' <= c && c <= '9':
   158  		return c - '0', true
   159  	case 'a' <= c && c <= 'f':
   160  		return c - 'a' + 10, true
   161  	case 'A' <= c && c <= 'F':
   162  		return c - 'A' + 10, true
   163  	}
   164  	return
   165  }
   166  
   167  // UnquoteChar decodes the first character or byte in the escaped string
   168  // or character literal represented by the string s.
   169  // It returns four values:
   170  //
   171  //	1) value, the decoded Unicode code point or byte value;
   172  //	2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
   173  //	3) tail, the remainder of the string after the character; and
   174  //	4) an error that will be nil if the character is syntactically valid.
   175  //
   176  // The second argument, quote, specifies the type of literal being parsed
   177  // and therefore which escaped quote character is permitted.
   178  // If set to a single quote, it permits the sequence \' and disallows unescaped '.
   179  // If set to a double quote, it permits \" and disallows unescaped ".
   180  // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
   181  func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
   182  	// easy cases
   183  	switch c := s[0]; {
   184  	case c == quote && (quote == '\'' || quote == '"'):
   185  		err = ErrSyntax
   186  		return
   187  	case c >= utf8.RuneSelf:
   188  		r, size := utf8.DecodeRuneInString(s)
   189  		return r, true, s[size:], nil
   190  	case c != '\\':
   191  		return rune(s[0]), false, s[1:], nil
   192  	}
   193  
   194  	// hard case: c is backslash
   195  	if len(s) <= 1 {
   196  		err = ErrSyntax
   197  		return
   198  	}
   199  	c := s[1]
   200  	s = s[2:]
   201  
   202  	switch c {
   203  	case 'a':
   204  		value = '\a'
   205  	case 'b':
   206  		value = '\b'
   207  	case 'f':
   208  		value = '\f'
   209  	case 'n':
   210  		value = '\n'
   211  	case 'r':
   212  		value = '\r'
   213  	case 't':
   214  		value = '\t'
   215  	case 'v':
   216  		value = '\v'
   217  	case 'x', 'u', 'U':
   218  		n := 0
   219  		switch c {
   220  		case 'x':
   221  			n = 2
   222  		case 'u':
   223  			n = 4
   224  		case 'U':
   225  			n = 8
   226  		}
   227  		var v rune
   228  		if len(s) < n {
   229  			err = ErrSyntax
   230  			return
   231  		}
   232  		for j := 0; j < n; j++ {
   233  			x, ok := unhex(s[j])
   234  			if !ok {
   235  				err = ErrSyntax
   236  				return
   237  			}
   238  			v = v<<4 | x
   239  		}
   240  		s = s[n:]
   241  		if c == 'x' {
   242  			// single-byte string, possibly not UTF-8
   243  			value = v
   244  			break
   245  		}
   246  		if v > utf8.MaxRune {
   247  			err = ErrSyntax
   248  			return
   249  		}
   250  		value = v
   251  		multibyte = true
   252  	case '0', '1', '2', '3', '4', '5', '6', '7':
   253  		v := rune(c) - '0'
   254  		if len(s) < 2 {
   255  			err = ErrSyntax
   256  			return
   257  		}
   258  		for j := 0; j < 2; j++ { // one digit already; two more
   259  			x := rune(s[j]) - '0'
   260  			if x < 0 || x > 7 {
   261  				err = ErrSyntax
   262  				return
   263  			}
   264  			v = (v << 3) | x
   265  		}
   266  		s = s[2:]
   267  		if v > 255 {
   268  			err = ErrSyntax
   269  			return
   270  		}
   271  		value = v
   272  	case '\\':
   273  		value = '\\'
   274  	case '\'', '"':
   275  		if c != quote {
   276  			err = ErrSyntax
   277  			return
   278  		}
   279  		value = rune(c)
   280  	default:
   281  		err = ErrSyntax
   282  		return
   283  	}
   284  	tail = s
   285  	return
   286  }
   287  
   288  // Unquote interprets s as a single-quoted, double-quoted,
   289  // or backquoted Go string literal, returning the string value
   290  // that s quotes.  (If s is single-quoted, it would be a Go
   291  // character literal; Unquote returns the corresponding
   292  // one-character string.)
   293  func Unquote(s string) (t string, err error) {
   294  	n := len(s)
   295  	if n < 2 {
   296  		return "", ErrSyntax
   297  	}
   298  	quote := s[0]
   299  	if quote != s[n-1] {
   300  		return "", ErrSyntax
   301  	}
   302  	s = s[1 : n-1]
   303  
   304  	if quote == '`' {
   305  		if contains(s, '`') {
   306  			return "", ErrSyntax
   307  		}
   308  		return s, nil
   309  	}
   310  	if quote != '"' && quote != '\'' {
   311  		return "", ErrSyntax
   312  	}
   313  	if contains(s, '\n') {
   314  		return "", ErrSyntax
   315  	}
   316  
   317  	// Is it trivial?  Avoid allocation.
   318  	if !contains(s, '\\') && !contains(s, quote) {
   319  		switch quote {
   320  		case '"':
   321  			return s, nil
   322  		case '\'':
   323  			r, size := utf8.DecodeRuneInString(s)
   324  			if size == len(s) && (r != utf8.RuneError || size != 1) {
   325  				return s, nil
   326  			}
   327  		}
   328  	}
   329  
   330  	var runeTmp [utf8.UTFMax]byte
   331  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   332  	for len(s) > 0 {
   333  		c, multibyte, ss, err := UnquoteChar(s, quote)
   334  		if err != nil {
   335  			return "", err
   336  		}
   337  		s = ss
   338  		if c < utf8.RuneSelf || !multibyte {
   339  			buf = append(buf, byte(c))
   340  		} else {
   341  			n := utf8.EncodeRune(runeTmp[:], c)
   342  			buf = append(buf, runeTmp[:n]...)
   343  		}
   344  		if quote == '\'' && len(s) != 0 {
   345  			// single-quoted must be single character
   346  			return "", ErrSyntax
   347  		}
   348  	}
   349  	return string(buf), nil
   350  }
   351  
   352  // contains reports whether the string contains the byte c.
   353  func contains(s string, c byte) bool {
   354  	for i := 0; i < len(s); i++ {
   355  		if s[i] == c {
   356  			return true
   357  		}
   358  	}
   359  	return false
   360  }
   361  
   362  // bsearch16 returns the smallest i such that a[i] >= x.
   363  // If there is no such i, bsearch16 returns len(a).
   364  func bsearch16(a []uint16, x uint16) int {
   365  	i, j := 0, len(a)
   366  	for i < j {
   367  		h := i + (j-i)/2
   368  		if a[h] < x {
   369  			i = h + 1
   370  		} else {
   371  			j = h
   372  		}
   373  	}
   374  	return i
   375  }
   376  
   377  // bsearch32 returns the smallest i such that a[i] >= x.
   378  // If there is no such i, bsearch32 returns len(a).
   379  func bsearch32(a []uint32, x uint32) int {
   380  	i, j := 0, len(a)
   381  	for i < j {
   382  		h := i + (j-i)/2
   383  		if a[h] < x {
   384  			i = h + 1
   385  		} else {
   386  			j = h
   387  		}
   388  	}
   389  	return i
   390  }
   391  
   392  // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
   393  // to give the same answer. It allows this package not to depend on unicode,
   394  // and therefore not pull in all the Unicode tables. If the linker were better
   395  // at tossing unused tables, we could get rid of this implementation.
   396  // That would be nice.
   397  
   398  // IsPrint reports whether the rune is defined as printable by Go, with
   399  // the same definition as unicode.IsPrint: letters, numbers, punctuation,
   400  // symbols and ASCII space.
   401  func IsPrint(r rune) bool {
   402  	// Fast check for Latin-1
   403  	if r <= 0xFF {
   404  		if 0x20 <= r && r <= 0x7E {
   405  			// All the ASCII is printable from space through DEL-1.
   406  			return true
   407  		}
   408  		if 0xA1 <= r && r <= 0xFF {
   409  			// Similarly for ¡ through ÿ...
   410  			return r != 0xAD // ...except for the bizarre soft hyphen.
   411  		}
   412  		return false
   413  	}
   414  
   415  	// Same algorithm, either on uint16 or uint32 value.
   416  	// First, find first i such that isPrint[i] >= x.
   417  	// This is the index of either the start or end of a pair that might span x.
   418  	// The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
   419  	// If we find x in a range, make sure x is not in isNotPrint list.
   420  
   421  	if 0 <= r && r < 1<<16 {
   422  		rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
   423  		i := bsearch16(isPrint, rr)
   424  		if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   425  			return false
   426  		}
   427  		j := bsearch16(isNotPrint, rr)
   428  		return j >= len(isNotPrint) || isNotPrint[j] != rr
   429  	}
   430  
   431  	rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
   432  	i := bsearch32(isPrint, rr)
   433  	if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   434  		return false
   435  	}
   436  	if r >= 0x20000 {
   437  		return true
   438  	}
   439  	r -= 0x10000
   440  	j := bsearch16(isNotPrint, uint16(r))
   441  	return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
   442  }