github.com/letsencrypt/go@v0.0.0-20160714163537-4054769a31f6/src/strconv/quote.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run makeisprint.go -output isprint.go
     6  
     7  package strconv
     8  
     9  import "unicode/utf8"
    10  
    11  const lowerhex = "0123456789abcdef"
    12  
    13  func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
    14  	return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
    15  }
    16  
    17  func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
    18  	return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
    19  }
    20  
    21  func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
    22  	buf = append(buf, quote)
    23  	for width := 0; len(s) > 0; s = s[width:] {
    24  		r := rune(s[0])
    25  		width = 1
    26  		if r >= utf8.RuneSelf {
    27  			r, width = utf8.DecodeRuneInString(s)
    28  		}
    29  		if width == 1 && r == utf8.RuneError {
    30  			buf = append(buf, `\x`...)
    31  			buf = append(buf, lowerhex[s[0]>>4])
    32  			buf = append(buf, lowerhex[s[0]&0xF])
    33  			continue
    34  		}
    35  		buf = appendEscapedRune(buf, r, width, quote, ASCIIonly, graphicOnly)
    36  	}
    37  	buf = append(buf, quote)
    38  	return buf
    39  }
    40  
    41  func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
    42  	buf = append(buf, quote)
    43  	if !utf8.ValidRune(r) {
    44  		r = utf8.RuneError
    45  	}
    46  	buf = appendEscapedRune(buf, r, utf8.RuneLen(r), quote, ASCIIonly, graphicOnly)
    47  	buf = append(buf, quote)
    48  	return buf
    49  }
    50  
    51  func appendEscapedRune(buf []byte, r rune, width int, quote byte, ASCIIonly, graphicOnly bool) []byte {
    52  	var runeTmp [utf8.UTFMax]byte
    53  	if r == rune(quote) || r == '\\' { // always backslashed
    54  		buf = append(buf, '\\')
    55  		buf = append(buf, byte(r))
    56  		return buf
    57  	}
    58  	if ASCIIonly {
    59  		if r < utf8.RuneSelf && IsPrint(r) {
    60  			buf = append(buf, byte(r))
    61  			return buf
    62  		}
    63  	} else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
    64  		n := utf8.EncodeRune(runeTmp[:], r)
    65  		buf = append(buf, runeTmp[:n]...)
    66  		return buf
    67  	}
    68  	switch r {
    69  	case '\a':
    70  		buf = append(buf, `\a`...)
    71  	case '\b':
    72  		buf = append(buf, `\b`...)
    73  	case '\f':
    74  		buf = append(buf, `\f`...)
    75  	case '\n':
    76  		buf = append(buf, `\n`...)
    77  	case '\r':
    78  		buf = append(buf, `\r`...)
    79  	case '\t':
    80  		buf = append(buf, `\t`...)
    81  	case '\v':
    82  		buf = append(buf, `\v`...)
    83  	default:
    84  		switch {
    85  		case r < ' ':
    86  			buf = append(buf, `\x`...)
    87  			buf = append(buf, lowerhex[byte(r)>>4])
    88  			buf = append(buf, lowerhex[byte(r)&0xF])
    89  		case r > utf8.MaxRune:
    90  			r = 0xFFFD
    91  			fallthrough
    92  		case r < 0x10000:
    93  			buf = append(buf, `\u`...)
    94  			for s := 12; s >= 0; s -= 4 {
    95  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
    96  			}
    97  		default:
    98  			buf = append(buf, `\U`...)
    99  			for s := 28; s >= 0; s -= 4 {
   100  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   101  			}
   102  		}
   103  	}
   104  	return buf
   105  }
   106  
   107  // Quote returns a double-quoted Go string literal representing s. The
   108  // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   109  // control characters and non-printable characters as defined by
   110  // IsPrint.
   111  func Quote(s string) string {
   112  	return quoteWith(s, '"', false, false)
   113  }
   114  
   115  // AppendQuote appends a double-quoted Go string literal representing s,
   116  // as generated by Quote, to dst and returns the extended buffer.
   117  func AppendQuote(dst []byte, s string) []byte {
   118  	return appendQuotedWith(dst, s, '"', false, false)
   119  }
   120  
   121  // QuoteToASCII returns a double-quoted Go string literal representing s.
   122  // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   123  // non-ASCII characters and non-printable characters as defined by IsPrint.
   124  func QuoteToASCII(s string) string {
   125  	return quoteWith(s, '"', true, false)
   126  }
   127  
   128  // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
   129  // as generated by QuoteToASCII, to dst and returns the extended buffer.
   130  func AppendQuoteToASCII(dst []byte, s string) []byte {
   131  	return appendQuotedWith(dst, s, '"', true, false)
   132  }
   133  
   134  // QuoteToGraphic returns a double-quoted Go string literal representing s.
   135  // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   136  // non-ASCII characters and non-printable characters as defined by IsGraphic.
   137  func QuoteToGraphic(s string) string {
   138  	return quoteWith(s, '"', false, true)
   139  }
   140  
   141  // AppendQuoteToGraphic appends a double-quoted Go string literal representing s,
   142  // as generated by QuoteToGraphic, to dst and returns the extended buffer.
   143  func AppendQuoteToGraphic(dst []byte, s string) []byte {
   144  	return appendQuotedWith(dst, s, '"', false, true)
   145  }
   146  
   147  // QuoteRune returns a single-quoted Go character literal representing the
   148  // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
   149  // for control characters and non-printable characters as defined by IsPrint.
   150  func QuoteRune(r rune) string {
   151  	return quoteRuneWith(r, '\'', false, false)
   152  }
   153  
   154  // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
   155  // as generated by QuoteRune, to dst and returns the extended buffer.
   156  func AppendQuoteRune(dst []byte, r rune) []byte {
   157  	return appendQuotedRuneWith(dst, r, '\'', false, false)
   158  }
   159  
   160  // QuoteRuneToASCII returns a single-quoted Go character literal representing
   161  // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
   162  // \u0100) for non-ASCII characters and non-printable characters as defined
   163  // by IsPrint.
   164  func QuoteRuneToASCII(r rune) string {
   165  	return quoteRuneWith(r, '\'', true, false)
   166  }
   167  
   168  // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
   169  // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
   170  func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
   171  	return appendQuotedRuneWith(dst, r, '\'', true, false)
   172  }
   173  
   174  // QuoteRuneToGraphic returns a single-quoted Go character literal representing
   175  // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
   176  // \u0100) for non-ASCII characters and non-printable characters as defined
   177  // by IsGraphic.
   178  func QuoteRuneToGraphic(r rune) string {
   179  	return quoteRuneWith(r, '\'', false, true)
   180  }
   181  
   182  // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,
   183  // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.
   184  func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
   185  	return appendQuotedRuneWith(dst, r, '\'', false, true)
   186  }
   187  
   188  // CanBackquote reports whether the string s can be represented
   189  // unchanged as a single-line backquoted string without control
   190  // characters other than tab.
   191  func CanBackquote(s string) bool {
   192  	for len(s) > 0 {
   193  		r, wid := utf8.DecodeRuneInString(s)
   194  		s = s[wid:]
   195  		if wid > 1 {
   196  			if r == '\ufeff' {
   197  				return false // BOMs are invisible and should not be quoted.
   198  			}
   199  			continue // All other multibyte runes are correctly encoded and assumed printable.
   200  		}
   201  		if r == utf8.RuneError {
   202  			return false
   203  		}
   204  		if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
   205  			return false
   206  		}
   207  	}
   208  	return true
   209  }
   210  
   211  func unhex(b byte) (v rune, ok bool) {
   212  	c := rune(b)
   213  	switch {
   214  	case '0' <= c && c <= '9':
   215  		return c - '0', true
   216  	case 'a' <= c && c <= 'f':
   217  		return c - 'a' + 10, true
   218  	case 'A' <= c && c <= 'F':
   219  		return c - 'A' + 10, true
   220  	}
   221  	return
   222  }
   223  
   224  // UnquoteChar decodes the first character or byte in the escaped string
   225  // or character literal represented by the string s.
   226  // It returns four values:
   227  //
   228  //	1) value, the decoded Unicode code point or byte value;
   229  //	2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
   230  //	3) tail, the remainder of the string after the character; and
   231  //	4) an error that will be nil if the character is syntactically valid.
   232  //
   233  // The second argument, quote, specifies the type of literal being parsed
   234  // and therefore which escaped quote character is permitted.
   235  // If set to a single quote, it permits the sequence \' and disallows unescaped '.
   236  // If set to a double quote, it permits \" and disallows unescaped ".
   237  // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
   238  func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
   239  	// easy cases
   240  	switch c := s[0]; {
   241  	case c == quote && (quote == '\'' || quote == '"'):
   242  		err = ErrSyntax
   243  		return
   244  	case c >= utf8.RuneSelf:
   245  		r, size := utf8.DecodeRuneInString(s)
   246  		return r, true, s[size:], nil
   247  	case c != '\\':
   248  		return rune(s[0]), false, s[1:], nil
   249  	}
   250  
   251  	// hard case: c is backslash
   252  	if len(s) <= 1 {
   253  		err = ErrSyntax
   254  		return
   255  	}
   256  	c := s[1]
   257  	s = s[2:]
   258  
   259  	switch c {
   260  	case 'a':
   261  		value = '\a'
   262  	case 'b':
   263  		value = '\b'
   264  	case 'f':
   265  		value = '\f'
   266  	case 'n':
   267  		value = '\n'
   268  	case 'r':
   269  		value = '\r'
   270  	case 't':
   271  		value = '\t'
   272  	case 'v':
   273  		value = '\v'
   274  	case 'x', 'u', 'U':
   275  		n := 0
   276  		switch c {
   277  		case 'x':
   278  			n = 2
   279  		case 'u':
   280  			n = 4
   281  		case 'U':
   282  			n = 8
   283  		}
   284  		var v rune
   285  		if len(s) < n {
   286  			err = ErrSyntax
   287  			return
   288  		}
   289  		for j := 0; j < n; j++ {
   290  			x, ok := unhex(s[j])
   291  			if !ok {
   292  				err = ErrSyntax
   293  				return
   294  			}
   295  			v = v<<4 | x
   296  		}
   297  		s = s[n:]
   298  		if c == 'x' {
   299  			// single-byte string, possibly not UTF-8
   300  			value = v
   301  			break
   302  		}
   303  		if v > utf8.MaxRune {
   304  			err = ErrSyntax
   305  			return
   306  		}
   307  		value = v
   308  		multibyte = true
   309  	case '0', '1', '2', '3', '4', '5', '6', '7':
   310  		v := rune(c) - '0'
   311  		if len(s) < 2 {
   312  			err = ErrSyntax
   313  			return
   314  		}
   315  		for j := 0; j < 2; j++ { // one digit already; two more
   316  			x := rune(s[j]) - '0'
   317  			if x < 0 || x > 7 {
   318  				err = ErrSyntax
   319  				return
   320  			}
   321  			v = (v << 3) | x
   322  		}
   323  		s = s[2:]
   324  		if v > 255 {
   325  			err = ErrSyntax
   326  			return
   327  		}
   328  		value = v
   329  	case '\\':
   330  		value = '\\'
   331  	case '\'', '"':
   332  		if c != quote {
   333  			err = ErrSyntax
   334  			return
   335  		}
   336  		value = rune(c)
   337  	default:
   338  		err = ErrSyntax
   339  		return
   340  	}
   341  	tail = s
   342  	return
   343  }
   344  
   345  // Unquote interprets s as a single-quoted, double-quoted,
   346  // or backquoted Go string literal, returning the string value
   347  // that s quotes.  (If s is single-quoted, it would be a Go
   348  // character literal; Unquote returns the corresponding
   349  // one-character string.)
   350  func Unquote(s string) (string, error) {
   351  	n := len(s)
   352  	if n < 2 {
   353  		return "", ErrSyntax
   354  	}
   355  	quote := s[0]
   356  	if quote != s[n-1] {
   357  		return "", ErrSyntax
   358  	}
   359  	s = s[1 : n-1]
   360  
   361  	if quote == '`' {
   362  		if contains(s, '`') {
   363  			return "", ErrSyntax
   364  		}
   365  		return s, nil
   366  	}
   367  	if quote != '"' && quote != '\'' {
   368  		return "", ErrSyntax
   369  	}
   370  	if contains(s, '\n') {
   371  		return "", ErrSyntax
   372  	}
   373  
   374  	// Is it trivial?  Avoid allocation.
   375  	if !contains(s, '\\') && !contains(s, quote) {
   376  		switch quote {
   377  		case '"':
   378  			return s, nil
   379  		case '\'':
   380  			r, size := utf8.DecodeRuneInString(s)
   381  			if size == len(s) && (r != utf8.RuneError || size != 1) {
   382  				return s, nil
   383  			}
   384  		}
   385  	}
   386  
   387  	var runeTmp [utf8.UTFMax]byte
   388  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   389  	for len(s) > 0 {
   390  		c, multibyte, ss, err := UnquoteChar(s, quote)
   391  		if err != nil {
   392  			return "", err
   393  		}
   394  		s = ss
   395  		if c < utf8.RuneSelf || !multibyte {
   396  			buf = append(buf, byte(c))
   397  		} else {
   398  			n := utf8.EncodeRune(runeTmp[:], c)
   399  			buf = append(buf, runeTmp[:n]...)
   400  		}
   401  		if quote == '\'' && len(s) != 0 {
   402  			// single-quoted must be single character
   403  			return "", ErrSyntax
   404  		}
   405  	}
   406  	return string(buf), nil
   407  }
   408  
   409  // contains reports whether the string contains the byte c.
   410  func contains(s string, c byte) bool {
   411  	for i := 0; i < len(s); i++ {
   412  		if s[i] == c {
   413  			return true
   414  		}
   415  	}
   416  	return false
   417  }
   418  
   419  // bsearch16 returns the smallest i such that a[i] >= x.
   420  // If there is no such i, bsearch16 returns len(a).
   421  func bsearch16(a []uint16, x uint16) int {
   422  	i, j := 0, len(a)
   423  	for i < j {
   424  		h := i + (j-i)/2
   425  		if a[h] < x {
   426  			i = h + 1
   427  		} else {
   428  			j = h
   429  		}
   430  	}
   431  	return i
   432  }
   433  
   434  // bsearch32 returns the smallest i such that a[i] >= x.
   435  // If there is no such i, bsearch32 returns len(a).
   436  func bsearch32(a []uint32, x uint32) int {
   437  	i, j := 0, len(a)
   438  	for i < j {
   439  		h := i + (j-i)/2
   440  		if a[h] < x {
   441  			i = h + 1
   442  		} else {
   443  			j = h
   444  		}
   445  	}
   446  	return i
   447  }
   448  
   449  // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
   450  // to give the same answer. It allows this package not to depend on unicode,
   451  // and therefore not pull in all the Unicode tables. If the linker were better
   452  // at tossing unused tables, we could get rid of this implementation.
   453  // That would be nice.
   454  
   455  // IsPrint reports whether the rune is defined as printable by Go, with
   456  // the same definition as unicode.IsPrint: letters, numbers, punctuation,
   457  // symbols and ASCII space.
   458  func IsPrint(r rune) bool {
   459  	// Fast check for Latin-1
   460  	if r <= 0xFF {
   461  		if 0x20 <= r && r <= 0x7E {
   462  			// All the ASCII is printable from space through DEL-1.
   463  			return true
   464  		}
   465  		if 0xA1 <= r && r <= 0xFF {
   466  			// Similarly for ¡ through ÿ...
   467  			return r != 0xAD // ...except for the bizarre soft hyphen.
   468  		}
   469  		return false
   470  	}
   471  
   472  	// Same algorithm, either on uint16 or uint32 value.
   473  	// First, find first i such that isPrint[i] >= x.
   474  	// This is the index of either the start or end of a pair that might span x.
   475  	// The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
   476  	// If we find x in a range, make sure x is not in isNotPrint list.
   477  
   478  	if 0 <= r && r < 1<<16 {
   479  		rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
   480  		i := bsearch16(isPrint, rr)
   481  		if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   482  			return false
   483  		}
   484  		j := bsearch16(isNotPrint, rr)
   485  		return j >= len(isNotPrint) || isNotPrint[j] != rr
   486  	}
   487  
   488  	rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
   489  	i := bsearch32(isPrint, rr)
   490  	if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   491  		return false
   492  	}
   493  	if r >= 0x20000 {
   494  		return true
   495  	}
   496  	r -= 0x10000
   497  	j := bsearch16(isNotPrint, uint16(r))
   498  	return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
   499  }
   500  
   501  // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such
   502  // characters include letters, marks, numbers, punctuation, symbols, and
   503  // spaces, from categories L, M, N, P, S, and Zs.
   504  func IsGraphic(r rune) bool {
   505  	if IsPrint(r) {
   506  		return true
   507  	}
   508  	return isInGraphicList(r)
   509  }
   510  
   511  // isInGraphicList reports whether the rune is in the isGraphic list. This separation
   512  // from IsGraphic allows quoteWith to avoid two calls to IsPrint.
   513  // Should be called only if IsPrint fails.
   514  func isInGraphicList(r rune) bool {
   515  	// We know r must fit in 16 bits - see makeisprint.go.
   516  	if r > 0xFFFF {
   517  		return false
   518  	}
   519  	rr := uint16(r)
   520  	i := bsearch16(isGraphic, rr)
   521  	return i < len(isGraphic) && rr == isGraphic[i]
   522  }