github.com/gidoBOSSftw5731/go/src@v0.0.0-20210226122457-d24b0edbf019/strconv/quote.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run makeisprint.go -output isprint.go
     6  
     7  package strconv
     8  
     9  import (
    10  	"unicode/utf8"
    11  )
    12  
    13  const (
    14  	lowerhex = "0123456789abcdef"
    15  	upperhex = "0123456789ABCDEF"
    16  )
    17  
    18  func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
    19  	return string(appendQuotedWith(make([]byte, 0, 3*len(s)/2), s, quote, ASCIIonly, graphicOnly))
    20  }
    21  
    22  func quoteRuneWith(r rune, quote byte, ASCIIonly, graphicOnly bool) string {
    23  	return string(appendQuotedRuneWith(nil, r, quote, ASCIIonly, graphicOnly))
    24  }
    25  
    26  func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly bool) []byte {
    27  	// Often called with big strings, so preallocate. If there's quoting,
    28  	// this is conservative but still helps a lot.
    29  	if cap(buf)-len(buf) < len(s) {
    30  		nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
    31  		copy(nBuf, buf)
    32  		buf = nBuf
    33  	}
    34  	buf = append(buf, quote)
    35  	for width := 0; len(s) > 0; s = s[width:] {
    36  		r := rune(s[0])
    37  		width = 1
    38  		if r >= utf8.RuneSelf {
    39  			r, width = utf8.DecodeRuneInString(s)
    40  		}
    41  		if width == 1 && r == utf8.RuneError {
    42  			buf = append(buf, `\x`...)
    43  			buf = append(buf, lowerhex[s[0]>>4])
    44  			buf = append(buf, lowerhex[s[0]&0xF])
    45  			continue
    46  		}
    47  		buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
    48  	}
    49  	buf = append(buf, quote)
    50  	return buf
    51  }
    52  
    53  func appendQuotedRuneWith(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
    54  	buf = append(buf, quote)
    55  	if !utf8.ValidRune(r) {
    56  		r = utf8.RuneError
    57  	}
    58  	buf = appendEscapedRune(buf, r, quote, ASCIIonly, graphicOnly)
    59  	buf = append(buf, quote)
    60  	return buf
    61  }
    62  
    63  func appendEscapedRune(buf []byte, r rune, quote byte, ASCIIonly, graphicOnly bool) []byte {
    64  	var runeTmp [utf8.UTFMax]byte
    65  	if r == rune(quote) || r == '\\' { // always backslashed
    66  		buf = append(buf, '\\')
    67  		buf = append(buf, byte(r))
    68  		return buf
    69  	}
    70  	if ASCIIonly {
    71  		if r < utf8.RuneSelf && IsPrint(r) {
    72  			buf = append(buf, byte(r))
    73  			return buf
    74  		}
    75  	} else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
    76  		n := utf8.EncodeRune(runeTmp[:], r)
    77  		buf = append(buf, runeTmp[:n]...)
    78  		return buf
    79  	}
    80  	switch r {
    81  	case '\a':
    82  		buf = append(buf, `\a`...)
    83  	case '\b':
    84  		buf = append(buf, `\b`...)
    85  	case '\f':
    86  		buf = append(buf, `\f`...)
    87  	case '\n':
    88  		buf = append(buf, `\n`...)
    89  	case '\r':
    90  		buf = append(buf, `\r`...)
    91  	case '\t':
    92  		buf = append(buf, `\t`...)
    93  	case '\v':
    94  		buf = append(buf, `\v`...)
    95  	default:
    96  		switch {
    97  		case r < ' ':
    98  			buf = append(buf, `\x`...)
    99  			buf = append(buf, lowerhex[byte(r)>>4])
   100  			buf = append(buf, lowerhex[byte(r)&0xF])
   101  		case r > utf8.MaxRune:
   102  			r = 0xFFFD
   103  			fallthrough
   104  		case r < 0x10000:
   105  			buf = append(buf, `\u`...)
   106  			for s := 12; s >= 0; s -= 4 {
   107  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   108  			}
   109  		default:
   110  			buf = append(buf, `\U`...)
   111  			for s := 28; s >= 0; s -= 4 {
   112  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   113  			}
   114  		}
   115  	}
   116  	return buf
   117  }
   118  
   119  // Quote returns a double-quoted Go string literal representing s. The
   120  // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   121  // control characters and non-printable characters as defined by
   122  // IsPrint.
   123  func Quote(s string) string {
   124  	return quoteWith(s, '"', false, false)
   125  }
   126  
   127  // AppendQuote appends a double-quoted Go string literal representing s,
   128  // as generated by Quote, to dst and returns the extended buffer.
   129  func AppendQuote(dst []byte, s string) []byte {
   130  	return appendQuotedWith(dst, s, '"', false, false)
   131  }
   132  
   133  // QuoteToASCII returns a double-quoted Go string literal representing s.
   134  // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   135  // non-ASCII characters and non-printable characters as defined by IsPrint.
   136  func QuoteToASCII(s string) string {
   137  	return quoteWith(s, '"', true, false)
   138  }
   139  
   140  // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
   141  // as generated by QuoteToASCII, to dst and returns the extended buffer.
   142  func AppendQuoteToASCII(dst []byte, s string) []byte {
   143  	return appendQuotedWith(dst, s, '"', true, false)
   144  }
   145  
   146  // QuoteToGraphic returns a double-quoted Go string literal representing s.
   147  // The returned string leaves Unicode graphic characters, as defined by
   148  // IsGraphic, unchanged and uses Go escape sequences (\t, \n, \xFF, \u0100)
   149  // for non-graphic characters.
   150  func QuoteToGraphic(s string) string {
   151  	return quoteWith(s, '"', false, true)
   152  }
   153  
   154  // AppendQuoteToGraphic appends a double-quoted Go string literal representing s,
   155  // as generated by QuoteToGraphic, to dst and returns the extended buffer.
   156  func AppendQuoteToGraphic(dst []byte, s string) []byte {
   157  	return appendQuotedWith(dst, s, '"', false, true)
   158  }
   159  
   160  // QuoteRune returns a single-quoted Go character literal representing the
   161  // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
   162  // for control characters and non-printable characters as defined by IsPrint.
   163  func QuoteRune(r rune) string {
   164  	return quoteRuneWith(r, '\'', false, false)
   165  }
   166  
   167  // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
   168  // as generated by QuoteRune, to dst and returns the extended buffer.
   169  func AppendQuoteRune(dst []byte, r rune) []byte {
   170  	return appendQuotedRuneWith(dst, r, '\'', false, false)
   171  }
   172  
   173  // QuoteRuneToASCII returns a single-quoted Go character literal representing
   174  // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
   175  // \u0100) for non-ASCII characters and non-printable characters as defined
   176  // by IsPrint.
   177  func QuoteRuneToASCII(r rune) string {
   178  	return quoteRuneWith(r, '\'', true, false)
   179  }
   180  
   181  // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
   182  // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
   183  func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
   184  	return appendQuotedRuneWith(dst, r, '\'', true, false)
   185  }
   186  
   187  // QuoteRuneToGraphic returns a single-quoted Go character literal representing
   188  // the rune. If the rune is not a Unicode graphic character,
   189  // as defined by IsGraphic, the returned string will use a Go escape sequence
   190  // (\t, \n, \xFF, \u0100).
   191  func QuoteRuneToGraphic(r rune) string {
   192  	return quoteRuneWith(r, '\'', false, true)
   193  }
   194  
   195  // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,
   196  // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.
   197  func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
   198  	return appendQuotedRuneWith(dst, r, '\'', false, true)
   199  }
   200  
   201  // CanBackquote reports whether the string s can be represented
   202  // unchanged as a single-line backquoted string without control
   203  // characters other than tab.
   204  func CanBackquote(s string) bool {
   205  	for len(s) > 0 {
   206  		r, wid := utf8.DecodeRuneInString(s)
   207  		s = s[wid:]
   208  		if wid > 1 {
   209  			if r == '\ufeff' {
   210  				return false // BOMs are invisible and should not be quoted.
   211  			}
   212  			continue // All other multibyte runes are correctly encoded and assumed printable.
   213  		}
   214  		if r == utf8.RuneError {
   215  			return false
   216  		}
   217  		if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
   218  			return false
   219  		}
   220  	}
   221  	return true
   222  }
   223  
   224  func unhex(b byte) (v rune, ok bool) {
   225  	c := rune(b)
   226  	switch {
   227  	case '0' <= c && c <= '9':
   228  		return c - '0', true
   229  	case 'a' <= c && c <= 'f':
   230  		return c - 'a' + 10, true
   231  	case 'A' <= c && c <= 'F':
   232  		return c - 'A' + 10, true
   233  	}
   234  	return
   235  }
   236  
   237  // UnquoteChar decodes the first character or byte in the escaped string
   238  // or character literal represented by the string s.
   239  // It returns four values:
   240  //
   241  //	1) value, the decoded Unicode code point or byte value;
   242  //	2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
   243  //	3) tail, the remainder of the string after the character; and
   244  //	4) an error that will be nil if the character is syntactically valid.
   245  //
   246  // The second argument, quote, specifies the type of literal being parsed
   247  // and therefore which escaped quote character is permitted.
   248  // If set to a single quote, it permits the sequence \' and disallows unescaped '.
   249  // If set to a double quote, it permits \" and disallows unescaped ".
   250  // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
   251  func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
   252  	// easy cases
   253  	if len(s) == 0 {
   254  		err = ErrSyntax
   255  		return
   256  	}
   257  	switch c := s[0]; {
   258  	case c == quote && (quote == '\'' || quote == '"'):
   259  		err = ErrSyntax
   260  		return
   261  	case c >= utf8.RuneSelf:
   262  		r, size := utf8.DecodeRuneInString(s)
   263  		return r, true, s[size:], nil
   264  	case c != '\\':
   265  		return rune(s[0]), false, s[1:], nil
   266  	}
   267  
   268  	// hard case: c is backslash
   269  	if len(s) <= 1 {
   270  		err = ErrSyntax
   271  		return
   272  	}
   273  	c := s[1]
   274  	s = s[2:]
   275  
   276  	switch c {
   277  	case 'a':
   278  		value = '\a'
   279  	case 'b':
   280  		value = '\b'
   281  	case 'f':
   282  		value = '\f'
   283  	case 'n':
   284  		value = '\n'
   285  	case 'r':
   286  		value = '\r'
   287  	case 't':
   288  		value = '\t'
   289  	case 'v':
   290  		value = '\v'
   291  	case 'x', 'u', 'U':
   292  		n := 0
   293  		switch c {
   294  		case 'x':
   295  			n = 2
   296  		case 'u':
   297  			n = 4
   298  		case 'U':
   299  			n = 8
   300  		}
   301  		var v rune
   302  		if len(s) < n {
   303  			err = ErrSyntax
   304  			return
   305  		}
   306  		for j := 0; j < n; j++ {
   307  			x, ok := unhex(s[j])
   308  			if !ok {
   309  				err = ErrSyntax
   310  				return
   311  			}
   312  			v = v<<4 | x
   313  		}
   314  		s = s[n:]
   315  		if c == 'x' {
   316  			// single-byte string, possibly not UTF-8
   317  			value = v
   318  			break
   319  		}
   320  		if v > utf8.MaxRune {
   321  			err = ErrSyntax
   322  			return
   323  		}
   324  		value = v
   325  		multibyte = true
   326  	case '0', '1', '2', '3', '4', '5', '6', '7':
   327  		v := rune(c) - '0'
   328  		if len(s) < 2 {
   329  			err = ErrSyntax
   330  			return
   331  		}
   332  		for j := 0; j < 2; j++ { // one digit already; two more
   333  			x := rune(s[j]) - '0'
   334  			if x < 0 || x > 7 {
   335  				err = ErrSyntax
   336  				return
   337  			}
   338  			v = (v << 3) | x
   339  		}
   340  		s = s[2:]
   341  		if v > 255 {
   342  			err = ErrSyntax
   343  			return
   344  		}
   345  		value = v
   346  	case '\\':
   347  		value = '\\'
   348  	case '\'', '"':
   349  		if c != quote {
   350  			err = ErrSyntax
   351  			return
   352  		}
   353  		value = rune(c)
   354  	default:
   355  		err = ErrSyntax
   356  		return
   357  	}
   358  	tail = s
   359  	return
   360  }
   361  
   362  // Unquote interprets s as a single-quoted, double-quoted,
   363  // or backquoted Go string literal, returning the string value
   364  // that s quotes.  (If s is single-quoted, it would be a Go
   365  // character literal; Unquote returns the corresponding
   366  // one-character string.)
   367  func Unquote(s string) (string, error) {
   368  	n := len(s)
   369  	if n < 2 {
   370  		return "", ErrSyntax
   371  	}
   372  	quote := s[0]
   373  	if quote != s[n-1] {
   374  		return "", ErrSyntax
   375  	}
   376  	s = s[1 : n-1]
   377  
   378  	if quote == '`' {
   379  		if contains(s, '`') {
   380  			return "", ErrSyntax
   381  		}
   382  		if contains(s, '\r') {
   383  			// -1 because we know there is at least one \r to remove.
   384  			buf := make([]byte, 0, len(s)-1)
   385  			for i := 0; i < len(s); i++ {
   386  				if s[i] != '\r' {
   387  					buf = append(buf, s[i])
   388  				}
   389  			}
   390  			return string(buf), nil
   391  		}
   392  		return s, nil
   393  	}
   394  	if quote != '"' && quote != '\'' {
   395  		return "", ErrSyntax
   396  	}
   397  	if contains(s, '\n') {
   398  		return "", ErrSyntax
   399  	}
   400  
   401  	// Is it trivial? Avoid allocation.
   402  	if !contains(s, '\\') && !contains(s, quote) {
   403  		switch quote {
   404  		case '"':
   405  			if utf8.ValidString(s) {
   406  				return s, nil
   407  			}
   408  		case '\'':
   409  			r, size := utf8.DecodeRuneInString(s)
   410  			if size == len(s) && (r != utf8.RuneError || size != 1) {
   411  				return s, nil
   412  			}
   413  		}
   414  	}
   415  
   416  	var runeTmp [utf8.UTFMax]byte
   417  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   418  	for len(s) > 0 {
   419  		c, multibyte, ss, err := UnquoteChar(s, quote)
   420  		if err != nil {
   421  			return "", err
   422  		}
   423  		s = ss
   424  		if c < utf8.RuneSelf || !multibyte {
   425  			buf = append(buf, byte(c))
   426  		} else {
   427  			n := utf8.EncodeRune(runeTmp[:], c)
   428  			buf = append(buf, runeTmp[:n]...)
   429  		}
   430  		if quote == '\'' && len(s) != 0 {
   431  			// single-quoted must be single character
   432  			return "", ErrSyntax
   433  		}
   434  	}
   435  	return string(buf), nil
   436  }
   437  
   438  // bsearch16 returns the smallest i such that a[i] >= x.
   439  // If there is no such i, bsearch16 returns len(a).
   440  func bsearch16(a []uint16, x uint16) int {
   441  	i, j := 0, len(a)
   442  	for i < j {
   443  		h := i + (j-i)>>1
   444  		if a[h] < x {
   445  			i = h + 1
   446  		} else {
   447  			j = h
   448  		}
   449  	}
   450  	return i
   451  }
   452  
   453  // bsearch32 returns the smallest i such that a[i] >= x.
   454  // If there is no such i, bsearch32 returns len(a).
   455  func bsearch32(a []uint32, x uint32) int {
   456  	i, j := 0, len(a)
   457  	for i < j {
   458  		h := i + (j-i)>>1
   459  		if a[h] < x {
   460  			i = h + 1
   461  		} else {
   462  			j = h
   463  		}
   464  	}
   465  	return i
   466  }
   467  
   468  // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
   469  // to give the same answer. It allows this package not to depend on unicode,
   470  // and therefore not pull in all the Unicode tables. If the linker were better
   471  // at tossing unused tables, we could get rid of this implementation.
   472  // That would be nice.
   473  
   474  // IsPrint reports whether the rune is defined as printable by Go, with
   475  // the same definition as unicode.IsPrint: letters, numbers, punctuation,
   476  // symbols and ASCII space.
   477  func IsPrint(r rune) bool {
   478  	// Fast check for Latin-1
   479  	if r <= 0xFF {
   480  		if 0x20 <= r && r <= 0x7E {
   481  			// All the ASCII is printable from space through DEL-1.
   482  			return true
   483  		}
   484  		if 0xA1 <= r && r <= 0xFF {
   485  			// Similarly for ¡ through ÿ...
   486  			return r != 0xAD // ...except for the bizarre soft hyphen.
   487  		}
   488  		return false
   489  	}
   490  
   491  	// Same algorithm, either on uint16 or uint32 value.
   492  	// First, find first i such that isPrint[i] >= x.
   493  	// This is the index of either the start or end of a pair that might span x.
   494  	// The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
   495  	// If we find x in a range, make sure x is not in isNotPrint list.
   496  
   497  	if 0 <= r && r < 1<<16 {
   498  		rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
   499  		i := bsearch16(isPrint, rr)
   500  		if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   501  			return false
   502  		}
   503  		j := bsearch16(isNotPrint, rr)
   504  		return j >= len(isNotPrint) || isNotPrint[j] != rr
   505  	}
   506  
   507  	rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
   508  	i := bsearch32(isPrint, rr)
   509  	if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   510  		return false
   511  	}
   512  	if r >= 0x20000 {
   513  		return true
   514  	}
   515  	r -= 0x10000
   516  	j := bsearch16(isNotPrint, uint16(r))
   517  	return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
   518  }
   519  
   520  // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such
   521  // characters include letters, marks, numbers, punctuation, symbols, and
   522  // spaces, from categories L, M, N, P, S, and Zs.
   523  func IsGraphic(r rune) bool {
   524  	if IsPrint(r) {
   525  		return true
   526  	}
   527  	return isInGraphicList(r)
   528  }
   529  
   530  // isInGraphicList reports whether the rune is in the isGraphic list. This separation
   531  // from IsGraphic allows quoteWith to avoid two calls to IsPrint.
   532  // Should be called only if IsPrint fails.
   533  func isInGraphicList(r rune) bool {
   534  	// We know r must fit in 16 bits - see makeisprint.go.
   535  	if r > 0xFFFF {
   536  		return false
   537  	}
   538  	rr := uint16(r)
   539  	i := bsearch16(isGraphic, rr)
   540  	return i < len(isGraphic) && rr == isGraphic[i]
   541  }