github.com/fjballest/golang@v0.0.0-20151209143359-e4c5fe594ca8/src/strconv/quote.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run makeisprint.go -output isprint.go
     6  
     7  package strconv
     8  
     9  import (
    10  	"unicode/utf8"
    11  )
    12  
    13  const lowerhex = "0123456789abcdef"
    14  
    15  func quoteWith(s string, quote byte, ASCIIonly, graphicOnly bool) string {
    16  	var runeTmp [utf8.UTFMax]byte
    17  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
    18  	buf = append(buf, quote)
    19  	for width := 0; len(s) > 0; s = s[width:] {
    20  		r := rune(s[0])
    21  		width = 1
    22  		if r >= utf8.RuneSelf {
    23  			r, width = utf8.DecodeRuneInString(s)
    24  		}
    25  		if width == 1 && r == utf8.RuneError {
    26  			buf = append(buf, `\x`...)
    27  			buf = append(buf, lowerhex[s[0]>>4])
    28  			buf = append(buf, lowerhex[s[0]&0xF])
    29  			continue
    30  		}
    31  		if r == rune(quote) || r == '\\' { // always backslashed
    32  			buf = append(buf, '\\')
    33  			buf = append(buf, byte(r))
    34  			continue
    35  		}
    36  		if ASCIIonly {
    37  			if r < utf8.RuneSelf && IsPrint(r) {
    38  				buf = append(buf, byte(r))
    39  				continue
    40  			}
    41  		} else if IsPrint(r) || graphicOnly && isInGraphicList(r) {
    42  			n := utf8.EncodeRune(runeTmp[:], r)
    43  			buf = append(buf, runeTmp[:n]...)
    44  			continue
    45  		}
    46  		switch r {
    47  		case '\a':
    48  			buf = append(buf, `\a`...)
    49  		case '\b':
    50  			buf = append(buf, `\b`...)
    51  		case '\f':
    52  			buf = append(buf, `\f`...)
    53  		case '\n':
    54  			buf = append(buf, `\n`...)
    55  		case '\r':
    56  			buf = append(buf, `\r`...)
    57  		case '\t':
    58  			buf = append(buf, `\t`...)
    59  		case '\v':
    60  			buf = append(buf, `\v`...)
    61  		default:
    62  			switch {
    63  			case r < ' ':
    64  				buf = append(buf, `\x`...)
    65  				buf = append(buf, lowerhex[s[0]>>4])
    66  				buf = append(buf, lowerhex[s[0]&0xF])
    67  			case r > utf8.MaxRune:
    68  				r = 0xFFFD
    69  				fallthrough
    70  			case r < 0x10000:
    71  				buf = append(buf, `\u`...)
    72  				for s := 12; s >= 0; s -= 4 {
    73  					buf = append(buf, lowerhex[r>>uint(s)&0xF])
    74  				}
    75  			default:
    76  				buf = append(buf, `\U`...)
    77  				for s := 28; s >= 0; s -= 4 {
    78  					buf = append(buf, lowerhex[r>>uint(s)&0xF])
    79  				}
    80  			}
    81  		}
    82  	}
    83  	buf = append(buf, quote)
    84  	return string(buf)
    85  
    86  }
    87  
    88  // Quote returns a double-quoted Go string literal representing s.  The
    89  // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
    90  // control characters and non-printable characters as defined by
    91  // IsPrint.
    92  func Quote(s string) string {
    93  	return quoteWith(s, '"', false, false)
    94  }
    95  
    96  // AppendQuote appends a double-quoted Go string literal representing s,
    97  // as generated by Quote, to dst and returns the extended buffer.
    98  func AppendQuote(dst []byte, s string) []byte {
    99  	return append(dst, Quote(s)...)
   100  }
   101  
   102  // QuoteToASCII returns a double-quoted Go string literal representing s.
   103  // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   104  // non-ASCII characters and non-printable characters as defined by IsPrint.
   105  func QuoteToASCII(s string) string {
   106  	return quoteWith(s, '"', true, false)
   107  }
   108  
   109  // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
   110  // as generated by QuoteToASCII, to dst and returns the extended buffer.
   111  func AppendQuoteToASCII(dst []byte, s string) []byte {
   112  	return append(dst, QuoteToASCII(s)...)
   113  }
   114  
   115  // QuoteToGraphic returns a double-quoted Go string literal representing s.
   116  // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   117  // non-ASCII characters and non-printable characters as defined by IsGraphic.
   118  func QuoteToGraphic(s string) string {
   119  	return quoteWith(s, '"', false, true)
   120  }
   121  
   122  // AppendQuoteToGraphic appends a double-quoted Go string literal representing s,
   123  // as generated by QuoteToGraphic, to dst and returns the extended buffer.
   124  func AppendQuoteToGraphic(dst []byte, s string) []byte {
   125  	return append(dst, QuoteToGraphic(s)...)
   126  }
   127  
   128  // QuoteRune returns a single-quoted Go character literal representing the
   129  // rune. The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
   130  // for control characters and non-printable characters as defined by IsPrint.
   131  func QuoteRune(r rune) string {
   132  	// TODO: avoid the allocation here.
   133  	return quoteWith(string(r), '\'', false, false)
   134  }
   135  
   136  // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
   137  // as generated by QuoteRune, to dst and returns the extended buffer.
   138  func AppendQuoteRune(dst []byte, r rune) []byte {
   139  	return append(dst, QuoteRune(r)...)
   140  }
   141  
   142  // QuoteRuneToASCII returns a single-quoted Go character literal representing
   143  // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
   144  // \u0100) for non-ASCII characters and non-printable characters as defined
   145  // by IsPrint.
   146  func QuoteRuneToASCII(r rune) string {
   147  	// TODO: avoid the allocation here.
   148  	return quoteWith(string(r), '\'', true, false)
   149  }
   150  
   151  // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
   152  // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
   153  func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
   154  	return append(dst, QuoteRuneToASCII(r)...)
   155  }
   156  
   157  // QuoteRuneToGraphic returns a single-quoted Go character literal representing
   158  // the rune. The returned string uses Go escape sequences (\t, \n, \xFF,
   159  // \u0100) for non-ASCII characters and non-printable characters as defined
   160  // by IsGraphic.
   161  func QuoteRuneToGraphic(r rune) string {
   162  	// TODO: avoid the allocation here.
   163  	return quoteWith(string(r), '\'', false, true)
   164  }
   165  
   166  // AppendQuoteRuneToGraphic appends a single-quoted Go character literal representing the rune,
   167  // as generated by QuoteRuneToGraphic, to dst and returns the extended buffer.
   168  func AppendQuoteRuneToGraphic(dst []byte, r rune) []byte {
   169  	return append(dst, QuoteRuneToGraphic(r)...)
   170  }
   171  
   172  // CanBackquote reports whether the string s can be represented
   173  // unchanged as a single-line backquoted string without control
   174  // characters other than tab.
   175  func CanBackquote(s string) bool {
   176  	for len(s) > 0 {
   177  		r, wid := utf8.DecodeRuneInString(s)
   178  		s = s[wid:]
   179  		if wid > 1 {
   180  			if r == '\ufeff' {
   181  				return false // BOMs are invisible and should not be quoted.
   182  			}
   183  			continue // All other multibyte runes are correctly encoded and assumed printable.
   184  		}
   185  		if r == utf8.RuneError {
   186  			return false
   187  		}
   188  		if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
   189  			return false
   190  		}
   191  	}
   192  	return true
   193  }
   194  
   195  func unhex(b byte) (v rune, ok bool) {
   196  	c := rune(b)
   197  	switch {
   198  	case '0' <= c && c <= '9':
   199  		return c - '0', true
   200  	case 'a' <= c && c <= 'f':
   201  		return c - 'a' + 10, true
   202  	case 'A' <= c && c <= 'F':
   203  		return c - 'A' + 10, true
   204  	}
   205  	return
   206  }
   207  
   208  // UnquoteChar decodes the first character or byte in the escaped string
   209  // or character literal represented by the string s.
   210  // It returns four values:
   211  //
   212  //	1) value, the decoded Unicode code point or byte value;
   213  //	2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
   214  //	3) tail, the remainder of the string after the character; and
   215  //	4) an error that will be nil if the character is syntactically valid.
   216  //
   217  // The second argument, quote, specifies the type of literal being parsed
   218  // and therefore which escaped quote character is permitted.
   219  // If set to a single quote, it permits the sequence \' and disallows unescaped '.
   220  // If set to a double quote, it permits \" and disallows unescaped ".
   221  // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
   222  func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
   223  	// easy cases
   224  	switch c := s[0]; {
   225  	case c == quote && (quote == '\'' || quote == '"'):
   226  		err = ErrSyntax
   227  		return
   228  	case c >= utf8.RuneSelf:
   229  		r, size := utf8.DecodeRuneInString(s)
   230  		return r, true, s[size:], nil
   231  	case c != '\\':
   232  		return rune(s[0]), false, s[1:], nil
   233  	}
   234  
   235  	// hard case: c is backslash
   236  	if len(s) <= 1 {
   237  		err = ErrSyntax
   238  		return
   239  	}
   240  	c := s[1]
   241  	s = s[2:]
   242  
   243  	switch c {
   244  	case 'a':
   245  		value = '\a'
   246  	case 'b':
   247  		value = '\b'
   248  	case 'f':
   249  		value = '\f'
   250  	case 'n':
   251  		value = '\n'
   252  	case 'r':
   253  		value = '\r'
   254  	case 't':
   255  		value = '\t'
   256  	case 'v':
   257  		value = '\v'
   258  	case 'x', 'u', 'U':
   259  		n := 0
   260  		switch c {
   261  		case 'x':
   262  			n = 2
   263  		case 'u':
   264  			n = 4
   265  		case 'U':
   266  			n = 8
   267  		}
   268  		var v rune
   269  		if len(s) < n {
   270  			err = ErrSyntax
   271  			return
   272  		}
   273  		for j := 0; j < n; j++ {
   274  			x, ok := unhex(s[j])
   275  			if !ok {
   276  				err = ErrSyntax
   277  				return
   278  			}
   279  			v = v<<4 | x
   280  		}
   281  		s = s[n:]
   282  		if c == 'x' {
   283  			// single-byte string, possibly not UTF-8
   284  			value = v
   285  			break
   286  		}
   287  		if v > utf8.MaxRune {
   288  			err = ErrSyntax
   289  			return
   290  		}
   291  		value = v
   292  		multibyte = true
   293  	case '0', '1', '2', '3', '4', '5', '6', '7':
   294  		v := rune(c) - '0'
   295  		if len(s) < 2 {
   296  			err = ErrSyntax
   297  			return
   298  		}
   299  		for j := 0; j < 2; j++ { // one digit already; two more
   300  			x := rune(s[j]) - '0'
   301  			if x < 0 || x > 7 {
   302  				err = ErrSyntax
   303  				return
   304  			}
   305  			v = (v << 3) | x
   306  		}
   307  		s = s[2:]
   308  		if v > 255 {
   309  			err = ErrSyntax
   310  			return
   311  		}
   312  		value = v
   313  	case '\\':
   314  		value = '\\'
   315  	case '\'', '"':
   316  		if c != quote {
   317  			err = ErrSyntax
   318  			return
   319  		}
   320  		value = rune(c)
   321  	default:
   322  		err = ErrSyntax
   323  		return
   324  	}
   325  	tail = s
   326  	return
   327  }
   328  
   329  // Unquote interprets s as a single-quoted, double-quoted,
   330  // or backquoted Go string literal, returning the string value
   331  // that s quotes.  (If s is single-quoted, it would be a Go
   332  // character literal; Unquote returns the corresponding
   333  // one-character string.)
   334  func Unquote(s string) (t string, err error) {
   335  	n := len(s)
   336  	if n < 2 {
   337  		return "", ErrSyntax
   338  	}
   339  	quote := s[0]
   340  	if quote != s[n-1] {
   341  		return "", ErrSyntax
   342  	}
   343  	s = s[1 : n-1]
   344  
   345  	if quote == '`' {
   346  		if contains(s, '`') {
   347  			return "", ErrSyntax
   348  		}
   349  		return s, nil
   350  	}
   351  	if quote != '"' && quote != '\'' {
   352  		return "", ErrSyntax
   353  	}
   354  	if contains(s, '\n') {
   355  		return "", ErrSyntax
   356  	}
   357  
   358  	// Is it trivial?  Avoid allocation.
   359  	if !contains(s, '\\') && !contains(s, quote) {
   360  		switch quote {
   361  		case '"':
   362  			return s, nil
   363  		case '\'':
   364  			r, size := utf8.DecodeRuneInString(s)
   365  			if size == len(s) && (r != utf8.RuneError || size != 1) {
   366  				return s, nil
   367  			}
   368  		}
   369  	}
   370  
   371  	var runeTmp [utf8.UTFMax]byte
   372  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   373  	for len(s) > 0 {
   374  		c, multibyte, ss, err := UnquoteChar(s, quote)
   375  		if err != nil {
   376  			return "", err
   377  		}
   378  		s = ss
   379  		if c < utf8.RuneSelf || !multibyte {
   380  			buf = append(buf, byte(c))
   381  		} else {
   382  			n := utf8.EncodeRune(runeTmp[:], c)
   383  			buf = append(buf, runeTmp[:n]...)
   384  		}
   385  		if quote == '\'' && len(s) != 0 {
   386  			// single-quoted must be single character
   387  			return "", ErrSyntax
   388  		}
   389  	}
   390  	return string(buf), nil
   391  }
   392  
   393  // contains reports whether the string contains the byte c.
   394  func contains(s string, c byte) bool {
   395  	for i := 0; i < len(s); i++ {
   396  		if s[i] == c {
   397  			return true
   398  		}
   399  	}
   400  	return false
   401  }
   402  
   403  // bsearch16 returns the smallest i such that a[i] >= x.
   404  // If there is no such i, bsearch16 returns len(a).
   405  func bsearch16(a []uint16, x uint16) int {
   406  	i, j := 0, len(a)
   407  	for i < j {
   408  		h := i + (j-i)/2
   409  		if a[h] < x {
   410  			i = h + 1
   411  		} else {
   412  			j = h
   413  		}
   414  	}
   415  	return i
   416  }
   417  
   418  // bsearch32 returns the smallest i such that a[i] >= x.
   419  // If there is no such i, bsearch32 returns len(a).
   420  func bsearch32(a []uint32, x uint32) int {
   421  	i, j := 0, len(a)
   422  	for i < j {
   423  		h := i + (j-i)/2
   424  		if a[h] < x {
   425  			i = h + 1
   426  		} else {
   427  			j = h
   428  		}
   429  	}
   430  	return i
   431  }
   432  
   433  // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
   434  // to give the same answer. It allows this package not to depend on unicode,
   435  // and therefore not pull in all the Unicode tables. If the linker were better
   436  // at tossing unused tables, we could get rid of this implementation.
   437  // That would be nice.
   438  
   439  // IsPrint reports whether the rune is defined as printable by Go, with
   440  // the same definition as unicode.IsPrint: letters, numbers, punctuation,
   441  // symbols and ASCII space.
   442  func IsPrint(r rune) bool {
   443  	// Fast check for Latin-1
   444  	if r <= 0xFF {
   445  		if 0x20 <= r && r <= 0x7E {
   446  			// All the ASCII is printable from space through DEL-1.
   447  			return true
   448  		}
   449  		if 0xA1 <= r && r <= 0xFF {
   450  			// Similarly for ¡ through ÿ...
   451  			return r != 0xAD // ...except for the bizarre soft hyphen.
   452  		}
   453  		return false
   454  	}
   455  
   456  	// Same algorithm, either on uint16 or uint32 value.
   457  	// First, find first i such that isPrint[i] >= x.
   458  	// This is the index of either the start or end of a pair that might span x.
   459  	// The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
   460  	// If we find x in a range, make sure x is not in isNotPrint list.
   461  
   462  	if 0 <= r && r < 1<<16 {
   463  		rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
   464  		i := bsearch16(isPrint, rr)
   465  		if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   466  			return false
   467  		}
   468  		j := bsearch16(isNotPrint, rr)
   469  		return j >= len(isNotPrint) || isNotPrint[j] != rr
   470  	}
   471  
   472  	rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
   473  	i := bsearch32(isPrint, rr)
   474  	if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   475  		return false
   476  	}
   477  	if r >= 0x20000 {
   478  		return true
   479  	}
   480  	r -= 0x10000
   481  	j := bsearch16(isNotPrint, uint16(r))
   482  	return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
   483  }
   484  
   485  // IsGraphic reports whether the rune is defined as a Graphic by Unicode. Such
   486  // characters include letters, marks, numbers, punctuation, symbols, and
   487  // spaces, from categories L, M, N, P, S, and Zs.
   488  func IsGraphic(r rune) bool {
   489  	if IsPrint(r) {
   490  		return true
   491  	}
   492  	return isInGraphicList(r)
   493  }
   494  
   495  // isInGraphicList reports whether the rune is in the isGraphic list. This separation
   496  // from IsGraphic allows quoteWith to avoid two calls to IsPrint.
   497  // Should be called only if IsPrint fails.
   498  func isInGraphicList(r rune) bool {
   499  	// We know r must fit in 16 bits - see makeisprint.go.
   500  	if r > 0xFFFF {
   501  		return false
   502  	}
   503  	rr := uint16(r)
   504  	i := bsearch16(isGraphic, rr)
   505  	return i < len(isGraphic) && rr == isGraphic[i]
   506  }