github.com/yanyiwu/go@v0.0.0-20150106053140-03d6637dbb7f/src/strconv/quote.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate go run makeisprint.go -output isprint.go
     6  
     7  package strconv
     8  
     9  import (
    10  	"unicode/utf8"
    11  )
    12  
    13  const lowerhex = "0123456789abcdef"
    14  
    15  func quoteWith(s string, quote byte, ASCIIonly bool) string {
    16  	var runeTmp [utf8.UTFMax]byte
    17  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
    18  	buf = append(buf, quote)
    19  	for width := 0; len(s) > 0; s = s[width:] {
    20  		r := rune(s[0])
    21  		width = 1
    22  		if r >= utf8.RuneSelf {
    23  			r, width = utf8.DecodeRuneInString(s)
    24  		}
    25  		if width == 1 && r == utf8.RuneError {
    26  			buf = append(buf, `\x`...)
    27  			buf = append(buf, lowerhex[s[0]>>4])
    28  			buf = append(buf, lowerhex[s[0]&0xF])
    29  			continue
    30  		}
    31  		if r == rune(quote) || r == '\\' { // always backslashed
    32  			buf = append(buf, '\\')
    33  			buf = append(buf, byte(r))
    34  			continue
    35  		}
    36  		if ASCIIonly {
    37  			if r < utf8.RuneSelf && IsPrint(r) {
    38  				buf = append(buf, byte(r))
    39  				continue
    40  			}
    41  		} else if IsPrint(r) {
    42  			n := utf8.EncodeRune(runeTmp[:], r)
    43  			buf = append(buf, runeTmp[:n]...)
    44  			continue
    45  		}
    46  		switch r {
    47  		case '\a':
    48  			buf = append(buf, `\a`...)
    49  		case '\b':
    50  			buf = append(buf, `\b`...)
    51  		case '\f':
    52  			buf = append(buf, `\f`...)
    53  		case '\n':
    54  			buf = append(buf, `\n`...)
    55  		case '\r':
    56  			buf = append(buf, `\r`...)
    57  		case '\t':
    58  			buf = append(buf, `\t`...)
    59  		case '\v':
    60  			buf = append(buf, `\v`...)
    61  		default:
    62  			switch {
    63  			case r < ' ':
    64  				buf = append(buf, `\x`...)
    65  				buf = append(buf, lowerhex[s[0]>>4])
    66  				buf = append(buf, lowerhex[s[0]&0xF])
    67  			case r > utf8.MaxRune:
    68  				r = 0xFFFD
    69  				fallthrough
    70  			case r < 0x10000:
    71  				buf = append(buf, `\u`...)
    72  				for s := 12; s >= 0; s -= 4 {
    73  					buf = append(buf, lowerhex[r>>uint(s)&0xF])
    74  				}
    75  			default:
    76  				buf = append(buf, `\U`...)
    77  				for s := 28; s >= 0; s -= 4 {
    78  					buf = append(buf, lowerhex[r>>uint(s)&0xF])
    79  				}
    80  			}
    81  		}
    82  	}
    83  	buf = append(buf, quote)
    84  	return string(buf)
    85  
    86  }
    87  
    88  // Quote returns a double-quoted Go string literal representing s.  The
    89  // returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
    90  // control characters and non-printable characters as defined by
    91  // IsPrint.
    92  func Quote(s string) string {
    93  	return quoteWith(s, '"', false)
    94  }
    95  
    96  // AppendQuote appends a double-quoted Go string literal representing s,
    97  // as generated by Quote, to dst and returns the extended buffer.
    98  func AppendQuote(dst []byte, s string) []byte {
    99  	return append(dst, Quote(s)...)
   100  }
   101  
   102  // QuoteToASCII returns a double-quoted Go string literal representing s.
   103  // The returned string uses Go escape sequences (\t, \n, \xFF, \u0100) for
   104  // non-ASCII characters and non-printable characters as defined by IsPrint.
   105  func QuoteToASCII(s string) string {
   106  	return quoteWith(s, '"', true)
   107  }
   108  
   109  // AppendQuoteToASCII appends a double-quoted Go string literal representing s,
   110  // as generated by QuoteToASCII, to dst and returns the extended buffer.
   111  func AppendQuoteToASCII(dst []byte, s string) []byte {
   112  	return append(dst, QuoteToASCII(s)...)
   113  }
   114  
   115  // QuoteRune returns a single-quoted Go character literal representing the
   116  // rune.  The returned string uses Go escape sequences (\t, \n, \xFF, \u0100)
   117  // for control characters and non-printable characters as defined by IsPrint.
   118  func QuoteRune(r rune) string {
   119  	// TODO: avoid the allocation here.
   120  	return quoteWith(string(r), '\'', false)
   121  }
   122  
   123  // AppendQuoteRune appends a single-quoted Go character literal representing the rune,
   124  // as generated by QuoteRune, to dst and returns the extended buffer.
   125  func AppendQuoteRune(dst []byte, r rune) []byte {
   126  	return append(dst, QuoteRune(r)...)
   127  }
   128  
   129  // QuoteRuneToASCII returns a single-quoted Go character literal representing
   130  // the rune.  The returned string uses Go escape sequences (\t, \n, \xFF,
   131  // \u0100) for non-ASCII characters and non-printable characters as defined
   132  // by IsPrint.
   133  func QuoteRuneToASCII(r rune) string {
   134  	// TODO: avoid the allocation here.
   135  	return quoteWith(string(r), '\'', true)
   136  }
   137  
   138  // AppendQuoteRuneToASCII appends a single-quoted Go character literal representing the rune,
   139  // as generated by QuoteRuneToASCII, to dst and returns the extended buffer.
   140  func AppendQuoteRuneToASCII(dst []byte, r rune) []byte {
   141  	return append(dst, QuoteRuneToASCII(r)...)
   142  }
   143  
   144  // CanBackquote reports whether the string s can be represented
   145  // unchanged as a single-line backquoted string without control
   146  // characters other than tab.
   147  func CanBackquote(s string) bool {
   148  	for len(s) > 0 {
   149  		r, wid := utf8.DecodeRuneInString(s)
   150  		s = s[wid:]
   151  		if wid > 1 {
   152  			if r == '\ufeff' {
   153  				return false // BOMs are invisible and should not be quoted.
   154  			}
   155  			continue // All other multibyte runes are correctly encoded and assumed printable.
   156  		}
   157  		if r == utf8.RuneError {
   158  			return false
   159  		}
   160  		if (r < ' ' && r != '\t') || r == '`' || r == '\u007F' {
   161  			return false
   162  		}
   163  	}
   164  	return true
   165  }
   166  
   167  func unhex(b byte) (v rune, ok bool) {
   168  	c := rune(b)
   169  	switch {
   170  	case '0' <= c && c <= '9':
   171  		return c - '0', true
   172  	case 'a' <= c && c <= 'f':
   173  		return c - 'a' + 10, true
   174  	case 'A' <= c && c <= 'F':
   175  		return c - 'A' + 10, true
   176  	}
   177  	return
   178  }
   179  
   180  // UnquoteChar decodes the first character or byte in the escaped string
   181  // or character literal represented by the string s.
   182  // It returns four values:
   183  //
   184  //	1) value, the decoded Unicode code point or byte value;
   185  //	2) multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
   186  //	3) tail, the remainder of the string after the character; and
   187  //	4) an error that will be nil if the character is syntactically valid.
   188  //
   189  // The second argument, quote, specifies the type of literal being parsed
   190  // and therefore which escaped quote character is permitted.
   191  // If set to a single quote, it permits the sequence \' and disallows unescaped '.
   192  // If set to a double quote, it permits \" and disallows unescaped ".
   193  // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
   194  func UnquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
   195  	// easy cases
   196  	switch c := s[0]; {
   197  	case c == quote && (quote == '\'' || quote == '"'):
   198  		err = ErrSyntax
   199  		return
   200  	case c >= utf8.RuneSelf:
   201  		r, size := utf8.DecodeRuneInString(s)
   202  		return r, true, s[size:], nil
   203  	case c != '\\':
   204  		return rune(s[0]), false, s[1:], nil
   205  	}
   206  
   207  	// hard case: c is backslash
   208  	if len(s) <= 1 {
   209  		err = ErrSyntax
   210  		return
   211  	}
   212  	c := s[1]
   213  	s = s[2:]
   214  
   215  	switch c {
   216  	case 'a':
   217  		value = '\a'
   218  	case 'b':
   219  		value = '\b'
   220  	case 'f':
   221  		value = '\f'
   222  	case 'n':
   223  		value = '\n'
   224  	case 'r':
   225  		value = '\r'
   226  	case 't':
   227  		value = '\t'
   228  	case 'v':
   229  		value = '\v'
   230  	case 'x', 'u', 'U':
   231  		n := 0
   232  		switch c {
   233  		case 'x':
   234  			n = 2
   235  		case 'u':
   236  			n = 4
   237  		case 'U':
   238  			n = 8
   239  		}
   240  		var v rune
   241  		if len(s) < n {
   242  			err = ErrSyntax
   243  			return
   244  		}
   245  		for j := 0; j < n; j++ {
   246  			x, ok := unhex(s[j])
   247  			if !ok {
   248  				err = ErrSyntax
   249  				return
   250  			}
   251  			v = v<<4 | x
   252  		}
   253  		s = s[n:]
   254  		if c == 'x' {
   255  			// single-byte string, possibly not UTF-8
   256  			value = v
   257  			break
   258  		}
   259  		if v > utf8.MaxRune {
   260  			err = ErrSyntax
   261  			return
   262  		}
   263  		value = v
   264  		multibyte = true
   265  	case '0', '1', '2', '3', '4', '5', '6', '7':
   266  		v := rune(c) - '0'
   267  		if len(s) < 2 {
   268  			err = ErrSyntax
   269  			return
   270  		}
   271  		for j := 0; j < 2; j++ { // one digit already; two more
   272  			x := rune(s[j]) - '0'
   273  			if x < 0 || x > 7 {
   274  				err = ErrSyntax
   275  				return
   276  			}
   277  			v = (v << 3) | x
   278  		}
   279  		s = s[2:]
   280  		if v > 255 {
   281  			err = ErrSyntax
   282  			return
   283  		}
   284  		value = v
   285  	case '\\':
   286  		value = '\\'
   287  	case '\'', '"':
   288  		if c != quote {
   289  			err = ErrSyntax
   290  			return
   291  		}
   292  		value = rune(c)
   293  	default:
   294  		err = ErrSyntax
   295  		return
   296  	}
   297  	tail = s
   298  	return
   299  }
   300  
   301  // Unquote interprets s as a single-quoted, double-quoted,
   302  // or backquoted Go string literal, returning the string value
   303  // that s quotes.  (If s is single-quoted, it would be a Go
   304  // character literal; Unquote returns the corresponding
   305  // one-character string.)
   306  func Unquote(s string) (t string, err error) {
   307  	n := len(s)
   308  	if n < 2 {
   309  		return "", ErrSyntax
   310  	}
   311  	quote := s[0]
   312  	if quote != s[n-1] {
   313  		return "", ErrSyntax
   314  	}
   315  	s = s[1 : n-1]
   316  
   317  	if quote == '`' {
   318  		if contains(s, '`') {
   319  			return "", ErrSyntax
   320  		}
   321  		return s, nil
   322  	}
   323  	if quote != '"' && quote != '\'' {
   324  		return "", ErrSyntax
   325  	}
   326  	if contains(s, '\n') {
   327  		return "", ErrSyntax
   328  	}
   329  
   330  	// Is it trivial?  Avoid allocation.
   331  	if !contains(s, '\\') && !contains(s, quote) {
   332  		switch quote {
   333  		case '"':
   334  			return s, nil
   335  		case '\'':
   336  			r, size := utf8.DecodeRuneInString(s)
   337  			if size == len(s) && (r != utf8.RuneError || size != 1) {
   338  				return s, nil
   339  			}
   340  		}
   341  	}
   342  
   343  	var runeTmp [utf8.UTFMax]byte
   344  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   345  	for len(s) > 0 {
   346  		c, multibyte, ss, err := UnquoteChar(s, quote)
   347  		if err != nil {
   348  			return "", err
   349  		}
   350  		s = ss
   351  		if c < utf8.RuneSelf || !multibyte {
   352  			buf = append(buf, byte(c))
   353  		} else {
   354  			n := utf8.EncodeRune(runeTmp[:], c)
   355  			buf = append(buf, runeTmp[:n]...)
   356  		}
   357  		if quote == '\'' && len(s) != 0 {
   358  			// single-quoted must be single character
   359  			return "", ErrSyntax
   360  		}
   361  	}
   362  	return string(buf), nil
   363  }
   364  
   365  // contains reports whether the string contains the byte c.
   366  func contains(s string, c byte) bool {
   367  	for i := 0; i < len(s); i++ {
   368  		if s[i] == c {
   369  			return true
   370  		}
   371  	}
   372  	return false
   373  }
   374  
   375  // bsearch16 returns the smallest i such that a[i] >= x.
   376  // If there is no such i, bsearch16 returns len(a).
   377  func bsearch16(a []uint16, x uint16) int {
   378  	i, j := 0, len(a)
   379  	for i < j {
   380  		h := i + (j-i)/2
   381  		if a[h] < x {
   382  			i = h + 1
   383  		} else {
   384  			j = h
   385  		}
   386  	}
   387  	return i
   388  }
   389  
   390  // bsearch32 returns the smallest i such that a[i] >= x.
   391  // If there is no such i, bsearch32 returns len(a).
   392  func bsearch32(a []uint32, x uint32) int {
   393  	i, j := 0, len(a)
   394  	for i < j {
   395  		h := i + (j-i)/2
   396  		if a[h] < x {
   397  			i = h + 1
   398  		} else {
   399  			j = h
   400  		}
   401  	}
   402  	return i
   403  }
   404  
   405  // TODO: IsPrint is a local implementation of unicode.IsPrint, verified by the tests
   406  // to give the same answer. It allows this package not to depend on unicode,
   407  // and therefore not pull in all the Unicode tables. If the linker were better
   408  // at tossing unused tables, we could get rid of this implementation.
   409  // That would be nice.
   410  
   411  // IsPrint reports whether the rune is defined as printable by Go, with
   412  // the same definition as unicode.IsPrint: letters, numbers, punctuation,
   413  // symbols and ASCII space.
   414  func IsPrint(r rune) bool {
   415  	// Fast check for Latin-1
   416  	if r <= 0xFF {
   417  		if 0x20 <= r && r <= 0x7E {
   418  			// All the ASCII is printable from space through DEL-1.
   419  			return true
   420  		}
   421  		if 0xA1 <= r && r <= 0xFF {
   422  			// Similarly for ¡ through ÿ...
   423  			return r != 0xAD // ...except for the bizarre soft hyphen.
   424  		}
   425  		return false
   426  	}
   427  
   428  	// Same algorithm, either on uint16 or uint32 value.
   429  	// First, find first i such that isPrint[i] >= x.
   430  	// This is the index of either the start or end of a pair that might span x.
   431  	// The start is even (isPrint[i&^1]) and the end is odd (isPrint[i|1]).
   432  	// If we find x in a range, make sure x is not in isNotPrint list.
   433  
   434  	if 0 <= r && r < 1<<16 {
   435  		rr, isPrint, isNotPrint := uint16(r), isPrint16, isNotPrint16
   436  		i := bsearch16(isPrint, rr)
   437  		if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   438  			return false
   439  		}
   440  		j := bsearch16(isNotPrint, rr)
   441  		return j >= len(isNotPrint) || isNotPrint[j] != rr
   442  	}
   443  
   444  	rr, isPrint, isNotPrint := uint32(r), isPrint32, isNotPrint32
   445  	i := bsearch32(isPrint, rr)
   446  	if i >= len(isPrint) || rr < isPrint[i&^1] || isPrint[i|1] < rr {
   447  		return false
   448  	}
   449  	if r >= 0x20000 {
   450  		return true
   451  	}
   452  	r -= 0x10000
   453  	j := bsearch16(isNotPrint, uint16(r))
   454  	return j >= len(isNotPrint) || isNotPrint[j] != uint16(r)
   455  }