github.com/tinygo-org/tinygo@v0.31.3-0.20240404173401-90b0bf646c27/src/reflect/strconv.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package reflect
     6  
     7  import (
     8  	"unicode/utf8"
     9  )
    10  
    11  // errSyntax indicates that a value does not have the right syntax for the target type.
    12  var errSyntax = badSyntax{}
    13  
    14  type badSyntax struct{}
    15  
    16  func (badSyntax) Error() string {
    17  	return "invalid syntax"
    18  }
    19  
    20  func unhex(b byte) (v rune, ok bool) {
    21  	c := rune(b)
    22  	switch {
    23  	case '0' <= c && c <= '9':
    24  		return c - '0', true
    25  	case 'a' <= c && c <= 'f':
    26  		return c - 'a' + 10, true
    27  	case 'A' <= c && c <= 'F':
    28  		return c - 'A' + 10, true
    29  	}
    30  	return
    31  }
    32  
    33  const (
    34  	lowerhex = "0123456789abcef"
    35  )
    36  
    37  // unquoteChar decodes the first character or byte in the escaped string
    38  // or character literal represented by the string s.
    39  // It returns four values:
    40  //
    41  //  1. value, the decoded Unicode code point or byte value;
    42  //  2. multibyte, a boolean indicating whether the decoded character requires a multibyte UTF-8 representation;
    43  //  3. tail, the remainder of the string after the character; and
    44  //  4. an error that will be nil if the character is syntactically valid.
    45  //
    46  // The second argument, quote, specifies the type of literal being parsed
    47  // and therefore which escaped quote character is permitted.
    48  // If set to a single quote, it permits the sequence \' and disallows unescaped '.
    49  // If set to a double quote, it permits \" and disallows unescaped ".
    50  // If set to zero, it does not permit either escape and allows both quote characters to appear unescaped.
    51  func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
    52  	// easy cases
    53  	if len(s) == 0 {
    54  		err = errSyntax
    55  		return
    56  	}
    57  	switch c := s[0]; {
    58  	case c == quote && (quote == '\'' || quote == '"'):
    59  		err = errSyntax
    60  		return
    61  	case c >= utf8.RuneSelf:
    62  		r, size := utf8.DecodeRuneInString(s)
    63  		return r, true, s[size:], nil
    64  	case c != '\\':
    65  		return rune(s[0]), false, s[1:], nil
    66  	}
    67  
    68  	// hard case: c is backslash
    69  	if len(s) <= 1 {
    70  		err = errSyntax
    71  		return
    72  	}
    73  	c := s[1]
    74  	s = s[2:]
    75  
    76  	switch c {
    77  	case 'a':
    78  		value = '\a'
    79  	case 'b':
    80  		value = '\b'
    81  	case 'f':
    82  		value = '\f'
    83  	case 'n':
    84  		value = '\n'
    85  	case 'r':
    86  		value = '\r'
    87  	case 't':
    88  		value = '\t'
    89  	case 'v':
    90  		value = '\v'
    91  	case 'x', 'u', 'U':
    92  		n := 0
    93  		switch c {
    94  		case 'x':
    95  			n = 2
    96  		case 'u':
    97  			n = 4
    98  		case 'U':
    99  			n = 8
   100  		}
   101  		var v rune
   102  		if len(s) < n {
   103  			err = errSyntax
   104  			return
   105  		}
   106  		for j := 0; j < n; j++ {
   107  			x, ok := unhex(s[j])
   108  			if !ok {
   109  				err = errSyntax
   110  				return
   111  			}
   112  			v = v<<4 | x
   113  		}
   114  		s = s[n:]
   115  		if c == 'x' {
   116  			// single-byte string, possibly not UTF-8
   117  			value = v
   118  			break
   119  		}
   120  		if v > utf8.MaxRune {
   121  			err = errSyntax
   122  			return
   123  		}
   124  		value = v
   125  		multibyte = true
   126  	case '0', '1', '2', '3', '4', '5', '6', '7':
   127  		v := rune(c) - '0'
   128  		if len(s) < 2 {
   129  			err = errSyntax
   130  			return
   131  		}
   132  		for j := 0; j < 2; j++ { // one digit already; two more
   133  			x := rune(s[j]) - '0'
   134  			if x < 0 || x > 7 {
   135  				err = errSyntax
   136  				return
   137  			}
   138  			v = (v << 3) | x
   139  		}
   140  		s = s[2:]
   141  		if v > 255 {
   142  			err = errSyntax
   143  			return
   144  		}
   145  		value = v
   146  	case '\\':
   147  		value = '\\'
   148  	case '\'', '"':
   149  		if c != quote {
   150  			err = errSyntax
   151  			return
   152  		}
   153  		value = rune(c)
   154  	default:
   155  		err = errSyntax
   156  		return
   157  	}
   158  	tail = s
   159  	return
   160  }
   161  
   162  // unquote interprets s as a single-quoted, double-quoted,
   163  // or backquoted Go string literal, returning the string value
   164  // that s quotes.  (If s is single-quoted, it would be a Go
   165  // character literal; unquote returns the corresponding
   166  // one-character string.)
   167  func unquote(s string) (string, error) {
   168  	n := len(s)
   169  	if n < 2 {
   170  		return "", errSyntax
   171  	}
   172  	quote := s[0]
   173  	if quote != s[n-1] {
   174  		return "", errSyntax
   175  	}
   176  	s = s[1 : n-1]
   177  
   178  	if quote == '`' {
   179  		if contains(s, '`') {
   180  			return "", errSyntax
   181  		}
   182  		if contains(s, '\r') {
   183  			// -1 because we know there is at least one \r to remove.
   184  			buf := make([]byte, 0, len(s)-1)
   185  			for i := 0; i < len(s); i++ {
   186  				if s[i] != '\r' {
   187  					buf = append(buf, s[i])
   188  				}
   189  			}
   190  			return string(buf), nil
   191  		}
   192  		return s, nil
   193  	}
   194  	if quote != '"' && quote != '\'' {
   195  		return "", errSyntax
   196  	}
   197  	if contains(s, '\n') {
   198  		return "", errSyntax
   199  	}
   200  
   201  	// Is it trivial? Avoid allocation.
   202  	if !contains(s, '\\') && !contains(s, quote) {
   203  		switch quote {
   204  		case '"':
   205  			if utf8.ValidString(s) {
   206  				return s, nil
   207  			}
   208  		case '\'':
   209  			r, size := utf8.DecodeRuneInString(s)
   210  			if size == len(s) && (r != utf8.RuneError || size != 1) {
   211  				return s, nil
   212  			}
   213  		}
   214  	}
   215  
   216  	var runeTmp [utf8.UTFMax]byte
   217  	buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
   218  	for len(s) > 0 {
   219  		c, multibyte, ss, err := unquoteChar(s, quote)
   220  		if err != nil {
   221  			return "", err
   222  		}
   223  		s = ss
   224  		if c < utf8.RuneSelf || !multibyte {
   225  			buf = append(buf, byte(c))
   226  		} else {
   227  			n := utf8.EncodeRune(runeTmp[:], c)
   228  			buf = append(buf, runeTmp[:n]...)
   229  		}
   230  		if quote == '\'' && len(s) != 0 {
   231  			// single-quoted must be single character
   232  			return "", errSyntax
   233  		}
   234  	}
   235  	return string(buf), nil
   236  }
   237  
   238  func quote(s string) string {
   239  	buf := make([]byte, 0, 3*len(s)/2)
   240  	const quote = '"'
   241  
   242  	buf = append(buf, quote)
   243  	for width := 0; len(s) > 0; s = s[width:] {
   244  		r := rune(s[0])
   245  		width = 1
   246  		if r >= utf8.RuneSelf {
   247  			r, width = utf8.DecodeRuneInString(s)
   248  		}
   249  		if width == 1 && r == utf8.RuneError {
   250  			buf = append(buf, `\x`...)
   251  			buf = append(buf, lowerhex[s[0]>>4])
   252  			buf = append(buf, lowerhex[s[0]&0xF])
   253  			continue
   254  		}
   255  		buf = appendEscapedRune(buf, r)
   256  	}
   257  	buf = append(buf, quote)
   258  	return string(buf)
   259  }
   260  
   261  func appendEscapedRune(buf []byte, r rune) []byte {
   262  
   263  	const quote = '"'
   264  
   265  	var runeTmp [utf8.UTFMax]byte
   266  	if r == rune(quote) || r == '\\' { // always backslashed
   267  		buf = append(buf, '\\')
   268  		buf = append(buf, byte(r))
   269  		return buf
   270  	}
   271  	if isPrint(r) {
   272  		n := utf8.EncodeRune(runeTmp[:], r)
   273  		buf = append(buf, runeTmp[:n]...)
   274  		return buf
   275  	}
   276  	switch r {
   277  	case '\a':
   278  		buf = append(buf, `\a`...)
   279  	case '\b':
   280  		buf = append(buf, `\b`...)
   281  	case '\f':
   282  		buf = append(buf, `\f`...)
   283  	case '\n':
   284  		buf = append(buf, `\n`...)
   285  	case '\r':
   286  		buf = append(buf, `\r`...)
   287  	case '\t':
   288  		buf = append(buf, `\t`...)
   289  	case '\v':
   290  		buf = append(buf, `\v`...)
   291  	default:
   292  		switch {
   293  		case r < ' ' || r == 0x7f:
   294  			buf = append(buf, `\x`...)
   295  			buf = append(buf, lowerhex[byte(r)>>4])
   296  			buf = append(buf, lowerhex[byte(r)&0xF])
   297  		case !utf8.ValidRune(r):
   298  			r = 0xFFFD
   299  			fallthrough
   300  		case r < 0x10000:
   301  			buf = append(buf, `\u`...)
   302  			for s := 12; s >= 0; s -= 4 {
   303  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   304  			}
   305  		default:
   306  			buf = append(buf, `\U`...)
   307  			for s := 28; s >= 0; s -= 4 {
   308  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   309  			}
   310  		}
   311  	}
   312  	return buf
   313  }
   314  
   315  // This is only used for struct tags.  Assume
   316  func isPrint(r rune) bool {
   317  	if r <= 0xFF {
   318  		if 0x20 <= r && r <= 0x7E {
   319  			// All the ASCII is printable from space through DEL-1.
   320  			return true
   321  		}
   322  		if 0xA1 <= r && r <= 0xFF {
   323  			// Similarly for ¡ through ÿ...
   324  			return r != 0xAD // ...except for the bizarre soft hyphen.
   325  		}
   326  		return false
   327  	}
   328  
   329  	// TinyGo: Skip all other unicode processing
   330  	return false
   331  }
   332  
   333  // contains reports whether the string contains the byte c.
   334  func contains(s string, c byte) bool {
   335  	return indexByteString(s, c) != -1
   336  }
   337  
   338  // Index finds the index of the first instance of the specified byte in the string.
   339  // If the byte is not found, this returns -1.
   340  func indexByteString(s string, c byte) int {
   341  	for i := 0; i < len(s); i++ {
   342  		if s[i] == c {
   343  			return i
   344  		}
   345  	}
   346  	return -1
   347  }