github.com/iEvan-lhr/exciting-tool@v0.0.0-20230504054234-8e983f73cdd2/atoi.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package tools
     6  
     7  import (
     8  	"errors"
     9  )
    10  
    11  // ErrRange indicates that a value is out of range for the target type.
    12  var ErrRange = errors.New("value out of range")
    13  
    14  // ErrSyntax indicates that a value does not have the right syntax for the target type.
    15  var ErrSyntax = errors.New("invalid syntax")
    16  
    17  // A NumError records a failed conversion.
    18  type NumError struct {
    19  	Func *String // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex)
    20  	Num  *String // the input
    21  	Err  error   // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
    22  }
    23  
    24  func (e *NumError) Error() string {
    25  	Quote(e.Num)
    26  	return "strconv." + e.Func.string() + ": " + "parsing " + e.Num.string() + ": " + e.Err.Error()
    27  }
    28  
    29  func (e *NumError) Unwrap() error { return e.Err }
    30  
    31  func syntaxError(fn, str *String) *NumError {
    32  	return &NumError{fn, str, ErrSyntax}
    33  }
    34  
    35  func rangeError(fn, str *String) *NumError {
    36  	return &NumError{fn, str, ErrRange}
    37  }
    38  
    39  func baseError(fn, str *String, base int) *NumError {
    40  	errorf := Strings("invalid base ")
    41  	errorf.appendAny(base)
    42  	return &NumError{fn, str, errors.New(errorf.string())}
    43  }
    44  
    45  func bitSizeError(fn, str *String, bitSize int) *NumError {
    46  	errorf := Strings("invalid bit size ")
    47  	errorf.appendAny(bitSize)
    48  	return &NumError{fn, str, errors.New(errorf.string())}
    49  }
    50  
    51  const intSize = 32 << (^uint(0) >> 63)
    52  
    53  // IntSize is the size in bits of an int or uint value.
    54  const IntSize = intSize
    55  
    56  const maxUint64 = 1<<64 - 1
    57  
    58  // ParseUint is like ParseInt but for unsigned numbers.
    59  //
    60  // A sign prefix is not permitted.
    61  func ParseUint(s []byte, base int, bitSize int) (uint64, error) {
    62  	const fnParseUint = "ParseUint"
    63  
    64  	if s == nil || len(s) == 0 {
    65  		return 0, syntaxError(Strings(fnParseUint), BytesString(s))
    66  	}
    67  
    68  	base0 := base == 0
    69  
    70  	s0 := s
    71  	switch {
    72  	case 2 <= base && base <= 36:
    73  		// valid base; nothing to do
    74  
    75  	case base == 0:
    76  		// Look for octal, hex prefix.
    77  		base = 10
    78  		if s[0] == '0' {
    79  			switch {
    80  			case len(s) >= 3 && lower(s[1]) == 'b':
    81  				base = 2
    82  				s = s[2:]
    83  			case len(s) >= 3 && lower(s[1]) == 'o':
    84  				base = 8
    85  				s = s[2:]
    86  			case len(s) >= 3 && lower(s[1]) == 'x':
    87  				base = 16
    88  				s = s[2:]
    89  			default:
    90  				base = 8
    91  				s = s[1:]
    92  			}
    93  		}
    94  
    95  	default:
    96  		return 0, baseError(Strings(fnParseUint), BytesString(s0), base)
    97  	}
    98  
    99  	if bitSize == 0 {
   100  		bitSize = IntSize
   101  	} else if bitSize < 0 || bitSize > 64 {
   102  		return 0, bitSizeError(Strings(fnParseUint), BytesString(s0), bitSize)
   103  	}
   104  
   105  	// Cutoff is the smallest number such that cutoff*base > maxUint64.
   106  	// Use compile-time constants for common cases.
   107  	var cutoff uint64
   108  	switch base {
   109  	case 10:
   110  		cutoff = maxUint64/10 + 1
   111  	case 16:
   112  		cutoff = maxUint64/16 + 1
   113  	default:
   114  		cutoff = maxUint64/uint64(base) + 1
   115  	}
   116  
   117  	maxVal := uint64(1)<<uint(bitSize) - 1
   118  
   119  	underscores := false
   120  	var n uint64
   121  	for _, c := range s {
   122  		var d byte
   123  		switch {
   124  		case c == '_' && base0:
   125  			underscores = true
   126  			continue
   127  		case '0' <= c && c <= '9':
   128  			d = c - '0'
   129  		case 'a' <= lower(c) && lower(c) <= 'z':
   130  			d = lower(c) - 'a' + 10
   131  		default:
   132  			return 0, syntaxError(Strings(fnParseUint), BytesString(s0))
   133  		}
   134  
   135  		if d >= byte(base) {
   136  			return 0, syntaxError(Strings(fnParseUint), BytesString(s0))
   137  		}
   138  
   139  		if n >= cutoff {
   140  			// n*base overflows
   141  			return maxVal, rangeError(Strings(fnParseUint), BytesString(s0))
   142  		}
   143  		n *= uint64(base)
   144  
   145  		n1 := n + uint64(d)
   146  		if n1 < n || n1 > maxVal {
   147  			// n+d overflows
   148  			return maxVal, rangeError(Strings(fnParseUint), BytesString(s0))
   149  		}
   150  		n = n1
   151  	}
   152  
   153  	if underscores && !underscoreOK(s0) {
   154  		return 0, syntaxError(Strings(fnParseUint), BytesString(s0))
   155  	}
   156  
   157  	return n, nil
   158  }
   159  
   160  // ParseInt interprets a string s in the given base (0, 2 to 36) and
   161  // bit size (0 to 64) and returns the corresponding value i.
   162  //
   163  // The string may begin with a leading sign: "+" or "-".
   164  //
   165  // If the base argument is 0, the true base is implied by the string's
   166  // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o",
   167  // 16 for "0x", and 10 otherwise. Also, for argument base 0 only,
   168  // underscore characters are permitted as defined by the Go syntax for
   169  // integer literals.
   170  //
   171  // The bitSize argument specifies the integer type
   172  // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
   173  // correspond to int, int8, int16, int32, and int64.
   174  // If bitSize is below 0 or above 64, an error is returned.
   175  //
   176  // The errors that ParseInt returns have concrete type *NumError
   177  // and include err.Num = s. If s is empty or contains invalid
   178  // digits, err.Err = ErrSyntax and the returned value is 0;
   179  // if the value corresponding to s cannot be represented by a
   180  // signed integer of the given size, err.Err = ErrRange and the
   181  // returned value is the maximum magnitude integer of the
   182  // appropriate bitSize and sign.
   183  func ParseInt(s []byte, base int, bitSize int) (i int64, err error) {
   184  	const fnParseInt = "ParseInt"
   185  
   186  	if s == nil || len(s) == 0 {
   187  		return 0, syntaxError(Strings(fnParseInt), BytesString(s))
   188  	}
   189  
   190  	// Pick off leading sign.
   191  	s0 := s
   192  	neg := false
   193  	if s[0] == '+' {
   194  		s = s[1:]
   195  	} else if s[0] == '-' {
   196  		neg = true
   197  		s = s[1:]
   198  	}
   199  
   200  	// Convert unsigned and check range.
   201  	var un uint64
   202  	un, err = ParseUint(s, base, bitSize)
   203  	if err != nil && err.(*NumError).Err != ErrRange {
   204  		err.(*NumError).Func = Strings(fnParseInt)
   205  		err.(*NumError).Num = BytesString(s0)
   206  		return 0, err
   207  	}
   208  
   209  	if bitSize == 0 {
   210  		bitSize = IntSize
   211  	}
   212  
   213  	cutoff := uint64(1 << uint(bitSize-1))
   214  	if !neg && un >= cutoff {
   215  		return int64(cutoff - 1), rangeError(Strings(fnParseInt), BytesString(s0))
   216  	}
   217  	if neg && un > cutoff {
   218  		return -int64(cutoff), rangeError(Strings(fnParseInt), BytesString(s0))
   219  	}
   220  	n := int64(un)
   221  	if neg {
   222  		n = -n
   223  	}
   224  	return n, nil
   225  }
   226  
   227  // Atoi 方法在实际使用中的效率近似接近strconv包中的方法,但如果是对同一个string对象进行多次转换 推荐使用strconv包中的方法
   228  func (s *String) Atoi() (int, error) {
   229  	const fnAtoi = "Atoi"
   230  	sLen := s.Len()
   231  	if intSize == 32 && (0 < sLen && sLen < 10) ||
   232  		intSize == 64 && (0 < sLen && sLen < 19) {
   233  		// Fast path for small integers that fit int type.
   234  		s0 := s
   235  		if s.buf[0] == '-' || s.buf[0] == '+' {
   236  			s.buf = s.buf[1:]
   237  			if s.Len() < 1 {
   238  				return 0, &NumError{Strings(fnAtoi), s0, ErrSyntax}
   239  			}
   240  		}
   241  
   242  		n := 0
   243  		for _, ch := range s.buf {
   244  			ch -= '0'
   245  			if ch > 9 {
   246  				return 0, &NumError{Strings(fnAtoi), s0, ErrSyntax}
   247  			}
   248  			n = n*10 + int(ch)
   249  		}
   250  		if s0.buf[0] == '-' {
   251  			n = -n
   252  		}
   253  		return n, nil
   254  	}
   255  
   256  	// Slow path for invalid, big, or underscored integers.
   257  	i64, err := ParseInt(s.buf, 10, 0)
   258  	if nerr, ok := err.(*NumError); ok {
   259  		nerr.Func = Strings(fnAtoi)
   260  	}
   261  	return int(i64), err
   262  }
   263  
   264  // underscoreOK reports whether the underscores in s are allowed.
   265  // Checking them in this one function lets all the parsers skip over them simply.
   266  // Underscore must appear only between digits or between a base prefix and a digit.
   267  func underscoreOK(s []byte) bool {
   268  	// saw tracks the last character (class) we saw:
   269  	// ^ for beginning of number,
   270  	// 0 for a digit or base prefix,
   271  	// _ for an underscore,
   272  	// ! for none of the above.
   273  	saw := '^'
   274  	i := 0
   275  
   276  	// Optional sign.
   277  	if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
   278  		s = s[1:]
   279  	}
   280  
   281  	// Optional base prefix.
   282  	hex := false
   283  	if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
   284  		i = 2
   285  		saw = '0' // base prefix counts as a digit for "underscore as digit separator"
   286  		hex = lower(s[1]) == 'x'
   287  	}
   288  
   289  	// Number proper.
   290  	for ; i < len(s); i++ {
   291  		// Digits are always okay.
   292  		if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
   293  			saw = '0'
   294  			continue
   295  		}
   296  		// Underscore must follow digit.
   297  		if s[i] == '_' {
   298  			if saw != '0' {
   299  				return false
   300  			}
   301  			saw = '_'
   302  			continue
   303  		}
   304  		// Underscore must also be followed by digit.
   305  		if saw == '_' {
   306  			return false
   307  		}
   308  		// Saw non-digit, non-underscore.
   309  		saw = '!'
   310  	}
   311  	return saw != '_'
   312  }