github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/strconv/atoi.go

github.com/x04/go/src@v0.0.0-20200202162449-3d481ceb3525/strconv/atoi.go (about)

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  import "github.com/x04/go/src/errors"
     8  
     9  // lower(c) is a lower-case letter if and only if
    10  // c is either that lower-case letter or the equivalent upper-case letter.
    11  // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'.
    12  // Note that lower of non-letters can produce other non-letters.
    13  func lower(c byte) byte {
    14  	return c | ('x' - 'X')
    15  }
    16  
    17  // ErrRange indicates that a value is out of range for the target type.
    18  var ErrRange = errors.New("value out of range")
    19  
    20  // ErrSyntax indicates that a value does not have the right syntax for the target type.
    21  var ErrSyntax = errors.New("invalid syntax")
    22  
    23  // A NumError records a failed conversion.
    24  type NumError struct {
    25  	Func	string	// the failing function (ParseBool, ParseInt, ParseUint, ParseFloat)
    26  	Num	string	// the input
    27  	Err	error	// the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.)
    28  }
    29  
    30  func (e *NumError) Error() string {
    31  	return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error()
    32  }
    33  
    34  func (e *NumError) Unwrap() error	{ return e.Err }
    35  
    36  func syntaxError(fn, str string) *NumError {
    37  	return &NumError{fn, str, ErrSyntax}
    38  }
    39  
    40  func rangeError(fn, str string) *NumError {
    41  	return &NumError{fn, str, ErrRange}
    42  }
    43  
    44  func baseError(fn, str string, base int) *NumError {
    45  	return &NumError{fn, str, errors.New("invalid base " + Itoa(base))}
    46  }
    47  
    48  func bitSizeError(fn, str string, bitSize int) *NumError {
    49  	return &NumError{fn, str, errors.New("invalid bit size " + Itoa(bitSize))}
    50  }
    51  
    52  const intSize = 32 << (^uint(0) >> 63)
    53  
    54  // IntSize is the size in bits of an int or uint value.
    55  const IntSize = intSize
    56  
    57  const maxUint64 = 1<<64 - 1
    58  
    59  // ParseUint is like ParseInt but for unsigned numbers.
    60  func ParseUint(s string, base int, bitSize int) (uint64, error) {
    61  	const fnParseUint = "ParseUint"
    62  
    63  	if s == "" {
    64  		return 0, syntaxError(fnParseUint, s)
    65  	}
    66  
    67  	base0 := base == 0
    68  
    69  	s0 := s
    70  	switch {
    71  	case 2 <= base && base <= 36:
    72  		// valid base; nothing to do
    73  
    74  	case base == 0:
    75  		// Look for octal, hex prefix.
    76  		base = 10
    77  		if s[0] == '0' {
    78  			switch {
    79  			case len(s) >= 3 && lower(s[1]) == 'b':
    80  				base = 2
    81  				s = s[2:]
    82  			case len(s) >= 3 && lower(s[1]) == 'o':
    83  				base = 8
    84  				s = s[2:]
    85  			case len(s) >= 3 && lower(s[1]) == 'x':
    86  				base = 16
    87  				s = s[2:]
    88  			default:
    89  				base = 8
    90  				s = s[1:]
    91  			}
    92  		}
    93  
    94  	default:
    95  		return 0, baseError(fnParseUint, s0, base)
    96  	}
    97  
    98  	if bitSize == 0 {
    99  		bitSize = int(IntSize)
   100  	} else if bitSize < 0 || bitSize > 64 {
   101  		return 0, bitSizeError(fnParseUint, s0, bitSize)
   102  	}
   103  
   104  	// Cutoff is the smallest number such that cutoff*base > maxUint64.
   105  	// Use compile-time constants for common cases.
   106  	var cutoff uint64
   107  	switch base {
   108  	case 10:
   109  		cutoff = maxUint64/10 + 1
   110  	case 16:
   111  		cutoff = maxUint64/16 + 1
   112  	default:
   113  		cutoff = maxUint64/uint64(base) + 1
   114  	}
   115  
   116  	maxVal := uint64(1)<<uint(bitSize) - 1
   117  
   118  	underscores := false
   119  	var n uint64
   120  	for _, c := range []byte(s) {
   121  		var d byte
   122  		switch {
   123  		case c == '_' && base0:
   124  			underscores = true
   125  			continue
   126  		case '0' <= c && c <= '9':
   127  			d = c - '0'
   128  		case 'a' <= lower(c) && lower(c) <= 'z':
   129  			d = lower(c) - 'a' + 10
   130  		default:
   131  			return 0, syntaxError(fnParseUint, s0)
   132  		}
   133  
   134  		if d >= byte(base) {
   135  			return 0, syntaxError(fnParseUint, s0)
   136  		}
   137  
   138  		if n >= cutoff {
   139  			// n*base overflows
   140  			return maxVal, rangeError(fnParseUint, s0)
   141  		}
   142  		n *= uint64(base)
   143  
   144  		n1 := n + uint64(d)
   145  		if n1 < n || n1 > maxVal {
   146  			// n+v overflows
   147  			return maxVal, rangeError(fnParseUint, s0)
   148  		}
   149  		n = n1
   150  	}
   151  
   152  	if underscores && !underscoreOK(s0) {
   153  		return 0, syntaxError(fnParseUint, s0)
   154  	}
   155  
   156  	return n, nil
   157  }
   158  
   159  // ParseInt interprets a string s in the given base (0, 2 to 36) and
   160  // bit size (0 to 64) and returns the corresponding value i.
   161  //
   162  // If the base argument is 0, the true base is implied by the string's
   163  // prefix: 2 for "0b", 8 for "0" or "0o", 16 for "0x", and 10 otherwise.
   164  // Also, for argument base 0 only, underscore characters are permitted
   165  // as defined by the Go syntax for integer literals.
   166  //
   167  // The bitSize argument specifies the integer type
   168  // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64
   169  // correspond to int, int8, int16, int32, and int64.
   170  // If bitSize is below 0 or above 64, an error is returned.
   171  //
   172  // The errors that ParseInt returns have concrete type *NumError
   173  // and include err.Num = s. If s is empty or contains invalid
   174  // digits, err.Err = ErrSyntax and the returned value is 0;
   175  // if the value corresponding to s cannot be represented by a
   176  // signed integer of the given size, err.Err = ErrRange and the
   177  // returned value is the maximum magnitude integer of the
   178  // appropriate bitSize and sign.
   179  func ParseInt(s string, base int, bitSize int) (i int64, err error) {
   180  	const fnParseInt = "ParseInt"
   181  
   182  	if s == "" {
   183  		return 0, syntaxError(fnParseInt, s)
   184  	}
   185  
   186  	// Pick off leading sign.
   187  	s0 := s
   188  	neg := false
   189  	if s[0] == '+' {
   190  		s = s[1:]
   191  	} else if s[0] == '-' {
   192  		neg = true
   193  		s = s[1:]
   194  	}
   195  
   196  	// Convert unsigned and check range.
   197  	var un uint64
   198  	un, err = ParseUint(s, base, bitSize)
   199  	if err != nil && err.(*NumError).Err != ErrRange {
   200  		err.(*NumError).Func = fnParseInt
   201  		err.(*NumError).Num = s0
   202  		return 0, err
   203  	}
   204  
   205  	if bitSize == 0 {
   206  		bitSize = int(IntSize)
   207  	}
   208  
   209  	cutoff := uint64(1 << uint(bitSize-1))
   210  	if !neg && un >= cutoff {
   211  		return int64(cutoff - 1), rangeError(fnParseInt, s0)
   212  	}
   213  	if neg && un > cutoff {
   214  		return -int64(cutoff), rangeError(fnParseInt, s0)
   215  	}
   216  	n := int64(un)
   217  	if neg {
   218  		n = -n
   219  	}
   220  	return n, nil
   221  }
   222  
   223  // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int.
   224  func Atoi(s string) (int, error) {
   225  	const fnAtoi = "Atoi"
   226  
   227  	sLen := len(s)
   228  	if intSize == 32 && (0 < sLen && sLen < 10) ||
   229  		intSize == 64 && (0 < sLen && sLen < 19) {
   230  		// Fast path for small integers that fit int type.
   231  		s0 := s
   232  		if s[0] == '-' || s[0] == '+' {
   233  			s = s[1:]
   234  			if len(s) < 1 {
   235  				return 0, &NumError{fnAtoi, s0, ErrSyntax}
   236  			}
   237  		}
   238  
   239  		n := 0
   240  		for _, ch := range []byte(s) {
   241  			ch -= '0'
   242  			if ch > 9 {
   243  				return 0, &NumError{fnAtoi, s0, ErrSyntax}
   244  			}
   245  			n = n*10 + int(ch)
   246  		}
   247  		if s0[0] == '-' {
   248  			n = -n
   249  		}
   250  		return n, nil
   251  	}
   252  
   253  	// Slow path for invalid, big, or underscored integers.
   254  	i64, err := ParseInt(s, 10, 0)
   255  	if nerr, ok := err.(*NumError); ok {
   256  		nerr.Func = fnAtoi
   257  	}
   258  	return int(i64), err
   259  }
   260  
   261  // underscoreOK reports whether the underscores in s are allowed.
   262  // Checking them in this one function lets all the parsers skip over them simply.
   263  // Underscore must appear only between digits or between a base prefix and a digit.
   264  func underscoreOK(s string) bool {
   265  	// saw tracks the last character (class) we saw:
   266  	// ^ for beginning of number,
   267  	// 0 for a digit or base prefix,
   268  	// _ for an underscore,
   269  	// ! for none of the above.
   270  	saw := '^'
   271  	i := 0
   272  
   273  	// Optional sign.
   274  	if len(s) >= 1 && (s[0] == '-' || s[0] == '+') {
   275  		s = s[1:]
   276  	}
   277  
   278  	// Optional base prefix.
   279  	hex := false
   280  	if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') {
   281  		i = 2
   282  		saw = '0'	// base prefix counts as a digit for "underscore as digit separator"
   283  		hex = lower(s[1]) == 'x'
   284  	}
   285  
   286  	// Number proper.
   287  	for ; i < len(s); i++ {
   288  		// Digits are always okay.
   289  		if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' {
   290  			saw = '0'
   291  			continue
   292  		}
   293  		// Underscore must follow digit.
   294  		if s[i] == '_' {
   295  			if saw != '0' {
   296  				return false
   297  			}
   298  			saw = '_'
   299  			continue
   300  		}
   301  		// Underscore must also be followed by digit.
   302  		if saw == '_' {
   303  			return false
   304  		}
   305  		// Saw non-digit, non-underscore.
   306  		saw = '!'
   307  	}
   308  	return saw != '_'
   309  }