github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/strconv/atoi.go (about) 1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package strconv 6 7 import "errors" 8 9 // lower(c) is a lower-case letter if and only if 10 // c is either that lower-case letter or the equivalent upper-case letter. 11 // Instead of writing c == 'x' || c == 'X' one can write lower(c) == 'x'. 12 // Note that lower of non-letters can produce other non-letters. 13 func lower(c byte) byte { 14 return c | ('x' - 'X') 15 } 16 17 // ErrRange indicates that a value is out of range for the target type. 18 var ErrRange = errors.New("value out of range") 19 20 // ErrSyntax indicates that a value does not have the right syntax for the target type. 21 var ErrSyntax = errors.New("invalid syntax") 22 23 // A NumError records a failed conversion. 24 type NumError struct { 25 Func string // the failing function (ParseBool, ParseInt, ParseUint, ParseFloat, ParseComplex) 26 Num string // the input 27 Err error // the reason the conversion failed (e.g. ErrRange, ErrSyntax, etc.) 28 } 29 30 func (e *NumError) Error() string { 31 return "strconv." + e.Func + ": " + "parsing " + Quote(e.Num) + ": " + e.Err.Error() 32 } 33 34 func (e *NumError) Unwrap() error { return e.Err } 35 36 // cloneString returns a string copy of x. 37 // 38 // All ParseXXX functions allow the input string to escape to the error value. 39 // This hurts strconv.ParseXXX(string(b)) calls where b is []byte since 40 // the conversion from []byte must allocate a string on the heap. 41 // If we assume errors are infrequent, then we can avoid escaping the input 42 // back to the output by copying it first. This allows the compiler to call 43 // strconv.ParseXXX without a heap allocation for most []byte to string 44 // conversions, since it can now prove that the string cannot escape Parse. 45 // 46 // TODO: Use strings.Clone instead? However, we cannot depend on "strings" 47 // since it incurs a transitive dependency on "unicode". 48 // Either move strings.Clone to an internal/bytealg or make the 49 // "strings" to "unicode" dependency lighter (see https://go.dev/issue/54098). 50 func cloneString(x string) string { return string([]byte(x)) } 51 52 func syntaxError(fn, str string) *NumError { 53 return &NumError{fn, cloneString(str), ErrSyntax} 54 } 55 56 func rangeError(fn, str string) *NumError { 57 return &NumError{fn, cloneString(str), ErrRange} 58 } 59 60 func baseError(fn, str string, base int) *NumError { 61 return &NumError{fn, cloneString(str), errors.New("invalid base " + Itoa(base))} 62 } 63 64 func bitSizeError(fn, str string, bitSize int) *NumError { 65 return &NumError{fn, cloneString(str), errors.New("invalid bit size " + Itoa(bitSize))} 66 } 67 68 const intSize = 32 << (^uint(0) >> 63) 69 70 // IntSize is the size in bits of an int or uint value. 71 const IntSize = intSize 72 73 const maxUint64 = 1<<64 - 1 74 75 // ParseUint is like ParseInt but for unsigned numbers. 76 // 77 // A sign prefix is not permitted. 78 func ParseUint(s string, base int, bitSize int) (uint64, error) { 79 const fnParseUint = "ParseUint" 80 81 if s == "" { 82 return 0, syntaxError(fnParseUint, s) 83 } 84 85 base0 := base == 0 86 87 s0 := s 88 switch { 89 case 2 <= base && base <= 36: 90 // valid base; nothing to do 91 92 case base == 0: 93 // Look for octal, hex prefix. 94 base = 10 95 if s[0] == '0' { 96 switch { 97 case len(s) >= 3 && lower(s[1]) == 'b': 98 base = 2 99 s = s[2:] 100 case len(s) >= 3 && lower(s[1]) == 'o': 101 base = 8 102 s = s[2:] 103 case len(s) >= 3 && lower(s[1]) == 'x': 104 base = 16 105 s = s[2:] 106 default: 107 base = 8 108 s = s[1:] 109 } 110 } 111 112 default: 113 return 0, baseError(fnParseUint, s0, base) 114 } 115 116 if bitSize == 0 { 117 bitSize = IntSize 118 } else if bitSize < 0 || bitSize > 64 { 119 return 0, bitSizeError(fnParseUint, s0, bitSize) 120 } 121 122 // Cutoff is the smallest number such that cutoff*base > maxUint64. 123 // Use compile-time constants for common cases. 124 var cutoff uint64 125 switch base { 126 case 10: 127 cutoff = maxUint64/10 + 1 128 case 16: 129 cutoff = maxUint64/16 + 1 130 default: 131 cutoff = maxUint64/uint64(base) + 1 132 } 133 134 maxVal := uint64(1)<<uint(bitSize) - 1 135 136 underscores := false 137 var n uint64 138 for _, c := range []byte(s) { 139 var d byte 140 switch { 141 case c == '_' && base0: 142 underscores = true 143 continue 144 case '0' <= c && c <= '9': 145 d = c - '0' 146 case 'a' <= lower(c) && lower(c) <= 'z': 147 d = lower(c) - 'a' + 10 148 default: 149 return 0, syntaxError(fnParseUint, s0) 150 } 151 152 if d >= byte(base) { 153 return 0, syntaxError(fnParseUint, s0) 154 } 155 156 if n >= cutoff { 157 // n*base overflows 158 return maxVal, rangeError(fnParseUint, s0) 159 } 160 n *= uint64(base) 161 162 n1 := n + uint64(d) 163 if n1 < n || n1 > maxVal { 164 // n+d overflows 165 return maxVal, rangeError(fnParseUint, s0) 166 } 167 n = n1 168 } 169 170 if underscores && !underscoreOK(s0) { 171 return 0, syntaxError(fnParseUint, s0) 172 } 173 174 return n, nil 175 } 176 177 // ParseInt interprets a string s in the given base (0, 2 to 36) and 178 // bit size (0 to 64) and returns the corresponding value i. 179 // 180 // The string may begin with a leading sign: "+" or "-". 181 // 182 // If the base argument is 0, the true base is implied by the string's 183 // prefix following the sign (if present): 2 for "0b", 8 for "0" or "0o", 184 // 16 for "0x", and 10 otherwise. Also, for argument base 0 only, 185 // underscore characters are permitted as defined by the Go syntax for 186 // [integer literals]. 187 // 188 // The bitSize argument specifies the integer type 189 // that the result must fit into. Bit sizes 0, 8, 16, 32, and 64 190 // correspond to int, int8, int16, int32, and int64. 191 // If bitSize is below 0 or above 64, an error is returned. 192 // 193 // The errors that ParseInt returns have concrete type *NumError 194 // and include err.Num = s. If s is empty or contains invalid 195 // digits, err.Err = ErrSyntax and the returned value is 0; 196 // if the value corresponding to s cannot be represented by a 197 // signed integer of the given size, err.Err = ErrRange and the 198 // returned value is the maximum magnitude integer of the 199 // appropriate bitSize and sign. 200 // 201 // [integer literals]: https://go.dev/ref/spec#Integer_literals 202 func ParseInt(s string, base int, bitSize int) (i int64, err error) { 203 const fnParseInt = "ParseInt" 204 205 if s == "" { 206 return 0, syntaxError(fnParseInt, s) 207 } 208 209 // Pick off leading sign. 210 s0 := s 211 neg := false 212 if s[0] == '+' { 213 s = s[1:] 214 } else if s[0] == '-' { 215 neg = true 216 s = s[1:] 217 } 218 219 // Convert unsigned and check range. 220 var un uint64 221 un, err = ParseUint(s, base, bitSize) 222 if err != nil && err.(*NumError).Err != ErrRange { 223 err.(*NumError).Func = fnParseInt 224 err.(*NumError).Num = cloneString(s0) 225 return 0, err 226 } 227 228 if bitSize == 0 { 229 bitSize = IntSize 230 } 231 232 cutoff := uint64(1 << uint(bitSize-1)) 233 if !neg && un >= cutoff { 234 return int64(cutoff - 1), rangeError(fnParseInt, s0) 235 } 236 if neg && un > cutoff { 237 return -int64(cutoff), rangeError(fnParseInt, s0) 238 } 239 n := int64(un) 240 if neg { 241 n = -n 242 } 243 return n, nil 244 } 245 246 // Atoi is equivalent to ParseInt(s, 10, 0), converted to type int. 247 func Atoi(s string) (int, error) { 248 const fnAtoi = "Atoi" 249 250 sLen := len(s) 251 if intSize == 32 && (0 < sLen && sLen < 10) || 252 intSize == 64 && (0 < sLen && sLen < 19) { 253 // Fast path for small integers that fit int type. 254 s0 := s 255 if s[0] == '-' || s[0] == '+' { 256 s = s[1:] 257 if len(s) < 1 { 258 return 0, syntaxError(fnAtoi, s0) 259 } 260 } 261 262 n := 0 263 for _, ch := range []byte(s) { 264 ch -= '0' 265 if ch > 9 { 266 return 0, syntaxError(fnAtoi, s0) 267 } 268 n = n*10 + int(ch) 269 } 270 if s0[0] == '-' { 271 n = -n 272 } 273 return n, nil 274 } 275 276 // Slow path for invalid, big, or underscored integers. 277 i64, err := ParseInt(s, 10, 0) 278 if nerr, ok := err.(*NumError); ok { 279 nerr.Func = fnAtoi 280 } 281 return int(i64), err 282 } 283 284 // underscoreOK reports whether the underscores in s are allowed. 285 // Checking them in this one function lets all the parsers skip over them simply. 286 // Underscore must appear only between digits or between a base prefix and a digit. 287 func underscoreOK(s string) bool { 288 // saw tracks the last character (class) we saw: 289 // ^ for beginning of number, 290 // 0 for a digit or base prefix, 291 // _ for an underscore, 292 // ! for none of the above. 293 saw := '^' 294 i := 0 295 296 // Optional sign. 297 if len(s) >= 1 && (s[0] == '-' || s[0] == '+') { 298 s = s[1:] 299 } 300 301 // Optional base prefix. 302 hex := false 303 if len(s) >= 2 && s[0] == '0' && (lower(s[1]) == 'b' || lower(s[1]) == 'o' || lower(s[1]) == 'x') { 304 i = 2 305 saw = '0' // base prefix counts as a digit for "underscore as digit separator" 306 hex = lower(s[1]) == 'x' 307 } 308 309 // Number proper. 310 for ; i < len(s); i++ { 311 // Digits are always okay. 312 if '0' <= s[i] && s[i] <= '9' || hex && 'a' <= lower(s[i]) && lower(s[i]) <= 'f' { 313 saw = '0' 314 continue 315 } 316 // Underscore must follow digit. 317 if s[i] == '_' { 318 if saw != '0' { 319 return false 320 } 321 saw = '_' 322 continue 323 } 324 // Underscore must also be followed by digit. 325 if saw == '_' { 326 return false 327 } 328 // Saw non-digit, non-underscore. 329 saw = '!' 330 } 331 return saw != '_' 332 }