github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/parse_number.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2020 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package simdjson 18 19 import ( 20 "errors" 21 "math" 22 "reflect" 23 "strconv" 24 "unsafe" 25 ) 26 27 const ( 28 isPartOfNumberFlag = 1 << iota 29 isFloatOnlyFlag 30 isMinusFlag 31 isEOVFlag 32 isDigitFlag 33 isMustHaveDigitNext 34 ) 35 36 var isNumberRune = [256]uint8{ 37 '0': isPartOfNumberFlag | isDigitFlag, 38 '1': isPartOfNumberFlag | isDigitFlag, 39 '2': isPartOfNumberFlag | isDigitFlag, 40 '3': isPartOfNumberFlag | isDigitFlag, 41 '4': isPartOfNumberFlag | isDigitFlag, 42 '5': isPartOfNumberFlag | isDigitFlag, 43 '6': isPartOfNumberFlag | isDigitFlag, 44 '7': isPartOfNumberFlag | isDigitFlag, 45 '8': isPartOfNumberFlag | isDigitFlag, 46 '9': isPartOfNumberFlag | isDigitFlag, 47 '.': isPartOfNumberFlag | isFloatOnlyFlag | isMustHaveDigitNext, 48 '+': isPartOfNumberFlag, 49 '-': isPartOfNumberFlag | isMinusFlag | isMustHaveDigitNext, 50 'e': isPartOfNumberFlag | isFloatOnlyFlag, 51 'E': isPartOfNumberFlag | isFloatOnlyFlag, 52 ',': isEOVFlag, 53 '}': isEOVFlag, 54 ']': isEOVFlag, 55 ' ': isEOVFlag, 56 '\t': isEOVFlag, 57 '\r': isEOVFlag, 58 '\n': isEOVFlag, 59 ':': isEOVFlag, 60 } 61 62 // parseNumber will parse the number starting in the buffer. 63 // Any non-number characters at the end will be ignored. 64 // Returns TagEnd if no valid value found be found. 65 func parseNumber(buf []byte) (id, val uint64) { 66 pos := 0 67 found := uint8(0) 68 for i, v := range buf { 69 t := isNumberRune[v] 70 if t == 0 { 71 //fmt.Println("aborting on", string(v), "in", string(buf[:i])) 72 return 0, 0 73 } 74 if t == isEOVFlag { 75 break 76 } 77 if t&isMustHaveDigitNext > 0 { 78 // A period and minus must be followed by a digit 79 if len(buf) < i+2 || isNumberRune[buf[i+1]]&isDigitFlag == 0 { 80 return 0, 0 81 } 82 } 83 found |= t 84 pos = i + 1 85 } 86 if pos == 0 { 87 return 0, 0 88 } 89 const maxIntLen = 20 90 floatTag := uint64(TagFloat) << JSONTAGOFFSET 91 92 // Only try integers if we didn't find any float exclusive and it can fit in an integer. 93 if found&isFloatOnlyFlag == 0 && pos <= maxIntLen { 94 if found&isMinusFlag == 0 { 95 if pos > 1 && buf[0] == '0' { 96 // Integers cannot have a leading zero. 97 return 0, 0 98 } 99 } else { 100 if pos > 2 && buf[1] == '0' { 101 // Integers cannot have a leading zero after minus. 102 return 0, 0 103 } 104 } 105 i64, err := strconv.ParseInt(unsafeBytesToString(buf[:pos]), 10, 64) 106 if err == nil { 107 return uint64(TagInteger) << JSONTAGOFFSET, uint64(i64) 108 } 109 if errors.Is(err, strconv.ErrRange) { 110 floatTag |= uint64(FloatOverflowedInteger) 111 } 112 113 if found&isMinusFlag == 0 { 114 u64, err := strconv.ParseUint(unsafeBytesToString(buf[:pos]), 10, 64) 115 if err == nil { 116 return uint64(TagUint) << JSONTAGOFFSET, u64 117 } 118 if errors.Is(err, strconv.ErrRange) { 119 floatTag |= uint64(FloatOverflowedInteger) 120 } 121 } 122 } else if found&isFloatOnlyFlag == 0 { 123 floatTag |= uint64(FloatOverflowedInteger) 124 } 125 126 if pos > 1 && buf[0] == '0' && isNumberRune[buf[1]]&isFloatOnlyFlag == 0 { 127 // Float can only have have a leading 0 when followed by a period. 128 return 0, 0 129 } 130 f64, err := strconv.ParseFloat(unsafeBytesToString(buf[:pos]), 64) 131 if err == nil { 132 return floatTag, math.Float64bits(f64) 133 } 134 return 0, 0 135 } 136 137 // unsafeBytesToString should only be used when we have control of b. 138 func unsafeBytesToString(b []byte) (s string) { 139 var length = len(b) 140 141 if length == 0 { 142 return "" 143 } 144 145 stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&s)) 146 stringHeader.Data = uintptr(unsafe.Pointer(&b[0])) 147 stringHeader.Len = length 148 return s 149 }