github.com/minio/simdjson-go@v0.4.6-0.20231116094823-04d21cddf993/parse_number.go (about)

     1  /*
     2   * MinIO Cloud Storage, (C) 2020 MinIO, Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package simdjson
    18  
    19  import (
    20  	"errors"
    21  	"math"
    22  	"reflect"
    23  	"strconv"
    24  	"unsafe"
    25  )
    26  
    27  const (
    28  	isPartOfNumberFlag = 1 << iota
    29  	isFloatOnlyFlag
    30  	isMinusFlag
    31  	isEOVFlag
    32  	isDigitFlag
    33  	isMustHaveDigitNext
    34  )
    35  
    36  var isNumberRune = [256]uint8{
    37  	'0':  isPartOfNumberFlag | isDigitFlag,
    38  	'1':  isPartOfNumberFlag | isDigitFlag,
    39  	'2':  isPartOfNumberFlag | isDigitFlag,
    40  	'3':  isPartOfNumberFlag | isDigitFlag,
    41  	'4':  isPartOfNumberFlag | isDigitFlag,
    42  	'5':  isPartOfNumberFlag | isDigitFlag,
    43  	'6':  isPartOfNumberFlag | isDigitFlag,
    44  	'7':  isPartOfNumberFlag | isDigitFlag,
    45  	'8':  isPartOfNumberFlag | isDigitFlag,
    46  	'9':  isPartOfNumberFlag | isDigitFlag,
    47  	'.':  isPartOfNumberFlag | isFloatOnlyFlag | isMustHaveDigitNext,
    48  	'+':  isPartOfNumberFlag,
    49  	'-':  isPartOfNumberFlag | isMinusFlag | isMustHaveDigitNext,
    50  	'e':  isPartOfNumberFlag | isFloatOnlyFlag,
    51  	'E':  isPartOfNumberFlag | isFloatOnlyFlag,
    52  	',':  isEOVFlag,
    53  	'}':  isEOVFlag,
    54  	']':  isEOVFlag,
    55  	' ':  isEOVFlag,
    56  	'\t': isEOVFlag,
    57  	'\r': isEOVFlag,
    58  	'\n': isEOVFlag,
    59  	':':  isEOVFlag,
    60  }
    61  
    62  // parseNumber will parse the number starting in the buffer.
    63  // Any non-number characters at the end will be ignored.
    64  // Returns TagEnd if no valid value found be found.
    65  func parseNumber(buf []byte) (id, val uint64) {
    66  	pos := 0
    67  	found := uint8(0)
    68  	for i, v := range buf {
    69  		t := isNumberRune[v]
    70  		if t == 0 {
    71  			//fmt.Println("aborting on", string(v), "in", string(buf[:i]))
    72  			return 0, 0
    73  		}
    74  		if t == isEOVFlag {
    75  			break
    76  		}
    77  		if t&isMustHaveDigitNext > 0 {
    78  			// A period and minus must be followed by a digit
    79  			if len(buf) < i+2 || isNumberRune[buf[i+1]]&isDigitFlag == 0 {
    80  				return 0, 0
    81  			}
    82  		}
    83  		found |= t
    84  		pos = i + 1
    85  	}
    86  	if pos == 0 {
    87  		return 0, 0
    88  	}
    89  	const maxIntLen = 20
    90  	floatTag := uint64(TagFloat) << JSONTAGOFFSET
    91  
    92  	// Only try integers if we didn't find any float exclusive and it can fit in an integer.
    93  	if found&isFloatOnlyFlag == 0 && pos <= maxIntLen {
    94  		if found&isMinusFlag == 0 {
    95  			if pos > 1 && buf[0] == '0' {
    96  				// Integers cannot have a leading zero.
    97  				return 0, 0
    98  			}
    99  		} else {
   100  			if pos > 2 && buf[1] == '0' {
   101  				// Integers cannot have a leading zero after minus.
   102  				return 0, 0
   103  			}
   104  		}
   105  		i64, err := strconv.ParseInt(unsafeBytesToString(buf[:pos]), 10, 64)
   106  		if err == nil {
   107  			return uint64(TagInteger) << JSONTAGOFFSET, uint64(i64)
   108  		}
   109  		if errors.Is(err, strconv.ErrRange) {
   110  			floatTag |= uint64(FloatOverflowedInteger)
   111  		}
   112  
   113  		if found&isMinusFlag == 0 {
   114  			u64, err := strconv.ParseUint(unsafeBytesToString(buf[:pos]), 10, 64)
   115  			if err == nil {
   116  				return uint64(TagUint) << JSONTAGOFFSET, u64
   117  			}
   118  			if errors.Is(err, strconv.ErrRange) {
   119  				floatTag |= uint64(FloatOverflowedInteger)
   120  			}
   121  		}
   122  	} else if found&isFloatOnlyFlag == 0 {
   123  		floatTag |= uint64(FloatOverflowedInteger)
   124  	}
   125  
   126  	if pos > 1 && buf[0] == '0' && isNumberRune[buf[1]]&isFloatOnlyFlag == 0 {
   127  		// Float can only have have a leading 0 when followed by a period.
   128  		return 0, 0
   129  	}
   130  	f64, err := strconv.ParseFloat(unsafeBytesToString(buf[:pos]), 64)
   131  	if err == nil {
   132  		return floatTag, math.Float64bits(f64)
   133  	}
   134  	return 0, 0
   135  }
   136  
   137  // unsafeBytesToString should only be used when we have control of b.
   138  func unsafeBytesToString(b []byte) (s string) {
   139  	var length = len(b)
   140  
   141  	if length == 0 {
   142  		return ""
   143  	}
   144  
   145  	stringHeader := (*reflect.StringHeader)(unsafe.Pointer(&s))
   146  	stringHeader.Data = uintptr(unsafe.Pointer(&b[0]))
   147  	stringHeader.Len = length
   148  	return s
   149  }