github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/native/atof_native.h (about)

     1  /*
     2   * Copyright 2021 ByteDance Inc.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  #pragma once
    18  
    19  #include "native.h"
    20  
    21  /* decimical shift witout overflow, e.g. 9 << 61 overflow */
    22  #define MAX_SHIFT 60
    23  
    24  /* Decimal represent the integer or float
    25   * example 1: 1.1   {"11", 2, 1, 0}
    26   * example 2: -0.1  {"1", 1, 0, 1}
    27   * example 3: 999   {"999", 3, 3, 0}
    28   */
    29  typedef struct Decimal {
    30      char*  d;
    31      size_t cap;
    32      int    nd;
    33      int    dp;
    34      int    neg;
    35      int    trunc;
    36  } Decimal;
    37  
    38  /* decimal power of ten to binary power of two.
    39   * For example: POW_TAB[1]: 10 ** 1 ~ 2 ** 3
    40   */
    41  static const int POW_TAB[9] = {1, 3, 6, 9, 13, 16, 19, 23, 26};
    42  
    43  /* Left shift information for decimal.
    44   * For example, {2, "625"}.  That means that it will add 2 digits to the new decimal
    45   * when the prefix of decimal is from "625" to "999", and 1 digit from "0" to "624".
    46   */
    47  typedef struct lshift_cheat  {
    48      int   delta;                             // number of added digits when left shift
    49      const char  cutoff[100];                 // minus one digit if under the half(cutoff).
    50  } lshift_cheat;
    51  
    52  /* Look up for the decimal shift information by binary shift bits.
    53   * idx is shift bits for binary.
    54   * value is the shift information for decimal.
    55   * For example, idx is 4, the value is {2, "625"}.
    56   * That means the binary shift 4 bits left, will cause add 2 digits to the decimal
    57   * if the prefix of decimal is under "625".
    58   */
    59  const static lshift_cheat LSHIFT_TAB[61];
    60  
    61  static always_inline void decimal_init(Decimal *d, char *dbuf, size_t cap) {
    62      d->d = dbuf;
    63      d->cap = cap;
    64      for (int i = 0; i < d->cap; ++i) {
    65          d->d[i] = 0;
    66      }
    67      d->dp    = 0;
    68      d->nd    = 0;
    69      d->neg   = 0;
    70      d->trunc = 0;
    71  }
    72  
    73  static always_inline void decimal_set(Decimal *d, const char *s, ssize_t len, char *dbuf, ssize_t cap) {
    74      int i = 0;
    75  
    76      decimal_init(d, dbuf, cap);
    77      if (s[i] == '-') {
    78          i++;
    79          d->neg = 1;
    80      }
    81  
    82      int saw_dot = 0;
    83      for (; i < len; i++) {
    84          if ('0' <= s[i] && s[i] <= '9') {
    85              if (s[i] == '0' && d->nd == 0) { // ignore leading zeros
    86                  d->dp--;
    87                  continue;
    88              }
    89              if (d->nd < d->cap) {
    90                  d->d[d->nd] = s[i];
    91                  d->nd++;
    92              } else if (s[i] != '0') {
    93                  /* truncat the remaining digits */
    94                  d->trunc = 1;
    95              }
    96          } else if (s[i] == '.') {
    97              saw_dot = 1;
    98              d->dp = d->nd;
    99          } else {
   100              break;
   101          }
   102      }
   103  
   104      /* integer */
   105      if (saw_dot == 0) {
   106          d->dp = d->nd;
   107      }
   108  
   109      /* exponent */
   110      if (i < len && (s[i] == 'e' || s[i] == 'E')) {
   111          int exp = 0;
   112          int esgn = 1;
   113  
   114          i++;
   115          if (s[i] == '+') {
   116              i++;
   117          } else if (s[i] == '-') {
   118              i++;
   119              esgn = -1;
   120          }
   121  
   122          for (; i < len && ('0' <= s[i] && s[i] <= '9') && exp < 10000; i++) {
   123                  exp = exp * 10 + (s[i] - '0');
   124          }
   125          d->dp += exp * esgn;
   126      }
   127  
   128      return;
   129  }
   130  
   131  /* trim trailing zeros from number */
   132  static always_inline void trim(Decimal *d) {
   133      while (d->nd > 0 && d->d[d->nd - 1] == '0') {
   134          d->nd--;
   135      }
   136      if (d->nd == 0) {
   137          d->dp = 0;
   138      }
   139  }
   140  
   141  /* Binary shift right (/ 2) by k bits.  k <= maxShift to avoid overflow */
   142  static always_inline void right_shift(Decimal *d, uint32_t k) {
   143      int      r = 0; // read pointer
   144      int      w = 0; // write pointer
   145      uint64_t n = 0;
   146  
   147      /* Pick up enough leading digits to cover first shift */
   148      for (; n >> k == 0; r++) {
   149          if (r >= d->nd) {
   150              if (n == 0) {
   151                  d->nd = 0; // no digits for this num
   152                  return;
   153              }
   154              /* until n has enough bits for right shift */
   155              while (n >> k == 0) {
   156                  n *= 10;
   157                  r++;
   158              }
   159              break;
   160          }
   161          n = n * 10 + d->d[r] - '0'; // read the value from d.d
   162      }
   163      d->dp -= r - 1; // point shift left
   164  
   165      uint64_t mask = (1ull << k) - 1;
   166      uint64_t dig = 0;
   167  
   168      /* Pick up a digit, put down a digit */
   169      for (; r < d->nd; r++) {
   170          dig = n >> k;
   171          n &= mask;
   172          d->d[w++] = (char)(dig + '0');
   173          n = n * 10 + d->d[r] - '0';
   174      }
   175  
   176      /* Put down extra digits */
   177      while (n > 0) {
   178          dig = n >> k;
   179          n &= mask;
   180          if (w < d->cap) {
   181              d->d[w] = (char)(dig + '0');
   182              w++;
   183          } else if (dig > 0) {
   184              /* truncated */
   185              d->trunc = 1;
   186          }
   187          n *= 10;
   188      }
   189  
   190      d->nd = w;
   191      trim(d);
   192  }
   193  
   194  /* Compare the leading prefix, if b is lexicographically less, return 0 */
   195  static always_inline bool prefix_is_less(const char *b, const char *s, uint64_t bn) {
   196      int i = 0;
   197      for (; i < bn; i++) {
   198          if (s[i] == '\0') {
   199              return false;
   200          }
   201          if (b[i] != s[i]) {
   202              return b[i] < s[i];
   203          }
   204      }
   205      return s[i] != '\0';
   206  }
   207  
   208  /* Binary shift left (* 2) by k bits.  k <= maxShift to avoid overflow */
   209  static always_inline void left_shift(Decimal *d, uint32_t k) {
   210      int delta = LSHIFT_TAB[k].delta;
   211  
   212      if (prefix_is_less(d->d, LSHIFT_TAB[k].cutoff, d->nd)){
   213          delta--;
   214      }
   215  
   216      int r = d->nd;         // read index
   217      int w = d->nd + delta; // write index
   218      uint64_t n = 0;
   219      uint64_t quo = 0;
   220      uint64_t rem = 0;
   221  
   222      /* Pick up a digit, put down a digit */
   223      for (r--; r >= 0; r--) {
   224          n += (uint64_t)(d->d[r] - '0') << k;
   225          quo = n / 10;
   226          rem = n - 10 * quo;
   227          w--;
   228          if (w < d->cap) {
   229              d->d[w] = (char)(rem + '0');
   230          } else if (rem != 0) {
   231              /* truncated */
   232              d->trunc = 1;
   233          }
   234          n = quo;
   235      }
   236  
   237      /* Put down extra digits */
   238      while (n > 0) {
   239          quo = n / 10;
   240          rem = n - 10 * quo;
   241          w--;
   242          if (w < d->cap) {
   243              d->d[w] = (char)(rem + '0');
   244          } else if (rem != 0) {
   245              /* truncated */
   246              d->trunc = 1;
   247          }
   248          n = quo;
   249      }
   250  
   251      d->nd += delta;
   252      if (d->nd >= d->cap) {
   253          d->nd = d->cap;
   254      }
   255      d->dp += delta;
   256      trim(d);
   257  }
   258  
   259  static always_inline void decimal_shift(Decimal *d, int k) {
   260      if (d->nd == 0 || k == 0) {
   261          return;
   262      }
   263  
   264      if (k > 0) {
   265          while (k > MAX_SHIFT) {
   266              left_shift(d, MAX_SHIFT);
   267              k -= MAX_SHIFT;
   268          }
   269          if (k) {
   270              left_shift(d, k);
   271          }
   272      }
   273  
   274      if (k < 0) {
   275          while (k < -MAX_SHIFT) {
   276              right_shift(d, MAX_SHIFT);
   277              k += MAX_SHIFT;
   278          }
   279          if (k) {
   280              right_shift(d, -k);
   281          }
   282      }
   283  
   284  }
   285  
   286  static always_inline int should_roundup(Decimal *d, int nd) {
   287      if (nd < 0 || nd >= d->nd) {
   288          return 0;
   289      }
   290  
   291      /* Exactly halfway - round to even */
   292      if (d->d[nd] == '5' && nd+1 == d->nd) {
   293          if (d->trunc) {
   294              return 1;
   295          }
   296          return nd > 0 && (d->d[nd-1]-'0')%2 != 0;
   297      }
   298  
   299      /* not halfway - round to the nearest */
   300      return d->d[nd] >= '5';
   301  }
   302  
   303  /* Extract integer part, rounded appropriately */
   304  static always_inline uint64_t rounded_integer(Decimal *d) {
   305      if (d->dp > 20) { // overflow
   306          return 0xFFFFFFFFFFFFFFFF; //64 bits
   307      }
   308  
   309      int i = 0;
   310      uint64_t n = 0;
   311      for (i = 0; i < d->dp && i < d->nd; i++) {
   312          n = n * 10 + (d->d[i] - '0');
   313      }
   314      for (; i < d->dp; i++) {
   315          n *= 10;
   316      }
   317      if (should_roundup(d, d->dp)) {
   318          n++;
   319      }
   320      return n;
   321  }
   322  
   323  static always_inline int decimal_to_f64(Decimal *d, double *val) {
   324      int exp2 = 0;
   325      uint64_t mant = 0;
   326      uint64_t bits = 0;
   327  
   328      /* d is zero */
   329      if (d->nd == 0) {
   330          mant = 0;
   331          exp2 = -1023;
   332          goto out;
   333      }
   334  
   335      /* Overflow, return inf/INF */
   336      if (d->dp > 310) {
   337          goto overflow;
   338      }
   339      /* Underflow, return zero */
   340      if (d->dp < -330) {
   341          mant = 0;
   342          exp2 = -1023;
   343          goto out;
   344      }
   345  
   346      /* Scale by powers of two until in range [0.5, 1.0) */
   347      int n = 0;
   348      while (d->dp > 0) { // d >= 1
   349          if (d->dp >= 9) {
   350              n = 27;
   351          } else {
   352              n = POW_TAB[d->dp];
   353          }
   354          decimal_shift(d, -n); // shift right
   355          exp2 += n;
   356      }
   357      while ((d->dp < 0) || ((d->dp == 0) && (d->d[0] < '5'))) { // d < 0.5
   358          if (-d->dp >= 9) {
   359              n = 27;
   360          } else {
   361              n = POW_TAB[-d->dp];
   362          }
   363          decimal_shift(d, n); // shift left
   364          exp2 -= n;
   365      }
   366  
   367      /* Our range is [0.5,1) but floating point range is [1,2) */
   368      exp2 --;
   369  
   370      /* Minimum exp2 for doulbe is -1022.
   371       * If the exponent is smaller, move it up and
   372       * adjust d accordingly.
   373       */
   374      if (exp2 < -1022) {
   375          n = -1022 - exp2;
   376          decimal_shift(d, -n); // shift right
   377          exp2 += n;
   378      }
   379  
   380      /* Exp2 too large */
   381      if ((exp2 + 1023) >= 0x7FF) {
   382          goto overflow;
   383      }
   384  
   385      /* Extract 53 bits. */
   386      decimal_shift(d, 53);  // shift left
   387      mant = rounded_integer(d);
   388  
   389      /* Rounding might have added a bit; shift down. */
   390      if (mant == (2ull << 52)) { // mant has 54 bits
   391          mant >>= 1;
   392          exp2 ++;
   393          if ((exp2 + 1023) >= 0x7FF) {
   394              goto overflow;
   395          }
   396      }
   397  
   398      /* Denormalized? */
   399      if ((mant & (1ull << 52)) == 0) {
   400          exp2 = -1023;
   401      }
   402      goto out;
   403  
   404  overflow:
   405      /* ±INF/inf */
   406      mant = 0;
   407      exp2 = 0x7FF - 1023;
   408  
   409  out:
   410      /* Assemble bits. */
   411      bits = mant & 0x000FFFFFFFFFFFFF;
   412      bits |= (uint64_t)((exp2 + 1023) & 0x7FF) << 52;
   413      if (d->neg) {
   414          bits |= 1ull << 63;
   415      }
   416      *(uint64_t*)val = bits;
   417      return 0;
   418  }
   419  
   420  static always_inline double atof_native_1(const char *sp, ssize_t nb, char* dbuf, ssize_t cap) {
   421      Decimal d;
   422      double val = 0;
   423      decimal_set(&d, sp, nb, dbuf, cap);
   424      decimal_to_f64(&d, &val);
   425      return val;
   426  }
   427  
   428  #undef MAX_SHIFT
   429  
   430  const static lshift_cheat LSHIFT_TAB[61] = {
   431      // Leading digits of 1/2^i = 5^i.
   432      // 5^23 is not an exact 64-bit floating point number,
   433      // so have to use bc for the math.
   434      // Go up to 60 to be large enough for 32bit and 64bit platforms.
   435      /*
   436          seq 60 | sed 's/^/5^/' | bc |
   437          awk 'BEGIN{ print "\t{ 0, \"\" }," }
   438          {
   439              log2 = log(2)/log(10)
   440              printf("\t{ %d, \"%s\" },\t// * %d\n",
   441                  int(log2*NR+1), $0, 2**NR)
   442          }'
   443      */
   444      {0, ""},
   445      {1, "5"},                                           // * 2
   446      {1, "25"},                                          // * 4
   447      {1, "125"},                                         // * 8
   448      {2, "625"},                                         // * 16
   449      {2, "3125"},                                        // * 32
   450      {2, "15625"},                                       // * 64
   451      {3, "78125"},                                       // * 128
   452      {3, "390625"},                                      // * 256
   453      {3, "1953125"},                                     // * 512
   454      {4, "9765625"},                                     // * 1024
   455      {4, "48828125"},                                    // * 2048
   456      {4, "244140625"},                                   // * 4096
   457      {4, "1220703125"},                                  // * 8192
   458      {5, "6103515625"},                                  // * 16384
   459      {5, "30517578125"},                                 // * 32768
   460      {5, "152587890625"},                                // * 65536
   461      {6, "762939453125"},                                // * 131072
   462      {6, "3814697265625"},                               // * 262144
   463      {6, "19073486328125"},                              // * 524288
   464      {7, "95367431640625"},                              // * 1048576
   465      {7, "476837158203125"},                             // * 2097152
   466      {7, "2384185791015625"},                            // * 4194304
   467      {7, "11920928955078125"},                           // * 8388608
   468      {8, "59604644775390625"},                           // * 16777216
   469      {8, "298023223876953125"},                          // * 33554432
   470      {8, "1490116119384765625"},                         // * 67108864
   471      {9, "7450580596923828125"},                         // * 134217728
   472      {9, "37252902984619140625"},                        // * 268435456
   473      {9, "186264514923095703125"},                       // * 536870912
   474      {10, "931322574615478515625"},                      // * 1073741824
   475      {10, "4656612873077392578125"},                     // * 2147483648
   476      {10, "23283064365386962890625"},                    // * 4294967296
   477      {10, "116415321826934814453125"},                   // * 8589934592
   478      {11, "582076609134674072265625"},                   // * 17179869184
   479      {11, "2910383045673370361328125"},                  // * 34359738368
   480      {11, "14551915228366851806640625"},                 // * 68719476736
   481      {12, "72759576141834259033203125"},                 // * 137438953472
   482      {12, "363797880709171295166015625"},                // * 274877906944
   483      {12, "1818989403545856475830078125"},               // * 549755813888
   484      {13, "9094947017729282379150390625"},               // * 1099511627776
   485      {13, "45474735088646411895751953125"},              // * 2199023255552
   486      {13, "227373675443232059478759765625"},             // * 4398046511104
   487      {13, "1136868377216160297393798828125"},            // * 8796093022208
   488      {14, "5684341886080801486968994140625"},            // * 17592186044416
   489      {14, "28421709430404007434844970703125"},           // * 35184372088832
   490      {14, "142108547152020037174224853515625"},          // * 70368744177664
   491      {15, "710542735760100185871124267578125"},          // * 140737488355328
   492      {15, "3552713678800500929355621337890625"},         // * 281474976710656
   493      {15, "17763568394002504646778106689453125"},        // * 562949953421312
   494      {16, "88817841970012523233890533447265625"},        // * 1125899906842624
   495      {16, "444089209850062616169452667236328125"},       // * 2251799813685248
   496      {16, "2220446049250313080847263336181640625"},      // * 4503599627370496
   497      {16, "11102230246251565404236316680908203125"},     // * 9007199254740992
   498      {17, "55511151231257827021181583404541015625"},     // * 18014398509481984
   499      {17, "277555756156289135105907917022705078125"},    // * 36028797018963968
   500      {17, "1387778780781445675529539585113525390625"},   // * 72057594037927936
   501      {18, "6938893903907228377647697925567626953125"},   // * 144115188075855872
   502      {18, "34694469519536141888238489627838134765625"},  // * 288230376151711744
   503      {18, "173472347597680709441192448139190673828125"}, // * 576460752303423488
   504      {19, "867361737988403547205962240695953369140625"}, // * 1152921504606846976
   505  };