github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/native/f32toa.c (about)

     1  /* Copyright 2020 Alexander Bolz
     2   * 
     3   * Boost Software License - Version 1.0 - August 17th, 2003
     4   * 
     5   * Permission is hereby granted, free of charge, to any person or organization
     6   * obtaining a copy of the software and accompanying documentation covered by
     7   * this license (the "Software") to use, reproduce, display, distribute,
     8   * execute, and transmit the Software, and to prepare derivative works of the
     9   * Software, and to permit third-parties to whom the Software is furnished to
    10   * do so, all subject to the following:
    11   * 
    12   * The copyright notices in the Software and this entire statement, including
    13   * the above license grant, this restriction and the following disclaimer,
    14   * must be included in all copies of the Software, in whole or in part, and
    15   * all derivative works of the Software, unless such copies or derivative
    16   * works are solely in the form of machine-executable object code generated by
    17   * a source language processor.
    18   *
    19   * Unless required by applicable law or agreed to in writing, this software
    20   * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    21   * KIND, either express or implied.
    22   * 
    23   * This file may have been modified by ByteDance authors. All ByteDance
    24   * Modifications are Copyright 2022 ByteDance Authors.
    25   */
    26  
    27  #include "native.h"
    28  #include "tab.h"
    29  #include "test/xassert.h"
    30  
    31  #define F32_BITS         32
    32  #define F32_EXP_BITS     8
    33  #define F32_SIG_BITS     23
    34  #define F32_EXP_MASK     0x7F800000u // middle 8 bits
    35  #define F32_SIG_MASK     0x007FFFFFu // lower 23 bits
    36  #define F32_EXP_BIAS     127
    37  #define F32_INF_NAN_EXP  0xFF
    38  #define F32_HIDDEN_BIT   0x00800000u
    39  
    40  typedef struct {
    41      uint32_t sig;
    42      int32_t exp;
    43  } f32_dec;
    44  
    45  typedef __uint128_t uint128_t;
    46  
    47  static always_inline unsigned ctz10_u32(const uint32_t v) {
    48      xassert(0 <= v && v < 1000000000u);
    49      if (v >= 100000) {
    50          if (v <   1000000) return 6;
    51          if (v <  10000000) return 7;
    52          if (v < 100000000) return 8;
    53          else               return 9;
    54      } else {
    55          if (v <        10) return 1;
    56          if (v <       100) return 2;
    57          if (v <      1000) return 3;
    58          if (v <     10000) return 4;
    59          else               return 5;
    60      }
    61  }
    62  
    63  static always_inline char* format_significand_f32(uint32_t sig, char *out, int cnt) {
    64      char *r = out + cnt;
    65      int ctz = 0;
    66  
    67      /* at most 9 digits here */
    68      if  (sig >= 10000) {
    69          uint32_t c = sig - 10000 * (sig / 10000);
    70          sig /= 10000;
    71          if (c != 0) {
    72              uint32_t c0 = (c % 100) << 1;
    73              uint32_t c1 = (c / 100) << 1;
    74              copy_two_digs(r - 2, Digits + c0);
    75              copy_two_digs(r - 4, Digits + c1);
    76          } else {
    77              ctz = 4;
    78          }
    79          r -= 4;
    80      }
    81  
    82      while (sig >= 100) {
    83          uint32_t c = (sig % 100) << 1;
    84          sig /= 100;
    85          copy_two_digs(r - 2, Digits + c);
    86          r -= 2;
    87      }
    88  
    89      if (sig >= 10) {
    90          uint32_t c = sig << 1;
    91          copy_two_digs(out, Digits + c);
    92      } else {
    93          *out = (char) ('0' + sig);
    94      }
    95  
    96      return out + cnt - ctz;
    97  }
    98  
    99  static always_inline char* format_integer_u32(uint32_t sig, char *out, unsigned cnt) {
   100      char *r = out + cnt;
   101  
   102      /* at most 9 digits here */
   103      if  (sig >= 10000) {
   104          uint32_t c = sig - 10000 * (sig / 10000);
   105          sig /= 10000;
   106          uint32_t c0 = (c % 100) << 1;
   107          uint32_t c1 = (c / 100) << 1;
   108          copy_two_digs(r - 2, Digits + c0);
   109          copy_two_digs(r - 4, Digits + c1);
   110          r -= 4;
   111      }
   112  
   113      while (sig >= 100) {
   114          uint32_t c = (sig % 100) << 1;
   115          sig /= 100;
   116          copy_two_digs(r - 2, Digits + c);
   117          r -= 2;
   118      }
   119  
   120      if (sig >= 10) {
   121          uint32_t c = sig << 1;
   122          copy_two_digs(out, Digits + c);
   123      } else {
   124          *out = (char) ('0' + sig);
   125      }
   126  
   127      return out + cnt;
   128  }
   129  
   130  static always_inline char* format_exponent_f32(f32_dec v, char *out, int cnt) {
   131      char* p = out + 1;
   132      char* end = format_significand_f32(v.sig, p, cnt);
   133      while (*(end - 1) == '0') end--;
   134  
   135      /* Print decimal point if needed */
   136      *out = *p;
   137      if (end - p > 1) {
   138          *p = '.';
   139      } else {
   140          end--;
   141      }
   142  
   143      /* Print the exponent */
   144      *end++ = 'e';
   145      int32_t exp = v.exp + (int32_t) cnt - 1;
   146      if (exp < 0) {
   147          *end++ = '-';
   148          exp = -exp;
   149      } else {
   150          *end++ = '+';
   151      }
   152  
   153      if (exp >= 100) {
   154          int32_t c = exp % 10;
   155          copy_two_digs(end, Digits + 2 * (exp / 10));
   156          end[2] = (char) ('0' + c);
   157          end += 3;
   158      } else if (exp >= 10) {
   159          copy_two_digs(end, Digits + 2 * exp);
   160          end += 2;
   161      } else {
   162          *end++ = (char) ('0' + exp);
   163      }
   164      return end;
   165  }
   166  
   167  static always_inline char* format_decimal_f32(f32_dec v, char* out, int cnt) {
   168      char* p = out;
   169      char* end;
   170      int point = cnt + v.exp;
   171  
   172      /* print leading zeros if fp < 1 */
   173      if (point <= 0) {
   174          *p++ = '0', *p++ = '.';
   175          for (int i = 0; i < -point; i++) {
   176              *p++ = '0';
   177          }
   178      }
   179  
   180      /* add the remaining digits */
   181      end = format_significand_f32(v.sig, p, cnt);
   182      while (*(end - 1) == '0') end--;
   183      if (point <= 0) {
   184          return end;
   185      }
   186  
   187      /* insert point or add trailing zeros */
   188      int digs = end - p, frac = digs - point;
   189      if (digs > point) {
   190          for (int i = 0; i < frac; i++) {
   191              *(end - i) = *(end - i - 1);
   192          }
   193          p[point] = '.';
   194          end++;
   195      } else {
   196          for (int i = 0; i < point - digs; i++) {
   197              *end++ = '0';
   198          }
   199      }
   200      return end;
   201  }
   202  
   203  static always_inline char* write_dec_f32(f32_dec dec, char* p) {
   204      int cnt = ctz10_u32(dec.sig);
   205      int dot = cnt + dec.exp;
   206      int sci_exp = dot - 1;
   207      bool exp_fmt = sci_exp < -6 || sci_exp > 20;
   208      bool has_dot = dot < cnt;
   209  
   210      if (exp_fmt) {
   211          return format_exponent_f32(dec, p, cnt);
   212      }
   213      if (has_dot) {
   214          return format_decimal_f32(dec, p, cnt);
   215      }
   216  
   217      char* end = p + dot;
   218      p = format_integer_u32(dec.sig, p, cnt);
   219      while (p < end) *p++ = '0';
   220      return end;
   221  }
   222  
   223  static always_inline uint32_t f32toraw(float fp) {
   224      union {
   225          uint32_t u32;
   226          float   f32;
   227      } uval;
   228      uval.f32 = fp;
   229      return uval.u32;
   230  }
   231  
   232  static always_inline uint64_t pow10_ceil_sig_f32(int32_t k)
   233  {
   234      // There are unique beta and r such that 10^k = beta 2^r and
   235      // 2^63 <= beta < 2^64, namely r = floor(log_2 10^k) - 63 and
   236      // beta = 2^-r 10^k.
   237      // Let g = ceil(beta), so (g-1) 2^r < 10^k <= g 2^r, with the latter
   238      // value being a pretty good overestimate for 10^k.
   239  
   240      // NB: Since for all the required exponents k, we have g < 2^64,
   241      //     all constants can be stored in 128-bit integers.
   242      // reference from: 
   243      //  https://github.com/abolz/Drachennest/blob/master/src/schubfach_32.cc#L144
   244  
   245  #define KMAX 45
   246  #define KMIN -31
   247      static const uint64_t g[KMAX - KMIN + 1] = {
   248          0x81CEB32C4B43FCF5, // -31
   249          0xA2425FF75E14FC32, // -30
   250          0xCAD2F7F5359A3B3F, // -29
   251          0xFD87B5F28300CA0E, // -28
   252          0x9E74D1B791E07E49, // -27
   253          0xC612062576589DDB, // -26
   254          0xF79687AED3EEC552, // -25
   255          0x9ABE14CD44753B53, // -24
   256          0xC16D9A0095928A28, // -23
   257          0xF1C90080BAF72CB2, // -22
   258          0x971DA05074DA7BEF, // -21
   259          0xBCE5086492111AEB, // -20
   260          0xEC1E4A7DB69561A6, // -19
   261          0x9392EE8E921D5D08, // -18
   262          0xB877AA3236A4B44A, // -17
   263          0xE69594BEC44DE15C, // -16
   264          0x901D7CF73AB0ACDA, // -15
   265          0xB424DC35095CD810, // -14
   266          0xE12E13424BB40E14, // -13
   267          0x8CBCCC096F5088CC, // -12
   268          0xAFEBFF0BCB24AAFF, // -11
   269          0xDBE6FECEBDEDD5BF, // -10
   270          0x89705F4136B4A598, //  -9
   271          0xABCC77118461CEFD, //  -8
   272          0xD6BF94D5E57A42BD, //  -7
   273          0x8637BD05AF6C69B6, //  -6
   274          0xA7C5AC471B478424, //  -5
   275          0xD1B71758E219652C, //  -4
   276          0x83126E978D4FDF3C, //  -3
   277          0xA3D70A3D70A3D70B, //  -2
   278          0xCCCCCCCCCCCCCCCD, //  -1
   279          0x8000000000000000, //   0
   280          0xA000000000000000, //   1
   281          0xC800000000000000, //   2
   282          0xFA00000000000000, //   3
   283          0x9C40000000000000, //   4
   284          0xC350000000000000, //   5
   285          0xF424000000000000, //   6
   286          0x9896800000000000, //   7
   287          0xBEBC200000000000, //   8
   288          0xEE6B280000000000, //   9
   289          0x9502F90000000000, //  10
   290          0xBA43B74000000000, //  11
   291          0xE8D4A51000000000, //  12
   292          0x9184E72A00000000, //  13
   293          0xB5E620F480000000, //  14
   294          0xE35FA931A0000000, //  15
   295          0x8E1BC9BF04000000, //  16
   296          0xB1A2BC2EC5000000, //  17
   297          0xDE0B6B3A76400000, //  18
   298          0x8AC7230489E80000, //  19
   299          0xAD78EBC5AC620000, //  20
   300          0xD8D726B7177A8000, //  21
   301          0x878678326EAC9000, //  22
   302          0xA968163F0A57B400, //  23
   303          0xD3C21BCECCEDA100, //  24
   304          0x84595161401484A0, //  25
   305          0xA56FA5B99019A5C8, //  26
   306          0xCECB8F27F4200F3A, //  27
   307          0x813F3978F8940985, //  28
   308          0xA18F07D736B90BE6, //  29
   309          0xC9F2C9CD04674EDF, //  30
   310          0xFC6F7C4045812297, //  31
   311          0x9DC5ADA82B70B59E, //  32
   312          0xC5371912364CE306, //  33
   313          0xF684DF56C3E01BC7, //  34
   314          0x9A130B963A6C115D, //  35
   315          0xC097CE7BC90715B4, //  36
   316          0xF0BDC21ABB48DB21, //  37
   317          0x96769950B50D88F5, //  38
   318          0xBC143FA4E250EB32, //  39
   319          0xEB194F8E1AE525FE, //  40
   320          0x92EFD1B8D0CF37BF, //  41
   321          0xB7ABC627050305AE, //  42
   322          0xE596B7B0C643C71A, //  43
   323          0x8F7E32CE7BEA5C70, //  44
   324          0xB35DBF821AE4F38C, //  45
   325      };
   326  
   327      xassert(k >= KMIN && k <= KMAX);
   328      return g[k - KMIN];
   329  #undef KMIN
   330  #undef KMAX
   331  }
   332  
   333  static always_inline uint32_t round_odd_f32(uint64_t g, uint32_t cp) {
   334      const uint128_t p = ((uint128_t)g) * cp;
   335      const uint32_t y1 = (uint64_t)(p >> 64);
   336      const uint32_t y0 = ((uint64_t)(p)) >> 32;
   337      return y1 | (y0 > 1);
   338  }
   339  
   340  /**
   341   Rendering float point number into decimal.
   342   The function used Schubfach algorithm, reference:
   343   The Schubfach way to render doubles, Raffaello Giulietti, 2022-03-20.
   344   https://drive.google.com/file/d/1gp5xv4CAa78SVgCeWfGqqI4FfYYYuNFb
   345   https://mail.openjdk.java.net/pipermail/core-libs-dev/2021-November/083536.html
   346   https://github.com/openjdk/jdk/pull/3402 (Java implementation)
   347   https://github.com/abolz/Drachennest (C++ implementation)
   348   */
   349  static always_inline f32_dec f32todec(uint32_t rsig, int32_t rexp, uint32_t c, int32_t q) {
   350      uint32_t cbl, cb, cbr, vbl, vb, vbr, lower, upper, s;
   351      int32_t k, h;
   352      bool even, irregular, w_inside, u_inside;
   353      f32_dec dec;
   354  
   355      even = !(c & 1);
   356      irregular = rsig == 0 && rexp > 1;
   357  
   358      cbl = 4 * c - 2 + irregular;
   359      cb = 4 * c;
   360      cbr = 4 * c + 2;
   361  
   362      k = (q * 1262611 - (irregular ? 524031 : 0)) >> 22;
   363      h = q + ((-k) * 1741647 >> 19) + 1;
   364      uint64_t pow10 = pow10_ceil_sig_f32(-k);
   365      vbl = round_odd_f32(pow10, cbl << h);
   366      vb = round_odd_f32(pow10, cb << h);
   367      vbr = round_odd_f32(pow10, cbr << h);
   368  
   369  
   370      lower = vbl + !even;
   371      upper = vbr - !even;
   372  
   373      s = vb / 4;
   374      if (s >= 10) {
   375          uint64_t sp = s / 10;
   376          bool up_inside = lower <= (40 * sp);
   377          bool wp_inside = (40 * sp + 40) <= upper;
   378          if (up_inside != wp_inside) {
   379              dec.sig = sp + wp_inside;
   380              dec.exp = k + 1;
   381              return dec;
   382          }
   383      }
   384  
   385      u_inside = lower <= (4 * s);
   386      w_inside = (4 * s + 4) <= upper;
   387      if (u_inside != w_inside) {
   388          dec.sig = s + w_inside;
   389          dec.exp = k;
   390          return dec;
   391      }
   392  
   393      uint64_t mid = 4 * s + 2;
   394      bool round_up = vb > mid || (vb == mid && (s & 1) != 0);
   395      dec.sig = s + round_up;
   396      dec.exp = k;
   397      return dec;
   398  }
   399  
   400   int f32toa(char *out, float fp) {
   401      char* p = out;
   402      uint32_t raw = f32toraw(fp);
   403      bool neg;
   404      uint32_t rsig, c;
   405      int32_t rexp, q;
   406  
   407      neg = ((raw >> (F32_BITS - 1)) != 0);
   408      rsig = raw & F32_SIG_MASK;
   409      rexp = (int32_t)((raw & F32_EXP_MASK) >> F32_SIG_BITS);
   410  
   411      /* check infinity and nan */
   412      if (unlikely(rexp == F32_INF_NAN_EXP)) {
   413          return 0;
   414      }
   415  
   416      /* check negative numbers */
   417      *p = '-';
   418      p += neg;
   419  
   420      /* simple case of 0.0 */
   421      if ((raw << 1) ==  0) {
   422          *p++ = '0';
   423          return p - out;
   424      }
   425  
   426      if (likely(rexp != 0)) {
   427          /* double is normal */
   428          c = rsig | F32_HIDDEN_BIT;
   429          q = rexp - F32_EXP_BIAS - F32_SIG_BITS;
   430  
   431          /* fast path for integer */
   432          if (q <= 0 && q >= -F32_SIG_BITS && is_div_pow2(c, -q)) {
   433              uint32_t u = c >> -q;
   434              p = format_integer_u32(u, p, ctz10_u32(u));
   435              return p - out;
   436          }
   437      } else {
   438          c = rsig;
   439          q = 1 - F32_EXP_BIAS - F32_SIG_BITS;
   440      }
   441  
   442      f32_dec dec = f32todec(rsig, rexp, c, q);
   443      p = write_dec_f32(dec, p);
   444      return p - out;
   445  }
   446  
   447  #undef F32_BITS       
   448  #undef F32_EXP_BITS   
   449  #undef F32_SIG_BITS   
   450  #undef F32_EXP_MASK   
   451  #undef F32_SIG_MASK   
   452  #undef F32_EXP_BIAS   
   453  #undef F32_INF_NAN_EXP
   454  #undef F32_HIDDEN_BIT