github.com/bytedance/sonic@v1.11.7-0.20240517092252-d2edb31b167b/native/f32toa.c (about) 1 /* Copyright 2020 Alexander Bolz 2 * 3 * Boost Software License - Version 1.0 - August 17th, 2003 4 * 5 * Permission is hereby granted, free of charge, to any person or organization 6 * obtaining a copy of the software and accompanying documentation covered by 7 * this license (the "Software") to use, reproduce, display, distribute, 8 * execute, and transmit the Software, and to prepare derivative works of the 9 * Software, and to permit third-parties to whom the Software is furnished to 10 * do so, all subject to the following: 11 * 12 * The copyright notices in the Software and this entire statement, including 13 * the above license grant, this restriction and the following disclaimer, 14 * must be included in all copies of the Software, in whole or in part, and 15 * all derivative works of the Software, unless such copies or derivative 16 * works are solely in the form of machine-executable object code generated by 17 * a source language processor. 18 * 19 * Unless required by applicable law or agreed to in writing, this software 20 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 21 * KIND, either express or implied. 22 * 23 * This file may have been modified by ByteDance authors. All ByteDance 24 * Modifications are Copyright 2022 ByteDance Authors. 25 */ 26 27 #include "native.h" 28 #include "tab.h" 29 #include "test/xassert.h" 30 31 #define F32_BITS 32 32 #define F32_EXP_BITS 8 33 #define F32_SIG_BITS 23 34 #define F32_EXP_MASK 0x7F800000u // middle 8 bits 35 #define F32_SIG_MASK 0x007FFFFFu // lower 23 bits 36 #define F32_EXP_BIAS 127 37 #define F32_INF_NAN_EXP 0xFF 38 #define F32_HIDDEN_BIT 0x00800000u 39 40 typedef struct { 41 uint32_t sig; 42 int32_t exp; 43 } f32_dec; 44 45 typedef __uint128_t uint128_t; 46 47 static always_inline unsigned ctz10_u32(const uint32_t v) { 48 xassert(0 <= v && v < 1000000000u); 49 if (v >= 100000) { 50 if (v < 1000000) return 6; 51 if (v < 10000000) return 7; 52 if (v < 100000000) return 8; 53 else return 9; 54 } else { 55 if (v < 10) return 1; 56 if (v < 100) return 2; 57 if (v < 1000) return 3; 58 if (v < 10000) return 4; 59 else return 5; 60 } 61 } 62 63 static always_inline char* format_significand_f32(uint32_t sig, char *out, int cnt) { 64 char *r = out + cnt; 65 int ctz = 0; 66 67 /* at most 9 digits here */ 68 if (sig >= 10000) { 69 uint32_t c = sig - 10000 * (sig / 10000); 70 sig /= 10000; 71 if (c != 0) { 72 uint32_t c0 = (c % 100) << 1; 73 uint32_t c1 = (c / 100) << 1; 74 copy_two_digs(r - 2, Digits + c0); 75 copy_two_digs(r - 4, Digits + c1); 76 } else { 77 ctz = 4; 78 } 79 r -= 4; 80 } 81 82 while (sig >= 100) { 83 uint32_t c = (sig % 100) << 1; 84 sig /= 100; 85 copy_two_digs(r - 2, Digits + c); 86 r -= 2; 87 } 88 89 if (sig >= 10) { 90 uint32_t c = sig << 1; 91 copy_two_digs(out, Digits + c); 92 } else { 93 *out = (char) ('0' + sig); 94 } 95 96 return out + cnt - ctz; 97 } 98 99 static always_inline char* format_integer_u32(uint32_t sig, char *out, unsigned cnt) { 100 char *r = out + cnt; 101 102 /* at most 9 digits here */ 103 if (sig >= 10000) { 104 uint32_t c = sig - 10000 * (sig / 10000); 105 sig /= 10000; 106 uint32_t c0 = (c % 100) << 1; 107 uint32_t c1 = (c / 100) << 1; 108 copy_two_digs(r - 2, Digits + c0); 109 copy_two_digs(r - 4, Digits + c1); 110 r -= 4; 111 } 112 113 while (sig >= 100) { 114 uint32_t c = (sig % 100) << 1; 115 sig /= 100; 116 copy_two_digs(r - 2, Digits + c); 117 r -= 2; 118 } 119 120 if (sig >= 10) { 121 uint32_t c = sig << 1; 122 copy_two_digs(out, Digits + c); 123 } else { 124 *out = (char) ('0' + sig); 125 } 126 127 return out + cnt; 128 } 129 130 static always_inline char* format_exponent_f32(f32_dec v, char *out, int cnt) { 131 char* p = out + 1; 132 char* end = format_significand_f32(v.sig, p, cnt); 133 while (*(end - 1) == '0') end--; 134 135 /* Print decimal point if needed */ 136 *out = *p; 137 if (end - p > 1) { 138 *p = '.'; 139 } else { 140 end--; 141 } 142 143 /* Print the exponent */ 144 *end++ = 'e'; 145 int32_t exp = v.exp + (int32_t) cnt - 1; 146 if (exp < 0) { 147 *end++ = '-'; 148 exp = -exp; 149 } else { 150 *end++ = '+'; 151 } 152 153 if (exp >= 100) { 154 int32_t c = exp % 10; 155 copy_two_digs(end, Digits + 2 * (exp / 10)); 156 end[2] = (char) ('0' + c); 157 end += 3; 158 } else if (exp >= 10) { 159 copy_two_digs(end, Digits + 2 * exp); 160 end += 2; 161 } else { 162 *end++ = (char) ('0' + exp); 163 } 164 return end; 165 } 166 167 static always_inline char* format_decimal_f32(f32_dec v, char* out, int cnt) { 168 char* p = out; 169 char* end; 170 int point = cnt + v.exp; 171 172 /* print leading zeros if fp < 1 */ 173 if (point <= 0) { 174 *p++ = '0', *p++ = '.'; 175 for (int i = 0; i < -point; i++) { 176 *p++ = '0'; 177 } 178 } 179 180 /* add the remaining digits */ 181 end = format_significand_f32(v.sig, p, cnt); 182 while (*(end - 1) == '0') end--; 183 if (point <= 0) { 184 return end; 185 } 186 187 /* insert point or add trailing zeros */ 188 int digs = end - p, frac = digs - point; 189 if (digs > point) { 190 for (int i = 0; i < frac; i++) { 191 *(end - i) = *(end - i - 1); 192 } 193 p[point] = '.'; 194 end++; 195 } else { 196 for (int i = 0; i < point - digs; i++) { 197 *end++ = '0'; 198 } 199 } 200 return end; 201 } 202 203 static always_inline char* write_dec_f32(f32_dec dec, char* p) { 204 int cnt = ctz10_u32(dec.sig); 205 int dot = cnt + dec.exp; 206 int sci_exp = dot - 1; 207 bool exp_fmt = sci_exp < -6 || sci_exp > 20; 208 bool has_dot = dot < cnt; 209 210 if (exp_fmt) { 211 return format_exponent_f32(dec, p, cnt); 212 } 213 if (has_dot) { 214 return format_decimal_f32(dec, p, cnt); 215 } 216 217 char* end = p + dot; 218 p = format_integer_u32(dec.sig, p, cnt); 219 while (p < end) *p++ = '0'; 220 return end; 221 } 222 223 static always_inline uint32_t f32toraw(float fp) { 224 union { 225 uint32_t u32; 226 float f32; 227 } uval; 228 uval.f32 = fp; 229 return uval.u32; 230 } 231 232 static always_inline uint64_t pow10_ceil_sig_f32(int32_t k) 233 { 234 // There are unique beta and r such that 10^k = beta 2^r and 235 // 2^63 <= beta < 2^64, namely r = floor(log_2 10^k) - 63 and 236 // beta = 2^-r 10^k. 237 // Let g = ceil(beta), so (g-1) 2^r < 10^k <= g 2^r, with the latter 238 // value being a pretty good overestimate for 10^k. 239 240 // NB: Since for all the required exponents k, we have g < 2^64, 241 // all constants can be stored in 128-bit integers. 242 // reference from: 243 // https://github.com/abolz/Drachennest/blob/master/src/schubfach_32.cc#L144 244 245 #define KMAX 45 246 #define KMIN -31 247 static const uint64_t g[KMAX - KMIN + 1] = { 248 0x81CEB32C4B43FCF5, // -31 249 0xA2425FF75E14FC32, // -30 250 0xCAD2F7F5359A3B3F, // -29 251 0xFD87B5F28300CA0E, // -28 252 0x9E74D1B791E07E49, // -27 253 0xC612062576589DDB, // -26 254 0xF79687AED3EEC552, // -25 255 0x9ABE14CD44753B53, // -24 256 0xC16D9A0095928A28, // -23 257 0xF1C90080BAF72CB2, // -22 258 0x971DA05074DA7BEF, // -21 259 0xBCE5086492111AEB, // -20 260 0xEC1E4A7DB69561A6, // -19 261 0x9392EE8E921D5D08, // -18 262 0xB877AA3236A4B44A, // -17 263 0xE69594BEC44DE15C, // -16 264 0x901D7CF73AB0ACDA, // -15 265 0xB424DC35095CD810, // -14 266 0xE12E13424BB40E14, // -13 267 0x8CBCCC096F5088CC, // -12 268 0xAFEBFF0BCB24AAFF, // -11 269 0xDBE6FECEBDEDD5BF, // -10 270 0x89705F4136B4A598, // -9 271 0xABCC77118461CEFD, // -8 272 0xD6BF94D5E57A42BD, // -7 273 0x8637BD05AF6C69B6, // -6 274 0xA7C5AC471B478424, // -5 275 0xD1B71758E219652C, // -4 276 0x83126E978D4FDF3C, // -3 277 0xA3D70A3D70A3D70B, // -2 278 0xCCCCCCCCCCCCCCCD, // -1 279 0x8000000000000000, // 0 280 0xA000000000000000, // 1 281 0xC800000000000000, // 2 282 0xFA00000000000000, // 3 283 0x9C40000000000000, // 4 284 0xC350000000000000, // 5 285 0xF424000000000000, // 6 286 0x9896800000000000, // 7 287 0xBEBC200000000000, // 8 288 0xEE6B280000000000, // 9 289 0x9502F90000000000, // 10 290 0xBA43B74000000000, // 11 291 0xE8D4A51000000000, // 12 292 0x9184E72A00000000, // 13 293 0xB5E620F480000000, // 14 294 0xE35FA931A0000000, // 15 295 0x8E1BC9BF04000000, // 16 296 0xB1A2BC2EC5000000, // 17 297 0xDE0B6B3A76400000, // 18 298 0x8AC7230489E80000, // 19 299 0xAD78EBC5AC620000, // 20 300 0xD8D726B7177A8000, // 21 301 0x878678326EAC9000, // 22 302 0xA968163F0A57B400, // 23 303 0xD3C21BCECCEDA100, // 24 304 0x84595161401484A0, // 25 305 0xA56FA5B99019A5C8, // 26 306 0xCECB8F27F4200F3A, // 27 307 0x813F3978F8940985, // 28 308 0xA18F07D736B90BE6, // 29 309 0xC9F2C9CD04674EDF, // 30 310 0xFC6F7C4045812297, // 31 311 0x9DC5ADA82B70B59E, // 32 312 0xC5371912364CE306, // 33 313 0xF684DF56C3E01BC7, // 34 314 0x9A130B963A6C115D, // 35 315 0xC097CE7BC90715B4, // 36 316 0xF0BDC21ABB48DB21, // 37 317 0x96769950B50D88F5, // 38 318 0xBC143FA4E250EB32, // 39 319 0xEB194F8E1AE525FE, // 40 320 0x92EFD1B8D0CF37BF, // 41 321 0xB7ABC627050305AE, // 42 322 0xE596B7B0C643C71A, // 43 323 0x8F7E32CE7BEA5C70, // 44 324 0xB35DBF821AE4F38C, // 45 325 }; 326 327 xassert(k >= KMIN && k <= KMAX); 328 return g[k - KMIN]; 329 #undef KMIN 330 #undef KMAX 331 } 332 333 static always_inline uint32_t round_odd_f32(uint64_t g, uint32_t cp) { 334 const uint128_t p = ((uint128_t)g) * cp; 335 const uint32_t y1 = (uint64_t)(p >> 64); 336 const uint32_t y0 = ((uint64_t)(p)) >> 32; 337 return y1 | (y0 > 1); 338 } 339 340 /** 341 Rendering float point number into decimal. 342 The function used Schubfach algorithm, reference: 343 The Schubfach way to render doubles, Raffaello Giulietti, 2022-03-20. 344 https://drive.google.com/file/d/1gp5xv4CAa78SVgCeWfGqqI4FfYYYuNFb 345 https://mail.openjdk.java.net/pipermail/core-libs-dev/2021-November/083536.html 346 https://github.com/openjdk/jdk/pull/3402 (Java implementation) 347 https://github.com/abolz/Drachennest (C++ implementation) 348 */ 349 static always_inline f32_dec f32todec(uint32_t rsig, int32_t rexp, uint32_t c, int32_t q) { 350 uint32_t cbl, cb, cbr, vbl, vb, vbr, lower, upper, s; 351 int32_t k, h; 352 bool even, irregular, w_inside, u_inside; 353 f32_dec dec; 354 355 even = !(c & 1); 356 irregular = rsig == 0 && rexp > 1; 357 358 cbl = 4 * c - 2 + irregular; 359 cb = 4 * c; 360 cbr = 4 * c + 2; 361 362 k = (q * 1262611 - (irregular ? 524031 : 0)) >> 22; 363 h = q + ((-k) * 1741647 >> 19) + 1; 364 uint64_t pow10 = pow10_ceil_sig_f32(-k); 365 vbl = round_odd_f32(pow10, cbl << h); 366 vb = round_odd_f32(pow10, cb << h); 367 vbr = round_odd_f32(pow10, cbr << h); 368 369 370 lower = vbl + !even; 371 upper = vbr - !even; 372 373 s = vb / 4; 374 if (s >= 10) { 375 uint64_t sp = s / 10; 376 bool up_inside = lower <= (40 * sp); 377 bool wp_inside = (40 * sp + 40) <= upper; 378 if (up_inside != wp_inside) { 379 dec.sig = sp + wp_inside; 380 dec.exp = k + 1; 381 return dec; 382 } 383 } 384 385 u_inside = lower <= (4 * s); 386 w_inside = (4 * s + 4) <= upper; 387 if (u_inside != w_inside) { 388 dec.sig = s + w_inside; 389 dec.exp = k; 390 return dec; 391 } 392 393 uint64_t mid = 4 * s + 2; 394 bool round_up = vb > mid || (vb == mid && (s & 1) != 0); 395 dec.sig = s + round_up; 396 dec.exp = k; 397 return dec; 398 } 399 400 int f32toa(char *out, float fp) { 401 char* p = out; 402 uint32_t raw = f32toraw(fp); 403 bool neg; 404 uint32_t rsig, c; 405 int32_t rexp, q; 406 407 neg = ((raw >> (F32_BITS - 1)) != 0); 408 rsig = raw & F32_SIG_MASK; 409 rexp = (int32_t)((raw & F32_EXP_MASK) >> F32_SIG_BITS); 410 411 /* check infinity and nan */ 412 if (unlikely(rexp == F32_INF_NAN_EXP)) { 413 return 0; 414 } 415 416 /* check negative numbers */ 417 *p = '-'; 418 p += neg; 419 420 /* simple case of 0.0 */ 421 if ((raw << 1) == 0) { 422 *p++ = '0'; 423 return p - out; 424 } 425 426 if (likely(rexp != 0)) { 427 /* double is normal */ 428 c = rsig | F32_HIDDEN_BIT; 429 q = rexp - F32_EXP_BIAS - F32_SIG_BITS; 430 431 /* fast path for integer */ 432 if (q <= 0 && q >= -F32_SIG_BITS && is_div_pow2(c, -q)) { 433 uint32_t u = c >> -q; 434 p = format_integer_u32(u, p, ctz10_u32(u)); 435 return p - out; 436 } 437 } else { 438 c = rsig; 439 q = 1 - F32_EXP_BIAS - F32_SIG_BITS; 440 } 441 442 f32_dec dec = f32todec(rsig, rexp, c, q); 443 p = write_dec_f32(dec, p); 444 return p - out; 445 } 446 447 #undef F32_BITS 448 #undef F32_EXP_BITS 449 #undef F32_SIG_BITS 450 #undef F32_EXP_MASK 451 #undef F32_SIG_MASK 452 #undef F32_EXP_BIAS 453 #undef F32_INF_NAN_EXP 454 #undef F32_HIDDEN_BIT