github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/get_str.c (about)

     1  /* mpn_get_str -- Convert {UP,USIZE} to a base BASE string in STR.
     2  
     3     Contributed to the GNU project by Torbjorn Granlund.
     4  
     5     THE FUNCTIONS IN THIS FILE, EXCEPT mpn_get_str, ARE INTERNAL WITH A MUTABLE
     6     INTERFACE.  IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN
     7     FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE
     8     GNU MP RELEASE.
     9  
    10  Copyright 1991-1994, 1996, 2000-2002, 2004, 2006-2008, 2011, 2012 Free Software
    11  Foundation, Inc.
    12  
    13  This file is part of the GNU MP Library.
    14  
    15  The GNU MP Library is free software; you can redistribute it and/or modify
    16  it under the terms of either:
    17  
    18    * the GNU Lesser General Public License as published by the Free
    19      Software Foundation; either version 3 of the License, or (at your
    20      option) any later version.
    21  
    22  or
    23  
    24    * the GNU General Public License as published by the Free Software
    25      Foundation; either version 2 of the License, or (at your option) any
    26      later version.
    27  
    28  or both in parallel, as here.
    29  
    30  The GNU MP Library is distributed in the hope that it will be useful, but
    31  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    32  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    33  for more details.
    34  
    35  You should have received copies of the GNU General Public License and the
    36  GNU Lesser General Public License along with the GNU MP Library.  If not,
    37  see https://www.gnu.org/licenses/.  */
    38  
    39  #include "gmp.h"
    40  #include "gmp-impl.h"
    41  #include "longlong.h"
    42  
    43  /* Conversion of U {up,un} to a string in base b.  Internally, we convert to
    44     base B = b^m, the largest power of b that fits a limb.  Basic algorithms:
    45  
    46    A) Divide U repeatedly by B, generating a quotient and remainder, until the
    47       quotient becomes zero.  The remainders hold the converted digits.  Digits
    48       come out from right to left.  (Used in mpn_sb_get_str.)
    49  
    50    B) Divide U by b^g, for g such that 1/b <= U/b^g < 1, generating a fraction.
    51       Then develop digits by multiplying the fraction repeatedly by b.  Digits
    52       come out from left to right.  (Currently not used herein, except for in
    53       code for converting single limbs to individual digits.)
    54  
    55    C) Compute B^1, B^2, B^4, ..., B^s, for s such that B^s is just above
    56       sqrt(U).  Then divide U by B^s, generating quotient and remainder.
    57       Recursively convert the quotient, then the remainder, using the
    58       precomputed powers.  Digits come out from left to right.  (Used in
    59       mpn_dc_get_str.)
    60  
    61    When using algorithm C, algorithm B might be suitable for basecase code,
    62    since the required b^g power will be readily accessible.
    63  
    64    Optimization ideas:
    65    1. The recursive function of (C) could use less temporary memory.  The powtab
    66       allocation could be trimmed with some computation, and the tmp area could
    67       be reduced, or perhaps eliminated if up is reused for both quotient and
    68       remainder (it is currently used just for remainder).
    69    2. Store the powers of (C) in normalized form, with the normalization count.
    70       Quotients will usually need to be left-shifted before each divide, and
    71       remainders will either need to be left-shifted of right-shifted.
    72    3. In the code for developing digits from a single limb, we could avoid using
    73       a full umul_ppmm except for the first (or first few) digits, provided base
    74       is even.  Subsequent digits can be developed using plain multiplication.
    75       (This saves on register-starved machines (read x86) and on all machines
    76       that generate the upper product half using a separate instruction (alpha,
    77       powerpc, IA-64) or lacks such support altogether (sparc64, hppa64).
    78    4. Separate mpn_dc_get_str basecase code from code for small conversions. The
    79       former code will have the exact right power readily available in the
    80       powtab parameter for dividing the current number into a fraction.  Convert
    81       that using algorithm B.
    82    5. Completely avoid division.  Compute the inverses of the powers now in
    83       powtab instead of the actual powers.
    84    6. Decrease powtab allocation for even bases.  E.g. for base 10 we could save
    85       about 30% (1-log(5)/log(10)).
    86  
    87    Basic structure of (C):
    88      mpn_get_str:
    89        if POW2_P (n)
    90  	...
    91        else
    92  	if (un < GET_STR_PRECOMPUTE_THRESHOLD)
    93  	  mpn_sb_get_str (str, base, up, un);
    94  	else
    95  	  precompute_power_tables
    96  	  mpn_dc_get_str
    97  
    98      mpn_dc_get_str:
    99  	mpn_tdiv_qr
   100  	if (qn < GET_STR_DC_THRESHOLD)
   101  	  mpn_sb_get_str
   102  	else
   103  	  mpn_dc_get_str
   104  	if (rn < GET_STR_DC_THRESHOLD)
   105  	  mpn_sb_get_str
   106  	else
   107  	  mpn_dc_get_str
   108  
   109  
   110    The reason for the two threshold values is the cost of
   111    precompute_power_tables.  GET_STR_PRECOMPUTE_THRESHOLD will be considerably
   112    larger than GET_STR_PRECOMPUTE_THRESHOLD.  */
   113  
   114  
   115  /* The x86s and m68020 have a quotient and remainder "div" instruction and
   116     gcc recognises an adjacent "/" and "%" can be combined using that.
   117     Elsewhere "/" and "%" are either separate instructions, or separate
   118     libgcc calls (which unfortunately gcc as of version 3.0 doesn't combine).
   119     A multiply and subtract should be faster than a "%" in those cases.  */
   120  #if HAVE_HOST_CPU_FAMILY_x86            \
   121    || HAVE_HOST_CPU_m68020               \
   122    || HAVE_HOST_CPU_m68030               \
   123    || HAVE_HOST_CPU_m68040               \
   124    || HAVE_HOST_CPU_m68060               \
   125    || HAVE_HOST_CPU_m68360 /* CPU32 */
   126  #define udiv_qrnd_unnorm(q,r,n,d)       \
   127    do {                                  \
   128      mp_limb_t  __q = (n) / (d);         \
   129      mp_limb_t  __r = (n) % (d);         \
   130      (q) = __q;                          \
   131      (r) = __r;                          \
   132    } while (0)
   133  #else
   134  #define udiv_qrnd_unnorm(q,r,n,d)       \
   135    do {                                  \
   136      mp_limb_t  __q = (n) / (d);         \
   137      mp_limb_t  __r = (n) - __q*(d);     \
   138      (q) = __q;                          \
   139      (r) = __r;                          \
   140    } while (0)
   141  #endif
   142  
   143  
   144  /* Convert {up,un} to a string in base base, and put the result in str.
   145     Generate len characters, possibly padding with zeros to the left.  If len is
   146     zero, generate as many characters as required.  Return a pointer immediately
   147     after the last digit of the result string.  Complexity is O(un^2); intended
   148     for small conversions.  */
   149  static unsigned char *
   150  mpn_sb_get_str (unsigned char *str, size_t len,
   151  		mp_ptr up, mp_size_t un, int base)
   152  {
   153    mp_limb_t rl, ul;
   154    unsigned char *s;
   155    size_t l;
   156    /* Allocate memory for largest possible string, given that we only get here
   157       for operands with un < GET_STR_PRECOMPUTE_THRESHOLD and that the smallest
   158       base is 3.  7/11 is an approximation to 1/log2(3).  */
   159  #if TUNE_PROGRAM_BUILD
   160  #define BUF_ALLOC (GET_STR_THRESHOLD_LIMIT * GMP_LIMB_BITS * 7 / 11)
   161  #else
   162  #define BUF_ALLOC (GET_STR_PRECOMPUTE_THRESHOLD * GMP_LIMB_BITS * 7 / 11)
   163  #endif
   164    unsigned char buf[BUF_ALLOC];
   165  #if TUNE_PROGRAM_BUILD
   166    mp_limb_t rp[GET_STR_THRESHOLD_LIMIT];
   167  #else
   168    mp_limb_t rp[GET_STR_PRECOMPUTE_THRESHOLD];
   169  #endif
   170  
   171    if (base == 10)
   172      {
   173        /* Special case code for base==10 so that the compiler has a chance to
   174  	 optimize things.  */
   175  
   176        MPN_COPY (rp + 1, up, un);
   177  
   178        s = buf + BUF_ALLOC;
   179        while (un > 1)
   180  	{
   181  	  int i;
   182  	  mp_limb_t frac, digit;
   183  	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
   184  					 MP_BASES_BIG_BASE_10,
   185  					 MP_BASES_BIG_BASE_INVERTED_10,
   186  					 MP_BASES_NORMALIZATION_STEPS_10);
   187  	  un -= rp[un] == 0;
   188  	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
   189  	  s -= MP_BASES_CHARS_PER_LIMB_10;
   190  #if HAVE_HOST_CPU_FAMILY_x86
   191  	  /* The code below turns out to be a bit slower for x86 using gcc.
   192  	     Use plain code.  */
   193  	  i = MP_BASES_CHARS_PER_LIMB_10;
   194  	  do
   195  	    {
   196  	      umul_ppmm (digit, frac, frac, 10);
   197  	      *s++ = digit;
   198  	    }
   199  	  while (--i);
   200  #else
   201  	  /* Use the fact that 10 in binary is 1010, with the lowest bit 0.
   202  	     After a few umul_ppmm, we will have accumulated enough low zeros
   203  	     to use a plain multiply.  */
   204  	  if (MP_BASES_NORMALIZATION_STEPS_10 == 0)
   205  	    {
   206  	      umul_ppmm (digit, frac, frac, 10);
   207  	      *s++ = digit;
   208  	    }
   209  	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 1)
   210  	    {
   211  	      umul_ppmm (digit, frac, frac, 10);
   212  	      *s++ = digit;
   213  	    }
   214  	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 2)
   215  	    {
   216  	      umul_ppmm (digit, frac, frac, 10);
   217  	      *s++ = digit;
   218  	    }
   219  	  if (MP_BASES_NORMALIZATION_STEPS_10 <= 3)
   220  	    {
   221  	      umul_ppmm (digit, frac, frac, 10);
   222  	      *s++ = digit;
   223  	    }
   224  	  i = (MP_BASES_CHARS_PER_LIMB_10 - ((MP_BASES_NORMALIZATION_STEPS_10 < 4)
   225  					     ? (4-MP_BASES_NORMALIZATION_STEPS_10)
   226  					     : 0));
   227  	  frac = (frac + 0xf) >> 4;
   228  	  do
   229  	    {
   230  	      frac *= 10;
   231  	      digit = frac >> (GMP_LIMB_BITS - 4);
   232  	      *s++ = digit;
   233  	      frac &= (~(mp_limb_t) 0) >> 4;
   234  	    }
   235  	  while (--i);
   236  #endif
   237  	  s -= MP_BASES_CHARS_PER_LIMB_10;
   238  	}
   239  
   240        ul = rp[1];
   241        while (ul != 0)
   242  	{
   243  	  udiv_qrnd_unnorm (ul, rl, ul, 10);
   244  	  *--s = rl;
   245  	}
   246      }
   247    else /* not base 10 */
   248      {
   249        unsigned chars_per_limb;
   250        mp_limb_t big_base, big_base_inverted;
   251        unsigned normalization_steps;
   252  
   253        chars_per_limb = mp_bases[base].chars_per_limb;
   254        big_base = mp_bases[base].big_base;
   255        big_base_inverted = mp_bases[base].big_base_inverted;
   256        count_leading_zeros (normalization_steps, big_base);
   257  
   258        MPN_COPY (rp + 1, up, un);
   259  
   260        s = buf + BUF_ALLOC;
   261        while (un > 1)
   262  	{
   263  	  int i;
   264  	  mp_limb_t frac;
   265  	  MPN_DIVREM_OR_PREINV_DIVREM_1 (rp, (mp_size_t) 1, rp + 1, un,
   266  					 big_base, big_base_inverted,
   267  					 normalization_steps);
   268  	  un -= rp[un] == 0;
   269  	  frac = (rp[0] + 1) << GMP_NAIL_BITS;
   270  	  s -= chars_per_limb;
   271  	  i = chars_per_limb;
   272  	  do
   273  	    {
   274  	      mp_limb_t digit;
   275  	      umul_ppmm (digit, frac, frac, base);
   276  	      *s++ = digit;
   277  	    }
   278  	  while (--i);
   279  	  s -= chars_per_limb;
   280  	}
   281  
   282        ul = rp[1];
   283        while (ul != 0)
   284  	{
   285  	  udiv_qrnd_unnorm (ul, rl, ul, base);
   286  	  *--s = rl;
   287  	}
   288      }
   289  
   290    l = buf + BUF_ALLOC - s;
   291    while (l < len)
   292      {
   293        *str++ = 0;
   294        len--;
   295      }
   296    while (l != 0)
   297      {
   298        *str++ = *s++;
   299        l--;
   300      }
   301    return str;
   302  }
   303  
   304  
   305  /* Convert {UP,UN} to a string with a base as represented in POWTAB, and put
   306     the string in STR.  Generate LEN characters, possibly padding with zeros to
   307     the left.  If LEN is zero, generate as many characters as required.
   308     Return a pointer immediately after the last digit of the result string.
   309     This uses divide-and-conquer and is intended for large conversions.  */
   310  static unsigned char *
   311  mpn_dc_get_str (unsigned char *str, size_t len,
   312  		mp_ptr up, mp_size_t un,
   313  		const powers_t *powtab, mp_ptr tmp)
   314  {
   315    if (BELOW_THRESHOLD (un, GET_STR_DC_THRESHOLD))
   316      {
   317        if (un != 0)
   318  	str = mpn_sb_get_str (str, len, up, un, powtab->base);
   319        else
   320  	{
   321  	  while (len != 0)
   322  	    {
   323  	      *str++ = 0;
   324  	      len--;
   325  	    }
   326  	}
   327      }
   328    else
   329      {
   330        mp_ptr pwp, qp, rp;
   331        mp_size_t pwn, qn;
   332        mp_size_t sn;
   333  
   334        pwp = powtab->p;
   335        pwn = powtab->n;
   336        sn = powtab->shift;
   337  
   338        if (un < pwn + sn || (un == pwn + sn && mpn_cmp (up + sn, pwp, un - sn) < 0))
   339  	{
   340  	  str = mpn_dc_get_str (str, len, up, un, powtab - 1, tmp);
   341  	}
   342        else
   343  	{
   344  	  qp = tmp;		/* (un - pwn + 1) limbs for qp */
   345  	  rp = up;		/* pwn limbs for rp; overwrite up area */
   346  
   347  	  mpn_tdiv_qr (qp, rp + sn, 0L, up + sn, un - sn, pwp, pwn);
   348  	  qn = un - sn - pwn; qn += qp[qn] != 0;		/* quotient size */
   349  
   350  	  ASSERT (qn < pwn + sn || (qn == pwn + sn && mpn_cmp (qp + sn, pwp, pwn) < 0));
   351  
   352  	  if (len != 0)
   353  	    len = len - powtab->digits_in_base;
   354  
   355  	  str = mpn_dc_get_str (str, len, qp, qn, powtab - 1, tmp + qn);
   356  	  str = mpn_dc_get_str (str, powtab->digits_in_base, rp, pwn + sn, powtab - 1, tmp);
   357  	}
   358      }
   359    return str;
   360  }
   361  
   362  
   363  /* There are no leading zeros on the digits generated at str, but that's not
   364     currently a documented feature.  The current mpz_out_str and mpz_get_str
   365     rely on it.  */
   366  
   367  size_t
   368  mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
   369  {
   370    mp_ptr powtab_mem, powtab_mem_ptr;
   371    mp_limb_t big_base;
   372    size_t digits_in_base;
   373    powers_t powtab[GMP_LIMB_BITS];
   374    int pi;
   375    mp_size_t n;
   376    mp_ptr p, t;
   377    size_t out_len;
   378    mp_ptr tmp;
   379    TMP_DECL;
   380  
   381    /* Special case zero, as the code below doesn't handle it.  */
   382    if (un == 0)
   383      {
   384        str[0] = 0;
   385        return 1;
   386      }
   387  
   388    if (POW2_P (base))
   389      {
   390        /* The base is a power of 2.  Convert from most significant end.  */
   391        mp_limb_t n1, n0;
   392        int bits_per_digit = mp_bases[base].big_base;
   393        int cnt;
   394        int bit_pos;
   395        mp_size_t i;
   396        unsigned char *s = str;
   397        mp_bitcnt_t bits;
   398  
   399        n1 = up[un - 1];
   400        count_leading_zeros (cnt, n1);
   401  
   402        /* BIT_POS should be R when input ends in least significant nibble,
   403  	 R + bits_per_digit * n when input ends in nth least significant
   404  	 nibble. */
   405  
   406        bits = (mp_bitcnt_t) GMP_NUMB_BITS * un - cnt + GMP_NAIL_BITS;
   407        cnt = bits % bits_per_digit;
   408        if (cnt != 0)
   409  	bits += bits_per_digit - cnt;
   410        bit_pos = bits - (mp_bitcnt_t) (un - 1) * GMP_NUMB_BITS;
   411  
   412        /* Fast loop for bit output.  */
   413        i = un - 1;
   414        for (;;)
   415  	{
   416  	  bit_pos -= bits_per_digit;
   417  	  while (bit_pos >= 0)
   418  	    {
   419  	      *s++ = (n1 >> bit_pos) & ((1 << bits_per_digit) - 1);
   420  	      bit_pos -= bits_per_digit;
   421  	    }
   422  	  i--;
   423  	  if (i < 0)
   424  	    break;
   425  	  n0 = (n1 << -bit_pos) & ((1 << bits_per_digit) - 1);
   426  	  n1 = up[i];
   427  	  bit_pos += GMP_NUMB_BITS;
   428  	  *s++ = n0 | (n1 >> bit_pos);
   429  	}
   430  
   431        return s - str;
   432      }
   433  
   434    /* General case.  The base is not a power of 2.  */
   435  
   436    if (BELOW_THRESHOLD (un, GET_STR_PRECOMPUTE_THRESHOLD))
   437      return mpn_sb_get_str (str, (size_t) 0, up, un, base) - str;
   438  
   439    TMP_MARK;
   440  
   441    /* Allocate one large block for the powers of big_base.  */
   442    powtab_mem = TMP_BALLOC_LIMBS (mpn_dc_get_str_powtab_alloc (un));
   443    powtab_mem_ptr = powtab_mem;
   444  
   445    /* Compute a table of powers, were the largest power is >= sqrt(U).  */
   446  
   447    big_base = mp_bases[base].big_base;
   448    digits_in_base = mp_bases[base].chars_per_limb;
   449  
   450    {
   451      mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
   452      mp_limb_t cy;
   453      mp_size_t shift;
   454      size_t ndig;
   455  
   456      DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
   457      xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
   458  
   459      n_pows = 0;
   460      for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
   461        {
   462  	exptab[n_pows] = pn;
   463  	n_pows++;
   464        }
   465      exptab[n_pows] = 1;
   466  
   467      powtab[0].p = &big_base;
   468      powtab[0].n = 1;
   469      powtab[0].digits_in_base = digits_in_base;
   470      powtab[0].base = base;
   471      powtab[0].shift = 0;
   472  
   473      powtab[1].p = powtab_mem_ptr;  powtab_mem_ptr += 2;
   474      powtab[1].p[0] = big_base;
   475      powtab[1].n = 1;
   476      powtab[1].digits_in_base = digits_in_base;
   477      powtab[1].base = base;
   478      powtab[1].shift = 0;
   479  
   480      n = 1;
   481      p = &big_base;
   482      bexp = 1;
   483      shift = 0;
   484      for (pi = 2; pi < n_pows; pi++)
   485        {
   486  	t = powtab_mem_ptr;
   487  	powtab_mem_ptr += 2 * n + 2;
   488  
   489  	ASSERT_ALWAYS (powtab_mem_ptr < powtab_mem + mpn_dc_get_str_powtab_alloc (un));
   490  
   491  	mpn_sqr (t, p, n);
   492  
   493  	digits_in_base *= 2;
   494  	n *= 2;  n -= t[n - 1] == 0;
   495  	bexp *= 2;
   496  
   497  	if (bexp + 1 < exptab[n_pows - pi])
   498  	  {
   499  	    digits_in_base += mp_bases[base].chars_per_limb;
   500  	    cy = mpn_mul_1 (t, t, n, big_base);
   501  	    t[n] = cy;
   502  	    n += cy != 0;
   503  	    bexp += 1;
   504  	  }
   505  	shift *= 2;
   506  	/* Strip low zero limbs.  */
   507  	while (t[0] == 0)
   508  	  {
   509  	    t++;
   510  	    n--;
   511  	    shift++;
   512  	  }
   513  	p = t;
   514  	powtab[pi].p = p;
   515  	powtab[pi].n = n;
   516  	powtab[pi].digits_in_base = digits_in_base;
   517  	powtab[pi].base = base;
   518  	powtab[pi].shift = shift;
   519        }
   520  
   521      for (pi = 1; pi < n_pows; pi++)
   522        {
   523  	t = powtab[pi].p;
   524  	n = powtab[pi].n;
   525  	cy = mpn_mul_1 (t, t, n, big_base);
   526  	t[n] = cy;
   527  	n += cy != 0;
   528  	if (t[0] == 0)
   529  	  {
   530  	    powtab[pi].p = t + 1;
   531  	    n--;
   532  	    powtab[pi].shift++;
   533  	  }
   534  	powtab[pi].n = n;
   535  	powtab[pi].digits_in_base += mp_bases[base].chars_per_limb;
   536        }
   537  
   538  #if 0
   539      { int i;
   540        printf ("Computed table values for base=%d, un=%d, xn=%d:\n", base, un, xn);
   541        for (i = 0; i < n_pows; i++)
   542  	printf ("%2d: %10ld %10ld %11ld %ld\n", i, exptab[n_pows-i], powtab[i].n, powtab[i].digits_in_base, powtab[i].shift);
   543      }
   544  #endif
   545    }
   546  
   547    /* Using our precomputed powers, now in powtab[], convert our number.  */
   548    tmp = TMP_BALLOC_LIMBS (mpn_dc_get_str_itch (un));
   549    out_len = mpn_dc_get_str (str, 0, up, un, powtab + (pi - 1), tmp) - str;
   550    TMP_FREE;
   551  
   552    return out_len;
   553  }