github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/div_qr_1n_pi1.c (about)

     1  /* mpn_div_qr_1n_pi1
     2  
     3     Contributed to the GNU project by Niels Möller
     4  
     5     THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES.  IT IS ONLY
     6     SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
     7     GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
     8  
     9  
    10  Copyright 2013 Free Software Foundation, Inc.
    11  
    12  This file is part of the GNU MP Library.
    13  
    14  The GNU MP Library is free software; you can redistribute it and/or modify
    15  it under the terms of either:
    16  
    17    * the GNU Lesser General Public License as published by the Free
    18      Software Foundation; either version 3 of the License, or (at your
    19      option) any later version.
    20  
    21  or
    22  
    23    * the GNU General Public License as published by the Free Software
    24      Foundation; either version 2 of the License, or (at your option) any
    25      later version.
    26  
    27  or both in parallel, as here.
    28  
    29  The GNU MP Library is distributed in the hope that it will be useful, but
    30  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    31  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    32  for more details.
    33  
    34  You should have received copies of the GNU General Public License and the
    35  GNU Lesser General Public License along with the GNU MP Library.  If not,
    36  see https://www.gnu.org/licenses/.  */
    37  
    38  #include "gmp.h"
    39  #include "gmp-impl.h"
    40  #include "longlong.h"
    41  
    42  #if GMP_NAIL_BITS > 0
    43  #error Nail bits not supported
    44  #endif
    45  
    46  #ifndef DIV_QR_1N_METHOD
    47  #define DIV_QR_1N_METHOD 2
    48  #endif
    49  
    50  /* FIXME: Duplicated in mod_1_1.c. Move to gmp-impl.h */
    51  
    52  #if defined (__GNUC__) && ! defined (NO_ASM)
    53  
    54  #if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
    55  #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
    56    __asm__ (  "add	%6, %k2\n\t"					\
    57  	     "adc	%4, %k1\n\t"					\
    58  	     "sbb	%k0, %k0"					\
    59  	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
    60  	   : "1"  ((USItype)(a1)), "g" ((USItype)(b1)),			\
    61  	     "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
    62  #endif
    63  
    64  #if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
    65  #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
    66    __asm__ (  "add	%6, %q2\n\t"					\
    67  	     "adc	%4, %q1\n\t"					\
    68  	     "sbb	%q0, %q0"					\
    69  	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
    70  	   : "1"  ((UDItype)(a1)), "rme" ((UDItype)(b1)),		\
    71  	     "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
    72  #endif
    73  
    74  #if defined (__sparc__) && W_TYPE_SIZE == 32
    75  #define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
    76    __asm__ (  "addcc	%r5, %6, %2\n\t"				\
    77  	     "addxcc	%r3, %4, %1\n\t"				\
    78  	     "subx	%%g0, %%g0, %0"					\
    79  	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
    80  	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
    81  	 __CLOBBER_CC)
    82  #endif
    83  
    84  #if defined (__sparc__) && W_TYPE_SIZE == 64
    85  #define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
    86    __asm__ (  "addcc	%r5, %6, %2\n\t"				\
    87  	     "addccc	%r7, %8, %%g0\n\t"				\
    88  	     "addccc	%r3, %4, %1\n\t"				\
    89  	     "clr	%0\n\t"						\
    90  	     "movcs	%%xcc, -1, %0"					\
    91  	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
    92  	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl),		\
    93  	     "rJ" ((al) >> 32), "rI" ((bl) >> 32)			\
    94  	 __CLOBBER_CC)
    95  #if __VIS__ >= 0x300
    96  #undef add_mssaaaa
    97  #define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
    98    __asm__ (  "addcc	%r5, %6, %2\n\t"				\
    99  	     "addxccc	%r3, %4, %1\n\t"				\
   100  	     "clr	%0\n\t"						\
   101  	     "movcs	%%xcc, -1, %0"					\
   102  	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
   103  	   : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl)		\
   104  	 __CLOBBER_CC)
   105  #endif
   106  #endif
   107  
   108  #if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
   109  /* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
   110     processor running in 32-bit mode, since the carry flag then gets the 32-bit
   111     carry.  */
   112  #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
   113    __asm__ (  "add%I6c	%2, %5, %6\n\t"					\
   114  	     "adde	%1, %3, %4\n\t"					\
   115  	     "subfe	%0, %0, %0\n\t"					\
   116  	     "nor	%0, %0, %0"					\
   117  	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
   118  	   : "r"  (a1), "r" (b1), "%r" (a0), "rI" (b0))
   119  #endif
   120  
   121  #if defined (__s390x__) && W_TYPE_SIZE == 64
   122  #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
   123    __asm__ (  "algr	%2, %6\n\t"					\
   124  	     "alcgr	%1, %4\n\t"					\
   125  	     "lghi	%0, 0\n\t"					\
   126  	     "alcgr	%0, %0\n\t"					\
   127  	     "lcgr	%0, %0"						\
   128  	   : "=r" (m), "=r" (s1), "=&r" (s0)				\
   129  	   : "1"  ((UDItype)(a1)), "r" ((UDItype)(b1)),			\
   130  	     "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
   131  #endif
   132  
   133  #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
   134  #define add_mssaaaa(m, sh, sl, ah, al, bh, bl)				\
   135    __asm__ (  "adds	%2, %5, %6\n\t"					\
   136  	     "adcs	%1, %3, %4\n\t"					\
   137  	     "movcc	%0, #0\n\t"					\
   138  	     "movcs	%0, #-1"					\
   139  	   : "=r" (m), "=r" (sh), "=&r" (sl)				\
   140  	   : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
   141  #endif
   142  #endif /* defined (__GNUC__) */
   143  
   144  #ifndef add_mssaaaa
   145  #define add_mssaaaa(m, s1, s0, a1, a0, b1, b0)				\
   146    do {									\
   147      UWtype __s0, __s1, __c0, __c1;					\
   148      __s0 = (a0) + (b0);							\
   149      __s1 = (a1) + (b1);							\
   150      __c0 = __s0 < (a0);							\
   151      __c1 = __s1 < (a1);							\
   152      (s0) = __s0;							\
   153      __s1 = __s1 + __c0;							\
   154      (s1) = __s1;							\
   155      (m) = - (__c1 + (__s1 < __c0));					\
   156    } while (0)
   157  #endif
   158  
   159  #if DIV_QR_1N_METHOD == 1
   160  
   161  /* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}.
   162     Requires that uh < d. */
   163  mp_limb_t
   164  mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh,
   165  		   mp_limb_t d, mp_limb_t dinv)
   166  {
   167    ASSERT (n > 0);
   168    ASSERT (uh < d);
   169    ASSERT (d & GMP_NUMB_HIGHBIT);
   170    ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n));
   171  
   172    do
   173      {
   174        mp_limb_t q, ul;
   175  
   176        ul = up[--n];
   177        udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv);
   178        qp[n] = q;
   179      }
   180    while (n > 0);
   181  
   182    return uh;
   183  }
   184  
   185  #elif DIV_QR_1N_METHOD == 2
   186  
   187  mp_limb_t
   188  mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t u1,
   189  		   mp_limb_t d, mp_limb_t dinv)
   190  {
   191    mp_limb_t B2;
   192    mp_limb_t u0, u2;
   193    mp_limb_t q0, q1;
   194    mp_limb_t p0, p1;
   195    mp_limb_t t;
   196    mp_size_t j;
   197  
   198    ASSERT (d & GMP_LIMB_HIGHBIT);
   199    ASSERT (n > 0);
   200    ASSERT (u1 < d);
   201  
   202    if (n == 1)
   203      {
   204        udiv_qrnnd_preinv (qp[0], u1, u1, up[0], d, dinv);
   205        return u1;
   206      }
   207  
   208    /* FIXME: Could be precomputed */
   209    B2 = -d*dinv;
   210  
   211    umul_ppmm (q1, q0, dinv, u1);
   212    umul_ppmm (p1, p0, B2, u1);
   213    q1 += u1;
   214    ASSERT (q1 >= u1);
   215    u0 = up[n-1];	/* Early read, to allow qp == up. */
   216    qp[n-1] = q1;
   217  
   218    add_mssaaaa (u2, u1, u0, u0, up[n-2], p1, p0);
   219  
   220    /* FIXME: Keep q1 in a variable between iterations, to reduce number
   221       of memory accesses. */
   222    for (j = n-2; j-- > 0; )
   223      {
   224        mp_limb_t q2, cy;
   225  
   226        /* Additions for the q update:
   227         *	+-------+
   228         *        |u1 * v |
   229         *        +---+---+
   230         *        | u1|
   231         *    +---+---+
   232         *    | 1 | v |  (conditional on u2)
   233         *    +---+---+
   234         *        | 1 |  (conditional on u0 + u2 B2 carry)
   235         *        +---+
   236         * +      | q0|
   237         *   -+---+---+---+
   238         *    | q2| q1| q0|
   239         *    +---+---+---+
   240        */
   241        umul_ppmm (p1, t, u1, dinv);
   242        add_ssaaaa (q2, q1, -u2, u2 & dinv, CNST_LIMB(0), u1);
   243        add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), p1);
   244        add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), q0);
   245        q0 = t;
   246  
   247        umul_ppmm (p1, p0, u1, B2);
   248        ADDC_LIMB (cy, u0, u0, u2 & B2);
   249        u0 -= (-cy) & d;
   250  
   251        /* Final q update */
   252        add_ssaaaa (q2, q1, q2, q1, CNST_LIMB(0), cy);
   253        qp[j+1] = q1;
   254        MPN_INCR_U (qp+j+2, n-j-2, q2);
   255  
   256        add_mssaaaa (u2, u1, u0, u0, up[j], p1, p0);
   257      }
   258  
   259    q1 = (u2 > 0);
   260    u1 -= (-q1) & d;
   261  
   262    t = (u1 >= d);
   263    q1 += t;
   264    u1 -= (-t) & d;
   265  
   266    udiv_qrnnd_preinv (t, u0, u1, u0, d, dinv);
   267    add_ssaaaa (q1, q0, q1, q0, CNST_LIMB(0), t);
   268  
   269    MPN_INCR_U (qp+1, n-1, q1);
   270  
   271    qp[0] = q0;
   272    return u0;
   273  }
   274  
   275  #else
   276  #error Unknown DIV_QR_1N_METHOD
   277  #endif