github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_8pts.c (about) 1 /* mpn_toom_interpolate_8pts -- Interpolate for toom54, 63, 72. 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2011, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 #include "gmp.h" 38 #include "gmp-impl.h" 39 40 #define BINVERT_3 MODLIMB_INVERSE_3 41 42 #define BINVERT_15 \ 43 ((((GMP_NUMB_MAX >> (GMP_NUMB_BITS % 4)) / 15) * 14 * 16 & GMP_NUMB_MAX) + 15) 44 45 #define BINVERT_45 ((BINVERT_15 * BINVERT_3) & GMP_NUMB_MASK) 46 47 #ifndef mpn_divexact_by3 48 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 49 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,BINVERT_3,0) 50 #else 51 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3) 52 #endif 53 #endif 54 55 #ifndef mpn_divexact_by45 56 #if GMP_NUMB_BITS % 12 == 0 57 #define mpn_divexact_by45(dst,src,size) \ 58 (63 & 19 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 45))) 59 #else 60 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 61 #define mpn_divexact_by45(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,45,BINVERT_45,0) 62 #else 63 #define mpn_divexact_by45(dst,src,size) mpn_divexact_1(dst,src,size,45) 64 #endif 65 #endif 66 #endif 67 68 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 69 #define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n) 70 #else 71 #define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws) 72 #endif 73 74 #if HAVE_NATIVE_mpn_sublsh_n 75 #define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s) 76 #else 77 static mp_limb_t 78 DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws) 79 { 80 #if USE_MUL_1 && 0 81 return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s)); 82 #else 83 mp_limb_t __cy; 84 __cy = mpn_lshift (ws,src,n,s); 85 return __cy + mpn_sub_n (dst,dst,ws,n); 86 #endif 87 } 88 #endif 89 90 91 #if HAVE_NATIVE_mpn_subrsh 92 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) mpn_subrsh (dst,nd,src,ns,s) 93 #else 94 /* This is not a correct definition, it assumes no carry */ 95 #define DO_mpn_subrsh(dst,nd,src,ns,s,ws) \ 96 do { \ 97 mp_limb_t __cy; \ 98 MPN_DECR_U (dst, nd, src[0] >> s); \ 99 __cy = DO_mpn_sublsh_n (dst, src + 1, ns - 1, GMP_NUMB_BITS - s, ws); \ 100 MPN_DECR_U (dst + ns - 1, nd - ns + 1, __cy); \ 101 } while (0) 102 #endif 103 104 /* Interpolation for Toom-4.5 (or Toom-4), using the evaluation 105 points: infinity(4.5 only), 4, -4, 2, -2, 1, -1, 0. More precisely, 106 we want to compute f(2^(GMP_NUMB_BITS * n)) for a polynomial f of 107 degree 7 (or 6), given the 8 (rsp. 7) values: 108 109 r1 = limit at infinity of f(x) / x^7, 110 r2 = f(4), 111 r3 = f(-4), 112 r4 = f(2), 113 r5 = f(-2), 114 r6 = f(1), 115 r7 = f(-1), 116 r8 = f(0). 117 118 All couples of the form f(n),f(-n) must be already mixed with 119 toom_couple_handling(f(n),...,f(-n),...) 120 121 The result is stored in {pp, spt + 7*n (or 6*n)}. 122 At entry, r8 is stored at {pp, 2n}, 123 r5 is stored at {pp + 3n, 3n + 1}. 124 125 The other values are 2n+... limbs each (with most significant limbs small). 126 127 All intermediate results are positive. 128 Inputs are destroyed. 129 */ 130 131 void 132 mpn_toom_interpolate_8pts (mp_ptr pp, mp_size_t n, 133 mp_ptr r3, mp_ptr r7, 134 mp_size_t spt, mp_ptr ws) 135 { 136 mp_limb_signed_t cy; 137 mp_ptr r5, r1; 138 r5 = (pp + 3 * n); /* 3n+1 */ 139 r1 = (pp + 7 * n); /* spt */ 140 141 /******************************* interpolation *****************************/ 142 143 DO_mpn_subrsh(r3+n, 2 * n + 1, pp, 2 * n, 4, ws); 144 cy = DO_mpn_sublsh_n (r3, r1, spt, 12, ws); 145 MPN_DECR_U (r3 + spt, 3 * n + 1 - spt, cy); 146 147 DO_mpn_subrsh(r5+n, 2 * n + 1, pp, 2 * n, 2, ws); 148 cy = DO_mpn_sublsh_n (r5, r1, spt, 6, ws); 149 MPN_DECR_U (r5 + spt, 3 * n + 1 - spt, cy); 150 151 r7[3*n] -= mpn_sub_n (r7+n, r7+n, pp, 2 * n); 152 cy = mpn_sub_n (r7, r7, r1, spt); 153 MPN_DECR_U (r7 + spt, 3 * n + 1 - spt, cy); 154 155 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1)); 156 ASSERT_NOCARRY(mpn_rshift(r3, r3, 3 * n + 1, 2)); 157 158 ASSERT_NOCARRY(mpn_sub_n (r5, r5, r7, 3 * n + 1)); 159 160 ASSERT_NOCARRY(mpn_sub_n (r3, r3, r5, 3 * n + 1)); 161 162 mpn_divexact_by45 (r3, r3, 3 * n + 1); 163 164 ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1)); 165 166 ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws)); 167 168 /* last interpolation steps... */ 169 /* ... are mixed with recomposition */ 170 171 /***************************** recomposition *******************************/ 172 /* 173 pp[] prior to operations: 174 |_H r1|_L r1|____||_H r5|_M_r5|_L r5|_____|_H r8|_L r8|pp 175 176 summation scheme for remaining operations: 177 |____8|n___7|n___6|n___5|n___4|n___3|n___2|n____|n____|pp 178 |_H r1|_L r1|____||_H*r5|_M r5|_L r5|_____|_H_r8|_L r8|pp 179 ||_H r3|_M r3|_L*r3| 180 ||_H_r7|_M_r7|_L_r7| 181 ||-H r3|-M r3|-L*r3| 182 ||-H*r5|-M_r5|-L_r5| 183 */ 184 185 cy = mpn_add_n (pp + n, pp + n, r7, n); /* Hr8+Lr7-Lr5 */ 186 cy-= mpn_sub_n (pp + n, pp + n, r5, n); 187 if (0 > cy) 188 MPN_DECR_U (r7 + n, 2*n + 1, 1); 189 else 190 MPN_INCR_U (r7 + n, 2*n + 1, cy); 191 192 cy = mpn_sub_n (pp + 2*n, r7 + n, r5 + n, n); /* Mr7-Mr5 */ 193 MPN_DECR_U (r7 + 2*n, n + 1, cy); 194 195 cy = mpn_add_n (pp + 3*n, r5, r7+ 2*n, n+1); /* Hr7+Lr5 */ 196 r5[3*n]+= mpn_add_n (r5 + 2*n, r5 + 2*n, r3, n); /* Hr5+Lr3 */ 197 cy-= mpn_sub_n (pp + 3*n, pp + 3*n, r5 + 2*n, n+1); /* Hr7-Hr5+Lr5-Lr3 */ 198 if (UNLIKELY(0 > cy)) 199 MPN_DECR_U (r5 + n + 1, 2*n, 1); 200 else 201 MPN_INCR_U (r5 + n + 1, 2*n, cy); 202 203 ASSERT_NOCARRY(mpn_sub_n(pp + 4*n, r5 + n, r3 + n, 2*n +1)); /* Mr5-Mr3,Hr5-Hr3 */ 204 205 cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]); 206 MPN_INCR_U (r3 + 2*n, n + 1, cy); 207 cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n); 208 if (LIKELY(spt != n)) 209 MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]); 210 else 211 ASSERT (r3[3*n] + cy == 0); 212 }