github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_6pts.c (about) 1 /* mpn_toom_interpolate_6pts -- Interpolate for toom43, 52 2 3 Contributed to the GNU project by Marco Bodrato. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2009, 2010, 2012 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 #include "gmp.h" 38 #include "gmp-impl.h" 39 40 /* For odd divisors, mpn_divexact_1 works fine with two's complement. */ 41 #ifndef mpn_divexact_by3 42 #if HAVE_NATIVE_mpn_pi1_bdiv_q_1 && MODLIMB_INVERSE_3 43 #define mpn_divexact_by3(dst,src,size) mpn_pi1_bdiv_q_1(dst,src,size,3,MODLIMB_INVERSE_3,0) 44 #else 45 #define mpn_divexact_by3(dst,src,size) mpn_divexact_1(dst,src,size,3) 46 #endif 47 #endif 48 49 /* Interpolation for Toom-3.5, using the evaluation points: infinity, 50 1, -1, 2, -2. More precisely, we want to compute 51 f(2^(GMP_NUMB_BITS * n)) for a polynomial f of degree 5, given the 52 six values 53 54 w5 = f(0), 55 w4 = f(-1), 56 w3 = f(1) 57 w2 = f(-2), 58 w1 = f(2), 59 w0 = limit at infinity of f(x) / x^5, 60 61 The result is stored in {pp, 5*n + w0n}. At entry, w5 is stored at 62 {pp, 2n}, w3 is stored at {pp + 2n, 2n+1}, and w0 is stored at 63 {pp + 5n, w0n}. The other values are 2n + 1 limbs each (with most 64 significant limbs small). f(-1) and f(-2) may be negative, signs 65 determined by the flag bits. All intermediate results are positive. 66 Inputs are destroyed. 67 68 Interpolation sequence was taken from the paper: "Integer and 69 Polynomial Multiplication: Towards Optimal Toom-Cook Matrices". 70 Some slight variations were introduced: adaptation to "gmp 71 instruction set", and a final saving of an operation by interlacing 72 interpolation and recomposition phases. 73 */ 74 75 void 76 mpn_toom_interpolate_6pts (mp_ptr pp, mp_size_t n, enum toom6_flags flags, 77 mp_ptr w4, mp_ptr w2, mp_ptr w1, 78 mp_size_t w0n) 79 { 80 mp_limb_t cy; 81 /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */ 82 mp_limb_t cy4, cy6, embankment; 83 84 ASSERT( n > 0 ); 85 ASSERT( 2*n >= w0n && w0n > 0 ); 86 87 #define w5 pp /* 2n */ 88 #define w3 (pp + 2 * n) /* 2n+1 */ 89 #define w0 (pp + 5 * n) /* w0n */ 90 91 /* Interpolate with sequence: 92 W2 =(W1 - W2)>>2 93 W1 =(W1 - W5)>>1 94 W1 =(W1 - W2)>>1 95 W4 =(W3 - W4)>>1 96 W2 =(W2 - W4)/3 97 W3 = W3 - W4 - W5 98 W1 =(W1 - W3)/3 99 // Last steps are mixed with recomposition... 100 W2 = W2 - W0<<2 101 W4 = W4 - W2 102 W3 = W3 - W1 103 W2 = W2 - W0 104 */ 105 106 /* W2 =(W1 - W2)>>2 */ 107 if (flags & toom6_vm2_neg) 108 mpn_add_n (w2, w1, w2, 2 * n + 1); 109 else 110 mpn_sub_n (w2, w1, w2, 2 * n + 1); 111 mpn_rshift (w2, w2, 2 * n + 1, 2); 112 113 /* W1 =(W1 - W5)>>1 */ 114 w1[2*n] -= mpn_sub_n (w1, w1, w5, 2*n); 115 mpn_rshift (w1, w1, 2 * n + 1, 1); 116 117 /* W1 =(W1 - W2)>>1 */ 118 #if HAVE_NATIVE_mpn_rsh1sub_n 119 mpn_rsh1sub_n (w1, w1, w2, 2 * n + 1); 120 #else 121 mpn_sub_n (w1, w1, w2, 2 * n + 1); 122 mpn_rshift (w1, w1, 2 * n + 1, 1); 123 #endif 124 125 /* W4 =(W3 - W4)>>1 */ 126 if (flags & toom6_vm1_neg) 127 { 128 #if HAVE_NATIVE_mpn_rsh1add_n 129 mpn_rsh1add_n (w4, w3, w4, 2 * n + 1); 130 #else 131 mpn_add_n (w4, w3, w4, 2 * n + 1); 132 mpn_rshift (w4, w4, 2 * n + 1, 1); 133 #endif 134 } 135 else 136 { 137 #if HAVE_NATIVE_mpn_rsh1sub_n 138 mpn_rsh1sub_n (w4, w3, w4, 2 * n + 1); 139 #else 140 mpn_sub_n (w4, w3, w4, 2 * n + 1); 141 mpn_rshift (w4, w4, 2 * n + 1, 1); 142 #endif 143 } 144 145 /* W2 =(W2 - W4)/3 */ 146 mpn_sub_n (w2, w2, w4, 2 * n + 1); 147 mpn_divexact_by3 (w2, w2, 2 * n + 1); 148 149 /* W3 = W3 - W4 - W5 */ 150 mpn_sub_n (w3, w3, w4, 2 * n + 1); 151 w3[2 * n] -= mpn_sub_n (w3, w3, w5, 2 * n); 152 153 /* W1 =(W1 - W3)/3 */ 154 mpn_sub_n (w1, w1, w3, 2 * n + 1); 155 mpn_divexact_by3 (w1, w1, 2 * n + 1); 156 157 /* 158 [1 0 0 0 0 0; 159 0 1 0 0 0 0; 160 1 0 1 0 0 0; 161 0 1 0 1 0 0; 162 1 0 1 0 1 0; 163 0 0 0 0 0 1] 164 165 pp[] prior to operations: 166 |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__| 167 168 summation scheme for remaining operations: 169 |______________5|n_____4|n_____3|n_____2|n______|n______|pp 170 |_H w0__|_L w0__|______||_H w3__|_L w3__|_H w5__|_L w5__| 171 || H w4 | L w4 | 172 || H w2 | L w2 | 173 || H w1 | L w1 | 174 ||-H w1 |-L w1 | 175 |-H w0 |-L w0 ||-H w2 |-L w2 | 176 */ 177 cy = mpn_add_n (pp + n, pp + n, w4, 2 * n + 1); 178 MPN_INCR_U (pp + 3 * n + 1, n, cy); 179 180 /* W2 -= W0<<2 */ 181 #if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1 182 #if HAVE_NATIVE_mpn_sublsh2_n_ip1 183 cy = mpn_sublsh2_n_ip1 (w2, w0, w0n); 184 #else 185 cy = mpn_sublsh_n (w2, w2, w0, w0n, 2); 186 #endif 187 #else 188 /* {W4,2*n+1} is now free and can be overwritten. */ 189 cy = mpn_lshift(w4, w0, w0n, 2); 190 cy+= mpn_sub_n(w2, w2, w4, w0n); 191 #endif 192 MPN_DECR_U (w2 + w0n, 2 * n + 1 - w0n, cy); 193 194 /* W4L = W4L - W2L */ 195 cy = mpn_sub_n (pp + n, pp + n, w2, n); 196 MPN_DECR_U (w3, 2 * n + 1, cy); 197 198 /* W3H = W3H + W2L */ 199 cy4 = w3[2 * n] + mpn_add_n (pp + 3 * n, pp + 3 * n, w2, n); 200 /* W1L + W2H */ 201 cy = w2[2 * n] + mpn_add_n (pp + 4 * n, w1, w2 + n, n); 202 MPN_INCR_U (w1 + n, n + 1, cy); 203 204 /* W0 = W0 + W1H */ 205 if (LIKELY (w0n > n)) 206 cy6 = w1[2 * n] + mpn_add_n (w0, w0, w1 + n, n); 207 else 208 cy6 = mpn_add_n (w0, w0, w1 + n, w0n); 209 210 /* 211 summation scheme for the next operation: 212 |...____5|n_____4|n_____3|n_____2|n______|n______|pp 213 |...w0___|_w1_w2_|_H w3__|_L w3__|_H w5__|_L w5__| 214 ...-w0___|-w1_w2 | 215 */ 216 /* if(LIKELY(w0n>n)) the two operands below DO overlap! */ 217 cy = mpn_sub_n (pp + 2 * n, pp + 2 * n, pp + 4 * n, n + w0n); 218 219 /* embankment is a "dirty trick" to avoid carry/borrow propagation 220 beyond allocated memory */ 221 embankment = w0[w0n - 1] - 1; 222 w0[w0n - 1] = 1; 223 if (LIKELY (w0n > n)) { 224 if (cy4 > cy6) 225 MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6); 226 else 227 MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4); 228 MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy); 229 MPN_INCR_U (w0 + n, w0n - n, cy6); 230 } else { 231 MPN_INCR_U (pp + 4 * n, w0n + n, cy4); 232 MPN_DECR_U (pp + 3 * n + w0n, 2 * n, cy + cy6); 233 } 234 w0[w0n - 1] += embankment; 235 236 #undef w5 237 #undef w3 238 #undef w0 239 240 }