github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom_interpolate_5pts.c (about) 1 /* mpn_toom_interpolate_5pts -- Interpolate for toom3, 33, 42. 2 3 Contributed to the GNU project by Robert Harley. 4 Improvements by Paul Zimmermann and Marco Bodrato. 5 6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2000-2003, 2005-2007, 2009 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 41 void 42 mpn_toom_interpolate_5pts (mp_ptr c, mp_ptr v2, mp_ptr vm1, 43 mp_size_t k, mp_size_t twor, int sa, 44 mp_limb_t vinf0) 45 { 46 mp_limb_t cy, saved; 47 mp_size_t twok; 48 mp_size_t kk1; 49 mp_ptr c1, v1, c3, vinf; 50 51 twok = k + k; 52 kk1 = twok + 1; 53 54 c1 = c + k; 55 v1 = c1 + k; 56 c3 = v1 + k; 57 vinf = c3 + k; 58 59 #define v0 (c) 60 /* (1) v2 <- v2-vm1 < v2+|vm1|, (16 8 4 2 1) - (1 -1 1 -1 1) = 61 thus 0 <= v2 < 50*B^(2k) < 2^6*B^(2k) (15 9 3 3 0) 62 */ 63 if (sa) 64 ASSERT_NOCARRY (mpn_add_n (v2, v2, vm1, kk1)); 65 else 66 ASSERT_NOCARRY (mpn_sub_n (v2, v2, vm1, kk1)); 67 68 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 69 v0 v1 hi(vinf) |vm1| v2-vm1 EMPTY */ 70 71 ASSERT_NOCARRY (mpn_divexact_by3 (v2, v2, kk1)); /* v2 <- v2 / 3 */ 72 /* (5 3 1 1 0)*/ 73 74 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 75 v0 v1 hi(vinf) |vm1| (v2-vm1)/3 EMPTY */ 76 77 /* (2) vm1 <- tm1 := (v1 - vm1) / 2 [(1 1 1 1 1) - (1 -1 1 -1 1)] / 2 = 78 tm1 >= 0 (0 1 0 1 0) 79 No carry comes out from {v1, kk1} +/- {vm1, kk1}, 80 and the division by two is exact. 81 If (sa!=0) the sign of vm1 is negative */ 82 if (sa) 83 { 84 #ifdef HAVE_NATIVE_mpn_rsh1add_n 85 mpn_rsh1add_n (vm1, v1, vm1, kk1); 86 #else 87 ASSERT_NOCARRY (mpn_add_n (vm1, v1, vm1, kk1)); 88 ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1)); 89 #endif 90 } 91 else 92 { 93 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 94 mpn_rsh1sub_n (vm1, v1, vm1, kk1); 95 #else 96 ASSERT_NOCARRY (mpn_sub_n (vm1, v1, vm1, kk1)); 97 ASSERT_NOCARRY (mpn_rshift (vm1, vm1, kk1, 1)); 98 #endif 99 } 100 101 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 102 v0 v1 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ 103 104 /* (3) v1 <- t1 := v1 - v0 (1 1 1 1 1) - (0 0 0 0 1) = (1 1 1 1 0) 105 t1 >= 0 106 */ 107 vinf[0] -= mpn_sub_n (v1, v1, c, twok); 108 109 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 110 v0 v1-v0 hi(vinf) tm1 (v2-vm1)/3 EMPTY */ 111 112 /* (4) v2 <- t2 := ((v2-vm1)/3-t1)/2 = (v2-vm1-3*t1)/6 113 t2 >= 0 [(5 3 1 1 0) - (1 1 1 1 0)]/2 = (2 1 0 0 0) 114 */ 115 #ifdef HAVE_NATIVE_mpn_rsh1sub_n 116 mpn_rsh1sub_n (v2, v2, v1, kk1); 117 #else 118 ASSERT_NOCARRY (mpn_sub_n (v2, v2, v1, kk1)); 119 ASSERT_NOCARRY (mpn_rshift (v2, v2, kk1, 1)); 120 #endif 121 122 /* {c,2k} {c+2k,2k+1} {c+4k+1,2r-1} {t,2k+1} {t+2k+1,2k+1} {t+4k+2,2r} 123 v0 v1-v0 hi(vinf) tm1 (v2-vm1-3t1)/6 EMPTY */ 124 125 /* (5) v1 <- t1-tm1 (1 1 1 1 0) - (0 1 0 1 0) = (1 0 1 0 0) 126 result is v1 >= 0 127 */ 128 ASSERT_NOCARRY (mpn_sub_n (v1, v1, vm1, kk1)); 129 130 /* We do not need to read the value in vm1, so we add it in {c+k, ...} */ 131 cy = mpn_add_n (c1, c1, vm1, kk1); 132 MPN_INCR_U (c3 + 1, twor + k - 1, cy); /* 2n-(3k+1) = 2r+k-1 */ 133 /* Memory allocated for vm1 is now free, it can be recycled ...*/ 134 135 /* (6) v2 <- v2 - 2*vinf, (2 1 0 0 0) - 2*(1 0 0 0 0) = (0 1 0 0 0) 136 result is v2 >= 0 */ 137 saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */ 138 vinf[0] = vinf0; /* Set the right value for vinf0 */ 139 #ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1 140 cy = mpn_sublsh1_n_ip1 (v2, vinf, twor); 141 #else 142 /* Overwrite unused vm1 */ 143 cy = mpn_lshift (vm1, vinf, twor, 1); 144 cy += mpn_sub_n (v2, v2, vm1, twor); 145 #endif 146 MPN_DECR_U (v2 + twor, kk1 - twor, cy); 147 148 /* Current matrix is 149 [1 0 0 0 0; vinf 150 0 1 0 0 0; v2 151 1 0 1 0 0; v1 152 0 1 0 1 0; vm1 153 0 0 0 0 1] v0 154 Some values already are in-place (we added vm1 in the correct position) 155 | vinf| v1 | v0 | 156 | vm1 | 157 One still is in a separated area 158 | +v2 | 159 We have to compute v1-=vinf; vm1 -= v2, 160 |-vinf| 161 | -v2 | 162 Carefully reordering operations we can avoid to compute twice the sum 163 of the high half of v2 plus the low half of vinf. 164 */ 165 166 /* Add the high half of t2 in {vinf} */ 167 if ( LIKELY(twor > k + 1) ) { /* This is the expected flow */ 168 cy = mpn_add_n (vinf, vinf, v2 + k, k + 1); 169 MPN_INCR_U (c3 + kk1, twor - k - 1, cy); /* 2n-(5k+1) = 2r-k-1 */ 170 } else { /* triggered only by very unbalanced cases like 171 (k+k+(k-2))x(k+k+1) , should be handled by toom32 */ 172 ASSERT_NOCARRY (mpn_add_n (vinf, vinf, v2 + k, twor)); 173 } 174 /* (7) v1 <- v1 - vinf, (1 0 1 0 0) - (1 0 0 0 0) = (0 0 1 0 0) 175 result is >= 0 */ 176 /* Side effect: we also subtracted (high half) vm1 -= v2 */ 177 cy = mpn_sub_n (v1, v1, vinf, twor); /* vinf is at most twor long. */ 178 vinf0 = vinf[0]; /* Save again the right value for vinf0 */ 179 vinf[0] = saved; 180 MPN_DECR_U (v1 + twor, kk1 - twor, cy); /* Treat the last bytes. */ 181 182 /* (8) vm1 <- vm1-v2 (0 1 0 1 0) - (0 1 0 0 0) = (0 0 0 1 0) 183 Operate only on the low half. 184 */ 185 cy = mpn_sub_n (c1, c1, v2, k); 186 MPN_DECR_U (v1, kk1, cy); 187 188 /********************* Beginning the final phase **********************/ 189 190 /* Most of the recomposition was done */ 191 192 /* add t2 in {c+3k, ...}, but only the low half */ 193 cy = mpn_add_n (c3, c3, v2, k); 194 vinf[0] += cy; 195 ASSERT(vinf[0] >= cy); /* No carry */ 196 MPN_INCR_U (vinf, twor, vinf0); /* Add vinf0, propagate carry. */ 197 198 #undef v0 199 }