github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom43_mul.c (about) 1 /* mpn_toom43_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3 2 times as large as bn. Or more accurately, bn < an < 2 bn. 3 4 Contributed to the GNU project by Marco Bodrato. 5 6 The idea of applying toom to unbalanced multiplication is due to Marco 7 Bodrato and Alberto Zanoni. 8 9 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 10 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 11 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 12 13 Copyright 2009 Free Software Foundation, Inc. 14 15 This file is part of the GNU MP Library. 16 17 The GNU MP Library is free software; you can redistribute it and/or modify 18 it under the terms of either: 19 20 * the GNU Lesser General Public License as published by the Free 21 Software Foundation; either version 3 of the License, or (at your 22 option) any later version. 23 24 or 25 26 * the GNU General Public License as published by the Free Software 27 Foundation; either version 2 of the License, or (at your option) any 28 later version. 29 30 or both in parallel, as here. 31 32 The GNU MP Library is distributed in the hope that it will be useful, but 33 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 34 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 35 for more details. 36 37 You should have received copies of the GNU General Public License and the 38 GNU Lesser General Public License along with the GNU MP Library. If not, 39 see https://www.gnu.org/licenses/. */ 40 41 42 #include "gmp.h" 43 #include "gmp-impl.h" 44 45 /* Evaluate in: -2, -1, 0, +1, +2, +inf 46 47 <-s-><--n--><--n--><--n--> 48 ___ ______ ______ ______ 49 |a3_|___a2_|___a1_|___a0_| 50 |_b2_|___b1_|___b0_| 51 <-t--><--n--><--n--> 52 53 v0 = a0 * b0 # A(0)*B(0) 54 v1 = (a0+ a1+ a2+ a3)*(b0+ b1+ b2) # A(1)*B(1) ah <= 3 bh <= 2 55 vm1 = (a0- a1+ a2- a3)*(b0- b1+ b2) # A(-1)*B(-1) |ah| <= 1 |bh|<= 1 56 v2 = (a0+2a1+4a2+8a3)*(b0+2b1+4b2) # A(2)*B(2) ah <= 14 bh <= 6 57 vm2 = (a0-2a1+4a2-8a3)*(b0-2b1+4b2) # A(-2)*B(-2) |ah| <= 9 |bh|<= 4 58 vinf= a3 * b2 # A(inf)*B(inf) 59 */ 60 61 void 62 mpn_toom43_mul (mp_ptr pp, 63 mp_srcptr ap, mp_size_t an, 64 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 65 { 66 mp_size_t n, s, t; 67 enum toom6_flags flags; 68 mp_limb_t cy; 69 70 #define a0 ap 71 #define a1 (ap + n) 72 #define a2 (ap + 2 * n) 73 #define a3 (ap + 3 * n) 74 #define b0 bp 75 #define b1 (bp + n) 76 #define b2 (bp + 2 * n) 77 78 n = 1 + (3 * an >= 4 * bn ? (an - 1) >> 2 : (bn - 1) / (size_t) 3); 79 80 s = an - 3 * n; 81 t = bn - 2 * n; 82 83 ASSERT (0 < s && s <= n); 84 ASSERT (0 < t && t <= n); 85 86 /* This is true whenever an >= 25 or bn >= 19, I think. It 87 guarantees that we can fit 5 values of size n+1 in the product 88 area. */ 89 ASSERT (s+t >= 5); 90 91 #define v0 pp /* 2n */ 92 #define vm1 (scratch) /* 2n+1 */ 93 #define v1 (pp + 2*n) /* 2n+1 */ 94 #define vm2 (scratch + 2 * n + 1) /* 2n+1 */ 95 #define v2 (scratch + 4 * n + 2) /* 2n+1 */ 96 #define vinf (pp + 5 * n) /* s+t */ 97 #define bs1 pp /* n+1 */ 98 #define bsm1 (scratch + 2 * n + 2) /* n+1 */ 99 #define asm1 (scratch + 3 * n + 3) /* n+1 */ 100 #define asm2 (scratch + 4 * n + 4) /* n+1 */ 101 #define bsm2 (pp + n + 1) /* n+1 */ 102 #define bs2 (pp + 2 * n + 2) /* n+1 */ 103 #define as2 (pp + 3 * n + 3) /* n+1 */ 104 #define as1 (pp + 4 * n + 4) /* n+1 */ 105 106 /* Total sccratch need is 6 * n + 3 + 1; we allocate one extra 107 limb, because products will overwrite 2n+2 limbs. */ 108 109 #define a0a2 scratch 110 #define b0b2 scratch 111 #define a1a3 asm1 112 #define b1d bsm1 113 114 /* Compute as2 and asm2. */ 115 flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3)); 116 117 /* Compute bs2 and bsm2. */ 118 b1d[n] = mpn_lshift (b1d, b1, n, 1); /* 2b1 */ 119 cy = mpn_lshift (b0b2, b2, t, 2); /* 4b2 */ 120 cy += mpn_add_n (b0b2, b0b2, b0, t); /* 4b2 + b0 */ 121 if (t != n) 122 cy = mpn_add_1 (b0b2 + t, b0 + t, n - t, cy); 123 b0b2[n] = cy; 124 125 #if HAVE_NATIVE_mpn_add_n_sub_n 126 if (mpn_cmp (b0b2, b1d, n+1) < 0) 127 { 128 mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1); 129 flags = (enum toom6_flags) (flags ^ toom6_vm2_neg); 130 } 131 else 132 { 133 mpn_add_n_sub_n (bs2, bsm2, b0b2, b1d, n+1); 134 } 135 #else 136 mpn_add_n (bs2, b0b2, b1d, n+1); 137 if (mpn_cmp (b0b2, b1d, n+1) < 0) 138 { 139 mpn_sub_n (bsm2, b1d, b0b2, n+1); 140 flags = (enum toom6_flags) (flags ^ toom6_vm2_neg); 141 } 142 else 143 { 144 mpn_sub_n (bsm2, b0b2, b1d, n+1); 145 } 146 #endif 147 148 /* Compute as1 and asm1. */ 149 flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2))); 150 151 /* Compute bs1 and bsm1. */ 152 bsm1[n] = mpn_add (bsm1, b0, n, b2, t); 153 #if HAVE_NATIVE_mpn_add_n_sub_n 154 if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0) 155 { 156 cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n); 157 bs1[n] = cy >> 1; 158 flags = (enum toom6_flags) (flags ^ toom6_vm1_neg); 159 } 160 else 161 { 162 cy = mpn_add_n_sub_n (bs1, bsm1, bsm1, b1, n); 163 bs1[n] = bsm1[n] + (cy >> 1); 164 bsm1[n]-= cy & 1; 165 } 166 #else 167 bs1[n] = bsm1[n] + mpn_add_n (bs1, bsm1, b1, n); 168 if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0) 169 { 170 mpn_sub_n (bsm1, b1, bsm1, n); 171 flags = (enum toom6_flags) (flags ^ toom6_vm1_neg); 172 } 173 else 174 { 175 bsm1[n] -= mpn_sub_n (bsm1, bsm1, b1, n); 176 } 177 #endif 178 179 ASSERT (as1[n] <= 3); 180 ASSERT (bs1[n] <= 2); 181 ASSERT (asm1[n] <= 1); 182 ASSERT (bsm1[n] <= 1); 183 ASSERT (as2[n] <=14); 184 ASSERT (bs2[n] <= 6); 185 ASSERT (asm2[n] <= 9); 186 ASSERT (bsm2[n] <= 4); 187 188 /* vm1, 2n+1 limbs */ 189 mpn_mul_n (vm1, asm1, bsm1, n+1); /* W4 */ 190 191 /* vm2, 2n+1 limbs */ 192 mpn_mul_n (vm2, asm2, bsm2, n+1); /* W2 */ 193 194 /* v2, 2n+1 limbs */ 195 mpn_mul_n (v2, as2, bs2, n+1); /* W1 */ 196 197 /* v1, 2n+1 limbs */ 198 mpn_mul_n (v1, as1, bs1, n+1); /* W3 */ 199 200 /* vinf, s+t limbs */ /* W0 */ 201 if (s > t) mpn_mul (vinf, a3, s, b2, t); 202 else mpn_mul (vinf, b2, t, a3, s); 203 204 /* v0, 2n limbs */ 205 mpn_mul_n (v0, ap, bp, n); /* W5 */ 206 207 mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s); 208 209 #undef v0 210 #undef vm1 211 #undef v1 212 #undef vm2 213 #undef v2 214 #undef vinf 215 #undef bs1 216 #undef bs2 217 #undef bsm1 218 #undef bsm2 219 #undef asm1 220 #undef asm2 221 /* #undef as1 */ 222 /* #undef as2 */ 223 #undef a0a2 224 #undef b0b2 225 #undef a1a3 226 #undef b1d 227 #undef a0 228 #undef a1 229 #undef a2 230 #undef a3 231 #undef b0 232 #undef b1 233 #undef b2 234 }