github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom52_mul.c (about) 1 /* mpn_toom52_mul -- Multiply {ap,an} and {bp,bn} where an is nominally 4/3 2 times as large as bn. Or more accurately, bn < an < 2 bn. 3 4 Contributed to the GNU project by Marco Bodrato. 5 6 The idea of applying toom to unbalanced multiplication is due to Marco 7 Bodrato and Alberto Zanoni. 8 9 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 10 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 11 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 12 13 Copyright 2009 Free Software Foundation, Inc. 14 15 This file is part of the GNU MP Library. 16 17 The GNU MP Library is free software; you can redistribute it and/or modify 18 it under the terms of either: 19 20 * the GNU Lesser General Public License as published by the Free 21 Software Foundation; either version 3 of the License, or (at your 22 option) any later version. 23 24 or 25 26 * the GNU General Public License as published by the Free Software 27 Foundation; either version 2 of the License, or (at your option) any 28 later version. 29 30 or both in parallel, as here. 31 32 The GNU MP Library is distributed in the hope that it will be useful, but 33 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 34 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 35 for more details. 36 37 You should have received copies of the GNU General Public License and the 38 GNU Lesser General Public License along with the GNU MP Library. If not, 39 see https://www.gnu.org/licenses/. */ 40 41 42 #include "gmp.h" 43 #include "gmp-impl.h" 44 45 /* Evaluate in: -2, -1, 0, +1, +2, +inf 46 47 <-s-><--n--><--n--><--n--><--n--> 48 ___ ______ ______ ______ ______ 49 |a4_|___a3_|___a2_|___a1_|___a0_| 50 |b1|___b0_| 51 <t-><--n--> 52 53 v0 = a0 * b0 # A(0)*B(0) 54 v1 = (a0+ a1+ a2+ a3+ a4)*(b0+ b1) # A(1)*B(1) ah <= 4 bh <= 1 55 vm1 = (a0- a1+ a2- a3+ a4)*(b0- b1) # A(-1)*B(-1) |ah| <= 2 bh = 0 56 v2 = (a0+2a1+4a2+8a3+16a4)*(b0+2b1) # A(2)*B(2) ah <= 30 bh <= 2 57 vm2 = (a0-2a1+4a2-8a3+16a4)*(b0-2b1) # A(-2)*B(-2) |ah| <= 20 |bh|<= 1 58 vinf= a4 * b1 # A(inf)*B(inf) 59 60 Some slight optimization in evaluation are taken from the paper: 61 "Towards Optimal Toom-Cook Multiplication for Univariate and 62 Multivariate Polynomials in Characteristic 2 and 0." 63 */ 64 65 void 66 mpn_toom52_mul (mp_ptr pp, 67 mp_srcptr ap, mp_size_t an, 68 mp_srcptr bp, mp_size_t bn, mp_ptr scratch) 69 { 70 mp_size_t n, s, t; 71 enum toom6_flags flags; 72 73 #define a0 ap 74 #define a1 (ap + n) 75 #define a2 (ap + 2 * n) 76 #define a3 (ap + 3 * n) 77 #define a4 (ap + 4 * n) 78 #define b0 bp 79 #define b1 (bp + n) 80 81 n = 1 + (2 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) >> 1); 82 83 s = an - 4 * n; 84 t = bn - n; 85 86 ASSERT (0 < s && s <= n); 87 ASSERT (0 < t && t <= n); 88 89 /* Ensures that 5 values of n+1 limbs each fits in the product area. 90 Borderline cases are an = 32, bn = 8, n = 7, and an = 36, bn = 9, 91 n = 8. */ 92 ASSERT (s+t >= 5); 93 94 #define v0 pp /* 2n */ 95 #define vm1 (scratch) /* 2n+1 */ 96 #define v1 (pp + 2 * n) /* 2n+1 */ 97 #define vm2 (scratch + 2 * n + 1) /* 2n+1 */ 98 #define v2 (scratch + 4 * n + 2) /* 2n+1 */ 99 #define vinf (pp + 5 * n) /* s+t */ 100 #define bs1 pp /* n+1 */ 101 #define bsm1 (scratch + 2 * n + 2) /* n */ 102 #define asm1 (scratch + 3 * n + 3) /* n+1 */ 103 #define asm2 (scratch + 4 * n + 4) /* n+1 */ 104 #define bsm2 (pp + n + 1) /* n+1 */ 105 #define bs2 (pp + 2 * n + 2) /* n+1 */ 106 #define as2 (pp + 3 * n + 3) /* n+1 */ 107 #define as1 (pp + 4 * n + 4) /* n+1 */ 108 109 /* Scratch need is 6 * n + 3 + 1. We need one extra limb, because 110 products will overwrite 2n+2 limbs. */ 111 112 #define a0a2 scratch 113 #define a1a3 asm1 114 115 /* Compute as2 and asm2. */ 116 flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3)); 117 118 /* Compute bs1 and bsm1. */ 119 if (t == n) 120 { 121 #if HAVE_NATIVE_mpn_add_n_sub_n 122 mp_limb_t cy; 123 124 if (mpn_cmp (b0, b1, n) < 0) 125 { 126 cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n); 127 flags = (enum toom6_flags) (flags ^ toom6_vm1_neg); 128 } 129 else 130 { 131 cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n); 132 } 133 bs1[n] = cy >> 1; 134 #else 135 bs1[n] = mpn_add_n (bs1, b0, b1, n); 136 if (mpn_cmp (b0, b1, n) < 0) 137 { 138 mpn_sub_n (bsm1, b1, b0, n); 139 flags = (enum toom6_flags) (flags ^ toom6_vm1_neg); 140 } 141 else 142 { 143 mpn_sub_n (bsm1, b0, b1, n); 144 } 145 #endif 146 } 147 else 148 { 149 bs1[n] = mpn_add (bs1, b0, n, b1, t); 150 if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0) 151 { 152 mpn_sub_n (bsm1, b1, b0, t); 153 MPN_ZERO (bsm1 + t, n - t); 154 flags = (enum toom6_flags) (flags ^ toom6_vm1_neg); 155 } 156 else 157 { 158 mpn_sub (bsm1, b0, n, b1, t); 159 } 160 } 161 162 /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1 */ 163 mpn_add (bs2, bs1, n+1, b1, t); 164 if (flags & toom6_vm1_neg ) 165 { 166 bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t); 167 flags = (enum toom6_flags) (flags ^ toom6_vm2_neg); 168 } 169 else 170 { 171 bsm2[n] = 0; 172 if (t == n) 173 { 174 if (mpn_cmp (bsm1, b1, n) < 0) 175 { 176 mpn_sub_n (bsm2, b1, bsm1, n); 177 flags = (enum toom6_flags) (flags ^ toom6_vm2_neg); 178 } 179 else 180 { 181 mpn_sub_n (bsm2, bsm1, b1, n); 182 } 183 } 184 else 185 { 186 if (mpn_zero_p (bsm1 + t, n - t) && mpn_cmp (bsm1, b1, t) < 0) 187 { 188 mpn_sub_n (bsm2, b1, bsm1, t); 189 MPN_ZERO (bsm2 + t, n - t); 190 flags = (enum toom6_flags) (flags ^ toom6_vm2_neg); 191 } 192 else 193 { 194 mpn_sub (bsm2, bsm1, n, b1, t); 195 } 196 } 197 } 198 199 /* Compute as1 and asm1. */ 200 flags = (enum toom6_flags) (flags ^ (toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2))); 201 202 ASSERT (as1[n] <= 4); 203 ASSERT (bs1[n] <= 1); 204 ASSERT (asm1[n] <= 2); 205 /* ASSERT (bsm1[n] <= 1); */ 206 ASSERT (as2[n] <=30); 207 ASSERT (bs2[n] <= 2); 208 ASSERT (asm2[n] <= 20); 209 ASSERT (bsm2[n] <= 1); 210 211 /* vm1, 2n+1 limbs */ 212 mpn_mul (vm1, asm1, n+1, bsm1, n); /* W4 */ 213 214 /* vm2, 2n+1 limbs */ 215 mpn_mul_n (vm2, asm2, bsm2, n+1); /* W2 */ 216 217 /* v2, 2n+1 limbs */ 218 mpn_mul_n (v2, as2, bs2, n+1); /* W1 */ 219 220 /* v1, 2n+1 limbs */ 221 mpn_mul_n (v1, as1, bs1, n+1); /* W3 */ 222 223 /* vinf, s+t limbs */ /* W0 */ 224 if (s > t) mpn_mul (vinf, a4, s, b1, t); 225 else mpn_mul (vinf, b1, t, a4, s); 226 227 /* v0, 2n limbs */ 228 mpn_mul_n (v0, ap, bp, n); /* W5 */ 229 230 mpn_toom_interpolate_6pts (pp, n, flags, vm1, vm2, v2, t + s); 231 232 #undef v0 233 #undef vm1 234 #undef v1 235 #undef vm2 236 #undef v2 237 #undef vinf 238 #undef bs1 239 #undef bs2 240 #undef bsm1 241 #undef bsm2 242 #undef asm1 243 #undef asm2 244 #undef as1 245 #undef as2 246 #undef a0a2 247 #undef b0b2 248 #undef a1a3 249 #undef a0 250 #undef a1 251 #undef a2 252 #undef a3 253 #undef b0 254 #undef b1 255 #undef b2 256 257 }