github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/toom22_mul.c (about) 1 /* mpn_toom22_mul -- Multiply {ap,an} and {bp,bn} where an >= bn. Or more 2 accurately, bn <= an < 2bn. 3 4 Contributed to the GNU project by Torbjorn Granlund. 5 6 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 7 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 8 GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 9 10 Copyright 2006-2010, 2012, 2014 Free Software Foundation, Inc. 11 12 This file is part of the GNU MP Library. 13 14 The GNU MP Library is free software; you can redistribute it and/or modify 15 it under the terms of either: 16 17 * the GNU Lesser General Public License as published by the Free 18 Software Foundation; either version 3 of the License, or (at your 19 option) any later version. 20 21 or 22 23 * the GNU General Public License as published by the Free Software 24 Foundation; either version 2 of the License, or (at your option) any 25 later version. 26 27 or both in parallel, as here. 28 29 The GNU MP Library is distributed in the hope that it will be useful, but 30 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 31 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 32 for more details. 33 34 You should have received copies of the GNU General Public License and the 35 GNU Lesser General Public License along with the GNU MP Library. If not, 36 see https://www.gnu.org/licenses/. */ 37 38 39 #include "gmp.h" 40 #include "gmp-impl.h" 41 42 /* Evaluate in: -1, 0, +inf 43 44 <-s--><--n--> 45 ____ ______ 46 |_a1_|___a0_| 47 |b1_|___b0_| 48 <-t-><--n--> 49 50 v0 = a0 * b0 # A(0)*B(0) 51 vm1 = (a0- a1)*(b0- b1) # A(-1)*B(-1) 52 vinf= a1 * b1 # A(inf)*B(inf) 53 */ 54 55 #if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY 56 #define MAYBE_mul_toom22 1 57 #else 58 #define MAYBE_mul_toom22 \ 59 (MUL_TOOM33_THRESHOLD >= 2 * MUL_TOOM22_THRESHOLD) 60 #endif 61 62 #define TOOM22_MUL_N_REC(p, a, b, n, ws) \ 63 do { \ 64 if (! MAYBE_mul_toom22 \ 65 || BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \ 66 mpn_mul_basecase (p, a, n, b, n); \ 67 else \ 68 mpn_toom22_mul (p, a, n, b, n, ws); \ 69 } while (0) 70 71 /* Normally, this calls mul_basecase or toom22_mul. But when when the fraction 72 MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD is large, an initially small 73 relative unbalance will become a larger and larger relative unbalance with 74 each recursion (the difference s-t will be invariant over recursive calls). 75 Therefore, we need to call toom32_mul. FIXME: Suppress depending on 76 MUL_TOOM33_THRESHOLD / MUL_TOOM22_THRESHOLD and on MUL_TOOM22_THRESHOLD. */ 77 #define TOOM22_MUL_REC(p, a, an, b, bn, ws) \ 78 do { \ 79 if (! MAYBE_mul_toom22 \ 80 || BELOW_THRESHOLD (bn, MUL_TOOM22_THRESHOLD)) \ 81 mpn_mul_basecase (p, a, an, b, bn); \ 82 else if (4 * an < 5 * bn) \ 83 mpn_toom22_mul (p, a, an, b, bn, ws); \ 84 else \ 85 mpn_toom32_mul (p, a, an, b, bn, ws); \ 86 } while (0) 87 88 void 89 mpn_toom22_mul (mp_ptr pp, 90 mp_srcptr ap, mp_size_t an, 91 mp_srcptr bp, mp_size_t bn, 92 mp_ptr scratch) 93 { 94 const int __gmpn_cpuvec_initialized = 1; 95 mp_size_t n, s, t; 96 int vm1_neg; 97 mp_limb_t cy, cy2; 98 mp_ptr asm1; 99 mp_ptr bsm1; 100 101 #define a0 ap 102 #define a1 (ap + n) 103 #define b0 bp 104 #define b1 (bp + n) 105 106 s = an >> 1; 107 n = an - s; 108 t = bn - n; 109 110 ASSERT (an >= bn); 111 112 ASSERT (0 < s && s <= n && s >= n - 1); 113 ASSERT (0 < t && t <= s); 114 115 asm1 = pp; 116 bsm1 = pp + n; 117 118 vm1_neg = 0; 119 120 /* Compute asm1. */ 121 if (s == n) 122 { 123 if (mpn_cmp (a0, a1, n) < 0) 124 { 125 mpn_sub_n (asm1, a1, a0, n); 126 vm1_neg = 1; 127 } 128 else 129 { 130 mpn_sub_n (asm1, a0, a1, n); 131 } 132 } 133 else /* n - s == 1 */ 134 { 135 if (a0[s] == 0 && mpn_cmp (a0, a1, s) < 0) 136 { 137 mpn_sub_n (asm1, a1, a0, s); 138 asm1[s] = 0; 139 vm1_neg = 1; 140 } 141 else 142 { 143 asm1[s] = a0[s] - mpn_sub_n (asm1, a0, a1, s); 144 } 145 } 146 147 /* Compute bsm1. */ 148 if (t == n) 149 { 150 if (mpn_cmp (b0, b1, n) < 0) 151 { 152 mpn_sub_n (bsm1, b1, b0, n); 153 vm1_neg ^= 1; 154 } 155 else 156 { 157 mpn_sub_n (bsm1, b0, b1, n); 158 } 159 } 160 else 161 { 162 if (mpn_zero_p (b0 + t, n - t) && mpn_cmp (b0, b1, t) < 0) 163 { 164 mpn_sub_n (bsm1, b1, b0, t); 165 MPN_ZERO (bsm1 + t, n - t); 166 vm1_neg ^= 1; 167 } 168 else 169 { 170 mpn_sub (bsm1, b0, n, b1, t); 171 } 172 } 173 174 #define v0 pp /* 2n */ 175 #define vinf (pp + 2 * n) /* s+t */ 176 #define vm1 scratch /* 2n */ 177 #define scratch_out scratch + 2 * n 178 179 /* vm1, 2n limbs */ 180 TOOM22_MUL_N_REC (vm1, asm1, bsm1, n, scratch_out); 181 182 if (s > t) TOOM22_MUL_REC (vinf, a1, s, b1, t, scratch_out); 183 else TOOM22_MUL_N_REC (vinf, a1, b1, s, scratch_out); 184 185 /* v0, 2n limbs */ 186 TOOM22_MUL_N_REC (v0, ap, bp, n, scratch_out); 187 188 /* H(v0) + L(vinf) */ 189 cy = mpn_add_n (pp + 2 * n, v0 + n, vinf, n); 190 191 /* L(v0) + H(v0) */ 192 cy2 = cy + mpn_add_n (pp + n, pp + 2 * n, v0, n); 193 194 /* L(vinf) + H(vinf) */ 195 cy += mpn_add (pp + 2 * n, pp + 2 * n, n, vinf + n, s + t - n); 196 197 if (vm1_neg) 198 cy += mpn_add_n (pp + n, pp + n, vm1, 2 * n); 199 else 200 cy -= mpn_sub_n (pp + n, pp + n, vm1, 2 * n); 201 202 ASSERT (cy + 1 <= 3); 203 ASSERT (cy2 <= 2); 204 205 MPN_INCR_U (pp + 2 * n, s + t, cy2); 206 if (LIKELY (cy <= 2)) 207 /* if s+t==n, cy is zero, but we should not acces pp[3*n] at all. */ 208 MPN_INCR_U (pp + 3 * n, s + t - n, cy); 209 else 210 MPN_DECR_U (pp + 3 * n, s + t - n, 1); 211 }