github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/mulmid.c (about) 1 /* mpn_mulmid -- middle product 2 3 Contributed by David Harvey. 4 5 THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY 6 SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST 7 GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. 8 9 Copyright 2011 Free Software Foundation, Inc. 10 11 This file is part of the GNU MP Library. 12 13 The GNU MP Library is free software; you can redistribute it and/or modify 14 it under the terms of either: 15 16 * the GNU Lesser General Public License as published by the Free 17 Software Foundation; either version 3 of the License, or (at your 18 option) any later version. 19 20 or 21 22 * the GNU General Public License as published by the Free Software 23 Foundation; either version 2 of the License, or (at your option) any 24 later version. 25 26 or both in parallel, as here. 27 28 The GNU MP Library is distributed in the hope that it will be useful, but 29 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 30 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 31 for more details. 32 33 You should have received copies of the GNU General Public License and the 34 GNU Lesser General Public License along with the GNU MP Library. If not, 35 see https://www.gnu.org/licenses/. */ 36 37 38 #include "gmp.h" 39 #include "gmp-impl.h" 40 41 42 #define CHUNK (200 + MULMID_TOOM42_THRESHOLD) 43 44 45 void 46 mpn_mulmid (mp_ptr rp, 47 mp_srcptr ap, mp_size_t an, 48 mp_srcptr bp, mp_size_t bn) 49 { 50 mp_size_t rn, k; 51 mp_ptr scratch, temp; 52 53 ASSERT (an >= bn); 54 ASSERT (bn >= 1); 55 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an)); 56 ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn)); 57 58 if (bn < MULMID_TOOM42_THRESHOLD) 59 { 60 /* region not tall enough to make toom42 worthwhile for any portion */ 61 62 if (an < CHUNK) 63 { 64 /* region not too wide either, just call basecase directly */ 65 mpn_mulmid_basecase (rp, ap, an, bp, bn); 66 return; 67 } 68 69 /* Region quite wide. For better locality, use basecase on chunks: 70 71 AAABBBCC.. 72 .AAABBBCC. 73 ..AAABBBCC 74 */ 75 76 k = CHUNK - bn + 1; /* number of diagonals per chunk */ 77 78 /* first chunk (marked A in the above diagram) */ 79 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn); 80 81 /* remaining chunks (B, C, etc) */ 82 an -= k; 83 84 while (an >= CHUNK) 85 { 86 mp_limb_t t0, t1, cy; 87 ap += k, rp += k; 88 t0 = rp[0], t1 = rp[1]; 89 mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn); 90 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */ 91 MPN_INCR_U (rp + 1, k + 1, t1 + cy); 92 an -= k; 93 } 94 95 if (an >= bn) 96 { 97 /* last remaining chunk */ 98 mp_limb_t t0, t1, cy; 99 ap += k, rp += k; 100 t0 = rp[0], t1 = rp[1]; 101 mpn_mulmid_basecase (rp, ap, an, bp, bn); 102 ADDC_LIMB (cy, rp[0], rp[0], t0); 103 MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy); 104 } 105 106 return; 107 } 108 109 /* region is tall enough for toom42 */ 110 111 rn = an - bn + 1; 112 113 if (rn < MULMID_TOOM42_THRESHOLD) 114 { 115 /* region not wide enough to make toom42 worthwhile for any portion */ 116 117 TMP_DECL; 118 119 if (bn < CHUNK) 120 { 121 /* region not too tall either, just call basecase directly */ 122 mpn_mulmid_basecase (rp, ap, an, bp, bn); 123 return; 124 } 125 126 /* Region quite tall. For better locality, use basecase on chunks: 127 128 AAAAA.... 129 .AAAAA... 130 ..BBBBB.. 131 ...BBBBB. 132 ....CCCCC 133 */ 134 135 TMP_MARK; 136 137 temp = TMP_ALLOC_LIMBS (rn + 2); 138 139 /* first chunk (marked A in the above diagram) */ 140 bp += bn - CHUNK, an -= bn - CHUNK; 141 mpn_mulmid_basecase (rp, ap, an, bp, CHUNK); 142 143 /* remaining chunks (B, C, etc) */ 144 bn -= CHUNK; 145 146 while (bn >= CHUNK) 147 { 148 ap += CHUNK, bp -= CHUNK; 149 mpn_mulmid_basecase (temp, ap, an, bp, CHUNK); 150 mpn_add_n (rp, rp, temp, rn + 2); 151 bn -= CHUNK; 152 } 153 154 if (bn) 155 { 156 /* last remaining chunk */ 157 ap += CHUNK, bp -= bn; 158 mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn); 159 mpn_add_n (rp, rp, temp, rn + 2); 160 } 161 162 TMP_FREE; 163 return; 164 } 165 166 /* we're definitely going to use toom42 somewhere */ 167 168 if (bn > rn) 169 { 170 /* slice region into chunks, use toom42 on all chunks except possibly 171 the last: 172 173 AA.... 174 .AA... 175 ..BB.. 176 ...BB. 177 ....CC 178 */ 179 180 TMP_DECL; 181 TMP_MARK; 182 183 temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn)); 184 scratch = temp + rn + 2; 185 186 /* first chunk (marked A in the above diagram) */ 187 bp += bn - rn; 188 mpn_toom42_mulmid (rp, ap, bp, rn, scratch); 189 190 /* remaining chunks (B, C, etc) */ 191 bn -= rn; 192 193 while (bn >= rn) 194 { 195 ap += rn, bp -= rn; 196 mpn_toom42_mulmid (temp, ap, bp, rn, scratch); 197 mpn_add_n (rp, rp, temp, rn + 2); 198 bn -= rn; 199 } 200 201 if (bn) 202 { 203 /* last remaining chunk */ 204 ap += rn, bp -= bn; 205 mpn_mulmid (temp, ap, rn + bn - 1, bp, bn); 206 mpn_add_n (rp, rp, temp, rn + 2); 207 } 208 209 TMP_FREE; 210 } 211 else 212 { 213 /* slice region into chunks, use toom42 on all chunks except possibly 214 the last: 215 216 AAABBBCC.. 217 .AAABBBCC. 218 ..AAABBBCC 219 */ 220 221 TMP_DECL; 222 TMP_MARK; 223 224 scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn)); 225 226 /* first chunk (marked A in the above diagram) */ 227 mpn_toom42_mulmid (rp, ap, bp, bn, scratch); 228 229 /* remaining chunks (B, C, etc) */ 230 rn -= bn; 231 232 while (rn >= bn) 233 { 234 mp_limb_t t0, t1, cy; 235 ap += bn, rp += bn; 236 t0 = rp[0], t1 = rp[1]; 237 mpn_toom42_mulmid (rp, ap, bp, bn, scratch); 238 ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */ 239 MPN_INCR_U (rp + 1, bn + 1, t1 + cy); 240 rn -= bn; 241 } 242 243 TMP_FREE; 244 245 if (rn) 246 { 247 /* last remaining chunk */ 248 mp_limb_t t0, t1, cy; 249 ap += bn, rp += bn; 250 t0 = rp[0], t1 = rp[1]; 251 mpn_mulmid (rp, ap, rn + bn - 1, bp, bn); 252 ADDC_LIMB (cy, rp[0], rp[0], t0); 253 MPN_INCR_U (rp + 1, rn + 1, t1 + cy); 254 } 255 } 256 }