github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/cray/ieee/invert_limb.c (about)

     1  /* mpn_invert_limb -- Invert a normalized limb.
     2  
     3  Copyright 1991, 2000, 2001 Free Software Foundation, Inc.
     4  
     5  This file is part of the GNU MP Library.
     6  
     7  The GNU MP Library is free software; you can redistribute it and/or modify
     8  it under the terms of either:
     9  
    10    * the GNU Lesser General Public License as published by the Free
    11      Software Foundation; either version 3 of the License, or (at your
    12      option) any later version.
    13  
    14  or
    15  
    16    * the GNU General Public License as published by the Free Software
    17      Foundation; either version 2 of the License, or (at your option) any
    18      later version.
    19  
    20  or both in parallel, as here.
    21  
    22  The GNU MP Library is distributed in the hope that it will be useful, but
    23  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    24  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    25  for more details.
    26  
    27  You should have received copies of the GNU General Public License and the
    28  GNU Lesser General Public License along with the GNU MP Library.  If not,
    29  see https://www.gnu.org/licenses/.  */
    30  
    31  #include "gmp.h"
    32  #include "gmp-impl.h"
    33  #include "longlong.h"
    34  
    35  /*
    36    This is needed to make configure define HAVE_NATIVE_mpn_invert_limb:
    37    PROLOGUE(mpn_invert_limb)
    38  */
    39  
    40  static const unsigned short int approx_tab[0x100] =
    41  {
    42    /* 0x400, */
    43    0x3ff,
    44           0x3fc, 0x3f8, 0x3f4, 0x3f0, 0x3ec, 0x3e8, 0x3e4,
    45    0x3e0, 0x3dd, 0x3d9, 0x3d5, 0x3d2, 0x3ce, 0x3ca, 0x3c7,
    46    0x3c3, 0x3c0, 0x3bc, 0x3b9, 0x3b5, 0x3b2, 0x3ae, 0x3ab,
    47    0x3a8, 0x3a4, 0x3a1, 0x39e, 0x39b, 0x397, 0x394, 0x391,
    48    0x38e, 0x38b, 0x387, 0x384, 0x381, 0x37e, 0x37b, 0x378,
    49    0x375, 0x372, 0x36f, 0x36c, 0x369, 0x366, 0x364, 0x361,
    50    0x35e, 0x35b, 0x358, 0x355, 0x353, 0x350, 0x34d, 0x34a,
    51    0x348, 0x345, 0x342, 0x340, 0x33d, 0x33a, 0x338, 0x335,
    52    0x333, 0x330, 0x32e, 0x32b, 0x329, 0x326, 0x324, 0x321,
    53    0x31f, 0x31c, 0x31a, 0x317, 0x315, 0x313, 0x310, 0x30e,
    54    0x30c, 0x309, 0x307, 0x305, 0x303, 0x300, 0x2fe, 0x2fc,
    55    0x2fa, 0x2f7, 0x2f5, 0x2f3, 0x2f1, 0x2ef, 0x2ec, 0x2ea,
    56    0x2e8, 0x2e6, 0x2e4, 0x2e2, 0x2e0, 0x2de, 0x2dc, 0x2da,
    57    0x2d8, 0x2d6, 0x2d4, 0x2d2, 0x2d0, 0x2ce, 0x2cc, 0x2ca,
    58    0x2c8, 0x2c6, 0x2c4, 0x2c2, 0x2c0, 0x2be, 0x2bc, 0x2bb,
    59    0x2b9, 0x2b7, 0x2b5, 0x2b3, 0x2b1, 0x2b0, 0x2ae, 0x2ac,
    60    0x2aa, 0x2a8, 0x2a7, 0x2a5, 0x2a3, 0x2a1, 0x2a0, 0x29e,
    61    0x29c, 0x29b, 0x299, 0x297, 0x295, 0x294, 0x292, 0x291,
    62    0x28f, 0x28d, 0x28c, 0x28a, 0x288, 0x287, 0x285, 0x284,
    63    0x282, 0x280, 0x27f, 0x27d, 0x27c, 0x27a, 0x279, 0x277,
    64    0x276, 0x274, 0x273, 0x271, 0x270, 0x26e, 0x26d, 0x26b,
    65    0x26a, 0x268, 0x267, 0x265, 0x264, 0x263, 0x261, 0x260,
    66    0x25e, 0x25d, 0x25c, 0x25a, 0x259, 0x257, 0x256, 0x255,
    67    0x253, 0x252, 0x251, 0x24f, 0x24e, 0x24d, 0x24b, 0x24a,
    68    0x249, 0x247, 0x246, 0x245, 0x243, 0x242, 0x241, 0x240,
    69    0x23e, 0x23d, 0x23c, 0x23b, 0x239, 0x238, 0x237, 0x236,
    70    0x234, 0x233, 0x232, 0x231, 0x230, 0x22e, 0x22d, 0x22c,
    71    0x22b, 0x22a, 0x229, 0x227, 0x226, 0x225, 0x224, 0x223,
    72    0x222, 0x220, 0x21f, 0x21e, 0x21d, 0x21c, 0x21b, 0x21a,
    73    0x219, 0x218, 0x216, 0x215, 0x214, 0x213, 0x212, 0x211,
    74    0x210, 0x20f, 0x20e, 0x20d, 0x20c, 0x20b, 0x20a, 0x209,
    75    0x208, 0x207, 0x206, 0x205, 0x204, 0x203, 0x202, 0x201,
    76  };
    77  
    78  /* iteration: z = 2z-(z**2)d */
    79  
    80  mp_limb_t
    81  mpn_invert_limb (mp_limb_t d)
    82  {
    83    mp_limb_t z, z2l, z2h, tl, th;
    84    mp_limb_t xh, xl;
    85    mp_limb_t zh, zl;
    86  
    87  #if GMP_LIMB_BITS == 32
    88    z = approx_tab[(d >> 23) - 0x100] << 6;	/* z < 2^16 */
    89  
    90    z2l = z * z;					/* z2l < 2^32 */
    91    umul_ppmm (th, tl, z2l, d);
    92    z = (z << 17) - (th << 1);
    93  #endif
    94  #if GMP_LIMB_BITS == 64
    95    z = approx_tab[(d >> 55) - 0x100] << 6;	/* z < 2^16 */
    96  
    97    z2l = z * z;					/* z2l < 2^32 */
    98    th = z2l * (d >> 32);				/* th < 2^64 */
    99    z = (z << 17) - (th >> 31);			/* z < 2^32 */
   100  
   101    z2l = z * z;
   102    umul_ppmm (th, tl, z2l, d);
   103    z = (z << 33) - (th << 1);
   104  #endif
   105  
   106    umul_ppmm (z2h, z2l, z, z);
   107    umul_ppmm (th, tl, z2h, d);
   108    umul_ppmm (xh, xl, z2l, d);
   109    tl += xh;
   110    th += tl < xh;
   111    th = (th << 2) | (tl >> GMP_LIMB_BITS - 2);
   112    tl = tl << 2;
   113    sub_ddmmss (zh, zl, z << 2, 0, th, tl);
   114  
   115    umul_ppmm (xh, xl, d, zh);
   116    xh += d;		/* add_ssaaaa (xh, xl, xh, xl, d, 0); */
   117    if (~xh != 0)
   118      {
   119        add_ssaaaa (xh, xl, xh, xl, 0, d);
   120        zh++;
   121      }
   122  
   123    add_ssaaaa (xh, xl, xh, xl, 0, d);
   124    if (xh != 0)
   125      zh++;
   126  
   127    return zh;
   128  }