github.com/aergoio/aergo@v1.3.1/libtool/src/gmp-6.1.2/mpn/generic/sec_powm.c (about)

     1  /* mpn_sec_powm -- Compute R = U^E mod M.  Secure variant, side-channel silent
     2     under the assumption that the multiply instruction is side channel silent.
     3  
     4     Contributed to the GNU project by Torbjörn Granlund.
     5  
     6  Copyright 2007-2009, 2011-2014 Free Software Foundation, Inc.
     7  
     8  This file is part of the GNU MP Library.
     9  
    10  The GNU MP Library is free software; you can redistribute it and/or modify
    11  it under the terms of either:
    12  
    13    * the GNU Lesser General Public License as published by the Free
    14      Software Foundation; either version 3 of the License, or (at your
    15      option) any later version.
    16  
    17  or
    18  
    19    * the GNU General Public License as published by the Free Software
    20      Foundation; either version 2 of the License, or (at your option) any
    21      later version.
    22  
    23  or both in parallel, as here.
    24  
    25  The GNU MP Library is distributed in the hope that it will be useful, but
    26  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    27  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    28  for more details.
    29  
    30  You should have received copies of the GNU General Public License and the
    31  GNU Lesser General Public License along with the GNU MP Library.  If not,
    32  see https://www.gnu.org/licenses/.  */
    33  
    34  
    35  /*
    36    BASIC ALGORITHM, Compute U^E mod M, where M < B^n is odd.
    37  
    38    1. T <- (B^n * U) mod M                Convert to REDC form
    39  
    40    2. Compute table U^0, U^1, U^2... of E-dependent size
    41  
    42    3. While there are more bits in E
    43         W <- power left-to-right base-k
    44  
    45  
    46    TODO:
    47  
    48     * Make getbits a macro, thereby allowing it to update the index operand.
    49       That will simplify the code using getbits.  (Perhaps make getbits' sibling
    50       getbit then have similar form, for symmetry.)
    51  
    52     * Choose window size without looping.  (Superoptimize or think(tm).)
    53  
    54     * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
    55       redc_1 and redc_n.  On such systems, we will switch to redc_2 causing
    56       slowdown.
    57  */
    58  
    59  #include "gmp.h"
    60  #include "gmp-impl.h"
    61  #include "longlong.h"
    62  
    63  #undef MPN_REDC_1_SEC
    64  #define MPN_REDC_1_SEC(rp, up, mp, n, invm)				\
    65    do {									\
    66      mp_limb_t cy;							\
    67      cy = mpn_redc_1 (rp, up, mp, n, invm);				\
    68      mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
    69    } while (0)
    70  
    71  #undef MPN_REDC_2_SEC
    72  #define MPN_REDC_2_SEC(rp, up, mp, n, mip)				\
    73    do {									\
    74      mp_limb_t cy;							\
    75      cy = mpn_redc_2 (rp, up, mp, n, mip);				\
    76      mpn_cnd_sub_n (cy, rp, rp, mp, n);					\
    77    } while (0)
    78  
    79  #if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
    80  #define WANT_REDC_2 1
    81  #endif
    82  
    83  /* Define our own mpn squaring function.  We do this since we cannot use a
    84     native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
    85     SQR_TOOM2_THRESHOLD.  This is so because of fixed size stack allocations
    86     made inside mpn_sqr_basecase.  */
    87  
    88  #if HAVE_NATIVE_mpn_sqr_diagonal
    89  #define MPN_SQR_DIAGONAL(rp, up, n)					\
    90    mpn_sqr_diagonal (rp, up, n)
    91  #else
    92  #define MPN_SQR_DIAGONAL(rp, up, n)					\
    93    do {									\
    94      mp_size_t _i;							\
    95      for (_i = 0; _i < (n); _i++)					\
    96        {									\
    97  	mp_limb_t ul, lpl;						\
    98  	ul = (up)[_i];							\
    99  	umul_ppmm ((rp)[2 * _i + 1], lpl, ul, ul << GMP_NAIL_BITS);	\
   100  	(rp)[2 * _i] = lpl >> GMP_NAIL_BITS;				\
   101        }									\
   102    } while (0)
   103  #endif
   104  
   105  
   106  #if ! HAVE_NATIVE_mpn_sqr_basecase
   107  /* The limit of the generic code is SQR_TOOM2_THRESHOLD.  */
   108  #define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
   109  #endif
   110  
   111  #if HAVE_NATIVE_mpn_sqr_basecase
   112  #ifdef TUNE_SQR_TOOM2_MAX
   113  /* We slightly abuse TUNE_SQR_TOOM2_MAX here.  If it is set for an assembly
   114     mpn_sqr_basecase, it comes from SQR_TOOM2_THRESHOLD_MAX in the assembly
   115     file.  An assembly mpn_sqr_basecase that does not define it should allow
   116     any size.  */
   117  #define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
   118  #endif
   119  #endif
   120  
   121  #ifdef WANT_FAT_BINARY
   122  /* For fat builds, we use SQR_TOOM2_THRESHOLD which will expand to a read from
   123     __gmpn_cpuvec.  Perhaps any possible sqr_basecase.asm allow any size, and we
   124     limit the use unnecessarily.  We cannot tell, so play it safe.  FIXME.  */
   125  #define SQR_BASECASE_LIM  SQR_TOOM2_THRESHOLD
   126  #endif
   127  
   128  #ifndef SQR_BASECASE_LIM
   129  /* If SQR_BASECASE_LIM is now not defined, use mpn_sqr_basecase for any operand
   130     size.  */
   131  #define mpn_local_sqr(rp,up,n,tp) mpn_sqr_basecase(rp,up,n)
   132  #else
   133  /* Else use mpn_sqr_basecase for its allowed sizes, else mpn_mul_basecase.  */
   134  #define mpn_local_sqr(rp,up,n,tp) \
   135    do {									\
   136      if (BELOW_THRESHOLD (n, SQR_BASECASE_LIM))				\
   137        mpn_sqr_basecase (rp, up, n);					\
   138      else								\
   139        mpn_mul_basecase(rp, up, n, up, n);				\
   140    } while (0)
   141  #endif
   142  
   143  #define getbit(p,bi) \
   144    ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
   145  
   146  /* FIXME: Maybe some things would get simpler if all callers ensure
   147     that bi >= nbits. As far as I understand, with the current code bi
   148     < nbits can happen only for the final iteration. */
   149  static inline mp_limb_t
   150  getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
   151  {
   152    int nbits_in_r;
   153    mp_limb_t r;
   154    mp_size_t i;
   155  
   156    if (bi < nbits)
   157      {
   158        return p[0] & (((mp_limb_t) 1 << bi) - 1);
   159      }
   160    else
   161      {
   162        bi -= nbits;			/* bit index of low bit to extract */
   163        i = bi / GMP_NUMB_BITS;		/* word index of low bit to extract */
   164        bi %= GMP_NUMB_BITS;		/* bit index in low word */
   165        r = p[i] >> bi;			/* extract (low) bits */
   166        nbits_in_r = GMP_NUMB_BITS - bi;	/* number of bits now in r */
   167        if (nbits_in_r < nbits)		/* did we get enough bits? */
   168  	r += p[i + 1] << nbits_in_r;	/* prepend bits from higher word */
   169        return r & (((mp_limb_t ) 1 << nbits) - 1);
   170      }
   171  }
   172  
   173  #ifndef POWM_SEC_TABLE
   174  #if GMP_NUMB_BITS < 50
   175  #define POWM_SEC_TABLE  2,33,96,780,2741
   176  #else
   177  #define POWM_SEC_TABLE  2,130,524,2578
   178  #endif
   179  #endif
   180  
   181  #if TUNE_PROGRAM_BUILD
   182  extern int win_size (mp_bitcnt_t);
   183  #else
   184  static inline int
   185  win_size (mp_bitcnt_t enb)
   186  {
   187    int k;
   188    /* Find k, such that x[k-1] < enb <= x[k].
   189  
   190       We require that x[k] >= k, then it follows that enb > x[k-1] >=
   191       k-1, which implies k <= enb.
   192    */
   193    static const mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0};
   194    for (k = 1; enb > x[k]; k++)
   195      ;
   196    ASSERT (k <= enb);
   197    return k;
   198  }
   199  #endif
   200  
   201  /* Convert U to REDC form, U_r = B^n * U mod M.
   202     Uses scratch space at tp of size 2un + n + 1.  */
   203  static void
   204  redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
   205  {
   206    MPN_ZERO (tp, n);
   207    MPN_COPY (tp + n, up, un);
   208  
   209    mpn_sec_div_r (tp, un + n, mp, n, tp + un + n);
   210    MPN_COPY (rp, tp, n);
   211  }
   212  
   213  /* {rp, n} <-- {bp, bn} ^ {ep, en} mod {mp, n},
   214     where en = ceil (enb / GMP_NUMB_BITS)
   215     Requires that {mp, n} is odd (and hence also mp[0] odd).
   216     Uses scratch space at tp as defined by mpn_sec_powm_itch.  */
   217  void
   218  mpn_sec_powm (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
   219  	      mp_srcptr ep, mp_bitcnt_t enb,
   220  	      mp_srcptr mp, mp_size_t n, mp_ptr tp)
   221  {
   222    mp_limb_t ip[2], *mip;
   223    int windowsize, this_windowsize;
   224    mp_limb_t expbits;
   225    mp_ptr pp, this_pp;
   226    long i;
   227    int cnd;
   228  
   229    ASSERT (enb > 0);
   230    ASSERT (n > 0);
   231    /* The code works for bn = 0, but the defined scratch space is 2 limbs
   232       greater than we supply, when converting 1 to redc form .  */
   233    ASSERT (bn > 0);
   234    ASSERT ((mp[0] & 1) != 0);
   235  
   236    windowsize = win_size (enb);
   237  
   238  #if WANT_REDC_2
   239    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
   240      {
   241        mip = ip;
   242        binvert_limb (mip[0], mp[0]);
   243        mip[0] = -mip[0];
   244      }
   245    else
   246      {
   247        mip = ip;
   248        mpn_binvert (mip, mp, 2, tp);
   249        mip[0] = -mip[0]; mip[1] = ~mip[1];
   250      }
   251  #else
   252    mip = ip;
   253    binvert_limb (mip[0], mp[0]);
   254    mip[0] = -mip[0];
   255  #endif
   256  
   257    pp = tp;
   258    tp += (n << windowsize);	/* put tp after power table */
   259  
   260    /* Compute pp[0] table entry */
   261    /* scratch: |   n   | 1 |   n+2    |  */
   262    /*          | pp[0] | 1 | redcify  |  */
   263    this_pp = pp;
   264    this_pp[n] = 1;
   265    redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
   266    this_pp += n;
   267  
   268    /* Compute pp[1] table entry.  To avoid excessive scratch usage in the
   269       degenerate situation where B >> M, we let redcify use scratch space which
   270       will later be used by the pp table (element 2 and up).  */
   271    /* scratch: |   n   |   n   |  bn + n + 1  |  */
   272    /*          | pp[0] | pp[1] |   redcify    |  */
   273    redcify (this_pp, bp, bn, mp, n, this_pp + n);
   274  
   275    /* Precompute powers of b and put them in the temporary area at pp.  */
   276    /* scratch: |   n   |   n   | ...  |                    |   2n      |  */
   277    /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  product  |  */
   278    for (i = (1 << windowsize) - 2; i > 0; i--)
   279      {
   280        mpn_mul_basecase (tp, this_pp, n, pp + n, n);
   281        this_pp += n;
   282  #if WANT_REDC_2
   283        if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
   284  	MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
   285        else
   286  	MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
   287  #else
   288        MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
   289  #endif
   290      }
   291  
   292    expbits = getbits (ep, enb, windowsize);
   293    ASSERT_ALWAYS (enb >= windowsize);
   294    enb -= windowsize;
   295  
   296    mpn_sec_tabselect (rp, pp, n, 1 << windowsize, expbits);
   297  
   298    /* Main exponentiation loop.  */
   299    /* scratch: |   n   |   n   | ...  |                    |     3n-4n     |  */
   300    /*          | pp[0] | pp[1] | ...  | pp[2^windowsize-1] |  loop scratch |  */
   301  
   302  #define INNERLOOP							\
   303    while (enb != 0)							\
   304      {									\
   305        expbits = getbits (ep, enb, windowsize);				\
   306        this_windowsize = windowsize;					\
   307        if (enb < windowsize)						\
   308  	{								\
   309  	  this_windowsize -= windowsize - enb;				\
   310  	  enb = 0;							\
   311  	}								\
   312        else								\
   313  	enb -= windowsize;						\
   314  									\
   315        do								\
   316  	{								\
   317  	  mpn_local_sqr (tp, rp, n, tp + 2 * n);			\
   318  	  MPN_REDUCE (rp, tp, mp, n, mip);				\
   319  	  this_windowsize--;						\
   320  	}								\
   321        while (this_windowsize != 0);					\
   322  									\
   323        mpn_sec_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);	\
   324        mpn_mul_basecase (tp, rp, n, tp + 2*n, n);			\
   325  									\
   326        MPN_REDUCE (rp, tp, mp, n, mip);					\
   327      }
   328  
   329  #if WANT_REDC_2
   330    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
   331      {
   332  #undef MPN_MUL_N
   333  #undef MPN_SQR
   334  #undef MPN_REDUCE
   335  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
   336  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
   337  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
   338        INNERLOOP;
   339      }
   340    else
   341      {
   342  #undef MPN_MUL_N
   343  #undef MPN_SQR
   344  #undef MPN_REDUCE
   345  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
   346  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
   347  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2_SEC (rp, tp, mp, n, mip)
   348        INNERLOOP;
   349      }
   350  #else
   351  #undef MPN_MUL_N
   352  #undef MPN_SQR
   353  #undef MPN_REDUCE
   354  #define MPN_MUL_N(r,a,b,n)		mpn_mul_basecase (r,a,n,b,n)
   355  #define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
   356  #define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
   357    INNERLOOP;
   358  #endif
   359  
   360    MPN_COPY (tp, rp, n);
   361    MPN_ZERO (tp + n, n);
   362  
   363  #if WANT_REDC_2
   364    if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
   365      MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
   366    else
   367      MPN_REDC_2_SEC (rp, tp, mp, n, mip);
   368  #else
   369    MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
   370  #endif
   371    cnd = mpn_sub_n (tp, rp, mp, n);	/* we need just retval */
   372    mpn_cnd_sub_n (!cnd, rp, rp, mp, n);
   373  }
   374  
   375  mp_size_t
   376  mpn_sec_powm_itch (mp_size_t bn, mp_bitcnt_t enb, mp_size_t n)
   377  {
   378    int windowsize;
   379    mp_size_t redcify_itch, itch;
   380  
   381    /* The top scratch usage will either be when reducing B in the 2nd redcify
   382       call, or more typically n*2^windowsize + 3n or 4n, in the main loop.  (It
   383       is 3n or 4n depending on if we use mpn_local_sqr or a native
   384       mpn_sqr_basecase.  We assume 4n always for now.) */
   385  
   386    windowsize = win_size (enb);
   387  
   388    /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
   389       the (bn + n) term is due to redcify's own usage, and the rest is due to
   390       mpn_sec_div_r's usage when called from redcify.  */
   391    redcify_itch = (2 * n) + (bn + n) + ((bn + n) + 2 * n + 2);
   392  
   393    /* The n * 2^windowsize term is due to the power table, the 4n term is due to
   394       scratch needs of squaring/multiplication in the exponentiation loop.  */
   395    itch = (n << windowsize) + (4 * n);
   396  
   397    return MAX (itch, redcify_itch);
   398  }