github.com/keysonzzz/kmg@v0.0.0-20151121023212-05317bfd7d39/kmgRand/LcgCalculateConstants/c/rand-primegen.c (about)

     1  /*
     2      This is DJB's code for calculating primes, with a few modifications,
     3      such as making it work with Microsoft's compiler on Windows, and
     4      getting rid of warnings.
     5  */
     6  #include "rand-primegen.h"
     7  
     8  /*
     9  B is 32 times X.
    10  Total memory use for one generator is 2B bytes = 64X bytes.
    11  Covers primes in an interval of length 1920X.
    12  Working set size for one generator is B bits = 4X bytes.
    13  
    14  Speedup by a factor of 2 or 3 for L1 cache instead of L2 cache.
    15  Slowdown by a factor of roughly n for primes past (nB)^2.
    16  
    17  Possible choices of X:
    18    2002 to fit inside an 8K L1 cache (e.g., Pentium).
    19    4004 to fit inside a 16K L1 cache (e.g., Pentium II).
    20    64064 to fit inside a 256K L2 cache.
    21  
    22  There are various word-size limits on X; 1000000 should still be okay.
    23  */
    24  
    25  #define B32 PRIMEGEN_WORDS
    26  #define B (PRIMEGEN_WORDS * 32)
    27  
    28  #ifdef _MSC_VER
    29  #pragma warning(disable:4244)
    30  #endif
    31  
    32  static const uint32_t two[32] = {
    33    0x00000001 , 0x00000002 , 0x00000004 , 0x00000008
    34  , 0x00000010 , 0x00000020 , 0x00000040 , 0x00000080
    35  , 0x00000100 , 0x00000200 , 0x00000400 , 0x00000800
    36  , 0x00001000 , 0x00002000 , 0x00004000 , 0x00008000
    37  , 0x00010000 , 0x00020000 , 0x00040000 , 0x00080000
    38  , 0x00100000 , 0x00200000 , 0x00400000 , 0x00800000
    39  , 0x01000000 , 0x02000000 , 0x04000000 , 0x08000000
    40  , 0x10000000 , 0x20000000 , 0x40000000 , 0x80000000
    41  } ;
    42  
    43  static void clear(register uint32_t (*buf)[B32])
    44  {
    45    register int i;
    46    register int j;
    47  
    48    for (j = 0;j < 16;++j) {
    49      for (i = 0;i < B32;++i)
    50        (*buf)[i] = (uint32_t)~0;
    51      ++buf;
    52    }
    53  }
    54  
    55  static void doit4(register uint32_t *a,register long x,register long y,int64_t start)
    56  {
    57    long i0;
    58    long y0;
    59    register long i;
    60    register uint32_t data;
    61    register uint32_t pos;
    62    register uint32_t bits;
    63  
    64    x += x; x += 15;
    65    y += 15;
    66  
    67    start += 1000000000;
    68    while (start < 0) { start += x; x += 30; }
    69    start -= 1000000000;
    70    i = start;
    71  
    72    while (i < B) { i += x; x += 30; }
    73  
    74    for (;;) {
    75      x -= 30;
    76      if (x <= 15) return;
    77      i -= x;
    78  
    79      while (i < 0) { i += y; y += 30; }
    80  
    81      i0 = i; y0 = y;
    82      while (i < B) {
    83        pos = i; data = i;
    84        pos >>= 5; data &= 31;
    85        i += y; y += 30;
    86        bits = a[pos]; data = two[data];
    87        bits ^= data;
    88        a[pos] = bits;
    89      }
    90      i = i0; y = y0;
    91    }
    92  }
    93  
    94  static void doit6(register uint32_t *a,register long x,register long y,int64_t start)
    95  {
    96    long i0;
    97    long y0;
    98    register long i;
    99    register uint32_t data;
   100    register uint32_t pos;
   101    register uint32_t bits;
   102  
   103    x += 5;
   104    y += 15;
   105  
   106    start += 1000000000;
   107    while (start < 0) { start += x; x += 10; }
   108    start -= 1000000000;
   109    i = start;
   110    while (i < B) { i += x; x += 10; }
   111  
   112    for (;;) {
   113      x -= 10;
   114      if (x <= 5) return;
   115      i -= x;
   116  
   117      while (i < 0) { i += y; y += 30; }
   118  
   119      i0 = i; y0 = y;
   120      while (i < B) {
   121        pos = i; data = i;
   122        pos >>= 5; data &= 31;
   123        i += y; y += 30;
   124        bits = a[pos]; data = two[data];
   125        bits ^= data;
   126        a[pos] = bits;
   127      }
   128      i = i0; y = y0;
   129    }
   130  }
   131  
   132  static void doit12(register uint32_t *a,register long x,register long y,int64_t start)
   133  {
   134    long i0;
   135    long y0;
   136    register long i;
   137    register uint32_t data;
   138    register uint32_t pos;
   139    register uint32_t bits;
   140  
   141    x += 5;
   142  
   143    start += 1000000000;
   144    while (start < 0) { start += x; x += 10; }
   145    start -= 1000000000;
   146    i = start;
   147    while (i < 0) { i += x; x += 10; }
   148  
   149    y += 15;
   150    x += 10;
   151  
   152    for (;;) {
   153      while (i >= B) {
   154        if (x <= y) return;
   155        i -= y;
   156        y += 30;
   157      }
   158      i0 = i;
   159      y0 = y;
   160      while ((i >= 0) && (y < x)) {
   161        pos = i; data = i;
   162        pos >>= 5; data &= 31;
   163        i -= y;
   164        y += 30;
   165        bits = a[pos]; data = two[data];
   166        bits ^= data;
   167        a[pos] = bits;
   168      }
   169      i = i0;
   170      y = y0;
   171      i += x - 10;
   172      x += 10;
   173    }
   174  }
   175  
   176  static const int deltainverse[60] = {
   177   -1,B32 * 0,-1,-1,-1,-1,-1,B32 * 1,-1,-1,-1,B32 * 2,-1,B32 * 3,-1
   178  ,-1,-1,B32 * 4,-1,B32 * 5,-1,-1,-1,B32 * 6,-1,-1,-1,-1,-1,B32 * 7
   179  ,-1,B32 * 8,-1,-1,-1,-1,-1,B32 * 9,-1,-1,-1,B32 * 10,-1,B32 * 11,-1
   180  ,-1,-1,B32 * 12,-1,B32 * 13,-1,-1,-1,B32 * 14,-1,-1,-1,-1,-1,B32 * 15
   181  } ;
   182  
   183  static void squarefree1big(uint32_t (*buf)[B32],uint64_t base,uint32_t q,uint64_t qq)
   184  {
   185    uint64_t i;
   186    uint32_t pos;
   187    int n;
   188    uint64_t bound = base + 60 * B;
   189  
   190    while (qq < bound) {
   191      if (bound < 2000000000)
   192        i = qq - (((uint32_t) base) % ((uint32_t) qq));
   193      else
   194        i = qq - (base % qq);
   195      if (!(i & 1)) i += qq;
   196  
   197      if (i < B * 60) {
   198        pos = i;
   199        n = deltainverse[pos % 60];
   200        if (n >= 0) {
   201          pos /= 60;
   202          (*buf)[n + (pos >> 5)] |= two[pos & 31];
   203        }
   204      }
   205  
   206      qq += q; q += 1800;
   207    }
   208  }
   209  
   210  static void squarefree1(register uint32_t (*buf)[B32],uint64_t L,uint32_t q)
   211  {
   212    uint32_t qq;
   213    register uint32_t qqhigh;
   214    uint32_t i;
   215    register uint32_t ilow;
   216    register uint32_t ihigh;
   217    register int n;
   218    uint64_t base;
   219  
   220    base = 60 * L;
   221    qq = q * q;
   222    q = 60 * q + 900;
   223  
   224    while (qq < B * 60) {
   225      if (base < 2000000000)
   226        i = qq - (((uint32_t) base) % qq);
   227      else
   228        i = qq - (base % qq);
   229      if (!(i & 1)) i += qq;
   230  
   231      if (i < B * 60) {
   232        qqhigh = qq / 60;
   233        ilow = i % 60; ihigh = i / 60;
   234  
   235        qqhigh += qqhigh;
   236        while (ihigh < B) {
   237          n = deltainverse[ilow];
   238          if (n >= 0)
   239            (*buf)[n + (ihigh >> 5)] |= two[ihigh & 31];
   240  
   241          ilow += 2; ihigh += qqhigh;
   242          if (ilow >= 60) { ilow -= 60; ihigh += 1; }
   243        }
   244      }
   245  
   246      qq += q; q += 1800;
   247    }
   248  
   249    squarefree1big(buf,base,q,qq);
   250  }
   251  
   252  static void squarefree49big(uint32_t (*buf)[B32],uint64_t base,uint32_t q,uint64_t qq)
   253  {
   254    uint64_t i;
   255    uint32_t pos;
   256    int n;
   257    uint64_t bound = base + 60 * B;
   258  
   259    while (qq < bound) {
   260      if (bound < 2000000000)
   261        i = qq - (((uint32_t) base) % ((uint32_t) qq));
   262      else
   263        i = qq - (base % qq);
   264      if (!(i & 1)) i += qq;
   265  
   266      if (i < B * 60) {
   267        pos = i;
   268        n = deltainverse[pos % 60];
   269        if (n >= 0) {
   270          pos /= 60;
   271          (*buf)[n + (pos >> 5)] |= two[pos & 31];
   272        }
   273      }
   274  
   275      qq += q; q += 1800;
   276    }
   277  }
   278  
   279  static void squarefree49(register uint32_t (*buf)[B32],uint64_t L,uint32_t q)
   280  {
   281    uint32_t qq;
   282    register uint32_t qqhigh;
   283    uint32_t i;
   284    register uint32_t ilow;
   285    register uint32_t ihigh;
   286    register int n;
   287    uint64_t base = 60 * L;
   288  
   289    qq = q * q;
   290    q = 60 * q + 900;
   291  
   292    while (qq < B * 60) {
   293      if (base < 2000000000)
   294        i = qq - (((uint32_t) base) % qq);
   295      else
   296        i = qq - (base % qq);
   297      if (!(i & 1)) i += qq;
   298  
   299      if (i < B * 60) {
   300        qqhigh = qq / 60;
   301        ilow = i % 60; ihigh = i / 60;
   302  
   303        qqhigh += qqhigh;
   304        qqhigh += 1;
   305        while (ihigh < B) {
   306          n = deltainverse[ilow];
   307          if (n >= 0)
   308            (*buf)[n + (ihigh >> 5)] |= two[ihigh & 31];
   309  
   310          ilow += 38; ihigh += qqhigh;
   311          if (ilow >= 60) { ilow -= 60; ihigh += 1; }
   312        }
   313      }
   314  
   315      qq += q; q += 1800;
   316    }
   317  
   318    squarefree49big(buf,base,q,qq);
   319  }
   320  
   321  /* squares of primes >= 7, < 240 */
   322  uint32_t qqtab[49] = {
   323    49,121,169,289,361,529,841,961,1369,1681
   324   ,1849,2209,2809,3481,3721,4489,5041,5329,6241,6889
   325   ,7921,9409,10201,10609,11449,11881,12769,16129,17161,18769
   326   ,19321,22201,22801,24649,26569,27889,29929,32041,32761,36481
   327   ,37249,38809,39601,44521,49729,51529,52441,54289,57121
   328  } ;
   329  
   330  /* (qq * 11 + 1) / 60 or (qq * 59 + 1) / 60 */
   331  uint32_t qq60tab[49] = {
   332    9,119,31,53,355,97,827,945,251,1653
   333   ,339,405,515,3423,3659,823,4957,977,6137
   334   ,1263,7789,1725,10031,1945,2099,11683,2341,2957
   335   ,16875,3441,18999,21831,22421,4519,4871,5113,5487
   336   ,31507,32215,35873,6829,7115,38941,43779,9117,9447,51567,9953,56169
   337  } ;
   338  
   339  static void squarefreetiny(register uint32_t *a,uint32_t *Lmodqq,int d)
   340  {
   341    int j;
   342    register uint32_t k;
   343    register uint32_t qq;
   344    register uint32_t pos;
   345    register uint32_t data;
   346    register uint32_t bits;
   347  
   348    for (j = 0;j < 49;++j) {
   349      qq = qqtab[j];
   350      k = qq - 1 - ((Lmodqq[j] + qq60tab[j] * d - 1) % qq);
   351      while (k < B) {
   352        pos = k;
   353        data = k;
   354        pos >>= 5;
   355        data &= 31;
   356        k += qq;
   357        bits = a[pos];
   358        data = two[data];
   359        bits |= data;
   360        a[pos] = bits;
   361      }
   362    }
   363  }
   364  
   365  typedef struct { char index; char f; char g; char k; } todo;
   366  
   367  static const todo for4[] = {
   368    {0,2,15,4} , {0,3,5,1} , {0,3,25,11} , {0,5,9,3}
   369  , {0,5,21,9} , {0,7,15,7} , {0,8,15,8} , {0,10,9,8}
   370  , {0,10,21,14} , {0,12,5,10} , {0,12,25,20} , {0,13,15,15}
   371  , {0,15,1,15} , {0,15,11,17} , {0,15,19,21} , {0,15,29,29}
   372  , {3,1,3,0} , {3,1,27,12} , {3,4,3,1} , {3,4,27,13}
   373  , {3,6,7,3} , {3,6,13,5} , {3,6,17,7} , {3,6,23,11}
   374  , {3,9,7,6} , {3,9,13,8} , {3,9,17,10} , {3,9,23,14}
   375  , {3,11,3,8} , {3,11,27,20} , {3,14,3,13} , {3,14,27,25}
   376  , {4,2,1,0} , {4,2,11,2} , {4,2,19,6} , {4,2,29,14}
   377  , {4,7,1,3} , {4,7,11,5} , {4,7,19,9} , {4,7,29,17}
   378  , {4,8,1,4} , {4,8,11,6} , {4,8,19,10} , {4,8,29,18}
   379  , {4,13,1,11} , {4,13,11,13} , {4,13,19,17} , {4,13,29,25}
   380  , {7,1,5,0} , {7,1,25,10} , {7,4,5,1} , {7,4,25,11}
   381  , {7,5,7,2} , {7,5,13,4} , {7,5,17,6} , {7,5,23,10}
   382  , {7,10,7,7} , {7,10,13,9} , {7,10,17,11} , {7,10,23,15}
   383  , {7,11,5,8} , {7,11,25,18} , {7,14,5,13} , {7,14,25,23}
   384  , {9,2,9,1} , {9,2,21,7} , {9,3,1,0} , {9,3,11,2}
   385  , {9,3,19,6} , {9,3,29,14} , {9,7,9,4} , {9,7,21,10}
   386  , {9,8,9,5} , {9,8,21,11} , {9,12,1,9} , {9,12,11,11}
   387  , {9,12,19,15} , {9,12,29,23} , {9,13,9,12} , {9,13,21,18}
   388  , {10,2,5,0} , {10,2,25,10} , {10,5,1,1} , {10,5,11,3}
   389  , {10,5,19,7} , {10,5,29,15} , {10,7,5,3} , {10,7,25,13}
   390  , {10,8,5,4} , {10,8,25,14} , {10,10,1,6} , {10,10,11,8}
   391  , {10,10,19,12} , {10,10,29,20} , {10,13,5,11} , {10,13,25,21}
   392  , {13,1,15,3} , {13,4,15,4} , {13,5,3,1} , {13,5,27,13}
   393  , {13,6,5,2} , {13,6,25,12} , {13,9,5,5} , {13,9,25,15}
   394  , {13,10,3,6} , {13,10,27,18} , {13,11,15,11} , {13,14,15,16}
   395  , {13,15,7,15} , {13,15,13,17} , {13,15,17,19} , {13,15,23,23}
   396  , {14,1,7,0} , {14,1,13,2} , {14,1,17,4} , {14,1,23,8}
   397  , {14,4,7,1} , {14,4,13,3} , {14,4,17,5} , {14,4,23,9}
   398  , {14,11,7,8} , {14,11,13,10} , {14,11,17,12} , {14,11,23,16}
   399  , {14,14,7,13} , {14,14,13,15} , {14,14,17,17} , {14,14,23,21}
   400  } ;
   401  
   402  static const todo for6[] = {
   403    {1,1,2,0} , {1,1,8,1} , {1,1,22,8} , {1,1,28,13}
   404  , {1,3,10,2} , {1,3,20,7} , {1,7,10,4} , {1,7,20,9}
   405  , {1,9,2,4} , {1,9,8,5} , {1,9,22,12} , {1,9,28,17}
   406  , {5,1,4,0} , {5,1,14,3} , {5,1,16,4} , {5,1,26,11}
   407  , {5,5,2,1} , {5,5,8,2} , {5,5,22,9} , {5,5,28,14}
   408  , {5,9,4,4} , {5,9,14,7} , {5,9,16,8} , {5,9,26,15}
   409  , {8,3,2,0} , {8,3,8,1} , {8,3,22,8} , {8,3,28,13}
   410  , {8,5,4,1} , {8,5,14,4} , {8,5,16,5} , {8,5,26,12}
   411  , {8,7,2,2} , {8,7,8,3} , {8,7,22,10} , {8,7,28,15}
   412  , {11,1,10,1} , {11,1,20,6} , {11,3,4,0} , {11,3,14,3}
   413  , {11,3,16,4} , {11,3,26,11} , {11,7,4,2} , {11,7,14,5}
   414  , {11,7,16,6} , {11,7,26,13} , {11,9,10,5} , {11,9,20,10}
   415  } ;
   416  
   417  static const todo for12[] = {
   418    {2,2,1,0} , {2,2,11,-2} , {2,2,19,-6} , {2,2,29,-14}
   419  , {2,3,4,0} , {2,3,14,-3} , {2,3,16,-4} , {2,3,26,-11}
   420  , {2,5,2,1} , {2,5,8,0} , {2,5,22,-7} , {2,5,28,-12}
   421  , {2,7,4,2} , {2,7,14,-1} , {2,7,16,-2} , {2,7,26,-9}
   422  , {2,8,1,3} , {2,8,11,1} , {2,8,19,-3} , {2,8,29,-11}
   423  , {2,10,7,4} , {2,10,13,2} , {2,10,17,0} , {2,10,23,-4}
   424  , {6,1,10,-2} , {6,1,20,-7} , {6,2,7,-1} , {6,2,13,-3}
   425  , {6,2,17,-5} , {6,2,23,-9} , {6,3,2,0} , {6,3,8,-1}
   426  , {6,3,22,-8} , {6,3,28,-13} , {6,4,5,0} , {6,4,25,-10}
   427  , {6,6,5,1} , {6,6,25,-9} , {6,7,2,2} , {6,7,8,1}
   428  , {6,7,22,-6} , {6,7,28,-11} , {6,8,7,2} , {6,8,13,0}
   429  , {6,8,17,-2} , {6,8,23,-6} , {6,9,10,2} , {6,9,20,-3}
   430  , {12,1,4,-1} , {12,1,14,-4} , {12,1,16,-5} , {12,1,26,-12}
   431  , {12,2,5,-1} , {12,2,25,-11} , {12,3,10,-2} , {12,3,20,-7}
   432  , {12,4,1,0} , {12,4,11,-2} , {12,4,19,-6} , {12,4,29,-14}
   433  , {12,6,1,1} , {12,6,11,-1} , {12,6,19,-5} , {12,6,29,-13}
   434  , {12,7,10,0} , {12,7,20,-5} , {12,8,5,2} , {12,8,25,-8}
   435  , {12,9,4,3} , {12,9,14,0} , {12,9,16,-1} , {12,9,26,-8}
   436  , {15,1,2,-1} , {15,1,8,-2} , {15,1,22,-9} , {15,1,28,-14}
   437  , {15,4,7,-1} , {15,4,13,-3} , {15,4,17,-5} , {15,4,23,-9}
   438  , {15,5,4,0} , {15,5,14,-3} , {15,5,16,-4} , {15,5,26,-11}
   439  , {15,6,7,0} , {15,6,13,-2} , {15,6,17,-4} , {15,6,23,-8}
   440  , {15,9,2,3} , {15,9,8,2} , {15,9,22,-5} , {15,9,28,-10}
   441  , {15,10,1,4} , {15,10,11,2} , {15,10,19,-2} , {15,10,29,-10}
   442  } ;
   443  
   444  void primegen_sieve(primegen *pg)
   445  {
   446    uint32_t (*buf)[B32];
   447    uint64_t L;
   448    int i;
   449    uint32_t Lmodqq[49];
   450  
   451    buf = pg->buf;
   452    L = pg->L;
   453  
   454    if (L > 2000000000)
   455      for (i = 0;i < 49;++i)
   456        Lmodqq[i] = L % qqtab[i];
   457    else
   458      for (i = 0;i < 49;++i)
   459        Lmodqq[i] = ((uint32_t) L) % qqtab[i];
   460  
   461    clear(buf);
   462  
   463    for (i = 0;i < 16;++i)
   464      doit4(buf[0],for4[i].f,for4[i].g,(int64_t) for4[i].k - L);
   465    squarefreetiny(buf[0],Lmodqq,1);
   466    for (;i < 32;++i)
   467      doit4(buf[3],for4[i].f,for4[i].g,(int64_t) for4[i].k - L);
   468    squarefreetiny(buf[3],Lmodqq,13);
   469    for (;i < 48;++i)
   470      doit4(buf[4],for4[i].f,for4[i].g,(int64_t) for4[i].k - L);
   471    squarefreetiny(buf[4],Lmodqq,17);
   472    for (;i < 64;++i)
   473      doit4(buf[7],for4[i].f,for4[i].g,(int64_t) for4[i].k - L);
   474    squarefreetiny(buf[7],Lmodqq,29);
   475    for (;i < 80;++i)
   476      doit4(buf[9],for4[i].f,for4[i].g,(int64_t) for4[i].k - L);
   477    squarefreetiny(buf[9],Lmodqq,37);
   478    for (;i < 96;++i)
   479      doit4(buf[10],for4[i].f,for4[i].g,(int64_t) for4[i].k - L);
   480    squarefreetiny(buf[10],Lmodqq,41);
   481    for (;i < 112;++i)
   482      doit4(buf[13],for4[i].f,for4[i].g,(int64_t) for4[i].k - L);
   483    squarefreetiny(buf[13],Lmodqq,49);
   484    for (;i < 128;++i)
   485      doit4(buf[14],for4[i].f,for4[i].g,(int64_t) for4[i].k - L);
   486    squarefreetiny(buf[14],Lmodqq,53);
   487  
   488    for (i = 0;i < 12;++i)
   489      doit6(buf[1],for6[i].f,for6[i].g,(int64_t) for6[i].k - L);
   490    squarefreetiny(buf[1],Lmodqq,7);
   491    for (;i < 24;++i)
   492      doit6(buf[5],for6[i].f,for6[i].g,(int64_t) for6[i].k - L);
   493    squarefreetiny(buf[5],Lmodqq,19);
   494    for (;i < 36;++i)
   495      doit6(buf[8],for6[i].f,for6[i].g,(int64_t) for6[i].k - L);
   496    squarefreetiny(buf[8],Lmodqq,31);
   497    for (;i < 48;++i)
   498      doit6(buf[11],for6[i].f,for6[i].g,(int64_t) for6[i].k - L);
   499    squarefreetiny(buf[11],Lmodqq,43);
   500  
   501    for (i = 0;i < 24;++i)
   502      doit12(buf[2],for12[i].f,for12[i].g,(int64_t) for12[i].k - L);
   503    squarefreetiny(buf[2],Lmodqq,11);
   504    for (;i < 48;++i)
   505      doit12(buf[6],for12[i].f,for12[i].g,(int64_t) for12[i].k - L);
   506    squarefreetiny(buf[6],Lmodqq,23);
   507    for (;i < 72;++i)
   508      doit12(buf[12],for12[i].f,for12[i].g,(int64_t) for12[i].k - L);
   509    squarefreetiny(buf[12],Lmodqq,47);
   510    for (;i < 96;++i)
   511      doit12(buf[15],for12[i].f,for12[i].g,(int64_t) for12[i].k - L);
   512    squarefreetiny(buf[15],Lmodqq,59);
   513  
   514    squarefree49(buf,L,247);
   515    squarefree49(buf,L,253);
   516    squarefree49(buf,L,257);
   517    squarefree49(buf,L,263);
   518    squarefree1(buf,L,241);
   519    squarefree1(buf,L,251);
   520    squarefree1(buf,L,259);
   521    squarefree1(buf,L,269);
   522  }
   523  
   524  
   525  void primegen_fill(primegen *pg)
   526  {
   527    int i;
   528    uint32_t mask;
   529    uint32_t bits0, bits1, bits2, bits3, bits4, bits5, bits6, bits7;
   530    uint32_t bits8, bits9, bits10, bits11, bits12, bits13, bits14, bits15;
   531    uint64_t base;
   532  
   533    i = pg->pos;
   534    if (i == B32) {
   535      primegen_sieve(pg);
   536      pg->L += B;
   537      i = 0;
   538    }
   539    pg->pos = i + 1;
   540  
   541    bits0 = ~pg->buf[0][i];
   542    bits1 = ~pg->buf[1][i];
   543    bits2 = ~pg->buf[2][i];
   544    bits3 = ~pg->buf[3][i];
   545    bits4 = ~pg->buf[4][i];
   546    bits5 = ~pg->buf[5][i];
   547    bits6 = ~pg->buf[6][i];
   548    bits7 = ~pg->buf[7][i];
   549    bits8 = ~pg->buf[8][i];
   550    bits9 = ~pg->buf[9][i];
   551    bits10 = ~pg->buf[10][i];
   552    bits11 = ~pg->buf[11][i];
   553    bits12 = ~pg->buf[12][i];
   554    bits13 = ~pg->buf[13][i];
   555    bits14 = ~pg->buf[14][i];
   556    bits15 = ~pg->buf[15][i];
   557  
   558    base = pg->base + 1920;
   559    pg->base = base;
   560  
   561    pg->num = 0;
   562  
   563    for (mask = 0x80000000;mask;mask >>= 1) {
   564      base -= 60;
   565      if (bits15 & mask) pg->p[pg->num++] = base + 59;
   566      if (bits14 & mask) pg->p[pg->num++] = base + 53;
   567      if (bits13 & mask) pg->p[pg->num++] = base + 49;
   568      if (bits12 & mask) pg->p[pg->num++] = base + 47;
   569      if (bits11 & mask) pg->p[pg->num++] = base + 43;
   570      if (bits10 & mask) pg->p[pg->num++] = base + 41;
   571      if (bits9 & mask) pg->p[pg->num++] = base + 37;
   572      if (bits8 & mask) pg->p[pg->num++] = base + 31;
   573      if (bits7 & mask) pg->p[pg->num++] = base + 29;
   574      if (bits6 & mask) pg->p[pg->num++] = base + 23;
   575      if (bits5 & mask) pg->p[pg->num++] = base + 19;
   576      if (bits4 & mask) pg->p[pg->num++] = base + 17;
   577      if (bits3 & mask) pg->p[pg->num++] = base + 13;
   578      if (bits2 & mask) pg->p[pg->num++] = base + 11;
   579      if (bits1 & mask) pg->p[pg->num++] = base + 7;
   580      if (bits0 & mask) pg->p[pg->num++] = base + 1;
   581    }
   582  }
   583  
   584  uint64_t primegen_next(primegen *pg)
   585  {
   586    while (!pg->num)
   587      primegen_fill(pg);
   588  
   589    return pg->p[--pg->num];
   590  }
   591  
   592  uint64_t primegen_peek(primegen *pg)
   593  {
   594    while (!pg->num)
   595      primegen_fill(pg);
   596  
   597    return pg->p[pg->num - 1];
   598  }
   599  
   600  void primegen_init(primegen *pg)
   601  {
   602    pg->L = 1;
   603    pg->base = 60;
   604  
   605    pg->pos = PRIMEGEN_WORDS;
   606  
   607    pg->p[0] = 59;
   608    pg->p[1] = 53;
   609    pg->p[2] = 47;
   610    pg->p[3] = 43;
   611    pg->p[4] = 41;
   612    pg->p[5] = 37;
   613    pg->p[6] = 31;
   614    pg->p[7] = 29;
   615    pg->p[8] = 23;
   616    pg->p[9] = 19;
   617    pg->p[10] = 17;
   618    pg->p[11] = 13;
   619    pg->p[12] = 11;
   620    pg->p[13] = 7;
   621    pg->p[14] = 5;
   622    pg->p[15] = 3;
   623    pg->p[16] = 2;
   624  
   625    pg->num = 17;
   626  }
   627  
   628  
   629  static const unsigned long pop[256] = {
   630   0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5
   631  ,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
   632  ,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
   633  ,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7
   634  ,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6
   635  ,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7
   636  ,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7
   637  ,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
   638  };
   639  
   640  uint64_t primegen_count(primegen *pg,uint64_t to)
   641  {
   642    uint64_t count = 0;
   643    register int pos;
   644    register int j;
   645    register uint32_t bits;
   646    register uint32_t smallcount;
   647  
   648    for (;;) {
   649      while (pg->num) {
   650        if (pg->p[pg->num - 1] >= to) return count;
   651        ++count;
   652        --pg->num;
   653      }
   654  
   655      smallcount = 0;
   656      pos = pg->pos;
   657      while ((pos < B32) && (pg->base + 1920 < to)) {
   658        for (j = 0;j < 16;++j) {
   659      bits = ~pg->buf[j][pos];
   660      smallcount += pop[bits & 255]; bits >>= 8;
   661      smallcount += pop[bits & 255]; bits >>= 8;
   662      smallcount += pop[bits & 255]; bits >>= 8;
   663      smallcount += pop[bits & 255];
   664        }
   665        pg->base += 1920;
   666        ++pos;
   667      }
   668      pg->pos = pos;
   669      count += smallcount;
   670  
   671      if (pos == B32)
   672        while (pg->base + B * 60 < to) {
   673          primegen_sieve(pg);
   674          pg->L += B;
   675  
   676      smallcount = 0;
   677          for (j = 0;j < 16;++j)
   678        for (pos = 0;pos < B32;++pos) {
   679          bits = ~pg->buf[j][pos];
   680          smallcount += pop[bits & 255]; bits >>= 8;
   681          smallcount += pop[bits & 255]; bits >>= 8;
   682          smallcount += pop[bits & 255]; bits >>= 8;
   683          smallcount += pop[bits & 255];
   684        }
   685          count += smallcount;
   686          pg->base += B * 60;
   687        }
   688  
   689      primegen_fill(pg);
   690    }
   691  }
   692  
   693  void primegen_skipto(primegen *pg,uint64_t to)
   694  {
   695    int pos;
   696  
   697    for (;;) {
   698      while (pg->num) {
   699        if (pg->p[pg->num - 1] >= to) return;
   700        --pg->num;
   701      }
   702  
   703      pos = pg->pos;
   704      while ((pos < B32) && (pg->base + 1920 < to)) {
   705        pg->base += 1920;
   706        ++pos;
   707      }
   708      pg->pos = pos;
   709      if (pos == B32)
   710        while (pg->base + B * 60 < to) {
   711          pg->L += B;
   712          pg->base += B * 60;
   713        }
   714  
   715      primegen_fill(pg);
   716    }
   717  }