github.com/keysonzzz/kmg@v0.0.0-20151121023212-05317bfd7d39/kmgRand/LcgCalculateConstants/c/rand-lcg.c (about)

     1  /*
     2      This is a "linear-congruent-generator", a type of random number
     3      generator. We use it scan IPv4 addresses (and ports) in random
     4      order, without having to keep 'state' about which ones we've
     5      already scanned.
     6  
     7  */
     8  
     9  #include "rand-lcg.h"
    10  #include "rand-primegen.h" /* DJB's prime factoring code */
    11  
    12  
    13  #include <math.h>  /* for 'sqrt()', may need -lm for gcc */
    14  #include <stdint.h>
    15  #include <stdio.h>
    16  #include <stdlib.h>
    17  #include <string.h>
    18  #include <time.h>
    19  
    20  
    21  
    22  /**
    23   * A 64 bit number can't have more than 16 prime factors. The first factors
    24   * are:
    25   * 2*3*5*7*11*13*17*19*23*29*31*37*41*43*47*53 = 0xC443F2F861D29C3A
    26   *                                                 0123456789abcdef
    27   * We zero termiante this list, so we are going to reserve 20 slots.
    28   */
    29  typedef uint64_t PRIMEFACTORS[20];
    30  
    31  
    32  /****************************************************************************
    33   * Break down the number into prime factors using DJB's sieve code, which
    34   * is about 5 to 10 times faster than the Seive of Eratosthenes.
    35   *
    36   * @param number
    37   *      The integer that we are factoring. It can be any value up to 64 bits
    38   *      in size.
    39   * @param factors
    40   *      The list of all the prime factors, zero terminated.
    41   * @param non_factors
    42   *      A list of smallest numbers that aren't prime factors. We return
    43   *      this because we are going to use prime non-factors for finding
    44   *      interesting numbers.
    45   ****************************************************************************/
    46  unsigned
    47  sieve_prime_factors(uint64_t number, PRIMEFACTORS factors, PRIMEFACTORS non_factors, double *elapsed)
    48  {
    49      primegen pg;
    50      clock_t start;
    51      clock_t stop;
    52      uint64_t prime;
    53      uint64_t max;
    54      unsigned factor_count = 0;
    55      unsigned non_factor_count = 0;
    56  
    57      /*
    58       * We only need to seive up to the square-root of the target number. Only
    59       * one prime factor can be bigger than the square root, so once we find
    60       * all the other primes, the square root is the only one left.
    61       * Note: you have to link to the 'm' math library for some gcc platforms.
    62       */
    63      max = (uint64_t)sqrt(number + 1.0);
    64  
    65      /*
    66       * Init the DJB primegen library.
    67       */
    68      primegen_init(&pg);
    69  
    70      /*
    71       * Enumerate all the primes starting with 2
    72       */
    73      start = clock();
    74      for (;;) {
    75  
    76          /* Seive the next prime */
    77          prime = primegen_next(&pg);
    78  
    79          /* If we've reached the square root, then that's as far as we need
    80           * to go */
    81          if (prime > max)
    82              break;
    83  
    84          /* If this prime is not a factor (evenly divisible with no remainder)
    85           * then loop back and get the next prime */
    86          if ((number % prime) != 0) {
    87              if (non_factor_count < 12)
    88                  non_factors[non_factor_count++] = prime;
    89              continue;
    90          }
    91  
    92          /* Else we've found a prime factor, so add this to the list of primes */
    93          factors[factor_count++] = prime;
    94  
    95          /* At the end, we may have one prime factor left that's bigger than the
    96           * sqrt. Therefore, as we go along, divide the original number
    97           * (possibly several times) by the prime factor so that this large
    98           * remaining factor will be the only one left */
    99          while ((number % prime) == 0)
   100              number /= prime;
   101  
   102          /* exit early if we've found all prime factors. comment out this
   103           * code if you want to benchmark it */
   104          if (number == 1 && non_factor_count > 10)
   105              break;
   106      }
   107  
   108      /*
   109       * See if there is one last prime that's bigger than the square root.
   110       * Note: This is the only number that can be larger than 32-bits in the
   111       * way this code is written.
   112       */
   113      if (number != 1)
   114          factors[factor_count++] = number;
   115  
   116      /*
   117       * Zero terminate the results.
   118       */
   119      factors[factor_count] = 0;
   120      non_factors[non_factor_count] = 0;
   121  
   122      /*
   123       * Since prime factorization takes a long time, especially on slow
   124       * CPUs, we benchmark it to keep track of performance.
   125       */
   126      stop = clock();
   127      if (elapsed)
   128          *elapsed = ((double)stop - (double)start)/(double)CLOCKS_PER_SEC;
   129  
   130      /* should always be at least 1, because if the number itself is prime,
   131       * then that's it's only prime factor */
   132      return factor_count;
   133  }
   134  
   135  
   136  
   137  /****************************************************************************
   138   * Do a pseudo-random 1-to-1 translation of a number within a range to
   139   * another number in that range.
   140   *
   141   * The constants 'a' and 'c' must be chosen to match the LCG algorithm
   142   * to fit 'm' (range).
   143   *
   144   * This the same as the function 'rand()', except all the constants and
   145   * seeds are specified as parameters.
   146   *
   147   * @param index
   148   *      The index within the range that we are randomizing.
   149   * @param a
   150   *      The 'multiplier' of the LCG algorithm.
   151   * @param c
   152   *      The 'increment' of the LCG algorithm.
   153   * @param range
   154   *      The 'modulus' of the LCG algorithm.
   155   ****************************************************************************/
   156  uint64_t
   157  lcg_rand(uint64_t index, uint64_t a, uint64_t c, uint64_t range)
   158  {
   159      return (index * a + c) % range;
   160  }
   161  
   162  
   163  /****************************************************************************
   164   * Verify the LCG algorithm. You shouldn't do this for large ranges,
   165   * because we'll run out of memory. Therefore, this algorithm allocates
   166   * a buffer only up to a smaller range. We still have to traverse the
   167   * entire range of numbers, but we only need store values for a smaller
   168   * range. If 10% of the range checks out, then there's a good chance
   169   * it applies to the other 90% as well.
   170   *
   171   * This works by counting the results of rand(), which should be produced
   172   * exactly once.
   173   ****************************************************************************/
   174  unsigned
   175  lcg_verify(uint64_t a, uint64_t c, uint64_t range, uint64_t max)
   176  {
   177      unsigned char *list;
   178      uint64_t i;
   179      unsigned is_success = 1;
   180  
   181      /* Allocate a list of 1-byte counters */
   182      list = (unsigned char *)malloc((size_t)((range<max)?range:max));
   183      if (list == NULL)
   184          exit(1);
   185      memset(list, 0, (size_t)((range<max)?range:max));
   186  
   187      /* For all numbers in the range, verify increment the counter for the
   188       * the output. */
   189      for (i=0; i<range; i++) {
   190          uint64_t x = lcg_rand(i, a, c, range);
   191          if (x < max)
   192              list[x]++;
   193      }
   194  
   195      /* Now check the output to make sure that every counter is set exactly
   196       * to the value of '1'. */
   197      for (i=0; i<max && i<range; i++) {
   198          if (list[i] != 1)
   199              is_success = 0;
   200      }
   201  
   202      free(list);
   203  
   204      return is_success;
   205  }
   206  
   207  
   208  /****************************************************************************
   209   * Count the number of digits in a number so that we can pretty-print a
   210   * bunch of numbers in nice columns.
   211   ****************************************************************************/
   212  unsigned
   213  count_digits(uint64_t num)
   214  {
   215      unsigned result = 0;
   216  
   217      while (num) {
   218          result++;
   219          num /= 10;
   220      }
   221  
   222      return result;
   223  }
   224  
   225  /****************************************************************************
   226   * Tell whether the number has any prime factors in common with the list
   227   * of factors. In other words, if it's not coprime with the other number.
   228   * @param c
   229   *      The number we want to see has common factors with the other number.
   230   * @param factors
   231   *      The factors from the other number
   232   * @return
   233   *      !is_coprime(c, factors)
   234   ****************************************************************************/
   235  uint64_t
   236  has_factors_in_common(uint64_t c, PRIMEFACTORS factors)
   237  {
   238      unsigned i;
   239  
   240      for (i=0; factors[i]; i++) {
   241          if ((c % factors[i]) == 0)
   242              return factors[i]; /* found a common factor */
   243      }
   244      return 0; /* no factors in common */
   245  }
   246  
   247  
   248  /****************************************************************************
   249   * Given a range, calculate some possible constants for the LCG algorithm
   250   * for randomizing the order of the array.
   251   * @parm m
   252   *      The range for which we'll be finding random numbers. If we are
   253   *      looking for random numbers between [0..100), this number will
   254   *      be 100.
   255   * @parm a
   256   *      The LCG 'a' constant that will be the result of this function.
   257   * @param c
   258   *      The LCG 'c' constant that will be the result of this function. This
   259   *      should be set to 0 on the input to this function, or a suggested
   260   *      value.
   261   ****************************************************************************/
   262  void
   263  lcg_calculate_constants(uint64_t m, uint64_t *out_a, uint64_t *inout_c, int is_debug)
   264  {
   265      uint64_t a;
   266      uint64_t c = *inout_c;
   267      double elapsed = 0.0; /* Benchmark of 'sieve' algorithm */
   268      PRIMEFACTORS factors; /* List of prime factors of 'm' */
   269      PRIMEFACTORS non_factors;
   270      unsigned i;
   271  
   272      /*
   273       * Find all the prime factors of the number. This step can take several
   274       * seconds for 48 bit numbers, which is why we benchmark how long it
   275       * takes.
   276       */
   277      sieve_prime_factors(m, factors, non_factors, &elapsed);
   278  
   279      /*
   280       * Calculate the 'a-1' constant. It must share all the prime factors
   281       * with the range, and if the range is a multiple of 4, must also
   282       * be a multiple of 4
   283       */
   284      if (factors[0] == m) {
   285          /* this number has no prime factors, so we can choose anything.
   286           * Therefore, we are going to pick something at random */
   287          unsigned j;
   288  
   289          a = 1;
   290          for (j=0; non_factors[j] && j < 5; j++)
   291              a *= non_factors[j];
   292      } else {
   293          //unsigned j;
   294          a = 1;
   295          for (i=0; factors[i]; i++)
   296              a = a * factors[i];
   297          if ((m % 4) == 0)
   298              a *= 2;
   299  
   300          /*for (j=0; j<0 && non_factors[j]; j++)
   301              a *= non_factors[j];*/
   302      }
   303      a += 1;
   304  
   305      /*
   306       * Calculate the 'c' constant. It must have no prime factors in
   307       * common with the range.
   308       */
   309      if (c == 0)
   310          c = 2531011 ; /* something random */
   311      while (has_factors_in_common(c, factors))
   312          c++;
   313  
   314      if (is_debug) {
   315          /*
   316           * print the results
   317           */
   318          //printf("sizeof(int) = %llu-bits\n", (uint64_t)(sizeof(size_t)*8));
   319          printf("elapsed     = %5.3f-seconds\n", elapsed);
   320          printf("factors     = ");
   321          for (i=0; factors[i]; i++)
   322              printf("%llu ", factors[i]);
   323          printf("%s\n", factors[0]?"":"(none)");
   324          printf("m           = %-24llu (0x%llx)\n", m, m);
   325          printf("a           = %-24llu (0x%llx)\n", a, a);
   326          printf("c           = %-24llu (0x%llx)\n", c, c);
   327          printf("c%%m         = %-24llu (0x%llx)\n", c%m, c%m);
   328          printf("a%%m         = %-24llu (0x%llx)\n", a%m, a%m);
   329  
   330          if (m < 1000000000) {
   331              if (lcg_verify(a, c+1, m, 280))
   332                  printf("verify      = success\n");
   333              else
   334                  printf("verify      = failure\n");
   335          } else {
   336              printf("verify      = too big to check\n");
   337          }
   338  
   339  
   340          /*
   341           * Print some first numbers. We use these to visually inspect whether
   342           * the results are random or not.
   343           */
   344          {
   345              unsigned count = 0;
   346              uint64_t x = 0;
   347              unsigned digits = count_digits(m);
   348  
   349              for (i=0; i<100 && i < m; i++) {
   350                  x = lcg_rand(x, a, c, m);
   351                  count += printf("%*llu ", digits, x);
   352                  if (count >= 70) {
   353                      count = 0;
   354                      printf("\n");
   355                  }
   356              }
   357              printf("\n");
   358          }
   359      }
   360  
   361      *out_a = a;
   362      *inout_c = c;
   363  }
   364  
   365  /***************************************************************************
   366   ***************************************************************************/
   367  int
   368  randlcg_selftest()
   369  {
   370      unsigned i;
   371      int is_success = 0;
   372      uint64_t m, a, c;
   373  
   374  
   375      m = 3015 * 3;
   376  
   377      for (i=0; i<5; i++) {
   378          a = 0;
   379          c = 0;
   380  
   381          m += 10 + i;
   382  
   383          lcg_calculate_constants(m, &a, &c, 0);
   384  
   385          is_success = lcg_verify(a, c, m, m);
   386  
   387          if (!is_success) {
   388              fprintf(stderr, "LCG: randomization failed\n");
   389              return 1; /*fail*/
   390          }
   391      }
   392  
   393      return 0; /*success*/
   394  }