github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/test/bench/shootout/fasta.c (about)

     1  /*
     2  Redistribution and use in source and binary forms, with or without
     3  modification, are permitted provided that the following conditions are met:
     4  
     5      * Redistributions of source code must retain the above copyright
     6      notice, this list of conditions and the following disclaimer.
     7  
     8      * Redistributions in binary form must reproduce the above copyright
     9      notice, this list of conditions and the following disclaimer in the
    10      documentation and/or other materials provided with the distribution.
    11  
    12      * Neither the name of "The Computer Language Benchmarks Game" nor the
    13      name of "The Computer Language Shootout Benchmarks" nor the names of
    14      its contributors may be used to endorse or promote products derived
    15      from this software without specific prior written permission.
    16  
    17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    18  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    19  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    20  ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    21  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    22  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    23  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    24  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    25  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    26  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    27  POSSIBILITY OF SUCH DAMAGE.
    28  */
    29  
    30  /*
    31   * http://shootout.alioth.debian.org/u32/program.php?test=fasta&lang=gcc&id=3
    32   */
    33  
    34  /*  The Computer Language Benchmarks Game
    35   *  http://shootout.alioth.debian.org/
    36   *
    37   *  contributed by Petr Prokhorenkov
    38   */
    39  
    40  #include <stdio.h>
    41  #include <stdlib.h>
    42  #include <string.h>
    43  
    44  #ifndef fwrite_unlocked
    45  // not available on OS X 
    46  #define fwrite_unlocked fwrite
    47  #define fputc_unlocked fputc
    48  #define fputs_unlocked fputs
    49  #endif
    50  
    51  #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
    52  #define unlikely(x) __builtin_expect((x), 0)
    53  
    54  #define IM 139968
    55  #define IA 3877
    56  #define IC 29573
    57  
    58  #define LINE_LEN 60
    59  #define LOOKUP_SIZE 4096
    60  #define LOOKUP_SCALE ((float)(LOOKUP_SIZE - 1))
    61  
    62  typedef unsigned random_t;
    63  
    64  void
    65  random_init(random_t *random) {
    66      *random = 42;
    67  }
    68  
    69  // Special version with result rescaled to LOOKUP_SCALE.
    70  static inline
    71  float
    72  random_next_lookup(random_t *random) {
    73      *random = (*random*IA + IC)%IM;
    74  
    75      return (*random)*(LOOKUP_SCALE/IM);
    76  }
    77  
    78  struct amino_acid {
    79     char sym;
    80     float prob;
    81     float cprob_lookup;
    82  };
    83  
    84  void
    85  repeat(const char *alu, const char *title, int n) {
    86      int len = strlen(alu);
    87      char buffer[len + LINE_LEN];
    88      int pos = 0;
    89  
    90      memcpy(buffer, alu, len);
    91      memcpy(buffer + len, alu, LINE_LEN);
    92  
    93      fputs_unlocked(title, stdout);
    94      while (n > 0) {
    95          int bytes = n > LINE_LEN ? LINE_LEN : n;
    96  
    97          fwrite_unlocked(buffer + pos, bytes, 1, stdout);
    98          pos += bytes;
    99          if (pos > len) {
   100              pos -= len;
   101          }
   102          fputc_unlocked('\n', stdout);
   103          n -= bytes;
   104      }
   105  }
   106  
   107  /*
   108   * Lookup table contains mapping from real values to cumulative
   109   * probabilities. Careful selection of table size allows lookup
   110   * virtually in constant time.
   111   *
   112   * All cumulative probabilities are rescaled to LOOKUP_SCALE,
   113   * this allows to save one multiplication operation on each iteration
   114   * in randomize().
   115   */
   116  
   117  void *
   118  fill_lookup(struct amino_acid **lookup, struct amino_acid *amino_acid, int amino_acid_size) {
   119      float p = 0;
   120      int i, j;
   121  
   122      for (i = 0; i < amino_acid_size; i++) {
   123          p += amino_acid[i].prob;
   124          amino_acid[i].cprob_lookup = p*LOOKUP_SCALE;
   125      }
   126  
   127      // Prevent rounding error.
   128      amino_acid[amino_acid_size - 1].cprob_lookup = LOOKUP_SIZE - 1;
   129  
   130      for (i = 0, j = 0; i < LOOKUP_SIZE; i++) {
   131          while (amino_acid[j].cprob_lookup < i) {
   132              j++;
   133          }
   134          lookup[i] = &amino_acid[j];
   135      }
   136  
   137      return 0;
   138  }
   139  
   140  void
   141  randomize(struct amino_acid *amino_acid, int amino_acid_size,
   142          const char *title, int n, random_t *rand) {
   143      struct amino_acid *lookup[LOOKUP_SIZE];
   144      char line_buffer[LINE_LEN + 1];
   145      int i, j;
   146  
   147      line_buffer[LINE_LEN] = '\n';
   148  
   149      fill_lookup(lookup, amino_acid, amino_acid_size);
   150  
   151      fputs_unlocked(title, stdout);
   152  
   153      for (i = 0, j = 0; i < n; i++, j++) {
   154          if (j == LINE_LEN) {
   155              fwrite_unlocked(line_buffer, LINE_LEN + 1, 1, stdout);
   156              j = 0;
   157          }
   158  
   159          float r = random_next_lookup(rand);
   160          struct amino_acid *u = lookup[(short)r];
   161          while (unlikely(u->cprob_lookup < r)) {
   162              ++u;
   163          }
   164          line_buffer[j] = u->sym;
   165      }
   166      line_buffer[j] = '\n';
   167      fwrite_unlocked(line_buffer, j + 1, 1, stdout);
   168  }
   169  
   170  struct amino_acid amino_acid[] = {
   171     { 'a', 0.27 },
   172     { 'c', 0.12 },
   173     { 'g', 0.12 },
   174     { 't', 0.27 },
   175  
   176     { 'B', 0.02 },
   177     { 'D', 0.02 },
   178     { 'H', 0.02 },
   179     { 'K', 0.02 },
   180     { 'M', 0.02 },
   181     { 'N', 0.02 },
   182     { 'R', 0.02 },
   183     { 'S', 0.02 },
   184     { 'V', 0.02 },
   185     { 'W', 0.02 },
   186     { 'Y', 0.02 },
   187  };
   188  
   189  struct amino_acid homo_sapiens[] = {
   190     { 'a', 0.3029549426680 },
   191     { 'c', 0.1979883004921 },
   192     { 'g', 0.1975473066391 },
   193     { 't', 0.3015094502008 },
   194  };
   195  
   196  static const char alu[] =
   197     "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG"
   198     "GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA"
   199     "GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA"
   200     "AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT"
   201     "CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC"
   202     "CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG"
   203     "CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
   204  
   205  int
   206  main(int argc, const char **argv) {
   207      int n = argc > 1 ? atoi( argv[1] ) : 512;
   208      random_t rand;
   209  
   210      random_init(&rand);
   211  
   212      repeat(alu, ">ONE Homo sapiens alu\n", n*2);
   213      randomize(amino_acid, ARRAY_SIZE(amino_acid),
   214              ">TWO IUB ambiguity codes\n", n*3, &rand);
   215      randomize(homo_sapiens, ARRAY_SIZE(homo_sapiens),
   216              ">THREE Homo sapiens frequency\n", n*5, &rand);
   217  
   218      return 0;
   219  }