golang.org/x/exp@v0.0.0-20240506185415-9bf2ced13842/shootout/fasta.c (about)

     1  // +build ignore
     2  
     3  /*
     4  Redistribution and use in source and binary forms, with or without
     5  modification, are permitted provided that the following conditions are met:
     6  
     7      * Redistributions of source code must retain the above copyright
     8      notice, this list of conditions and the following disclaimer.
     9  
    10      * Redistributions in binary form must reproduce the above copyright
    11      notice, this list of conditions and the following disclaimer in the
    12      documentation and/or other materials provided with the distribution.
    13  
    14      * Neither the name of "The Computer Language Benchmarks Game" nor the
    15      name of "The Computer Language Shootout Benchmarks" nor the names of
    16      its contributors may be used to endorse or promote products derived
    17      from this software without specific prior written permission.
    18  
    19  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    20  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    21  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    22  ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    23  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    24  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    25  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    26  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    27  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    28  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    29  POSSIBILITY OF SUCH DAMAGE.
    30  */
    31  
    32  /*
    33   * http://shootout.alioth.debian.org/u32/program.php?test=fasta&lang=gcc&id=3
    34   */
    35  
    36  /*  The Computer Language Benchmarks Game
    37   *  http://shootout.alioth.debian.org/
    38   *
    39   *  contributed by Petr Prokhorenkov
    40   */
    41  
    42  #include <stdio.h>
    43  #include <stdlib.h>
    44  #include <string.h>
    45  
    46  #ifndef fwrite_unlocked
    47  // not available on OS X 
    48  #define fwrite_unlocked fwrite
    49  #define fputc_unlocked fputc
    50  #define fputs_unlocked fputs
    51  #endif
    52  
    53  #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0]))
    54  #define unlikely(x) __builtin_expect((x), 0)
    55  
    56  #define IM 139968
    57  #define IA 3877
    58  #define IC 29573
    59  
    60  #define LINE_LEN 60
    61  #define LOOKUP_SIZE 4096
    62  #define LOOKUP_SCALE ((float)(LOOKUP_SIZE - 1))
    63  
    64  typedef unsigned random_t;
    65  
    66  void
    67  random_init(random_t *random) {
    68      *random = 42;
    69  }
    70  
    71  // Special version with result rescaled to LOOKUP_SCALE.
    72  static inline
    73  float
    74  random_next_lookup(random_t *random) {
    75      *random = (*random*IA + IC)%IM;
    76  
    77      return (*random)*(LOOKUP_SCALE/IM);
    78  }
    79  
    80  struct amino_acid {
    81     char sym;
    82     float prob;
    83     float cprob_lookup;
    84  };
    85  
    86  void
    87  repeat(const char *alu, const char *title, int n) {
    88      int len = strlen(alu);
    89      char buffer[len + LINE_LEN];
    90      int pos = 0;
    91  
    92      memcpy(buffer, alu, len);
    93      memcpy(buffer + len, alu, LINE_LEN);
    94  
    95      fputs_unlocked(title, stdout);
    96      while (n > 0) {
    97          int bytes = n > LINE_LEN ? LINE_LEN : n;
    98  
    99          fwrite_unlocked(buffer + pos, bytes, 1, stdout);
   100          pos += bytes;
   101          if (pos > len) {
   102              pos -= len;
   103          }
   104          fputc_unlocked('\n', stdout);
   105          n -= bytes;
   106      }
   107  }
   108  
   109  /*
   110   * Lookup table contains mapping from real values to cumulative
   111   * probabilities. Careful selection of table size allows lookup
   112   * virtually in constant time.
   113   *
   114   * All cumulative probabilities are rescaled to LOOKUP_SCALE,
   115   * this allows to save one multiplication operation on each iteration
   116   * in randomize().
   117   */
   118  
   119  void *
   120  fill_lookup(struct amino_acid **lookup, struct amino_acid *amino_acid, int amino_acid_size) {
   121      float p = 0;
   122      int i, j;
   123  
   124      for (i = 0; i < amino_acid_size; i++) {
   125          p += amino_acid[i].prob;
   126          amino_acid[i].cprob_lookup = p*LOOKUP_SCALE;
   127      }
   128  
   129      // Prevent rounding error.
   130      amino_acid[amino_acid_size - 1].cprob_lookup = LOOKUP_SIZE - 1;
   131  
   132      for (i = 0, j = 0; i < LOOKUP_SIZE; i++) {
   133          while (amino_acid[j].cprob_lookup < i) {
   134              j++;
   135          }
   136          lookup[i] = &amino_acid[j];
   137      }
   138  
   139      return 0;
   140  }
   141  
   142  void
   143  randomize(struct amino_acid *amino_acid, int amino_acid_size,
   144          const char *title, int n, random_t *rand) {
   145      struct amino_acid *lookup[LOOKUP_SIZE];
   146      char line_buffer[LINE_LEN + 1];
   147      int i, j;
   148  
   149      line_buffer[LINE_LEN] = '\n';
   150  
   151      fill_lookup(lookup, amino_acid, amino_acid_size);
   152  
   153      fputs_unlocked(title, stdout);
   154  
   155      for (i = 0, j = 0; i < n; i++, j++) {
   156          if (j == LINE_LEN) {
   157              fwrite_unlocked(line_buffer, LINE_LEN + 1, 1, stdout);
   158              j = 0;
   159          }
   160  
   161          float r = random_next_lookup(rand);
   162          struct amino_acid *u = lookup[(short)r];
   163          while (unlikely(u->cprob_lookup < r)) {
   164              ++u;
   165          }
   166          line_buffer[j] = u->sym;
   167      }
   168      line_buffer[j] = '\n';
   169      fwrite_unlocked(line_buffer, j + 1, 1, stdout);
   170  }
   171  
   172  struct amino_acid amino_acid[] = {
   173     { 'a', 0.27 },
   174     { 'c', 0.12 },
   175     { 'g', 0.12 },
   176     { 't', 0.27 },
   177  
   178     { 'B', 0.02 },
   179     { 'D', 0.02 },
   180     { 'H', 0.02 },
   181     { 'K', 0.02 },
   182     { 'M', 0.02 },
   183     { 'N', 0.02 },
   184     { 'R', 0.02 },
   185     { 'S', 0.02 },
   186     { 'V', 0.02 },
   187     { 'W', 0.02 },
   188     { 'Y', 0.02 },
   189  };
   190  
   191  struct amino_acid homo_sapiens[] = {
   192     { 'a', 0.3029549426680 },
   193     { 'c', 0.1979883004921 },
   194     { 'g', 0.1975473066391 },
   195     { 't', 0.3015094502008 },
   196  };
   197  
   198  static const char alu[] =
   199     "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG"
   200     "GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA"
   201     "GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA"
   202     "AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT"
   203     "CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC"
   204     "CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG"
   205     "CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA";
   206  
   207  int
   208  main(int argc, const char **argv) {
   209      int n = argc > 1 ? atoi( argv[1] ) : 512;
   210      random_t rand;
   211  
   212      random_init(&rand);
   213  
   214      repeat(alu, ">ONE Homo sapiens alu\n", n*2);
   215      randomize(amino_acid, ARRAY_SIZE(amino_acid),
   216              ">TWO IUB ambiguity codes\n", n*3, &rand);
   217      randomize(homo_sapiens, ARRAY_SIZE(homo_sapiens),
   218              ">THREE Homo sapiens frequency\n", n*5, &rand);
   219  
   220      return 0;
   221  }