golang.org/x/exp@v0.0.0-20240506185415-9bf2ced13842/shootout/fasta.c (about) 1 // +build ignore 2 3 /* 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions are met: 6 7 * Redistributions of source code must retain the above copyright 8 notice, this list of conditions and the following disclaimer. 9 10 * Redistributions in binary form must reproduce the above copyright 11 notice, this list of conditions and the following disclaimer in the 12 documentation and/or other materials provided with the distribution. 13 14 * Neither the name of "The Computer Language Benchmarks Game" nor the 15 name of "The Computer Language Shootout Benchmarks" nor the names of 16 its contributors may be used to endorse or promote products derived 17 from this software without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 23 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * http://shootout.alioth.debian.org/u32/program.php?test=fasta&lang=gcc&id=3 34 */ 35 36 /* The Computer Language Benchmarks Game 37 * http://shootout.alioth.debian.org/ 38 * 39 * contributed by Petr Prokhorenkov 40 */ 41 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <string.h> 45 46 #ifndef fwrite_unlocked 47 // not available on OS X 48 #define fwrite_unlocked fwrite 49 #define fputc_unlocked fputc 50 #define fputs_unlocked fputs 51 #endif 52 53 #define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) 54 #define unlikely(x) __builtin_expect((x), 0) 55 56 #define IM 139968 57 #define IA 3877 58 #define IC 29573 59 60 #define LINE_LEN 60 61 #define LOOKUP_SIZE 4096 62 #define LOOKUP_SCALE ((float)(LOOKUP_SIZE - 1)) 63 64 typedef unsigned random_t; 65 66 void 67 random_init(random_t *random) { 68 *random = 42; 69 } 70 71 // Special version with result rescaled to LOOKUP_SCALE. 72 static inline 73 float 74 random_next_lookup(random_t *random) { 75 *random = (*random*IA + IC)%IM; 76 77 return (*random)*(LOOKUP_SCALE/IM); 78 } 79 80 struct amino_acid { 81 char sym; 82 float prob; 83 float cprob_lookup; 84 }; 85 86 void 87 repeat(const char *alu, const char *title, int n) { 88 int len = strlen(alu); 89 char buffer[len + LINE_LEN]; 90 int pos = 0; 91 92 memcpy(buffer, alu, len); 93 memcpy(buffer + len, alu, LINE_LEN); 94 95 fputs_unlocked(title, stdout); 96 while (n > 0) { 97 int bytes = n > LINE_LEN ? LINE_LEN : n; 98 99 fwrite_unlocked(buffer + pos, bytes, 1, stdout); 100 pos += bytes; 101 if (pos > len) { 102 pos -= len; 103 } 104 fputc_unlocked('\n', stdout); 105 n -= bytes; 106 } 107 } 108 109 /* 110 * Lookup table contains mapping from real values to cumulative 111 * probabilities. Careful selection of table size allows lookup 112 * virtually in constant time. 113 * 114 * All cumulative probabilities are rescaled to LOOKUP_SCALE, 115 * this allows to save one multiplication operation on each iteration 116 * in randomize(). 117 */ 118 119 void * 120 fill_lookup(struct amino_acid **lookup, struct amino_acid *amino_acid, int amino_acid_size) { 121 float p = 0; 122 int i, j; 123 124 for (i = 0; i < amino_acid_size; i++) { 125 p += amino_acid[i].prob; 126 amino_acid[i].cprob_lookup = p*LOOKUP_SCALE; 127 } 128 129 // Prevent rounding error. 130 amino_acid[amino_acid_size - 1].cprob_lookup = LOOKUP_SIZE - 1; 131 132 for (i = 0, j = 0; i < LOOKUP_SIZE; i++) { 133 while (amino_acid[j].cprob_lookup < i) { 134 j++; 135 } 136 lookup[i] = &amino_acid[j]; 137 } 138 139 return 0; 140 } 141 142 void 143 randomize(struct amino_acid *amino_acid, int amino_acid_size, 144 const char *title, int n, random_t *rand) { 145 struct amino_acid *lookup[LOOKUP_SIZE]; 146 char line_buffer[LINE_LEN + 1]; 147 int i, j; 148 149 line_buffer[LINE_LEN] = '\n'; 150 151 fill_lookup(lookup, amino_acid, amino_acid_size); 152 153 fputs_unlocked(title, stdout); 154 155 for (i = 0, j = 0; i < n; i++, j++) { 156 if (j == LINE_LEN) { 157 fwrite_unlocked(line_buffer, LINE_LEN + 1, 1, stdout); 158 j = 0; 159 } 160 161 float r = random_next_lookup(rand); 162 struct amino_acid *u = lookup[(short)r]; 163 while (unlikely(u->cprob_lookup < r)) { 164 ++u; 165 } 166 line_buffer[j] = u->sym; 167 } 168 line_buffer[j] = '\n'; 169 fwrite_unlocked(line_buffer, j + 1, 1, stdout); 170 } 171 172 struct amino_acid amino_acid[] = { 173 { 'a', 0.27 }, 174 { 'c', 0.12 }, 175 { 'g', 0.12 }, 176 { 't', 0.27 }, 177 178 { 'B', 0.02 }, 179 { 'D', 0.02 }, 180 { 'H', 0.02 }, 181 { 'K', 0.02 }, 182 { 'M', 0.02 }, 183 { 'N', 0.02 }, 184 { 'R', 0.02 }, 185 { 'S', 0.02 }, 186 { 'V', 0.02 }, 187 { 'W', 0.02 }, 188 { 'Y', 0.02 }, 189 }; 190 191 struct amino_acid homo_sapiens[] = { 192 { 'a', 0.3029549426680 }, 193 { 'c', 0.1979883004921 }, 194 { 'g', 0.1975473066391 }, 195 { 't', 0.3015094502008 }, 196 }; 197 198 static const char alu[] = 199 "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTG" 200 "GGAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGA" 201 "GACCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAA" 202 "AATACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAAT" 203 "CCCAGCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAAC" 204 "CCGGGAGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTG" 205 "CACTCCAGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"; 206 207 int 208 main(int argc, const char **argv) { 209 int n = argc > 1 ? atoi( argv[1] ) : 512; 210 random_t rand; 211 212 random_init(&rand); 213 214 repeat(alu, ">ONE Homo sapiens alu\n", n*2); 215 randomize(amino_acid, ARRAY_SIZE(amino_acid), 216 ">TWO IUB ambiguity codes\n", n*3, &rand); 217 randomize(homo_sapiens, ARRAY_SIZE(homo_sapiens), 218 ">THREE Homo sapiens frequency\n", n*5, &rand); 219 220 return 0; 221 }