modernc.org/ccgo/v3@v3.16.14/lib/testdata/CompCert-3.6/test/c/knucleotide.c (about) 1 /* The Computer Language Shootout 2 http://shootout.alioth.debian.org/ 3 4 Contributed by Josh Goldfoot 5 to compile, use gcc -O3 6 7 This revision uses "simple_hash.h," available from 8 http://cvs.alioth.debian.org/cgi-bin/cvsweb.cgi/shootout/bench/Include/?cvsroot=shootout 9 10 */ 11 #include <stdio.h> 12 #include <string.h> 13 #include <ctype.h> 14 #include <stdlib.h> 15 16 enum { ht_num_primes = 28 }; 17 18 static unsigned long ht_prime_list[ht_num_primes] = { 19 53ul, 97ul, 193ul, 389ul, 769ul, 20 1543ul, 3079ul, 6151ul, 12289ul, 24593ul, 21 49157ul, 98317ul, 196613ul, 393241ul, 786433ul, 22 1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul, 23 50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul, 24 1610612741ul, 3221225473ul, 4294967291ul 25 }; 26 27 struct ht_node { 28 char *key; 29 int val; 30 struct ht_node *next; 31 }; 32 33 struct ht_ht { 34 int size; 35 struct ht_node **tbl; 36 int iter_index; 37 struct ht_node *iter_next; 38 int items; 39 #ifdef HT_DEBUG 40 int collisions; 41 #endif /* HT_DEBUG */ 42 }; 43 44 static inline int ht_val(struct ht_node *node) { 45 return(node->val); 46 } 47 48 static inline char *ht_key(struct ht_node *node) { 49 return(node->key); 50 } 51 52 static inline int ht_hashcode(struct ht_ht *ht, char *key) { 53 unsigned long val = 0; 54 for (; *key; ++key) val = 5 * val + *key; 55 return(val % ht->size); 56 } 57 58 struct ht_node *ht_node_create(char *key) { 59 char *newkey; 60 struct ht_node *node; 61 if ((node = (struct ht_node *)malloc(sizeof(struct ht_node))) == 0) { 62 perror("malloc ht_node"); 63 exit(1); 64 } 65 if ((newkey = malloc(strlen(key) + 1)) == 0) { 66 perror("strdup newkey"); 67 exit(1); 68 } 69 strcpy(newkey, key); 70 node->key = newkey; 71 node->val = 0; 72 node->next = (struct ht_node *)NULL; 73 return(node); 74 } 75 76 struct ht_ht *ht_create(int size) { 77 int i = 0; 78 struct ht_ht *ht = (struct ht_ht *)malloc(sizeof(struct ht_ht)); 79 while (ht_prime_list[i] < size) { i++; } 80 ht->size = ht_prime_list[i]; 81 ht->tbl = (struct ht_node **)calloc(ht->size, sizeof(struct ht_node *)); 82 ht->iter_index = 0; 83 ht->iter_next = 0; 84 ht->items = 0; 85 #ifdef HT_DEBUG 86 ht->collisions = 0; 87 #endif /* HT_DEBUG */ 88 return(ht); 89 } 90 91 void ht_destroy(struct ht_ht *ht) { 92 struct ht_node *cur, *next; 93 int i; 94 #ifdef HT_DEBUG 95 int chain_len; 96 int max_chain_len = 0; 97 int density = 0; 98 fprintf(stderr, " HT: size %d\n", ht->size); 99 fprintf(stderr, " HT: items %d\n", ht->items); 100 fprintf(stderr, " HT: collisions %d\n", ht->collisions); 101 #endif /* HT_DEBUG */ 102 for (i=0; i<ht->size; i++) { 103 next = ht->tbl[i]; 104 #ifdef HT_DEBUG 105 if (next) { 106 density++; 107 } 108 chain_len = 0; 109 #endif /* HT_DEBUG */ 110 while (next) { 111 cur = next; 112 next = next->next; 113 free(cur->key); 114 free(cur); 115 #ifdef HT_DEBUG 116 chain_len++; 117 #endif /* HT_DEBUG */ 118 } 119 #ifdef HT_DEBUG 120 if (chain_len > max_chain_len) 121 max_chain_len = chain_len; 122 #endif /* HT_DEBUG */ 123 } 124 free(ht->tbl); 125 free(ht); 126 #ifdef HT_DEBUG 127 fprintf(stderr, " HT: density %d\n", density); 128 fprintf(stderr, " HT: max chain len %d\n", max_chain_len); 129 #endif /* HT_DEBUG */ 130 } 131 132 struct ht_node *ht_find(struct ht_ht *ht, char *key) { 133 int hash_code = ht_hashcode(ht, key); 134 struct ht_node *node = ht->tbl[hash_code]; 135 while (node) { 136 if (strcmp(key, node->key) == 0) return(node); 137 node = node->next; 138 } 139 return((struct ht_node *)NULL); 140 } 141 142 struct ht_node *ht_find_new(struct ht_ht *ht, char *key) { 143 int hash_code = ht_hashcode(ht, key); 144 struct ht_node *prev = 0, *node = ht->tbl[hash_code]; 145 while (node) { 146 if (strcmp(key, node->key) == 0) return(node); 147 prev = node; 148 node = node->next; 149 #ifdef HT_DEBUG 150 ht->collisions++; 151 #endif /* HT_DEBUG */ 152 } 153 ht->items++; 154 if (prev) { 155 return(prev->next = ht_node_create(key)); 156 } else { 157 return(ht->tbl[hash_code] = ht_node_create(key)); 158 } 159 } 160 161 /* 162 * Hash Table iterator data/functions 163 */ 164 struct ht_node *ht_next(struct ht_ht *ht) { 165 unsigned long index; 166 struct ht_node *node = ht->iter_next; 167 if (node) { 168 ht->iter_next = node->next; 169 return(node); 170 } else { 171 while (ht->iter_index < ht->size) { 172 index = ht->iter_index++; 173 if (ht->tbl[index]) { 174 ht->iter_next = ht->tbl[index]->next; 175 return(ht->tbl[index]); 176 } 177 } 178 } 179 return((struct ht_node *)NULL); 180 } 181 182 struct ht_node *ht_first(struct ht_ht *ht) { 183 ht->iter_index = 0; 184 ht->iter_next = (struct ht_node *)NULL; 185 return(ht_next(ht)); 186 } 187 188 static inline int ht_count(struct ht_ht *ht) { 189 return(ht->items); 190 } 191 192 long 193 hash_table_size (int fl, long buflen) 194 { 195 long maxsize1, maxsize2; 196 197 maxsize1 = buflen - fl; 198 maxsize2 = 4; 199 while (--fl > 0 && maxsize2 < maxsize1) 200 maxsize2 = maxsize2 * 4; 201 if (maxsize1 < maxsize2) 202 return maxsize1; 203 return maxsize2; 204 } 205 206 struct ht_ht * 207 generate_frequencies (int fl, char *buffer, long buflen) 208 { 209 struct ht_ht *ht; 210 char *reader; 211 long i; 212 char nulled; 213 214 if (fl > buflen) 215 return NULL; 216 217 ht = ht_create (hash_table_size (fl, buflen)); 218 for (i = 0; i < buflen - fl + 1; i++) 219 { 220 reader = &(buffer[i]); 221 nulled = reader[fl]; 222 reader[fl] = 0x00; 223 ht_find_new (ht, reader)->val++; 224 reader[fl] = nulled; 225 } 226 return ht; 227 } 228 229 typedef struct ssorter 230 { 231 char *string; 232 int num; 233 } sorter; 234 235 void 236 write_frequencies (int fl, char *buffer, long buflen) 237 { 238 struct ht_ht *ht; 239 long total, i, j, size; 240 struct ht_node *nd; 241 sorter *s; 242 sorter tmp; 243 244 ht = generate_frequencies (fl, buffer, buflen); 245 total = 0; 246 size = 0; 247 for (nd = ht_first (ht); nd != NULL; nd = ht_next (ht)) 248 { 249 total = total + nd->val; 250 size++; 251 } 252 s = calloc (size, sizeof (sorter)); 253 i = 0; 254 for (nd = ht_first (ht); nd != NULL; nd = ht_next (ht)) 255 { 256 s[i].string = nd->key; 257 s[i++].num = nd->val; 258 } 259 for (i = 0; i < size - 1; i++) 260 for (j = i + 1; j < size; j++) 261 if (s[i].num < s[j].num) 262 { 263 memcpy (&tmp, &(s[i]), sizeof (sorter)); 264 memcpy (&(s[i]), &(s[j]), sizeof (sorter)); 265 memcpy (&(s[j]), &tmp, sizeof (sorter)); 266 } 267 for (i = 0; i < size; i++) 268 printf ("%s %.3f\n", s[i].string, 100 * (float) s[i].num / total); 269 printf ("\n"); 270 ht_destroy (ht); 271 free (s); 272 } 273 274 void 275 write_count (char *searchFor, char *buffer, long buflen) 276 { 277 struct ht_ht *ht; 278 279 ht = generate_frequencies (strlen (searchFor), buffer, buflen); 280 printf ("%d\t%s\n", ht_find_new (ht, searchFor)->val, searchFor); 281 ht_destroy (ht); 282 } 283 284 #define NRUNS 50 285 286 int 287 main () 288 { 289 char c; 290 char *line, *buffer, *tmp, *x; 291 int i, linelen, nothree; 292 long buflen, seqlen; 293 FILE * f; 294 295 line = malloc (256); 296 if (!line) 297 return 2; 298 seqlen = 0; 299 nothree = 1; 300 301 f = fopen("Results/knucleotide-input.txt", "r"); 302 if (f == NULL) return 2; 303 304 while (nothree && fgets (line, 255, f)) 305 if (line[0] == '>' && line[1] == 'T' && line[2] == 'H') 306 nothree = 0; 307 free (line); 308 309 buflen = 10240; 310 buffer = malloc (buflen + 1); 311 if (!buffer) 312 return 2; 313 x = buffer; 314 315 while (fgets (x, 255, f)) 316 { 317 linelen = strlen (x); 318 if (linelen) 319 { 320 if (x[linelen - 1] == '\n') 321 linelen--; 322 c = x[0]; 323 if (c == '>') 324 break; 325 else if (c != ';') 326 { 327 seqlen = seqlen + linelen; 328 if (seqlen + 512 >= buflen) 329 { 330 buflen = buflen + 10240; 331 tmp = realloc (buffer, buflen + 1); 332 if (tmp == NULL) 333 return 2; 334 buffer = tmp; 335 x = &(buffer[seqlen]); 336 } 337 else 338 x = &(x[linelen]); 339 x[0] = 0; 340 } 341 } 342 } 343 for (i = 0; i < seqlen; i++) 344 buffer[i] = toupper (buffer[i]); 345 write_frequencies (1, buffer, seqlen); 346 write_frequencies (2, buffer, seqlen); 347 write_count ("GGT", buffer, seqlen); 348 write_count ("GGTA", buffer, seqlen); 349 write_count ("GGTATT", buffer, seqlen); 350 write_count ("GGTATTTTAATT", buffer, seqlen); 351 write_count ("GGTATTTTAATTTATAGT", buffer, seqlen); 352 for (i = 0; i < NRUNS; i++) { 353 struct ht_ht * ht = generate_frequencies (6, buffer, seqlen); 354 ht_destroy(ht); 355 } 356 free (buffer); 357 fclose (f); 358 return 0; 359 } 360 361 /********** 362 build & benchmark results 363 364 BUILD COMMANDS FOR: knucleotide.gcc 365 366 Fri Sep 15 13:56:07 PDT 2006 367 368 /usr/bin/gcc -pipe -Wall -O3 -fomit-frame-pointer -funroll-loops -march=pentium4 knucleotide.c -o knucleotide.gcc_run 369 370 ================================================================= 371 COMMAND LINE (%A is single numeric argument): 372 373 knucleotide.gcc_run %A 374 N=2500 375 376 *******/