github.com/ccccaoqing/test@v0.0.0-20220510085219-3985d23445c0/test/bench/shootout/k-nucleotide.c (about) 1 /* 2 Redistribution and use in source and binary forms, with or without 3 modification, are permitted provided that the following conditions are met: 4 5 * Redistributions of source code must retain the above copyright 6 notice, this list of conditions and the following disclaimer. 7 8 * Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 12 * Neither the name of "The Computer Language Benchmarks Game" nor the 13 name of "The Computer Language Shootout Benchmarks" nor the names of 14 its contributors may be used to endorse or promote products derived 15 from this software without specific prior written permission. 16 17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include <stdio.h> 31 #include <string.h> 32 #include <ctype.h> 33 #include <stdlib.h> 34 #include <glib.h> 35 36 typedef struct stat_s stat_t; 37 struct stat_s 38 { 39 const gchar *key; 40 long stat; 41 }; 42 43 #define MAX_ELM (8192 / sizeof (stat_t)) 44 45 static int 46 generate_frequencies (int fl, char *buffer, long buflen, 47 GHashTable *ht, GTrashStack **ts, GPtrArray *roots, GStringChunk *sc) 48 { 49 gchar *key; 50 long i; 51 52 if (fl > buflen) return 0; 53 if (fl == 0) return 0; 54 55 for (i = 0; i < buflen - fl + 1; ++i) 56 { 57 char nulled; 58 stat_t *stat; 59 60 nulled = buffer[i + fl]; 61 buffer[i + fl] = '\0'; 62 63 key = g_string_chunk_insert_const(sc, buffer + i); 64 65 stat = g_hash_table_lookup(ht, key); 66 if (!stat) 67 { 68 stat = g_trash_stack_pop(ts); 69 if (!stat) 70 { 71 int j; 72 73 stat = malloc(sizeof (stat_t) * MAX_ELM); 74 g_ptr_array_add(roots, stat); 75 76 for (j = 1; j < MAX_ELM; ++j) 77 g_trash_stack_push(ts, stat + j); 78 } 79 stat->stat = 1; 80 stat->key = key; 81 82 g_hash_table_insert(ht, key, stat); 83 } 84 else 85 stat->stat++; 86 87 buffer[i + fl] = nulled; 88 } 89 90 return buflen - fl + 1; 91 } 92 93 static int 94 cmp_func(gconstpointer a, gconstpointer b) 95 { 96 const stat_t *left = a; 97 const stat_t *right = b; 98 99 return right->stat - left->stat; 100 } 101 102 static void 103 sorted_list(gpointer key, gpointer value, gpointer user_data) 104 { 105 stat_t *data = value; 106 GList **lst = user_data; 107 108 *lst = g_list_insert_sorted(*lst, data, cmp_func); 109 } 110 111 static void 112 display_stat(gpointer data, gpointer user_data) 113 { 114 long *total = user_data; 115 stat_t *st = data; 116 117 printf("%s %.3f\n", st->key, 100 * (float) st->stat / *total); 118 } 119 120 void 121 write_frequencies (int fl, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots) 122 { 123 GStringChunk *sc; 124 GHashTable *ht; 125 GList *lst; 126 long total; 127 128 ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */); 129 sc = g_string_chunk_new(buflen); 130 lst = NULL; 131 132 total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc); 133 134 if (!total) goto on_error; 135 136 g_hash_table_foreach(ht, sorted_list, &lst); 137 g_list_foreach(lst, display_stat, &total); 138 g_list_free(lst); 139 140 on_error: 141 g_hash_table_destroy(ht); 142 g_string_chunk_free(sc); 143 } 144 145 void 146 write_count (char *searchFor, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots) 147 { 148 GStringChunk *sc; 149 GHashTable *ht; 150 stat_t *result; 151 GList *lst; 152 long total; 153 long fl; 154 155 fl = strlen(searchFor); 156 157 ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */); 158 sc = g_string_chunk_new(buflen); 159 lst = NULL; 160 result = NULL; 161 162 total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc); 163 164 if (!total) goto on_error; 165 166 result = g_hash_table_lookup(ht, searchFor); 167 168 on_error: 169 printf("%ld\t%s\n", result ? result->stat : 0, searchFor); 170 171 g_hash_table_destroy(ht); 172 g_string_chunk_free(sc); 173 } 174 175 int 176 main () 177 { 178 char buffer[4096]; 179 GTrashStack *ts; 180 GPtrArray *roots; 181 GString *stuff; 182 gchar *s; 183 int len; 184 185 roots = g_ptr_array_new(); 186 ts = NULL; 187 188 while (fgets(buffer, sizeof (buffer), stdin)) 189 if (strncmp(buffer, ">THREE", 6) == 0) 190 break; 191 192 stuff = g_string_new(NULL); 193 194 while (fgets(buffer, sizeof (buffer), stdin)) 195 { 196 size_t sz; 197 198 if (buffer[0] == '>') 199 break; 200 201 sz = strlen(buffer); 202 if (buffer[sz - 1] == '\n') 203 --sz; 204 205 stuff = g_string_append_len(stuff, buffer, sz); 206 } 207 208 stuff = g_string_ascii_up(stuff); 209 len = stuff->len; 210 s = g_string_free(stuff, FALSE); 211 212 write_frequencies(1, s, len, &ts, roots); 213 printf("\n"); 214 write_frequencies(2, s, len, &ts, roots); 215 printf("\n"); 216 write_count("GGT", s, len, &ts, roots); 217 write_count("GGTA", s, len, &ts, roots); 218 write_count("GGTATT", s, len, &ts, roots); 219 write_count("GGTATTTTAATT", s, len, &ts, roots); 220 write_count("GGTATTTTAATTTATAGT", s, len, &ts, roots); 221 222 free(s); 223 224 g_ptr_array_foreach(roots, (GFunc)free, NULL); 225 g_ptr_array_free(roots, TRUE); 226 227 return 0; 228 }