github.com/ccccaoqing/test@v0.0.0-20220510085219-3985d23445c0/test/bench/shootout/k-nucleotide.c (about)

     1  /*
     2  Redistribution and use in source and binary forms, with or without
     3  modification, are permitted provided that the following conditions are met:
     4  
     5      * Redistributions of source code must retain the above copyright
     6      notice, this list of conditions and the following disclaimer.
     7  
     8      * Redistributions in binary form must reproduce the above copyright
     9      notice, this list of conditions and the following disclaimer in the
    10      documentation and/or other materials provided with the distribution.
    11  
    12      * Neither the name of "The Computer Language Benchmarks Game" nor the
    13      name of "The Computer Language Shootout Benchmarks" nor the names of
    14      its contributors may be used to endorse or promote products derived
    15      from this software without specific prior written permission.
    16  
    17  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
    18  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    19  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    20  ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
    21  LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
    22  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    23  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    24  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    25  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    26  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    27  POSSIBILITY OF SUCH DAMAGE.
    28  */
    29  
    30  #include <stdio.h>
    31  #include <string.h>
    32  #include <ctype.h>
    33  #include <stdlib.h>
    34  #include <glib.h>
    35  
    36  typedef struct stat_s stat_t;
    37  struct stat_s
    38  {
    39     const gchar *key;
    40     long stat;
    41  };
    42  
    43  #define MAX_ELM (8192 / sizeof (stat_t))
    44  
    45  static int
    46  generate_frequencies (int fl, char *buffer, long buflen,
    47  		      GHashTable *ht, GTrashStack **ts, GPtrArray *roots, GStringChunk *sc)
    48  {
    49     gchar *key;
    50     long i;
    51  
    52     if (fl > buflen) return 0;
    53     if (fl == 0) return 0;
    54  
    55     for (i = 0; i < buflen - fl + 1; ++i)
    56       {
    57  	char nulled;
    58  	stat_t *stat;
    59  
    60  	nulled = buffer[i + fl];
    61  	buffer[i + fl] = '\0';
    62  
    63  	key = g_string_chunk_insert_const(sc, buffer + i);
    64  
    65  	stat = g_hash_table_lookup(ht, key);
    66  	if (!stat)
    67  	  {
    68  	     stat = g_trash_stack_pop(ts);
    69  	     if (!stat)
    70  	       {
    71  		  int j;
    72  
    73  		  stat = malloc(sizeof (stat_t) * MAX_ELM);
    74  		  g_ptr_array_add(roots, stat);
    75  
    76  		  for (j = 1; j < MAX_ELM; ++j)
    77  		    g_trash_stack_push(ts, stat + j);
    78  	       }
    79  	     stat->stat = 1;
    80  	     stat->key = key;
    81  
    82  	     g_hash_table_insert(ht, key, stat);
    83  	  }
    84  	else
    85  	  stat->stat++;
    86  
    87  	buffer[i + fl] = nulled;
    88       }
    89  
    90     return buflen - fl + 1;
    91  }
    92  
    93  static int
    94  cmp_func(gconstpointer a, gconstpointer b)
    95  {
    96     const stat_t *left = a;
    97     const stat_t *right = b;
    98  
    99     return right->stat - left->stat;
   100  }
   101  
   102  static void
   103  sorted_list(gpointer key, gpointer value, gpointer user_data)
   104  {
   105     stat_t *data = value;
   106     GList **lst = user_data;
   107  
   108     *lst = g_list_insert_sorted(*lst, data, cmp_func);
   109  }
   110  
   111  static void
   112  display_stat(gpointer data, gpointer user_data)
   113  {
   114     long *total = user_data;
   115     stat_t *st = data;
   116  
   117     printf("%s %.3f\n", st->key, 100 * (float) st->stat / *total);
   118  }
   119  
   120  void
   121  write_frequencies (int fl, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
   122  {
   123     GStringChunk *sc;
   124     GHashTable *ht;
   125     GList *lst;
   126     long total;
   127  
   128     ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
   129     sc = g_string_chunk_new(buflen);
   130     lst = NULL;
   131  
   132     total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
   133  
   134     if (!total) goto on_error;
   135  
   136     g_hash_table_foreach(ht, sorted_list, &lst);
   137     g_list_foreach(lst, display_stat, &total);
   138     g_list_free(lst);
   139  
   140   on_error:
   141     g_hash_table_destroy(ht);
   142     g_string_chunk_free(sc);
   143  }
   144  
   145  void
   146  write_count (char *searchFor, char *buffer, long buflen, GTrashStack **ts, GPtrArray *roots)
   147  {
   148     GStringChunk *sc;
   149     GHashTable *ht;
   150     stat_t *result;
   151     GList *lst;
   152     long total;
   153     long fl;
   154  
   155     fl = strlen(searchFor);
   156  
   157     ht = g_hash_table_new_full(g_str_hash, g_str_equal, NULL /* free key */, NULL /* free value */);
   158     sc = g_string_chunk_new(buflen);
   159     lst = NULL;
   160     result = NULL;
   161  
   162     total = generate_frequencies (fl, buffer, buflen, ht, ts, roots, sc);
   163  
   164     if (!total) goto on_error;
   165  
   166     result = g_hash_table_lookup(ht, searchFor);
   167  
   168   on_error:
   169     printf("%ld\t%s\n", result ? result->stat : 0, searchFor);
   170  
   171     g_hash_table_destroy(ht);
   172     g_string_chunk_free(sc);
   173  }
   174  
   175  int
   176  main ()
   177  {
   178     char buffer[4096];
   179     GTrashStack *ts;
   180     GPtrArray *roots;
   181     GString *stuff;
   182     gchar *s;
   183     int len;
   184  
   185     roots = g_ptr_array_new();
   186     ts = NULL;
   187  
   188     while (fgets(buffer, sizeof (buffer), stdin))
   189       if (strncmp(buffer, ">THREE", 6) == 0)
   190         break;
   191  
   192     stuff = g_string_new(NULL);
   193  
   194     while (fgets(buffer, sizeof (buffer), stdin))
   195       {
   196  	size_t sz;
   197  
   198  	if (buffer[0] == '>')
   199  	  break;
   200  
   201  	sz = strlen(buffer);
   202  	if (buffer[sz - 1] == '\n')
   203  	  --sz;
   204  
   205  	stuff = g_string_append_len(stuff, buffer, sz);
   206       }
   207  
   208     stuff = g_string_ascii_up(stuff);
   209     len = stuff->len;
   210     s = g_string_free(stuff, FALSE);
   211  
   212     write_frequencies(1, s, len, &ts, roots);
   213     printf("\n");
   214     write_frequencies(2, s, len, &ts, roots);
   215     printf("\n");
   216     write_count("GGT", s, len, &ts, roots);
   217     write_count("GGTA", s, len, &ts, roots);
   218     write_count("GGTATT", s, len, &ts, roots);
   219     write_count("GGTATTTTAATT", s, len, &ts, roots);
   220     write_count("GGTATTTTAATTTATAGT", s, len, &ts, roots);
   221  
   222     free(s);
   223  
   224     g_ptr_array_foreach(roots, (GFunc)free, NULL);
   225     g_ptr_array_free(roots, TRUE);
   226  
   227     return 0;
   228  }