modernc.org/ccgo/v3@v3.16.14/lib/testdata/CompCert-3.6/test/c/knucleotide.c (about)

     1  /* The Computer Language Shootout
     2     http://shootout.alioth.debian.org/
     3  
     4     Contributed by Josh Goldfoot
     5     to compile, use gcc -O3
     6  
     7     This revision uses "simple_hash.h," available from
     8     http://cvs.alioth.debian.org/cgi-bin/cvsweb.cgi/shootout/bench/Include/?cvsroot=shootout
     9  
    10  */
    11  #include <stdio.h>
    12  #include <string.h>
    13  #include <ctype.h>
    14  #include <stdlib.h>
    15  
    16  enum { ht_num_primes = 28 };
    17  
    18  static unsigned long ht_prime_list[ht_num_primes] = {
    19      53ul,         97ul,         193ul,       389ul,       769ul,
    20      1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
    21      49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
    22      1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
    23      50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul, 
    24      1610612741ul, 3221225473ul, 4294967291ul
    25  };
    26  
    27  struct ht_node {
    28      char *key;
    29      int val;
    30      struct ht_node *next;
    31  };
    32  
    33  struct ht_ht {
    34      int size;
    35      struct ht_node **tbl;
    36      int iter_index;
    37      struct ht_node *iter_next;
    38      int items;
    39  #ifdef HT_DEBUG
    40      int collisions;
    41  #endif /* HT_DEBUG */
    42  };
    43  
    44  static inline int ht_val(struct ht_node *node) {
    45      return(node->val);
    46  }
    47  
    48  static inline char *ht_key(struct ht_node *node) {
    49      return(node->key);
    50  }
    51  
    52  static inline int ht_hashcode(struct ht_ht *ht, char *key) {
    53      unsigned long val = 0;
    54      for (; *key; ++key) val = 5 * val + *key;
    55      return(val % ht->size);
    56  }
    57  
    58  struct ht_node *ht_node_create(char *key) {
    59      char *newkey;
    60      struct ht_node *node;
    61      if ((node = (struct ht_node *)malloc(sizeof(struct ht_node))) == 0) {
    62  	perror("malloc ht_node");
    63  	exit(1);
    64      }
    65      if ((newkey = malloc(strlen(key) + 1)) == 0) {
    66  	perror("strdup newkey");
    67  	exit(1);
    68      }
    69      strcpy(newkey, key);
    70      node->key = newkey;
    71      node->val = 0;
    72      node->next = (struct ht_node *)NULL;
    73      return(node);
    74  }
    75  
    76  struct ht_ht *ht_create(int size) {
    77      int i = 0;
    78      struct ht_ht *ht = (struct ht_ht *)malloc(sizeof(struct ht_ht));
    79      while (ht_prime_list[i] < size) { i++; }
    80      ht->size = ht_prime_list[i];
    81      ht->tbl = (struct ht_node **)calloc(ht->size, sizeof(struct ht_node *));
    82      ht->iter_index = 0;
    83      ht->iter_next = 0;
    84      ht->items = 0;
    85  #ifdef HT_DEBUG
    86      ht->collisions = 0;
    87  #endif /* HT_DEBUG */
    88      return(ht);
    89  }
    90  
    91  void ht_destroy(struct ht_ht *ht) {
    92      struct ht_node *cur, *next;
    93      int i;
    94  #ifdef HT_DEBUG
    95      int chain_len;
    96      int max_chain_len = 0;
    97      int density = 0;
    98      fprintf(stderr, " HT: size            %d\n", ht->size);
    99      fprintf(stderr, " HT: items           %d\n", ht->items);
   100      fprintf(stderr, " HT: collisions      %d\n", ht->collisions);
   101  #endif /* HT_DEBUG */
   102      for (i=0; i<ht->size; i++) {
   103  	next = ht->tbl[i];
   104  #ifdef HT_DEBUG
   105  	if (next) {
   106  	    density++;
   107  	}
   108  	chain_len = 0;
   109  #endif /* HT_DEBUG */
   110  	while (next) {
   111  	    cur = next;
   112  	    next = next->next;
   113  	    free(cur->key);
   114  	    free(cur);
   115  #ifdef HT_DEBUG
   116  	    chain_len++;
   117  #endif /* HT_DEBUG */
   118  	}
   119  #ifdef HT_DEBUG
   120  	if (chain_len > max_chain_len)
   121  	    max_chain_len = chain_len;
   122  #endif /* HT_DEBUG */
   123      }
   124      free(ht->tbl);
   125      free(ht);
   126  #ifdef HT_DEBUG
   127      fprintf(stderr, " HT: density         %d\n", density);
   128      fprintf(stderr, " HT: max chain len   %d\n", max_chain_len);
   129  #endif /* HT_DEBUG */
   130  }
   131  
   132  struct ht_node *ht_find(struct ht_ht *ht, char *key) {
   133      int hash_code = ht_hashcode(ht, key);
   134      struct ht_node *node = ht->tbl[hash_code];
   135      while (node) {
   136  	if (strcmp(key, node->key) == 0) return(node);
   137  	node = node->next;
   138      }
   139      return((struct ht_node *)NULL);
   140  }
   141  
   142  struct ht_node *ht_find_new(struct ht_ht *ht, char *key) {
   143      int hash_code = ht_hashcode(ht, key);
   144      struct ht_node *prev = 0, *node = ht->tbl[hash_code];
   145      while (node) {
   146  	if (strcmp(key, node->key) == 0) return(node);
   147  	prev = node;
   148  	node = node->next;
   149  #ifdef HT_DEBUG
   150  	ht->collisions++;
   151  #endif /* HT_DEBUG */
   152      }
   153      ht->items++;
   154      if (prev) {
   155  	return(prev->next = ht_node_create(key));
   156      } else {
   157  	return(ht->tbl[hash_code] = ht_node_create(key));
   158      }
   159  }
   160  
   161  /*
   162   *  Hash Table iterator data/functions
   163   */
   164  struct ht_node *ht_next(struct ht_ht *ht) {
   165      unsigned long index;
   166      struct ht_node *node = ht->iter_next;
   167      if (node) {
   168  	ht->iter_next = node->next;
   169  	return(node);
   170      } else {
   171  	while (ht->iter_index < ht->size) {
   172  	    index = ht->iter_index++;
   173  	    if (ht->tbl[index]) {
   174  		ht->iter_next = ht->tbl[index]->next;
   175  		return(ht->tbl[index]);
   176  	    }
   177  	}
   178      }
   179      return((struct ht_node *)NULL);
   180  }
   181  
   182  struct ht_node *ht_first(struct ht_ht *ht) {
   183      ht->iter_index = 0;
   184      ht->iter_next = (struct ht_node *)NULL;
   185      return(ht_next(ht));
   186  }
   187  
   188  static inline int ht_count(struct ht_ht *ht) {
   189      return(ht->items);
   190  }
   191  
   192  long
   193  hash_table_size (int fl, long buflen)
   194  {
   195    long maxsize1, maxsize2;
   196  
   197    maxsize1 = buflen - fl;
   198    maxsize2 = 4;
   199    while (--fl > 0 && maxsize2 < maxsize1)
   200      maxsize2 = maxsize2 * 4;
   201    if (maxsize1 < maxsize2)
   202      return maxsize1;
   203    return maxsize2;
   204  }
   205  
   206  struct ht_ht *
   207  generate_frequencies (int fl, char *buffer, long buflen)
   208  {
   209    struct ht_ht *ht;
   210    char *reader;
   211    long i;
   212    char nulled;
   213  
   214    if (fl > buflen)
   215      return NULL;
   216  
   217    ht = ht_create (hash_table_size (fl, buflen));
   218    for (i = 0; i < buflen - fl + 1; i++)
   219      {
   220        reader = &(buffer[i]);
   221        nulled = reader[fl];
   222        reader[fl] = 0x00;
   223        ht_find_new (ht, reader)->val++;
   224        reader[fl] = nulled;
   225      }
   226    return ht;
   227  }
   228  
   229  typedef struct ssorter
   230  {
   231    char *string;
   232    int num;
   233  } sorter;
   234  
   235  void
   236  write_frequencies (int fl, char *buffer, long buflen)
   237  {
   238    struct ht_ht *ht;
   239    long total, i, j, size;
   240    struct ht_node *nd;
   241    sorter *s;
   242    sorter tmp;
   243  
   244    ht = generate_frequencies (fl, buffer, buflen);
   245    total = 0;
   246    size = 0;
   247    for (nd = ht_first (ht); nd != NULL; nd = ht_next (ht))
   248      {
   249        total = total + nd->val;
   250        size++;
   251      }
   252    s = calloc (size, sizeof (sorter));
   253    i = 0;
   254    for (nd = ht_first (ht); nd != NULL; nd = ht_next (ht))
   255      {
   256        s[i].string = nd->key;
   257        s[i++].num = nd->val;
   258      }
   259    for (i = 0; i < size - 1; i++)
   260      for (j = i + 1; j < size; j++)
   261        if (s[i].num < s[j].num)
   262  	{
   263  	  memcpy (&tmp, &(s[i]), sizeof (sorter));
   264  	  memcpy (&(s[i]), &(s[j]), sizeof (sorter));
   265  	  memcpy (&(s[j]), &tmp, sizeof (sorter));
   266  	}
   267    for (i = 0; i < size; i++)
   268      printf ("%s %.3f\n", s[i].string, 100 * (float) s[i].num / total);
   269    printf ("\n");
   270    ht_destroy (ht);
   271    free (s);
   272  }
   273  
   274  void
   275  write_count (char *searchFor, char *buffer, long buflen)
   276  {
   277    struct ht_ht *ht;
   278  
   279    ht = generate_frequencies (strlen (searchFor), buffer, buflen);
   280    printf ("%d\t%s\n", ht_find_new (ht, searchFor)->val, searchFor);
   281    ht_destroy (ht);
   282  }
   283  
   284  #define NRUNS 50
   285  
   286  int
   287  main ()
   288  {
   289    char c;
   290    char *line, *buffer, *tmp, *x;
   291    int i, linelen, nothree;
   292    long buflen, seqlen;
   293    FILE * f;
   294  
   295    line = malloc (256);
   296    if (!line)
   297      return 2;
   298    seqlen = 0;
   299    nothree = 1;
   300  
   301    f = fopen("Results/knucleotide-input.txt", "r");
   302    if (f == NULL) return 2;
   303  
   304    while (nothree && fgets (line, 255, f))
   305      if (line[0] == '>' && line[1] == 'T' && line[2] == 'H')
   306        nothree = 0;
   307    free (line);
   308  
   309    buflen = 10240;
   310    buffer = malloc (buflen + 1);
   311    if (!buffer)
   312      return 2;
   313    x = buffer;
   314  
   315    while (fgets (x, 255, f))
   316      {
   317        linelen = strlen (x);
   318        if (linelen)
   319  	{
   320  	  if (x[linelen - 1] == '\n')
   321  	    linelen--;
   322  	  c = x[0];
   323  	  if (c == '>')
   324  	    break;
   325  	  else if (c != ';')
   326  	    {
   327  	      seqlen = seqlen + linelen;
   328  	      if (seqlen + 512 >= buflen)
   329  		{
   330  		  buflen = buflen + 10240;
   331  		  tmp = realloc (buffer, buflen + 1);
   332  		  if (tmp == NULL)
   333  		    return 2;
   334  		  buffer = tmp;
   335  		  x = &(buffer[seqlen]);
   336  		}
   337  	      else
   338  		x = &(x[linelen]);
   339  	      x[0] = 0;
   340  	    }
   341  	}
   342      }
   343    for (i = 0; i < seqlen; i++)
   344      buffer[i] = toupper (buffer[i]);
   345    write_frequencies (1, buffer, seqlen);
   346    write_frequencies (2, buffer, seqlen);
   347    write_count ("GGT", buffer, seqlen);
   348    write_count ("GGTA", buffer, seqlen);
   349    write_count ("GGTATT", buffer, seqlen);
   350    write_count ("GGTATTTTAATT", buffer, seqlen);
   351    write_count ("GGTATTTTAATTTATAGT", buffer, seqlen);
   352    for (i = 0; i < NRUNS; i++) {
   353      struct ht_ht *  ht = generate_frequencies (6, buffer, seqlen);
   354      ht_destroy(ht);
   355    }
   356    free (buffer);
   357    fclose (f);
   358    return 0;
   359  }
   360  
   361  /**********
   362   build & benchmark results
   363  
   364  BUILD COMMANDS FOR: knucleotide.gcc
   365  
   366  Fri Sep 15 13:56:07 PDT 2006
   367  
   368  /usr/bin/gcc -pipe -Wall -O3 -fomit-frame-pointer -funroll-loops -march=pentium4  knucleotide.c -o knucleotide.gcc_run
   369  
   370  =================================================================
   371  COMMAND LINE (%A is single numeric argument):
   372  
   373  knucleotide.gcc_run %A
   374  N=2500
   375  
   376  *******/