github.com/biogo/biogo@v1.0.4/index/kmerindex/kmerindex_test.go (about)

     1  // Copyright ©2011-2013 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package kmerindex
     6  
     7  import (
     8  	"github.com/biogo/biogo/alphabet"
     9  	"github.com/biogo/biogo/seq/linear"
    10  	"github.com/biogo/biogo/util"
    11  
    12  	"math/rand"
    13  	"strings"
    14  	"testing"
    15  
    16  	"gopkg.in/check.v1"
    17  )
    18  
    19  // Tests
    20  func Test(t *testing.T) { check.TestingT(t) }
    21  
    22  type S struct {
    23  	*linear.Seq
    24  }
    25  
    26  var _ = check.Suite(&S{})
    27  
    28  var (
    29  	maxKmerLen = 14
    30  	testLen    = 1000
    31  )
    32  
    33  func (s *S) SetUpSuite(c *check.C) {
    34  	s.Seq = linear.NewSeq("", nil, alphabet.DNA)
    35  	s.Seq.Seq = make(alphabet.Letters, testLen)
    36  	for i := range s.Seq.Seq {
    37  		s.Seq.Seq[i] = [...]alphabet.Letter{'A', 'C', 'G', 'T', 'a', 'c', 'g', 't'}[rand.Int()%8]
    38  	}
    39  }
    40  
    41  func (s *S) TestKmerIndexCheck(c *check.C) {
    42  	for k := MinKmerLen; k <= maxKmerLen; k++ {
    43  		if i, err := New(k, s.Seq); err != nil {
    44  			c.Fatalf("New KmerIndex failed: %v", err)
    45  		} else {
    46  			ok, _ := i.Check()
    47  			c.Check(ok, check.Equals, false)
    48  			i.Build()
    49  			ok, f := i.Check()
    50  			c.Check(f, check.Equals, s.Seq.Len()-k+1)
    51  			c.Check(ok, check.Equals, true)
    52  		}
    53  	}
    54  }
    55  
    56  func (s *S) TestKmerFrequencies(c *check.C) {
    57  	for k := MinKmerLen; k <= maxKmerLen; k++ {
    58  		if i, err := New(k, s.Seq); err != nil {
    59  			c.Fatalf("New KmerIndex failed: %v", err)
    60  		} else {
    61  			freqs, ok := i.KmerFrequencies()
    62  			c.Check(ok, check.Equals, true)
    63  			hashFreqs := make(map[string]int)
    64  			for i := 0; i+k <= s.Seq.Len(); i++ {
    65  				hashFreqs[strings.ToLower(string(alphabet.LettersToBytes(s.Seq.Seq[i:i+k])))]++
    66  			}
    67  			for key := range freqs {
    68  				c.Check(freqs[key], check.Equals, hashFreqs[i.Format(key)],
    69  					check.Commentf("key %x, string of %q\n", key, i.Format(key)))
    70  			}
    71  			for key := range hashFreqs {
    72  				if keyKmer, err := i.KmerOf(key); err != nil {
    73  					c.Fatal(err)
    74  				} else {
    75  					c.Check(freqs[keyKmer], check.Equals, hashFreqs[key],
    76  						check.Commentf("keyKmer %x, string of %q, key %q\n", keyKmer, i.Format(keyKmer), key))
    77  				}
    78  			}
    79  		}
    80  	}
    81  }
    82  
    83  func (s *S) TestKmerPositions(c *check.C) {
    84  	for k := MinKmerLen; k < maxKmerLen; k++ { // don't test full range to time's sake
    85  		if i, err := New(k, s.Seq); err != nil {
    86  			c.Fatalf("New KmerIndex failed: %v", err)
    87  		} else {
    88  			i.Build()
    89  			hashPos := make(map[string][]int)
    90  			for i := 0; i+k <= s.Seq.Len(); i++ {
    91  				p := strings.ToLower(string(alphabet.LettersToBytes(s.Seq.Seq[i : i+k])))
    92  				hashPos[p] = append(hashPos[p], i)
    93  			}
    94  			pos, ok := i.KmerIndex()
    95  			c.Check(ok, check.Equals, true)
    96  			for p := range pos {
    97  				c.Check(pos[p], check.DeepEquals, hashPos[i.Format(p)])
    98  			}
    99  		}
   100  	}
   101  }
   102  
   103  func (s *S) TestKmerPositionsString(c *check.C) {
   104  	for k := MinKmerLen; k < maxKmerLen; k++ { // don't test full range to time's sake
   105  		if i, err := New(k, s.Seq); err != nil {
   106  			c.Fatalf("New KmerIndex failed: %v", err)
   107  		} else {
   108  			i.Build()
   109  			hashPos := make(map[string][]int)
   110  			for i := 0; i+k <= s.Seq.Len(); i++ {
   111  				p := strings.ToLower(string(alphabet.LettersToBytes(s.Seq.Seq[i : i+k])))
   112  				hashPos[p] = append(hashPos[p], i)
   113  			}
   114  			pos, ok := i.StringKmerIndex()
   115  			c.Check(ok, check.Equals, true)
   116  			for p := range pos {
   117  				c.Check(pos[p], check.DeepEquals, hashPos[p])
   118  			}
   119  		}
   120  	}
   121  }
   122  
   123  func (s *S) TestKmerKmerUtilities(c *check.C) {
   124  	for k := MinKmerLen; k <= 8; k++ { // again not testing all exhaustively
   125  		for kmer := Kmer(0); uint(kmer) <= util.Pow4(k)-1; kmer++ {
   126  			// Interconversion between string and Kmer
   127  			s, err := Format(kmer, k, alphabet.DNA)
   128  			c.Assert(err, check.Equals, nil)
   129  			rk, err := KmerOf(k, alphabet.DNA.LetterIndex(), s)
   130  			c.Assert(err, check.Equals, nil)
   131  			c.Check(rk, check.Equals, kmer)
   132  
   133  			// Complementation
   134  			dc := ComplementOf(k, ComplementOf(k, kmer))
   135  			skmer, _ := Format(kmer, k, alphabet.DNA)
   136  			sdc, _ := Format(dc, k, alphabet.DNA)
   137  			c.Check(dc, check.Equals, kmer, check.Commentf("kmer: %s\ndouble complement: %s\n", skmer, sdc))
   138  
   139  			// GC content
   140  			ks, _ := Format(kmer, k, alphabet.DNA)
   141  			gc := 0
   142  			for _, b := range ks {
   143  				if b == 'g' || b == 'c' {
   144  					gc++
   145  				}
   146  			}
   147  			c.Check(GCof(k, kmer), check.Equals, float64(gc)/float64(k))
   148  		}
   149  	}
   150  }