github.com/biogo/biogo@v1.0.4/index/kmerindex/kmerindex_test.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package kmerindex 6 7 import ( 8 "github.com/biogo/biogo/alphabet" 9 "github.com/biogo/biogo/seq/linear" 10 "github.com/biogo/biogo/util" 11 12 "math/rand" 13 "strings" 14 "testing" 15 16 "gopkg.in/check.v1" 17 ) 18 19 // Tests 20 func Test(t *testing.T) { check.TestingT(t) } 21 22 type S struct { 23 *linear.Seq 24 } 25 26 var _ = check.Suite(&S{}) 27 28 var ( 29 maxKmerLen = 14 30 testLen = 1000 31 ) 32 33 func (s *S) SetUpSuite(c *check.C) { 34 s.Seq = linear.NewSeq("", nil, alphabet.DNA) 35 s.Seq.Seq = make(alphabet.Letters, testLen) 36 for i := range s.Seq.Seq { 37 s.Seq.Seq[i] = [...]alphabet.Letter{'A', 'C', 'G', 'T', 'a', 'c', 'g', 't'}[rand.Int()%8] 38 } 39 } 40 41 func (s *S) TestKmerIndexCheck(c *check.C) { 42 for k := MinKmerLen; k <= maxKmerLen; k++ { 43 if i, err := New(k, s.Seq); err != nil { 44 c.Fatalf("New KmerIndex failed: %v", err) 45 } else { 46 ok, _ := i.Check() 47 c.Check(ok, check.Equals, false) 48 i.Build() 49 ok, f := i.Check() 50 c.Check(f, check.Equals, s.Seq.Len()-k+1) 51 c.Check(ok, check.Equals, true) 52 } 53 } 54 } 55 56 func (s *S) TestKmerFrequencies(c *check.C) { 57 for k := MinKmerLen; k <= maxKmerLen; k++ { 58 if i, err := New(k, s.Seq); err != nil { 59 c.Fatalf("New KmerIndex failed: %v", err) 60 } else { 61 freqs, ok := i.KmerFrequencies() 62 c.Check(ok, check.Equals, true) 63 hashFreqs := make(map[string]int) 64 for i := 0; i+k <= s.Seq.Len(); i++ { 65 hashFreqs[strings.ToLower(string(alphabet.LettersToBytes(s.Seq.Seq[i:i+k])))]++ 66 } 67 for key := range freqs { 68 c.Check(freqs[key], check.Equals, hashFreqs[i.Format(key)], 69 check.Commentf("key %x, string of %q\n", key, i.Format(key))) 70 } 71 for key := range hashFreqs { 72 if keyKmer, err := i.KmerOf(key); err != nil { 73 c.Fatal(err) 74 } else { 75 c.Check(freqs[keyKmer], check.Equals, hashFreqs[key], 76 check.Commentf("keyKmer %x, string of %q, key %q\n", keyKmer, i.Format(keyKmer), key)) 77 } 78 } 79 } 80 } 81 } 82 83 func (s *S) TestKmerPositions(c *check.C) { 84 for k := MinKmerLen; k < maxKmerLen; k++ { // don't test full range to time's sake 85 if i, err := New(k, s.Seq); err != nil { 86 c.Fatalf("New KmerIndex failed: %v", err) 87 } else { 88 i.Build() 89 hashPos := make(map[string][]int) 90 for i := 0; i+k <= s.Seq.Len(); i++ { 91 p := strings.ToLower(string(alphabet.LettersToBytes(s.Seq.Seq[i : i+k]))) 92 hashPos[p] = append(hashPos[p], i) 93 } 94 pos, ok := i.KmerIndex() 95 c.Check(ok, check.Equals, true) 96 for p := range pos { 97 c.Check(pos[p], check.DeepEquals, hashPos[i.Format(p)]) 98 } 99 } 100 } 101 } 102 103 func (s *S) TestKmerPositionsString(c *check.C) { 104 for k := MinKmerLen; k < maxKmerLen; k++ { // don't test full range to time's sake 105 if i, err := New(k, s.Seq); err != nil { 106 c.Fatalf("New KmerIndex failed: %v", err) 107 } else { 108 i.Build() 109 hashPos := make(map[string][]int) 110 for i := 0; i+k <= s.Seq.Len(); i++ { 111 p := strings.ToLower(string(alphabet.LettersToBytes(s.Seq.Seq[i : i+k]))) 112 hashPos[p] = append(hashPos[p], i) 113 } 114 pos, ok := i.StringKmerIndex() 115 c.Check(ok, check.Equals, true) 116 for p := range pos { 117 c.Check(pos[p], check.DeepEquals, hashPos[p]) 118 } 119 } 120 } 121 } 122 123 func (s *S) TestKmerKmerUtilities(c *check.C) { 124 for k := MinKmerLen; k <= 8; k++ { // again not testing all exhaustively 125 for kmer := Kmer(0); uint(kmer) <= util.Pow4(k)-1; kmer++ { 126 // Interconversion between string and Kmer 127 s, err := Format(kmer, k, alphabet.DNA) 128 c.Assert(err, check.Equals, nil) 129 rk, err := KmerOf(k, alphabet.DNA.LetterIndex(), s) 130 c.Assert(err, check.Equals, nil) 131 c.Check(rk, check.Equals, kmer) 132 133 // Complementation 134 dc := ComplementOf(k, ComplementOf(k, kmer)) 135 skmer, _ := Format(kmer, k, alphabet.DNA) 136 sdc, _ := Format(dc, k, alphabet.DNA) 137 c.Check(dc, check.Equals, kmer, check.Commentf("kmer: %s\ndouble complement: %s\n", skmer, sdc)) 138 139 // GC content 140 ks, _ := Format(kmer, k, alphabet.DNA) 141 gc := 0 142 for _, b := range ks { 143 if b == 'g' || b == 'c' { 144 gc++ 145 } 146 } 147 c.Check(GCof(k, kmer), check.Equals, float64(gc)/float64(k)) 148 } 149 } 150 }