github.com/biogo/biogo@v1.0.4/io/seqio/fastq/fastq_test.go (about)

     1  // Copyright ©2011-2013 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package fastq
     6  
     7  import (
     8  	"github.com/biogo/biogo/alphabet"
     9  	"github.com/biogo/biogo/seq/linear"
    10  
    11  	"bytes"
    12  	"io"
    13  	"testing"
    14  
    15  	"gopkg.in/check.v1"
    16  )
    17  
    18  // Helpers
    19  func constructQL(l [][]alphabet.Letter, q [][]alphabet.Qphred) (ql []alphabet.QLetters) {
    20  	if len(l) != len(q) {
    21  		panic("test data length mismatch")
    22  	}
    23  	ql = make([]alphabet.QLetters, len(l))
    24  	for i := range ql {
    25  		if len(l[i]) != len(q[i]) {
    26  			panic("test data length mismatch")
    27  		}
    28  		if len(l[i]) == 0 {
    29  			continue
    30  		}
    31  		ql[i] = make(alphabet.QLetters, len(l[i]))
    32  		for j := range ql[i] {
    33  			ql[i][j] = alphabet.QLetter{L: l[i][j], Q: q[i][j]}
    34  		}
    35  	}
    36  
    37  	return
    38  }
    39  
    40  // Tests
    41  func Test(t *testing.T) { check.TestingT(t) }
    42  
    43  type S struct{}
    44  
    45  var _ = check.Suite(&S{})
    46  
    47  var (
    48  	expectedIds = []string{
    49  		"FC12044_91407_8_200_981_857",
    50  		"FC12044_91407_8_200_8_865",
    51  		"FC12044_91407_8_200_292_484",
    52  		"FC12044_91407_8_200_675_16",
    53  		"FC12044_91407_8_200_285_136",
    54  	}
    55  
    56  	expectedQLetters = constructQL(
    57  		[][]alphabet.Letter{
    58  			[]alphabet.Letter("AACGAGGGGCGCGACTTGACCTTGG"),
    59  			[]alphabet.Letter("TTTCCCACCCCAGGAAGCCTTGGAC"),
    60  			[]alphabet.Letter("TCAGCCTCCGTGCCCAGCCCACTCC"),
    61  			[]alphabet.Letter("CTCGGGAGGCTGAGGCAGGGGGGTT"),
    62  			[]alphabet.Letter("CCAAATCTTGAATTGTAGCTCCCCT"),
    63  		},
    64  		[][]alphabet.Qphred{
    65  			{49, 55, 44, 50, 50, 55, 55, 55, 55, 50, 55, 48, 55, 48, 55, 37, 50, 55, 48, 37, 48, 42, 44, 55, 50},
    66  			{55, 55, 55, 37, 42, 46, 49, 46, 44, 42, 46, 46, 49, 44, 40, 44, 49, 40, 40, 42, 42, 46, 49, 37, 37},
    67  			{55, 48, 55, 46, 50, 55, 55, 55, 55, 55, 52, 55, 55, 55, 55, 40, 55, 55, 55, 55, 48, 51, 46, 55, 37},
    68  			{46, 55, 51, 55, 55, 55, 50, 55, 55, 48, 55, 55, 46, 55, 55, 42, 44, 55, 55, 44, 55, 46, 42, 48, 37},
    69  			{46, 50, 55, 46, 48, 55, 55, 55, 55, 55, 50, 55, 55, 52, 55, 55, 51, 55, 55, 55, 55, 51, 49, 44, 50},
    70  		},
    71  	)
    72  
    73  	plusStart = constructQL(
    74  		[][]alphabet.Letter{
    75  			[]alphabet.Letter("AACGAGGGGCGCGACTTGACCTTGG"),
    76  		},
    77  		[][]alphabet.Qphred{
    78  			{10, 55, 44, 50, 50, 55, 55, 55, 55, 50, 55, 48, 55, 48, 55, 37, 50, 55, 48, 37, 48, 42, 44, 55, 50},
    79  		},
    80  	)
    81  	atStart = constructQL(
    82  		[][]alphabet.Letter{
    83  			[]alphabet.Letter("AACGAGGGGCGCGACTTGACCTTGG"),
    84  		},
    85  		[][]alphabet.Qphred{
    86  			{31, 55, 44, 50, 50, 55, 55, 55, 55, 50, 55, 48, 55, 48, 55, 37, 50, 55, 48, 37, 48, 42, 44, 55, 50},
    87  		},
    88  	)
    89  )
    90  
    91  var (
    92  	fqTests = []struct {
    93  		fq       string
    94  		verbatim bool
    95  		ids      []string
    96  		seqs     []alphabet.QLetters
    97  	}{
    98  		{
    99  			fq: `@FC12044_91407_8_200_981_857
   100  AACGAGGGGCGCGACTTGACCTTGG
   101  +FC12044_91407_8_200_981_857
   102  RXMSSXXXXSXQXQXFSXQFQKMXS
   103  @FC12044_91407_8_200_8_865
   104  TTTCCCACCCCAGGAAGCCTTGGAC
   105  +FC12044_91407_8_200_8_865
   106  XXXFKOROMKOORMIMRIIKKORFF
   107  @FC12044_91407_8_200_292_484
   108  TCAGCCTCCGTGCCCAGCCCACTCC
   109  +FC12044_91407_8_200_292_484
   110  XQXOSXXXXXUXXXXIXXXXQTOXF
   111  @FC12044_91407_8_200_675_16
   112  CTCGGGAGGCTGAGGCAGGGGGGTT
   113  +FC12044_91407_8_200_675_16
   114  OXTXXXSXXQXXOXXKMXXMXOKQF
   115  @FC12044_91407_8_200_285_136
   116  CCAAATCTTGAATTGTAGCTCCCCT
   117  +FC12044_91407_8_200_285_136
   118  OSXOQXXXXXSXXUXXTXXXXTRMS
   119  `,
   120  			verbatim: true,
   121  			ids:      expectedIds,
   122  			seqs: []alphabet.QLetters{
   123  				expectedQLetters[0],
   124  				expectedQLetters[1],
   125  				expectedQLetters[2],
   126  				expectedQLetters[3],
   127  				expectedQLetters[4],
   128  			},
   129  		},
   130  		{
   131  			fq: `@FC12044_91407_8_200_981_857
   132  AACGAGGGGCGCGACTTGACCTTGG
   133  +FC12044_91407_8_200_981_857
   134  @XMSSXXXXSXQXQXFSXQFQKMXS
   135  @FC12044_91407_8_200_8_865
   136  TTTCCCACCCCAGGAAGCCTTGGAC
   137  +FC12044_91407_8_200_8_865
   138  XXXFKOROMKOORMIMRIIKKORFF
   139  @FC12044_91407_8_200_292_484
   140  TCAGCCTCCGTGCCCAGCCCACTCC
   141  +FC12044_91407_8_200_292_484
   142  XQXOSXXXXXUXXXXIXXXXQTOXF
   143  @FC12044_91407_8_200_675_16
   144  CTCGGGAGGCTGAGGCAGGGGGGTT
   145  +FC12044_91407_8_200_675_16
   146  OXTXXXSXXQXXOXXKMXXMXOKQF
   147  @FC12044_91407_8_200_285_136
   148  CCAAATCTTGAATTGTAGCTCCCCT
   149  +FC12044_91407_8_200_285_136
   150  OSXOQXXXXXSXXUXXTXXXXTRMS
   151  `,
   152  			verbatim: true,
   153  			ids:      expectedIds,
   154  			seqs: []alphabet.QLetters{
   155  				atStart[0],
   156  				expectedQLetters[1],
   157  				expectedQLetters[2],
   158  				expectedQLetters[3],
   159  				expectedQLetters[4],
   160  			},
   161  		},
   162  		{
   163  			fq: `@FC12044_91407_8_200_981_857
   164  AACGAGGGGCGCGACTTGACCTTGG
   165  +FC12044_91407_8_200_981_857
   166  +XMSSXXXXSXQXQXFSXQFQKMXS
   167  @FC12044_91407_8_200_8_865
   168  TTTCCCACCCCAGGAAGCCTTGGAC
   169  +FC12044_91407_8_200_8_865
   170  XXXFKOROMKOORMIMRIIKKORFF
   171  @FC12044_91407_8_200_292_484
   172  TCAGCCTCCGTGCCCAGCCCACTCC
   173  +FC12044_91407_8_200_292_484
   174  XQXOSXXXXXUXXXXIXXXXQTOXF
   175  @FC12044_91407_8_200_675_16
   176  CTCGGGAGGCTGAGGCAGGGGGGTT
   177  +FC12044_91407_8_200_675_16
   178  OXTXXXSXXQXXOXXKMXXMXOKQF
   179  @FC12044_91407_8_200_285_136
   180  CCAAATCTTGAATTGTAGCTCCCCT
   181  +FC12044_91407_8_200_285_136
   182  OSXOQXXXXXSXXUXXTXXXXTRMS
   183  `,
   184  			verbatim: true,
   185  			ids:      expectedIds,
   186  			seqs: []alphabet.QLetters{
   187  				plusStart[0],
   188  				expectedQLetters[1],
   189  				expectedQLetters[2],
   190  				expectedQLetters[3],
   191  				expectedQLetters[4],
   192  			},
   193  		},
   194  		{
   195  			fq: `@FC12044_91407_8_200_981_857
   196  AACGAGGGGCGCGACTTGACCTTGG
   197  +FC12044_91407_8_200_981_857
   198  RXMSSXXXXSXQXQXFSXQFQKMXS
   199  @FC12044_91407_8_200_8_865
   200  TTTCCCACCCCAGGAAGCCTTGGAC
   201  +FC12044_91407_8_200_8_865
   202  XXXFKOROMKOORMIMRIIKKORFF
   203  @FC12044_91407_8_200_292_484
   204  TCAGCCTCCGTGCCCAGCCCACTCC
   205  +FC12044_91407_8_200_292_484
   206  XQXOSXXXXXUXXXXIXXXXQTOXF
   207  @FC12044_91407_8_200_675_16
   208  CTCGGGAGGCTGAGGCAGGGGGGTT
   209  +FC12044_91407_8_200_675_16
   210  OXTXXXSXXQXXOXXKMXXMXOKQF
   211  @FC12044_91407_8_200_285_136
   212  
   213  +FC12044_91407_8_200_285_136
   214  
   215  `,
   216  			verbatim: true,
   217  			ids:      expectedIds,
   218  			seqs: []alphabet.QLetters{
   219  				expectedQLetters[0],
   220  				expectedQLetters[1],
   221  				expectedQLetters[2],
   222  				expectedQLetters[3],
   223  				nil,
   224  			},
   225  		},
   226  		{
   227  			fq: `@FC12044_91407_8_200_981_857
   228  
   229  +FC12044_91407_8_200_981_857
   230  
   231  @FC12044_91407_8_200_8_865
   232  TTTCCCACCCCAGGAAGCCTTGGAC
   233  +FC12044_91407_8_200_8_865
   234  XXXFKOROMKOORMIMRIIKKORFF
   235  @FC12044_91407_8_200_292_484
   236  TCAGCCTCCGTGCCCAGCCCACTCC
   237  +FC12044_91407_8_200_292_484
   238  XQXOSXXXXXUXXXXIXXXXQTOXF
   239  @FC12044_91407_8_200_675_16
   240  CTCGGGAGGCTGAGGCAGGGGGGTT
   241  +FC12044_91407_8_200_675_16
   242  OXTXXXSXXQXXOXXKMXXMXOKQF
   243  @FC12044_91407_8_200_285_136
   244  CCAAATCTTGAATTGTAGCTCCCCT
   245  +FC12044_91407_8_200_285_136
   246  OSXOQXXXXXSXXUXXTXXXXTRMS
   247  `,
   248  			verbatim: true,
   249  			ids:      expectedIds,
   250  			seqs: []alphabet.QLetters{
   251  				nil,
   252  				expectedQLetters[1],
   253  				expectedQLetters[2],
   254  				expectedQLetters[3],
   255  				expectedQLetters[4],
   256  			},
   257  		},
   258  		{
   259  			fq: `@FC12044_91407_8_200_981_857
   260  AACGAGGGGCGCGACTTGACCTTGG
   261  +FC12044_91407_8_200_981_857
   262  RXMSSXXXXSXQXQXFSXQFQKMXS
   263  @FC12044_91407_8_200_8_865
   264  
   265  +FC12044_91407_8_200_8_865
   266  
   267  @FC12044_91407_8_200_292_484
   268  TCAGCCTCCGTGCCCAGCCCACTCC
   269  +FC12044_91407_8_200_292_484
   270  XQXOSXXXXXUXXXXIXXXXQTOXF
   271  @FC12044_91407_8_200_675_16
   272  CTCGGGAGGCTGAGGCAGGGGGGTT
   273  +FC12044_91407_8_200_675_16
   274  OXTXXXSXXQXXOXXKMXXMXOKQF
   275  @FC12044_91407_8_200_285_136
   276  CCAAATCTTGAATTGTAGCTCCCCT
   277  +FC12044_91407_8_200_285_136
   278  OSXOQXXXXXSXXUXXTXXXXTRMS
   279  `,
   280  			verbatim: true,
   281  			ids:      expectedIds,
   282  			seqs: []alphabet.QLetters{
   283  				expectedQLetters[0],
   284  				nil,
   285  				expectedQLetters[2],
   286  				expectedQLetters[3],
   287  				expectedQLetters[4],
   288  			},
   289  		},
   290  		{
   291  			fq: `@FC12044_91407_8_200_981_857
   292  AACGAGGGGCGCGACTTGACCTTGG
   293  +FC12044_91407_8_200_981_857
   294  RXMSSXXXXSXQXQXFSXQFQKMXS
   295  
   296  @FC12044_91407_8_200_8_865
   297  TTTCCCACCCCAGGAAGCCTTGGAC
   298  +FC12044_91407_8_200_8_865
   299  
   300  XXXFKOROMKOORMIMRIIKKORFF
   301  @FC12044_91407_8_200_292_484
   302  
   303  TCAGCCTCCGTGCCCAGCCCACTCC
   304  
   305  +FC12044_91407_8_200_292_484
   306  XQXOSXXXXXUXXXXIXXXXQTOXF
   307  @FC12044_91407_8_200_675_16
   308  
   309  CTCGGGAGGCTGAGGCAGGGGGGTT
   310  +FC12044_91407_8_200_675_16
   311  OXTXXXSXXQXXOXXKMXXMXOKQF
   312  @FC12044_91407_8_200_285_136
   313  CCAAATCTTGAATTGTAGCTCCCCT
   314  +FC12044_91407_8_200_285_136
   315  
   316  OSXOQXXXXXSXXUXXTXXXXTRMS`,
   317  			verbatim: false,
   318  			ids:      expectedIds,
   319  			seqs: []alphabet.QLetters{
   320  				expectedQLetters[0],
   321  				expectedQLetters[1],
   322  				expectedQLetters[2],
   323  				expectedQLetters[3],
   324  				expectedQLetters[4],
   325  			},
   326  		},
   327  		{
   328  			fq: `@FC12044_91407_8_200_981_857
   329  AACGAGGGGCGCGACTTGACCTTGG
   330  +FC12044_91407_8_200_981_857
   331  RXMSSXXXXSXQXQXFSXQFQKMXS
   332  
   333  @FC12044_91407_8_200_8_865
   334  TTTCCCACCCCAGGAAGCCTTGGAC
   335  +FC12044_91407_8_200_8_865
   336  
   337  XXXFKOROMKOORMIMRIIKKORFF
   338  @FC12044_91407_8_200_292_484
   339  
   340  TCAGCCTCCGTGCCCAGCCCACTCC
   341  
   342  +FC12044_91407_8_200_292_484
   343  XQXOSXXXXXUXXXXIXXXXQTOXF
   344  @FC12044_91407_8_200_675_16
   345  
   346  CTCGGGAGGCTGAGGCAGGGGGGTT
   347  +FC12044_91407_8_200_675_16
   348  OXTXXXSXXQXXOXXKMXXMXOKQF
   349  @FC12044_91407_8_200_285_136
   350  
   351  +FC12044_91407_8_200_285_136
   352  
   353  `,
   354  			verbatim: false,
   355  			ids:      expectedIds,
   356  			seqs: []alphabet.QLetters{
   357  				expectedQLetters[0],
   358  				expectedQLetters[1],
   359  				expectedQLetters[2],
   360  				expectedQLetters[3],
   361  				nil,
   362  			},
   363  		},
   364  		{
   365  			fq: `@FC12044_91407_8_200_981_857
   366  AACGAGGGGCGCGACTTGACCTTGG
   367  +FC12044_91407_8_200_981_857
   368  RXMSSXXXXSXQXQXFSXQFQKMXS
   369  
   370  @FC12044_91407_8_200_8_865
   371  TTTCCCACCCCAGGAAGCCTTGGAC
   372  +FC12044_91407_8_200_8_865
   373  
   374  XXXFKOROMKOORMIMRIIKKORFF
   375  @FC12044_91407_8_200_292_484
   376  
   377  TCAGCCTCCGTGCCCAGCCCACTCC
   378  
   379  +FC12044_91407_8_200_292_484
   380  XQXOSXXXXXUXXXXIXXXXQTOXF
   381  @FC12044_91407_8_200_675_16
   382  
   383  CTCGGGAGGCTGAGGCAGGGGGGTT
   384  +FC12044_91407_8_200_675_16
   385  OXTXXXSXXQXXOXXKMXXMXOKQF
   386  @FC12044_91407_8_200_285_136
   387  +FC12044_91407_8_200_285_136`,
   388  			verbatim: false,
   389  			ids:      expectedIds,
   390  			seqs: []alphabet.QLetters{
   391  				expectedQLetters[0],
   392  				expectedQLetters[1],
   393  				expectedQLetters[2],
   394  				expectedQLetters[3],
   395  				nil,
   396  			},
   397  		},
   398  	}
   399  )
   400  
   401  func (s *S) TestReadFastq(c *check.C) {
   402  	for _, t := range fqTests {
   403  		r := NewReader(bytes.NewBufferString(t.fq), linear.NewQSeq("", nil, alphabet.DNA, alphabet.Sanger))
   404  		var n int
   405  		for n = 0; ; n++ {
   406  			if s, err := r.Read(); err != nil {
   407  				if err == io.EOF {
   408  					break
   409  				} else {
   410  					c.Fatalf("Failed to read %s in %q: %s", expectedIds[n], t.fq, err)
   411  				}
   412  			} else {
   413  				l := s.(*linear.QSeq)
   414  				header := l.Name()
   415  				if desc := l.Description(); len(desc) > 0 {
   416  					header += " " + desc
   417  				}
   418  				c.Check(header, check.Equals, t.ids[n])
   419  				c.Check(l.Slice(), check.DeepEquals, t.seqs[n])
   420  			}
   421  		}
   422  		c.Check(n, check.Equals, len(t.ids))
   423  	}
   424  }
   425  
   426  func (s *S) TestWriteFastq(c *check.C) {
   427  	for i, t := range fqTests {
   428  		if !t.verbatim {
   429  			continue
   430  		}
   431  		for j := 0; j < 2; j++ {
   432  			var n int
   433  			b := &bytes.Buffer{}
   434  			w := NewWriter(b)
   435  			w.QID = j == 0
   436  			seq := linear.NewQSeq("", nil, alphabet.DNA, alphabet.Sanger)
   437  
   438  			for i := range expectedIds {
   439  				seq.ID = t.ids[i]
   440  				seq.Seq = t.seqs[i]
   441  				_n, err := w.Write(seq)
   442  				c.Assert(err, check.Equals, nil, check.Commentf("Failed to write to buffer: %s", err))
   443  				n += _n
   444  			}
   445  
   446  			c.Check(n, check.Equals, b.Len())
   447  
   448  			if w.QID {
   449  				c.Check(string(b.Bytes()), check.Equals, t.fq, check.Commentf("Write test %d", i))
   450  			}
   451  		}
   452  	}
   453  }