github.com/biogo/biogo@v1.0.4/io/seqio/fastq/fastq_test.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package fastq 6 7 import ( 8 "github.com/biogo/biogo/alphabet" 9 "github.com/biogo/biogo/seq/linear" 10 11 "bytes" 12 "io" 13 "testing" 14 15 "gopkg.in/check.v1" 16 ) 17 18 // Helpers 19 func constructQL(l [][]alphabet.Letter, q [][]alphabet.Qphred) (ql []alphabet.QLetters) { 20 if len(l) != len(q) { 21 panic("test data length mismatch") 22 } 23 ql = make([]alphabet.QLetters, len(l)) 24 for i := range ql { 25 if len(l[i]) != len(q[i]) { 26 panic("test data length mismatch") 27 } 28 if len(l[i]) == 0 { 29 continue 30 } 31 ql[i] = make(alphabet.QLetters, len(l[i])) 32 for j := range ql[i] { 33 ql[i][j] = alphabet.QLetter{L: l[i][j], Q: q[i][j]} 34 } 35 } 36 37 return 38 } 39 40 // Tests 41 func Test(t *testing.T) { check.TestingT(t) } 42 43 type S struct{} 44 45 var _ = check.Suite(&S{}) 46 47 var ( 48 expectedIds = []string{ 49 "FC12044_91407_8_200_981_857", 50 "FC12044_91407_8_200_8_865", 51 "FC12044_91407_8_200_292_484", 52 "FC12044_91407_8_200_675_16", 53 "FC12044_91407_8_200_285_136", 54 } 55 56 expectedQLetters = constructQL( 57 [][]alphabet.Letter{ 58 []alphabet.Letter("AACGAGGGGCGCGACTTGACCTTGG"), 59 []alphabet.Letter("TTTCCCACCCCAGGAAGCCTTGGAC"), 60 []alphabet.Letter("TCAGCCTCCGTGCCCAGCCCACTCC"), 61 []alphabet.Letter("CTCGGGAGGCTGAGGCAGGGGGGTT"), 62 []alphabet.Letter("CCAAATCTTGAATTGTAGCTCCCCT"), 63 }, 64 [][]alphabet.Qphred{ 65 {49, 55, 44, 50, 50, 55, 55, 55, 55, 50, 55, 48, 55, 48, 55, 37, 50, 55, 48, 37, 48, 42, 44, 55, 50}, 66 {55, 55, 55, 37, 42, 46, 49, 46, 44, 42, 46, 46, 49, 44, 40, 44, 49, 40, 40, 42, 42, 46, 49, 37, 37}, 67 {55, 48, 55, 46, 50, 55, 55, 55, 55, 55, 52, 55, 55, 55, 55, 40, 55, 55, 55, 55, 48, 51, 46, 55, 37}, 68 {46, 55, 51, 55, 55, 55, 50, 55, 55, 48, 55, 55, 46, 55, 55, 42, 44, 55, 55, 44, 55, 46, 42, 48, 37}, 69 {46, 50, 55, 46, 48, 55, 55, 55, 55, 55, 50, 55, 55, 52, 55, 55, 51, 55, 55, 55, 55, 51, 49, 44, 50}, 70 }, 71 ) 72 73 plusStart = constructQL( 74 [][]alphabet.Letter{ 75 []alphabet.Letter("AACGAGGGGCGCGACTTGACCTTGG"), 76 }, 77 [][]alphabet.Qphred{ 78 {10, 55, 44, 50, 50, 55, 55, 55, 55, 50, 55, 48, 55, 48, 55, 37, 50, 55, 48, 37, 48, 42, 44, 55, 50}, 79 }, 80 ) 81 atStart = constructQL( 82 [][]alphabet.Letter{ 83 []alphabet.Letter("AACGAGGGGCGCGACTTGACCTTGG"), 84 }, 85 [][]alphabet.Qphred{ 86 {31, 55, 44, 50, 50, 55, 55, 55, 55, 50, 55, 48, 55, 48, 55, 37, 50, 55, 48, 37, 48, 42, 44, 55, 50}, 87 }, 88 ) 89 ) 90 91 var ( 92 fqTests = []struct { 93 fq string 94 verbatim bool 95 ids []string 96 seqs []alphabet.QLetters 97 }{ 98 { 99 fq: `@FC12044_91407_8_200_981_857 100 AACGAGGGGCGCGACTTGACCTTGG 101 +FC12044_91407_8_200_981_857 102 RXMSSXXXXSXQXQXFSXQFQKMXS 103 @FC12044_91407_8_200_8_865 104 TTTCCCACCCCAGGAAGCCTTGGAC 105 +FC12044_91407_8_200_8_865 106 XXXFKOROMKOORMIMRIIKKORFF 107 @FC12044_91407_8_200_292_484 108 TCAGCCTCCGTGCCCAGCCCACTCC 109 +FC12044_91407_8_200_292_484 110 XQXOSXXXXXUXXXXIXXXXQTOXF 111 @FC12044_91407_8_200_675_16 112 CTCGGGAGGCTGAGGCAGGGGGGTT 113 +FC12044_91407_8_200_675_16 114 OXTXXXSXXQXXOXXKMXXMXOKQF 115 @FC12044_91407_8_200_285_136 116 CCAAATCTTGAATTGTAGCTCCCCT 117 +FC12044_91407_8_200_285_136 118 OSXOQXXXXXSXXUXXTXXXXTRMS 119 `, 120 verbatim: true, 121 ids: expectedIds, 122 seqs: []alphabet.QLetters{ 123 expectedQLetters[0], 124 expectedQLetters[1], 125 expectedQLetters[2], 126 expectedQLetters[3], 127 expectedQLetters[4], 128 }, 129 }, 130 { 131 fq: `@FC12044_91407_8_200_981_857 132 AACGAGGGGCGCGACTTGACCTTGG 133 +FC12044_91407_8_200_981_857 134 @XMSSXXXXSXQXQXFSXQFQKMXS 135 @FC12044_91407_8_200_8_865 136 TTTCCCACCCCAGGAAGCCTTGGAC 137 +FC12044_91407_8_200_8_865 138 XXXFKOROMKOORMIMRIIKKORFF 139 @FC12044_91407_8_200_292_484 140 TCAGCCTCCGTGCCCAGCCCACTCC 141 +FC12044_91407_8_200_292_484 142 XQXOSXXXXXUXXXXIXXXXQTOXF 143 @FC12044_91407_8_200_675_16 144 CTCGGGAGGCTGAGGCAGGGGGGTT 145 +FC12044_91407_8_200_675_16 146 OXTXXXSXXQXXOXXKMXXMXOKQF 147 @FC12044_91407_8_200_285_136 148 CCAAATCTTGAATTGTAGCTCCCCT 149 +FC12044_91407_8_200_285_136 150 OSXOQXXXXXSXXUXXTXXXXTRMS 151 `, 152 verbatim: true, 153 ids: expectedIds, 154 seqs: []alphabet.QLetters{ 155 atStart[0], 156 expectedQLetters[1], 157 expectedQLetters[2], 158 expectedQLetters[3], 159 expectedQLetters[4], 160 }, 161 }, 162 { 163 fq: `@FC12044_91407_8_200_981_857 164 AACGAGGGGCGCGACTTGACCTTGG 165 +FC12044_91407_8_200_981_857 166 +XMSSXXXXSXQXQXFSXQFQKMXS 167 @FC12044_91407_8_200_8_865 168 TTTCCCACCCCAGGAAGCCTTGGAC 169 +FC12044_91407_8_200_8_865 170 XXXFKOROMKOORMIMRIIKKORFF 171 @FC12044_91407_8_200_292_484 172 TCAGCCTCCGTGCCCAGCCCACTCC 173 +FC12044_91407_8_200_292_484 174 XQXOSXXXXXUXXXXIXXXXQTOXF 175 @FC12044_91407_8_200_675_16 176 CTCGGGAGGCTGAGGCAGGGGGGTT 177 +FC12044_91407_8_200_675_16 178 OXTXXXSXXQXXOXXKMXXMXOKQF 179 @FC12044_91407_8_200_285_136 180 CCAAATCTTGAATTGTAGCTCCCCT 181 +FC12044_91407_8_200_285_136 182 OSXOQXXXXXSXXUXXTXXXXTRMS 183 `, 184 verbatim: true, 185 ids: expectedIds, 186 seqs: []alphabet.QLetters{ 187 plusStart[0], 188 expectedQLetters[1], 189 expectedQLetters[2], 190 expectedQLetters[3], 191 expectedQLetters[4], 192 }, 193 }, 194 { 195 fq: `@FC12044_91407_8_200_981_857 196 AACGAGGGGCGCGACTTGACCTTGG 197 +FC12044_91407_8_200_981_857 198 RXMSSXXXXSXQXQXFSXQFQKMXS 199 @FC12044_91407_8_200_8_865 200 TTTCCCACCCCAGGAAGCCTTGGAC 201 +FC12044_91407_8_200_8_865 202 XXXFKOROMKOORMIMRIIKKORFF 203 @FC12044_91407_8_200_292_484 204 TCAGCCTCCGTGCCCAGCCCACTCC 205 +FC12044_91407_8_200_292_484 206 XQXOSXXXXXUXXXXIXXXXQTOXF 207 @FC12044_91407_8_200_675_16 208 CTCGGGAGGCTGAGGCAGGGGGGTT 209 +FC12044_91407_8_200_675_16 210 OXTXXXSXXQXXOXXKMXXMXOKQF 211 @FC12044_91407_8_200_285_136 212 213 +FC12044_91407_8_200_285_136 214 215 `, 216 verbatim: true, 217 ids: expectedIds, 218 seqs: []alphabet.QLetters{ 219 expectedQLetters[0], 220 expectedQLetters[1], 221 expectedQLetters[2], 222 expectedQLetters[3], 223 nil, 224 }, 225 }, 226 { 227 fq: `@FC12044_91407_8_200_981_857 228 229 +FC12044_91407_8_200_981_857 230 231 @FC12044_91407_8_200_8_865 232 TTTCCCACCCCAGGAAGCCTTGGAC 233 +FC12044_91407_8_200_8_865 234 XXXFKOROMKOORMIMRIIKKORFF 235 @FC12044_91407_8_200_292_484 236 TCAGCCTCCGTGCCCAGCCCACTCC 237 +FC12044_91407_8_200_292_484 238 XQXOSXXXXXUXXXXIXXXXQTOXF 239 @FC12044_91407_8_200_675_16 240 CTCGGGAGGCTGAGGCAGGGGGGTT 241 +FC12044_91407_8_200_675_16 242 OXTXXXSXXQXXOXXKMXXMXOKQF 243 @FC12044_91407_8_200_285_136 244 CCAAATCTTGAATTGTAGCTCCCCT 245 +FC12044_91407_8_200_285_136 246 OSXOQXXXXXSXXUXXTXXXXTRMS 247 `, 248 verbatim: true, 249 ids: expectedIds, 250 seqs: []alphabet.QLetters{ 251 nil, 252 expectedQLetters[1], 253 expectedQLetters[2], 254 expectedQLetters[3], 255 expectedQLetters[4], 256 }, 257 }, 258 { 259 fq: `@FC12044_91407_8_200_981_857 260 AACGAGGGGCGCGACTTGACCTTGG 261 +FC12044_91407_8_200_981_857 262 RXMSSXXXXSXQXQXFSXQFQKMXS 263 @FC12044_91407_8_200_8_865 264 265 +FC12044_91407_8_200_8_865 266 267 @FC12044_91407_8_200_292_484 268 TCAGCCTCCGTGCCCAGCCCACTCC 269 +FC12044_91407_8_200_292_484 270 XQXOSXXXXXUXXXXIXXXXQTOXF 271 @FC12044_91407_8_200_675_16 272 CTCGGGAGGCTGAGGCAGGGGGGTT 273 +FC12044_91407_8_200_675_16 274 OXTXXXSXXQXXOXXKMXXMXOKQF 275 @FC12044_91407_8_200_285_136 276 CCAAATCTTGAATTGTAGCTCCCCT 277 +FC12044_91407_8_200_285_136 278 OSXOQXXXXXSXXUXXTXXXXTRMS 279 `, 280 verbatim: true, 281 ids: expectedIds, 282 seqs: []alphabet.QLetters{ 283 expectedQLetters[0], 284 nil, 285 expectedQLetters[2], 286 expectedQLetters[3], 287 expectedQLetters[4], 288 }, 289 }, 290 { 291 fq: `@FC12044_91407_8_200_981_857 292 AACGAGGGGCGCGACTTGACCTTGG 293 +FC12044_91407_8_200_981_857 294 RXMSSXXXXSXQXQXFSXQFQKMXS 295 296 @FC12044_91407_8_200_8_865 297 TTTCCCACCCCAGGAAGCCTTGGAC 298 +FC12044_91407_8_200_8_865 299 300 XXXFKOROMKOORMIMRIIKKORFF 301 @FC12044_91407_8_200_292_484 302 303 TCAGCCTCCGTGCCCAGCCCACTCC 304 305 +FC12044_91407_8_200_292_484 306 XQXOSXXXXXUXXXXIXXXXQTOXF 307 @FC12044_91407_8_200_675_16 308 309 CTCGGGAGGCTGAGGCAGGGGGGTT 310 +FC12044_91407_8_200_675_16 311 OXTXXXSXXQXXOXXKMXXMXOKQF 312 @FC12044_91407_8_200_285_136 313 CCAAATCTTGAATTGTAGCTCCCCT 314 +FC12044_91407_8_200_285_136 315 316 OSXOQXXXXXSXXUXXTXXXXTRMS`, 317 verbatim: false, 318 ids: expectedIds, 319 seqs: []alphabet.QLetters{ 320 expectedQLetters[0], 321 expectedQLetters[1], 322 expectedQLetters[2], 323 expectedQLetters[3], 324 expectedQLetters[4], 325 }, 326 }, 327 { 328 fq: `@FC12044_91407_8_200_981_857 329 AACGAGGGGCGCGACTTGACCTTGG 330 +FC12044_91407_8_200_981_857 331 RXMSSXXXXSXQXQXFSXQFQKMXS 332 333 @FC12044_91407_8_200_8_865 334 TTTCCCACCCCAGGAAGCCTTGGAC 335 +FC12044_91407_8_200_8_865 336 337 XXXFKOROMKOORMIMRIIKKORFF 338 @FC12044_91407_8_200_292_484 339 340 TCAGCCTCCGTGCCCAGCCCACTCC 341 342 +FC12044_91407_8_200_292_484 343 XQXOSXXXXXUXXXXIXXXXQTOXF 344 @FC12044_91407_8_200_675_16 345 346 CTCGGGAGGCTGAGGCAGGGGGGTT 347 +FC12044_91407_8_200_675_16 348 OXTXXXSXXQXXOXXKMXXMXOKQF 349 @FC12044_91407_8_200_285_136 350 351 +FC12044_91407_8_200_285_136 352 353 `, 354 verbatim: false, 355 ids: expectedIds, 356 seqs: []alphabet.QLetters{ 357 expectedQLetters[0], 358 expectedQLetters[1], 359 expectedQLetters[2], 360 expectedQLetters[3], 361 nil, 362 }, 363 }, 364 { 365 fq: `@FC12044_91407_8_200_981_857 366 AACGAGGGGCGCGACTTGACCTTGG 367 +FC12044_91407_8_200_981_857 368 RXMSSXXXXSXQXQXFSXQFQKMXS 369 370 @FC12044_91407_8_200_8_865 371 TTTCCCACCCCAGGAAGCCTTGGAC 372 +FC12044_91407_8_200_8_865 373 374 XXXFKOROMKOORMIMRIIKKORFF 375 @FC12044_91407_8_200_292_484 376 377 TCAGCCTCCGTGCCCAGCCCACTCC 378 379 +FC12044_91407_8_200_292_484 380 XQXOSXXXXXUXXXXIXXXXQTOXF 381 @FC12044_91407_8_200_675_16 382 383 CTCGGGAGGCTGAGGCAGGGGGGTT 384 +FC12044_91407_8_200_675_16 385 OXTXXXSXXQXXOXXKMXXMXOKQF 386 @FC12044_91407_8_200_285_136 387 +FC12044_91407_8_200_285_136`, 388 verbatim: false, 389 ids: expectedIds, 390 seqs: []alphabet.QLetters{ 391 expectedQLetters[0], 392 expectedQLetters[1], 393 expectedQLetters[2], 394 expectedQLetters[3], 395 nil, 396 }, 397 }, 398 } 399 ) 400 401 func (s *S) TestReadFastq(c *check.C) { 402 for _, t := range fqTests { 403 r := NewReader(bytes.NewBufferString(t.fq), linear.NewQSeq("", nil, alphabet.DNA, alphabet.Sanger)) 404 var n int 405 for n = 0; ; n++ { 406 if s, err := r.Read(); err != nil { 407 if err == io.EOF { 408 break 409 } else { 410 c.Fatalf("Failed to read %s in %q: %s", expectedIds[n], t.fq, err) 411 } 412 } else { 413 l := s.(*linear.QSeq) 414 header := l.Name() 415 if desc := l.Description(); len(desc) > 0 { 416 header += " " + desc 417 } 418 c.Check(header, check.Equals, t.ids[n]) 419 c.Check(l.Slice(), check.DeepEquals, t.seqs[n]) 420 } 421 } 422 c.Check(n, check.Equals, len(t.ids)) 423 } 424 } 425 426 func (s *S) TestWriteFastq(c *check.C) { 427 for i, t := range fqTests { 428 if !t.verbatim { 429 continue 430 } 431 for j := 0; j < 2; j++ { 432 var n int 433 b := &bytes.Buffer{} 434 w := NewWriter(b) 435 w.QID = j == 0 436 seq := linear.NewQSeq("", nil, alphabet.DNA, alphabet.Sanger) 437 438 for i := range expectedIds { 439 seq.ID = t.ids[i] 440 seq.Seq = t.seqs[i] 441 _n, err := w.Write(seq) 442 c.Assert(err, check.Equals, nil, check.Commentf("Failed to write to buffer: %s", err)) 443 n += _n 444 } 445 446 c.Check(n, check.Equals, b.Len()) 447 448 if w.QID { 449 c.Check(string(b.Bytes()), check.Equals, t.fq, check.Commentf("Write test %d", i)) 450 } 451 } 452 } 453 }