github.com/biogo/biogo@v1.0.4/io/seqio/seqio_test.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package seqio_test 6 7 import ( 8 "bytes" 9 10 "github.com/biogo/biogo/alphabet" 11 "github.com/biogo/biogo/io/seqio" 12 "github.com/biogo/biogo/io/seqio/fasta" 13 "github.com/biogo/biogo/io/seqio/fastq" 14 "github.com/biogo/biogo/seq/linear" 15 16 "testing" 17 18 "gopkg.in/check.v1" 19 ) 20 21 func TestSeqio(t *testing.T) { 22 var ( 23 _ seqio.Reader = (*fasta.Reader)(nil) 24 _ seqio.Reader = (*fastq.Reader)(nil) 25 _ seqio.Writer = (*fasta.Writer)(nil) 26 _ seqio.Writer = (*fastq.Writer)(nil) 27 ) 28 } 29 30 // Tests 31 func Test(t *testing.T) { check.TestingT(t) } 32 33 type S struct{} 34 35 var _ = check.Suite(&S{}) 36 37 var ( 38 testaln0 = `>AK1H_ECOLI/114-431 DESCRIPTION HERE 39 CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAE 40 STRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRAD 41 CCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLH 42 PRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------ 43 NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQS 44 DCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF--- 45 ---FAALARANINIVAIA 46 >AKH_HAEIN 114-431 47 -----------------VEDAVKATIDCRGEKLSIAMMKAWFEARGY-S--VHIVDPVKQ 48 LLAKG-GYLESSVEIEESTKRVDAANIA--K-DKVVLMAGF---TAGNEKGELVLLGRNG 49 SDYSAAC-----------------LAACLGASVCEIWTDVDGVYTCDP--RLVPDARLLP 50 TLSYREAMELSYFGAKVIHPRTIGPLLPQNIPCVIKNTGNPSAPGSI-ID--GNVKSESL 51 Q----VKGITNLDNLAMFNVSGPGMQGM---VGMASRVFSAMSGAGISVILITQSSSEYS 52 ---ISFCVPVKSAEVAKTVLETEFA-----NELNEHQLEPIEVIKDLSIISV-VGDGMKQ 53 AKGIAARF------FSALAQANISIVAIA 54 >AKH1_MAIZE/117-440 55 -----------------ATESFSDFVVGHGELWSAQMLSYAIQKSGT-P--CSWMDTREV 56 LVVNPSGANQVDPDYLESEKRLEKWFSRC-P-AETIIATGF---IASTPENIPTTLKRDG 57 SDFSAAI-----------------IGSLVKARQVTIWTDVDGVFSADP--RKVSEAVILS 58 TLSYQEAWEMSYFGANVLHPRTIIPVMKYNIPIVIRNIFNTSAPGTM-IC--QQPANENG 59 DLEACVKAFATIDKLALVNVEGTGMAGV---PGTANAIFGAVKDVGANVIMISQASSEHS 60 ---VCFAVPEKEVALVSAALHARFR-----EALAAGRLSKVEVIHNCSILAT-VGLRMAS 61 TPGVSATL------FDALAKANINVRAIA 62 >AK2H_ECOLI/112-431 63 -----------------INDAVYAEVVGHGEVWSARLMSAVLNQQG-----LPAAWLDAR 64 EFLRAERAAQPQVDEGLSYPLLQQLLVQH-P-GKRLVVTGF---ISRNNAGETVLLGRNG 65 SDYSATQ-----------------IGALAGVSRVTIWSDVAGVYSADP--RKVKDACLLP 66 LLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQGSTRIERVLASGTGARI 67 VTSHDDVCLI-EFQVPASQDFKLAHKEI--DQILKRAQVRPLAVGVHNDRQLLQFCYTSE 68 VADSALKILDEAG---------LPGELRLRQGLALVAMVGAGVTRNPLHCHRFWQQLKGQ 69 PVEFTWQSDDGISLVAVL 70 >AK1_BACSU/66-374 71 -----------------ISPREQDLLLSCGETISSVVFTSMLLDNGVKA--AALTGAQAG 72 FLTNDQHTNAKIIEMKPER--LFSVLAN----HDAVVVAGF---QGATEKGDTTTIGRGG 73 SDTSAAA-----------------LGAAVDAEYIDIFTDVEGVMTADP--RVVENAKPLP 74 VVTYTEICNLAYQGAKVISPRAVEIAMQAKVPIRVRSTYS-NDKGTLVTSHHSSKVGSDV 75 FERLITGIAH-VKDVTQFKVPAKIGQYN-----VQTEVFKAMANAGISVDFFNITPSEIV 76 YTVAGNKTETAQR------------ILMDMGYDPMVTRNCAKVSAVGAGIMGVPGVTSKI 77 ------VSALSEKEIPILQSA 78 >AK2_BACST/63-370 79 -----------------KRE--MDMLLSTGEQVSIALLAMSLHEKGYKA--VSLTGWQAG 80 ITTEEMHGNARIMNIDTT--RIRRCLDE----GAIVIVAGF---QGVTETGEITTLGRGG 81 SDTTAVA-----------------LAAALKAEKCDIYTDVTGVFTTDP--RYVKTARKIK 82 EISYDEMLELANLGAGVLHPRAVEFAKNYEVPLEVRSSME-NERGTMVK--EEVSMEQHL 83 IVRGIAFEDQ-VTRVTVVGIEKYLQSVA--------TIFTALANRGINVDIIIQNA---- 84 ----------------TNSETAS--VSFSIRTEDLPETLQVLQ-------------ALEG 85 ADVHYESGLAKVSI-VGSGMISNPGVAARV------FEVLADQGIEIKMVS 86 >AK2_BACSU/63-373 87 -----------------KRE--MDMLLATGEQVTISLLSMALQEKGYDA--VSYTGWQAG 88 IRTEAIHGNARITDIDTS--VLADQLEK----GKIVIVAGF---QGMTEDCEITTLGRGG 89 SDTTAVA-----------------LAAALKVDKCDIYTDVPGVFTTDP--RYVKSARKLE 90 GISYDEMLELANLGAGVLHPRAVEFAKNYQVPLEVRSSTE-TEAGTLIE--EESSMEQNL 91 IVRGIAFEDQ-ITRVTIYGLTSGLTTLS--------TIFTTLAKRNINVDIIIQTQ---- 92 ----------------AEDKTG---ISFSVKTEDADQTVAVLEEYK---------DALEF 93 EKIETESKLAKVSI-VGSGMVSNPGVAAEM------FAVLAQKNILIKMVS 94 >AKAB_CORFL/63-379 95 -----------------ARE--MDMLLTAGERISNALVAMAIESLGAEA--QSFTGSQAG 96 VLTTERHGNARIVDVTPG--RVREALDE----GKICIVAGF--QGVNKETRDVTTLGRGG 97 SDTTAVA-----------------LAAALNADVCEIYSDVDGVYTADP--RIVPNAQKLE 98 KLSFEEMLELAAVGSKILVLRSVEYARAFNVPLRVRSSYS-NDPGTLIAGSMEDIPVEEA 99 VLTGVATDKS-EAKVTVLGISDKPGEAA--------KVFRALADAEINIDMVLQNV---- 100 ----------------SSVEDGTTDITFTCPRADGRRAMEILKKLQ---------VQGNW 101 TNVLYDDQVDKVSL-VGAGMKSHPGVTAEF------MEALRDVNVNIELIS 102 >AKAB_MYCSM/63-379 103 -----------------PRE--MDMLLTAGERISNALVAMAIESLGAQA--RSFTGSQAG 104 VITTGTHGNAKIIDVTPG--RLRDALDE----GQIVLVAGF--QGVSQDSKDVTTLGRGG 105 SDTTAVA-----------------VAAALDADVCEIYTDVDGIFTADP--RIVPNARHLD 106 TVSFEEMLEMAACGAKVLMLRCVEYARRYNVPIHVRSSYS-DKPGTIVKGSIEDIPMEDA 107 ILTGVAHDRS-EAKVTVVGLPDVPGYAA--------KVFRAVAEADVNIDMVLQNI---- 108 ----------------SKIEDGKTDITFTCARDNGPRAVEKLSALK---------SEIGF 109 SQVLYDDHIGKVSL-IGAGMRSHPGVTATF------CEALAEAGINIDLIS 110 >AK3_ECOLI/106-407 111 -----------------TSPALTDELVSHGELMSTLLFVEILRERD--V--QAQWFDVRK 112 VMRTNDRFGRAEPDIAALAELAALQLLPR-LNEGLVITQGF---IGSENKGRTTTLGRGG 113 SDYTAAL-----------------LAEALHASRVDIWTDVPGIYTTDP--RVVSAAKRID 114 EIAFAEAAEMATFGAKVLHPATLLPAVRSDIPVFVGSSKDPRAGGTLVCNKTENPPLFRA 115 LAL--RRNQT-LLTLHSLNMLHSRGFLA--------EVFGILARHNISVDLITTSEVSVA 116 LTLDTTGSTSTG----------DTLLTQSLLMELSALCRVEVEEGLALVALIG------- 117 ---NDLSKACGVGKEVF 118 >AK_YEAST/134-472 A COMMENT FOR YEAST 119 -----------------VSSRTVDLVMSCGEKLSCLFMTALCNDRGCKAKYVDLSHIVPS 120 DFSASALDNSFYTFLVQALKEKLAPFVSA-KERIVPVFTGF---FGLVPTGLLNGVGRGY 121 TDLCAAL-----------------IAVAVNADELQVWKEVDGIFTADP--RKVPEARLLD 122 SVTPEEASELTYYGSEVIHPFTMEQVIRAKIPIRIKNVQNPLGNGTIIYPDNVAKKGEST 123 PPHPPENLSS----SFYEKRKRGATAITTKN----DIFVINIHSNKKTLSHGFLAQIFTI 124 LDKYKLVVDLISTSEVHVSMALPIPDADS-LKSLRQAEEKLRILGSVDITKKLSIVSLVG 125 KHMKQYIGIAG---TMFTTLAEEGINIEMIS 126 ` 127 128 expectNfa = []string{ 129 "AK1H_ECOLI/114-431 DESCRIPTION HERE", 130 "AKH_HAEIN 114-431", 131 "AKH1_MAIZE/117-440", 132 "AK2H_ECOLI/112-431", 133 "AK1_BACSU/66-374", 134 "AK2_BACST/63-370", 135 "AK2_BACSU/63-373", 136 "AKAB_CORFL/63-379", 137 "AKAB_MYCSM/63-379", 138 "AK3_ECOLI/106-407", 139 "AK_YEAST/134-472 A COMMENT FOR YEAST", 140 } 141 142 expectSfa = [][]alphabet.Letter{ 143 []alphabet.Letter("CPDSINAALICRGEKMSIAIMAGVLEARGH-N--VTVIDPVEKLLAVG-HYLESTVDIAESTRRIAASRIP------A-DHMVLMAGFTAGN-EKGELVVLGRNGSDYSAAVLAACLRADCCEIWTDVNGVYTCDP-------------RQVPDARLLKSMSYQEAMELSY--FGAKVLHPRTITPIAQFQIPCLIKNTGNPQAPGTL-IG--ASRDEDELP----VKGISNLN------NMAMFSVSGP-GMKGMVGMAARVFAAMS-------RARISVVLITQSSSEYSISFCVPQSDCVRAERAMLEEFY-----LELKEGLLEPLAVAERLAIISV-VGDGLRTLRGISAKF------FAALARANINIVAIA"), 144 []alphabet.Letter("-----------------VEDAVKATIDCRGEKLSIAMMKAWFEARGY-S--VHIVDPVKQLLAKG-GYLESSVEIEESTKRVDAANIA--K-DKVVLMAGF---TAGNEKGELVLLGRNGSDYSAAC-----------------LAACLGASVCEIWTDVDGVYTCDP--RLVPDARLLPTLSYREAMELSYFGAKVIHPRTIGPLLPQNIPCVIKNTGNPSAPGSI-ID--GNVKSESLQ----VKGITNLDNLAMFNVSGPGMQGM---VGMASRVFSAMSGAGISVILITQSSSEYS---ISFCVPVKSAEVAKTVLETEFA-----NELNEHQLEPIEVIKDLSIISV-VGDGMKQAKGIAARF------FSALAQANISIVAIA"), 145 []alphabet.Letter("-----------------ATESFSDFVVGHGELWSAQMLSYAIQKSGT-P--CSWMDTREVLVVNPSGANQVDPDYLESEKRLEKWFSRC-P-AETIIATGF---IASTPENIPTTLKRDGSDFSAAI-----------------IGSLVKARQVTIWTDVDGVFSADP--RKVSEAVILSTLSYQEAWEMSYFGANVLHPRTIIPVMKYNIPIVIRNIFNTSAPGTM-IC--QQPANENGDLEACVKAFATIDKLALVNVEGTGMAGV---PGTANAIFGAVKDVGANVIMISQASSEHS---VCFAVPEKEVALVSAALHARFR-----EALAAGRLSKVEVIHNCSILAT-VGLRMASTPGVSATL------FDALAKANINVRAIA"), 146 []alphabet.Letter("-----------------INDAVYAEVVGHGEVWSARLMSAVLNQQG-----LPAAWLDAREFLRAERAAQPQVDEGLSYPLLQQLLVQH-P-GKRLVVTGF---ISRNNAGETVLLGRNGSDYSATQ-----------------IGALAGVSRVTIWSDVAGVYSADP--RKVKDACLLPLLRLDEASELARLAAPVLHARTLQPVSGSEIDLQLRCSYTPDQGSTRIERVLASGTGARIVTSHDDVCLI-EFQVPASQDFKLAHKEI--DQILKRAQVRPLAVGVHNDRQLLQFCYTSEVADSALKILDEAG---------LPGELRLRQGLALVAMVGAGVTRNPLHCHRFWQQLKGQPVEFTWQSDDGISLVAVL"), 147 []alphabet.Letter("-----------------ISPREQDLLLSCGETISSVVFTSMLLDNGVKA--AALTGAQAGFLTNDQHTNAKIIEMKPER--LFSVLAN----HDAVVVAGF---QGATEKGDTTTIGRGGSDTSAAA-----------------LGAAVDAEYIDIFTDVEGVMTADP--RVVENAKPLPVVTYTEICNLAYQGAKVISPRAVEIAMQAKVPIRVRSTYS-NDKGTLVTSHHSSKVGSDVFERLITGIAH-VKDVTQFKVPAKIGQYN-----VQTEVFKAMANAGISVDFFNITPSEIVYTVAGNKTETAQR------------ILMDMGYDPMVTRNCAKVSAVGAGIMGVPGVTSKI------VSALSEKEIPILQSA"), 148 []alphabet.Letter("-----------------KRE--MDMLLSTGEQVSIALLAMSLHEKGYKA--VSLTGWQAGITTEEMHGNARIMNIDTT--RIRRCLDE----GAIVIVAGF---QGVTETGEITTLGRGGSDTTAVA-----------------LAAALKAEKCDIYTDVTGVFTTDP--RYVKTARKIKEISYDEMLELANLGAGVLHPRAVEFAKNYEVPLEVRSSME-NERGTMVK--EEVSMEQHLIVRGIAFEDQ-VTRVTVVGIEKYLQSVA--------TIFTALANRGINVDIIIQNA--------------------TNSETAS--VSFSIRTEDLPETLQVLQ-------------ALEGADVHYESGLAKVSI-VGSGMISNPGVAARV------FEVLADQGIEIKMVS"), 149 []alphabet.Letter("-----------------KRE--MDMLLATGEQVTISLLSMALQEKGYDA--VSYTGWQAGIRTEAIHGNARITDIDTS--VLADQLEK----GKIVIVAGF---QGMTEDCEITTLGRGGSDTTAVA-----------------LAAALKVDKCDIYTDVPGVFTTDP--RYVKSARKLEGISYDEMLELANLGAGVLHPRAVEFAKNYQVPLEVRSSTE-TEAGTLIE--EESSMEQNLIVRGIAFEDQ-ITRVTIYGLTSGLTTLS--------TIFTTLAKRNINVDIIIQTQ--------------------AEDKTG---ISFSVKTEDADQTVAVLEEYK---------DALEFEKIETESKLAKVSI-VGSGMVSNPGVAAEM------FAVLAQKNILIKMVS"), 150 []alphabet.Letter("-----------------ARE--MDMLLTAGERISNALVAMAIESLGAEA--QSFTGSQAGVLTTERHGNARIVDVTPG--RVREALDE----GKICIVAGF--QGVNKETRDVTTLGRGGSDTTAVA-----------------LAAALNADVCEIYSDVDGVYTADP--RIVPNAQKLEKLSFEEMLELAAVGSKILVLRSVEYARAFNVPLRVRSSYS-NDPGTLIAGSMEDIPVEEAVLTGVATDKS-EAKVTVLGISDKPGEAA--------KVFRALADAEINIDMVLQNV--------------------SSVEDGTTDITFTCPRADGRRAMEILKKLQ---------VQGNWTNVLYDDQVDKVSL-VGAGMKSHPGVTAEF------MEALRDVNVNIELIS"), 151 []alphabet.Letter("-----------------PRE--MDMLLTAGERISNALVAMAIESLGAQA--RSFTGSQAGVITTGTHGNAKIIDVTPG--RLRDALDE----GQIVLVAGF--QGVSQDSKDVTTLGRGGSDTTAVA-----------------VAAALDADVCEIYTDVDGIFTADP--RIVPNARHLDTVSFEEMLEMAACGAKVLMLRCVEYARRYNVPIHVRSSYS-DKPGTIVKGSIEDIPMEDAILTGVAHDRS-EAKVTVVGLPDVPGYAA--------KVFRAVAEADVNIDMVLQNI--------------------SKIEDGKTDITFTCARDNGPRAVEKLSALK---------SEIGFSQVLYDDHIGKVSL-IGAGMRSHPGVTATF------CEALAEAGINIDLIS"), 152 []alphabet.Letter("-----------------TSPALTDELVSHGELMSTLLFVEILRERD--V--QAQWFDVRKVMRTNDRFGRAEPDIAALAELAALQLLPR-LNEGLVITQGF---IGSENKGRTTTLGRGGSDYTAAL-----------------LAEALHASRVDIWTDVPGIYTTDP--RVVSAAKRIDEIAFAEAAEMATFGAKVLHPATLLPAVRSDIPVFVGSSKDPRAGGTLVCNKTENPPLFRALAL--RRNQT-LLTLHSLNMLHSRGFLA--------EVFGILARHNISVDLITTSEVSVALTLDTTGSTSTG----------DTLLTQSLLMELSALCRVEVEEGLALVALIG----------NDLSKACGVGKEVF"), 153 []alphabet.Letter("-----------------VSSRTVDLVMSCGEKLSCLFMTALCNDRGCKAKYVDLSHIVPSDFSASALDNSFYTFLVQALKEKLAPFVSA-KERIVPVFTGF---FGLVPTGLLNGVGRGYTDLCAAL-----------------IAVAVNADELQVWKEVDGIFTADP--RKVPEARLLDSVTPEEASELTYYGSEVIHPFTMEQVIRAKIPIRIKNVQNPLGNGTIIYPDNVAKKGESTPPHPPENLSS----SFYEKRKRGATAITTKN----DIFVINIHSNKKTLSHGFLAQIFTILDKYKLVVDLISTSEVHVSMALPIPDADS-LKSLRQAEEKLRILGSVDITKKLSIVSLVGKHMKQYIGIAG---TMFTTLAEEGINIEMIS"), 154 } 155 ) 156 157 func (s *S) TestReadFasta(c *check.C) { 158 var ( 159 obtainNfa []string 160 obtainSfa [][]alphabet.Letter 161 ) 162 163 sc := seqio.NewScanner( 164 fasta.NewReader( 165 bytes.NewBufferString(testaln0), 166 linear.NewSeq("", nil, alphabet.Protein), 167 ), 168 ) 169 for sc.Next() { 170 t := sc.Seq().(*linear.Seq) 171 header := t.Name() 172 if desc := t.Description(); len(desc) > 0 { 173 header += " " + desc 174 } 175 obtainNfa = append(obtainNfa, header) 176 obtainSfa = append(obtainSfa, t.Slice().(alphabet.Letters)) 177 } 178 c.Check(sc.Error(), check.Equals, nil) 179 c.Check(obtainNfa, check.DeepEquals, expectNfa) 180 for i := range obtainSfa { 181 c.Check(len(obtainSfa[i]), check.Equals, len(expectSfa[i])) 182 c.Check(obtainSfa[i], check.DeepEquals, expectSfa[i]) 183 } 184 } 185 186 func (s *S) TestReadFromFunc(c *check.C) { 187 var ( 188 obtainNfa []string 189 obtainSfa [][]alphabet.Letter 190 ) 191 192 sc := seqio.NewScannerFromFunc( 193 fasta.NewReader( 194 bytes.NewBufferString(testaln0), 195 linear.NewSeq("", nil, alphabet.Protein), 196 ).Read, 197 ) 198 for sc.Next() { 199 t := sc.Seq().(*linear.Seq) 200 header := t.Name() 201 if desc := t.Description(); len(desc) > 0 { 202 header += " " + desc 203 } 204 obtainNfa = append(obtainNfa, header) 205 obtainSfa = append(obtainSfa, t.Slice().(alphabet.Letters)) 206 } 207 c.Check(sc.Error(), check.Equals, nil) 208 c.Check(obtainNfa, check.DeepEquals, expectNfa) 209 for i := range obtainSfa { 210 c.Check(len(obtainSfa[i]), check.Equals, len(expectSfa[i])) 211 c.Check(obtainSfa[i], check.DeepEquals, expectSfa[i]) 212 } 213 } 214 215 // Helper 216 func constructQL(l [][]alphabet.Letter, q [][]alphabet.Qphred) (ql [][]alphabet.QLetter) { 217 if len(l) != len(q) { 218 panic("test data length mismatch") 219 } 220 ql = make([][]alphabet.QLetter, len(l)) 221 for i := range ql { 222 if len(l[i]) != len(q[i]) { 223 panic("test data length mismatch") 224 } 225 ql[i] = make([]alphabet.QLetter, len(l[i])) 226 for j := range ql[i] { 227 ql[i][j] = alphabet.QLetter{L: l[i][j], Q: q[i][j]} 228 } 229 } 230 231 return 232 } 233 234 var ( 235 fq0 = `@FC12044_91407_8_200_406_24 236 GTTAGCTCCCACCTTAAGATGTTTA 237 +FC12044_91407_8_200_406_24 238 SXXTXXXXXXXXXTTSUXSSXKTMQ 239 @FC12044_91407_8_200_720_610 240 CTCTGTGGCACCCCATCCCTCACTT 241 +FC12044_91407_8_200_720_610 242 OXXXXXXXXXXXXXXXXXTSXQTXU 243 @FC12044_91407_8_200_345_133 244 GATTTTTTAACAATAAACGTACATA 245 +FC12044_91407_8_200_345_133 246 OQTOOSFORTFFFIIOFFFFFFFFF 247 @FC12044_91407_8_200_106_131 248 GTTGCCCAGGCTCGTCTTGAACTCC 249 +FC12044_91407_8_200_106_131 250 XXXXXXXXXXXXXXSXXXXISTXQS 251 @FC12044_91407_8_200_916_471 252 TGATTGAAGGTAGGGTAGCATACTG 253 +FC12044_91407_8_200_916_471 254 XXXXXXXXXXXXXXXUXXUSXXTXW 255 @FC12044_91407_8_200_57_85 256 GCTCCAATAGCGCAGAGGAAACCTG 257 +FC12044_91407_8_200_57_85 258 XFXMXSXXSXXXOSQROOSROFQIQ 259 @FC12044_91407_8_200_10_437 260 GCTGCTTGGGAGGCTGAGGCAGGAG 261 +FC12044_91407_8_200_10_437 262 USXSXXXXXXUXXXSXQXXUQXXKS 263 @FC12044_91407_8_200_154_436 264 AGACCTTTGGATACAATGAACGACT 265 +FC12044_91407_8_200_154_436 266 MKKMQTSRXMSQTOMRFOOIFFFFF 267 @FC12044_91407_8_200_336_64 268 AGGGAATTTTAGAGGAGGGCTGCCG 269 +FC12044_91407_8_200_336_64 270 STQMOSXSXSQXQXXKXXXKFXFFK 271 @FC12044_91407_8_200_620_233 272 TCTCCATGTTGGTCAGGCTGGTCTC 273 +FC12044_91407_8_200_620_233 274 XXXXXXXXXXXXXXXXXXXXXSXSW 275 @FC12044_91407_8_200_902_349 276 TGAACGTCGAGACGCAAGGCCCGCC 277 +FC12044_91407_8_200_902_349 278 XMXSSXMXXSXQSXTSQXFKSKTOF 279 @FC12044_91407_8_200_40_618 280 CTGTCCCCACGGCGGGGGGGCCTGG 281 +FC12044_91407_8_200_40_618 282 TXXXXSXXXXXXXXXXXXXRKFOXS 283 @FC12044_91407_8_200_83_511 284 GATGTACTCTTACACCCAGACTTTG 285 +FC12044_91407_8_200_83_511 286 SOXXXXXUXXXXXXQKQKKROOQSU 287 @FC12044_91407_8_200_76_246 288 TCAAGGGTGGATCTTGGCTCCCAGT 289 +FC12044_91407_8_200_76_246 290 XTXTUXXXXXRXXXTXXSUXSRFXQ 291 @FC12044_91407_8_200_303_427 292 TTGCGACAGAGTTTTGCTCTTGTCC 293 +FC12044_91407_8_200_303_427 294 XXQROXXXXIXFQXXXOIQSSXUFF 295 @FC12044_91407_8_200_31_299 296 TCTGCTCCAGCTCCAAGACGCCGCC 297 +FC12044_91407_8_200_31_299 298 XRXTSXXXRXXSXQQOXQTSQSXKQ 299 @FC12044_91407_8_200_553_135 300 TACGGAGCCGCGGGCGGGAAAGGCG 301 +FC12044_91407_8_200_553_135 302 XSQQXXXXXXXXXXSXXMFFQXTKU 303 @FC12044_91407_8_200_139_74 304 CCTCCCAGGTTCAAGCGATTATCCT 305 +FC12044_91407_8_200_139_74 306 RMXUSXTXXQXXQUXXXSQISISSO 307 @FC12044_91407_8_200_108_33 308 GTCATGGCGGCCCGCGCGGGGAGCG 309 +FC12044_91407_8_200_108_33 310 OOOSSXXSXXOMKMOFMKFOKFFFF 311 @FC12044_91407_8_200_980_965 312 ACAGTGGGTTCTTAAAGAAGAGTCG 313 +FC12044_91407_8_200_980_965 314 TOSSRXXXSSMSXMOMXIRXOXFFS 315 @FC12044_91407_8_200_981_857 316 AACGAGGGGCGCGACTTGACCTTGG 317 +FC12044_91407_8_200_981_857 318 RXMSSXXXXSXQXQXFSXQFQKMXS 319 @FC12044_91407_8_200_8_865 320 TTTCCCACCCCAGGAAGCCTTGGAC 321 +FC12044_91407_8_200_8_865 322 XXXFKOROMKOORMIMRIIKKORFF 323 @FC12044_91407_8_200_292_484 324 TCAGCCTCCGTGCCCAGCCCACTCC 325 +FC12044_91407_8_200_292_484 326 XQXOSXXXXXUXXXXIXXXXQTOXF 327 @FC12044_91407_8_200_675_16 328 CTCGGGAGGCTGAGGCAGGGGGGTT 329 +FC12044_91407_8_200_675_16 330 OXTXXXSXXQXXOXXKMXXMXOKQF 331 @FC12044_91407_8_200_285_136 332 CCAAATCTTGAATTGTAGCTCCCCT 333 +FC12044_91407_8_200_285_136 334 OSXOQXXXXXSXXUXXTXXXXTRMS 335 ` 336 337 expectNfq = []string{ 338 "FC12044_91407_8_200_406_24", 339 "FC12044_91407_8_200_720_610", 340 "FC12044_91407_8_200_345_133", 341 "FC12044_91407_8_200_106_131", 342 "FC12044_91407_8_200_916_471", 343 "FC12044_91407_8_200_57_85", 344 "FC12044_91407_8_200_10_437", 345 "FC12044_91407_8_200_154_436", 346 "FC12044_91407_8_200_336_64", 347 "FC12044_91407_8_200_620_233", 348 "FC12044_91407_8_200_902_349", 349 "FC12044_91407_8_200_40_618", 350 "FC12044_91407_8_200_83_511", 351 "FC12044_91407_8_200_76_246", 352 "FC12044_91407_8_200_303_427", 353 "FC12044_91407_8_200_31_299", 354 "FC12044_91407_8_200_553_135", 355 "FC12044_91407_8_200_139_74", 356 "FC12044_91407_8_200_108_33", 357 "FC12044_91407_8_200_980_965", 358 "FC12044_91407_8_200_981_857", 359 "FC12044_91407_8_200_8_865", 360 "FC12044_91407_8_200_292_484", 361 "FC12044_91407_8_200_675_16", 362 "FC12044_91407_8_200_285_136", 363 } 364 365 expectSfq = [][]alphabet.Letter{ 366 []alphabet.Letter("GTTAGCTCCCACCTTAAGATGTTTA"), 367 []alphabet.Letter("CTCTGTGGCACCCCATCCCTCACTT"), 368 []alphabet.Letter("GATTTTTTAACAATAAACGTACATA"), 369 []alphabet.Letter("GTTGCCCAGGCTCGTCTTGAACTCC"), 370 []alphabet.Letter("TGATTGAAGGTAGGGTAGCATACTG"), 371 []alphabet.Letter("GCTCCAATAGCGCAGAGGAAACCTG"), 372 []alphabet.Letter("GCTGCTTGGGAGGCTGAGGCAGGAG"), 373 []alphabet.Letter("AGACCTTTGGATACAATGAACGACT"), 374 []alphabet.Letter("AGGGAATTTTAGAGGAGGGCTGCCG"), 375 []alphabet.Letter("TCTCCATGTTGGTCAGGCTGGTCTC"), 376 []alphabet.Letter("TGAACGTCGAGACGCAAGGCCCGCC"), 377 []alphabet.Letter("CTGTCCCCACGGCGGGGGGGCCTGG"), 378 []alphabet.Letter("GATGTACTCTTACACCCAGACTTTG"), 379 []alphabet.Letter("TCAAGGGTGGATCTTGGCTCCCAGT"), 380 []alphabet.Letter("TTGCGACAGAGTTTTGCTCTTGTCC"), 381 []alphabet.Letter("TCTGCTCCAGCTCCAAGACGCCGCC"), 382 []alphabet.Letter("TACGGAGCCGCGGGCGGGAAAGGCG"), 383 []alphabet.Letter("CCTCCCAGGTTCAAGCGATTATCCT"), 384 []alphabet.Letter("GTCATGGCGGCCCGCGCGGGGAGCG"), 385 []alphabet.Letter("ACAGTGGGTTCTTAAAGAAGAGTCG"), 386 []alphabet.Letter("AACGAGGGGCGCGACTTGACCTTGG"), 387 []alphabet.Letter("TTTCCCACCCCAGGAAGCCTTGGAC"), 388 []alphabet.Letter("TCAGCCTCCGTGCCCAGCCCACTCC"), 389 []alphabet.Letter("CTCGGGAGGCTGAGGCAGGGGGGTT"), 390 []alphabet.Letter("CCAAATCTTGAATTGTAGCTCCCCT"), 391 } 392 393 expectQ = [][]alphabet.Qphred{ 394 {50, 55, 55, 51, 55, 55, 55, 55, 55, 55, 55, 55, 55, 51, 51, 50, 52, 55, 50, 50, 55, 42, 51, 44, 48}, 395 {46, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 51, 50, 55, 48, 51, 55, 52}, 396 {46, 48, 51, 46, 46, 50, 37, 46, 49, 51, 37, 37, 37, 40, 40, 46, 37, 37, 37, 37, 37, 37, 37, 37, 37}, 397 {55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 50, 55, 55, 55, 55, 40, 50, 51, 55, 48, 50}, 398 {55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 52, 55, 55, 52, 50, 55, 55, 51, 55, 54}, 399 {55, 37, 55, 44, 55, 50, 55, 55, 50, 55, 55, 55, 46, 50, 48, 49, 46, 46, 50, 49, 46, 37, 48, 40, 48}, 400 {52, 50, 55, 50, 55, 55, 55, 55, 55, 55, 52, 55, 55, 55, 50, 55, 48, 55, 55, 52, 48, 55, 55, 42, 50}, 401 {44, 42, 42, 44, 48, 51, 50, 49, 55, 44, 50, 48, 51, 46, 44, 49, 37, 46, 46, 40, 37, 37, 37, 37, 37}, 402 {50, 51, 48, 44, 46, 50, 55, 50, 55, 50, 48, 55, 48, 55, 55, 42, 55, 55, 55, 42, 37, 55, 37, 37, 42}, 403 {55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 50, 55, 50, 54}, 404 {55, 44, 55, 50, 50, 55, 44, 55, 55, 50, 55, 48, 50, 55, 51, 50, 48, 55, 37, 42, 50, 42, 51, 46, 37}, 405 {51, 55, 55, 55, 55, 50, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 49, 42, 37, 46, 55, 50}, 406 {50, 46, 55, 55, 55, 55, 55, 52, 55, 55, 55, 55, 55, 55, 48, 42, 48, 42, 42, 49, 46, 46, 48, 50, 52}, 407 {55, 51, 55, 51, 52, 55, 55, 55, 55, 55, 49, 55, 55, 55, 51, 55, 55, 50, 52, 55, 50, 49, 37, 55, 48}, 408 {55, 55, 48, 49, 46, 55, 55, 55, 55, 40, 55, 37, 48, 55, 55, 55, 46, 40, 48, 50, 50, 55, 52, 37, 37}, 409 {55, 49, 55, 51, 50, 55, 55, 55, 49, 55, 55, 50, 55, 48, 48, 46, 55, 48, 51, 50, 48, 50, 55, 42, 48}, 410 {55, 50, 48, 48, 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, 50, 55, 55, 44, 37, 37, 48, 55, 51, 42, 52}, 411 {49, 44, 55, 52, 50, 55, 51, 55, 55, 48, 55, 55, 48, 52, 55, 55, 55, 50, 48, 40, 50, 40, 50, 50, 46}, 412 {46, 46, 46, 50, 50, 55, 55, 50, 55, 55, 46, 44, 42, 44, 46, 37, 44, 42, 37, 46, 42, 37, 37, 37, 37}, 413 {51, 46, 50, 50, 49, 55, 55, 55, 50, 50, 44, 50, 55, 44, 46, 44, 55, 40, 49, 55, 46, 55, 37, 37, 50}, 414 {49, 55, 44, 50, 50, 55, 55, 55, 55, 50, 55, 48, 55, 48, 55, 37, 50, 55, 48, 37, 48, 42, 44, 55, 50}, 415 {55, 55, 55, 37, 42, 46, 49, 46, 44, 42, 46, 46, 49, 44, 40, 44, 49, 40, 40, 42, 42, 46, 49, 37, 37}, 416 {55, 48, 55, 46, 50, 55, 55, 55, 55, 55, 52, 55, 55, 55, 55, 40, 55, 55, 55, 55, 48, 51, 46, 55, 37}, 417 {46, 55, 51, 55, 55, 55, 50, 55, 55, 48, 55, 55, 46, 55, 55, 42, 44, 55, 55, 44, 55, 46, 42, 48, 37}, 418 {46, 50, 55, 46, 48, 55, 55, 55, 55, 55, 50, 55, 55, 52, 55, 55, 51, 55, 55, 55, 55, 51, 49, 44, 50}, 419 } 420 421 expectQL = constructQL(expectSfq, expectQ) 422 ) 423 424 func (s *S) TestReadFastq(c *check.C) { 425 var ( 426 obtainNfq []string 427 obtainQL [][]alphabet.QLetter 428 ) 429 430 sc := seqio.NewScanner( 431 fastq.NewReader( 432 bytes.NewBufferString(fq0), 433 linear.NewQSeq("", nil, alphabet.DNA, alphabet.Sanger), 434 ), 435 ) 436 for sc.Next() { 437 t := sc.Seq().(*linear.QSeq) 438 header := t.Name() 439 if desc := t.Description(); len(desc) > 0 { 440 header += " " + desc 441 } 442 obtainNfq = append(obtainNfq, header) 443 obtainQL = append(obtainQL, (t.Slice().(alphabet.QLetters))) 444 } 445 c.Check(sc.Error(), check.Equals, nil) 446 c.Check(obtainNfq, check.DeepEquals, expectNfq) 447 c.Check(obtainQL, check.DeepEquals, expectQL) 448 }