github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bgzf/index/index_test.go (about) 1 // Copyright ©2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package index 6 7 import ( 8 "bytes" 9 "flag" 10 "io" 11 "strings" 12 "testing" 13 14 "github.com/Schaudge/hts/bgzf" 15 16 "gopkg.in/check.v1" 17 ) 18 19 var conc = flag.Int("conc", 1, "sets the level of concurrency for compression") 20 21 func Test(t *testing.T) { check.TestingT(t) } 22 23 type S struct{} 24 25 var _ = check.Suite(&S{}) 26 27 // conceptualBAMdata is the BAM corresponding to: 28 // 29 // @HD VN:1.0 SO:coordinate 30 // @SQ SN:conceptual LN:134217728 31 // 60m66m:bin0 0 conceptual 62914561 40 6291456M * 0 0 * * 32 // 70m76m:bin2 0 conceptual 73400321 40 6291456M * 0 0 * * 33 // 73m75m:bin18 0 conceptual 76546049 40 2097152M * 0 0 * * 34 // 35 // This is a coordinate-translated version of the conceptual example in the 36 // SAM spec using binning as actually used by BAM rather than as presented. 37 var conceptualBAMdata = []byte{ 38 // sam.Header block [{File:0, Block:0}, {File:0, Block:87}). 39 0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 40 0x06, 0x00, 0x42, 0x43, 0x02, 0x00, 0x64, 0x00, 0x73, 0x72, 41 0xf4, 0x65, 0xb4, 0x60, 0x60, 0x60, 0x70, 0xf0, 0x70, 0xe1, 42 0x0c, 0xf3, 0xb3, 0x32, 0xd4, 0x33, 0xe0, 0x0c, 0xf6, 0xb7, 43 0x4a, 0xce, 0xcf, 0x2f, 0x4a, 0xc9, 0xcc, 0x4b, 0x2c, 0x49, 44 0xe5, 0x72, 0x08, 0x0e, 0xe4, 0x0c, 0xf6, 0x03, 0x8a, 0xe4, 45 0x25, 0xa7, 0x16, 0x94, 0x94, 0x26, 0xe6, 0x70, 0xfa, 0x00, 46 0x95, 0x19, 0x9b, 0x18, 0x19, 0x9a, 0x9b, 0x1b, 0x59, 0x70, 47 0x31, 0x02, 0xf5, 0x72, 0x03, 0x31, 0x42, 0x1e, 0xc8, 0x61, 48 0xe0, 0x00, 0x00, 0x42, 0x51, 0xcc, 0xea, 0x57, 0x00, 0x00, 49 0x00, 50 51 // Record block [{File:101, Block:0}, {File:101, Block:157}). 52 0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 53 0x06, 0x00, 0x42, 0x43, 0x02, 0x00, 0x62, 0x00, 0x33, 0x60, 54 0x80, 0x81, 0x03, 0xcc, 0x3c, 0x1a, 0x0c, 0x0c, 0x8c, 0x50, 55 0xde, 0x7f, 0x28, 0x00, 0xb1, 0xcd, 0x0c, 0x72, 0xcd, 0xcc, 56 0x72, 0xad, 0x92, 0x32, 0xf3, 0x0c, 0x40, 0x5c, 0x36, 0x03, 57 0xb8, 0x9e, 0x04, 0x16, 0x1e, 0x0d, 0x26, 0xac, 0x7a, 0xcc, 58 0x0d, 0x72, 0xcd, 0x21, 0x7a, 0x8c, 0xc0, 0x7a, 0x0c, 0xe1, 59 0x7a, 0x26, 0xb0, 0xf0, 0x6a, 0x08, 0x61, 0xd7, 0x63, 0x9c, 60 0x6b, 0x6e, 0x0a, 0xd6, 0x63, 0x68, 0x01, 0xe2, 0x33, 0x01, 61 0x00, 0x5a, 0x80, 0xfe, 0xec, 0x9d, 0x00, 0x00, 0x00, 62 63 // Magic block [{File:200, Block:0}, {File:200, Block:0}). 64 0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 65 0x06, 0x00, 0x42, 0x43, 0x02, 0x00, 0x1b, 0x00, 0x03, 0x00, 66 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 67 68 // End {File:228, Block:0} 69 } 70 71 var conceptualChunks = []bgzf.Chunk{ 72 {Begin: bgzf.Offset{File: 0, Block: 0}, End: bgzf.Offset{File: 0, Block: 87}}, // header 73 {Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 101, Block: 52}}, // 60m66m:bin0 74 {Begin: bgzf.Offset{File: 101, Block: 52}, End: bgzf.Offset{File: 101, Block: 104}}, // 70m76m:bin2 75 {Begin: bgzf.Offset{File: 101, Block: 104}, End: bgzf.Offset{File: 101, Block: 157}}, // 73m75m:bin18 76 {Begin: bgzf.Offset{File: 228, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}}, // EOF 77 } 78 79 // Test for issue #6 https://github.com/biogo/hts/issues/6 80 func (s *S) TestChunkReader(c *check.C) { 81 br, err := bgzf.NewReader(bytes.NewReader(conceptualBAMdata), *conc) 82 c.Assert(err, check.Equals, nil) 83 defer br.Close() 84 cr, err := NewChunkReader(br, conceptualChunks) 85 c.Assert(err, check.Equals, nil) 86 defer cr.Close() 87 // 2 is shorter than the length of the first block. 88 // This panics prior to the fix. 89 n, err := cr.Read(make([]byte, 2)) 90 c.Check(n, check.Equals, 2) 91 c.Check(err, check.Equals, nil) 92 } 93 94 // Test for issue #8 https://github.com/biogo/hts/issues/8 95 func (s *S) TestIssue8(c *check.C) { 96 br, err := bgzf.NewReader(bytes.NewReader(conceptualBAMdata), *conc) 97 c.Assert(err, check.Equals, nil) 98 defer br.Close() 99 cr, err := NewChunkReader(br, conceptualChunks[:2]) 100 c.Assert(err, check.Equals, nil) 101 defer cr.Close() 102 var last []byte 103 for { 104 p := make([]byte, 1024) 105 n, err := cr.Read(p) 106 if n != 0 { 107 c.Check(p[:n], check.Not(check.DeepEquals), last[:min(n, len(last))]) 108 } 109 last = p 110 if err != nil { 111 if err == io.EOF { 112 break 113 } 114 c.Fatalf("unexpected error: %v", err) 115 } 116 } 117 } 118 119 // issue10Tests are test cases for https://github.com/biogo/hts/issues/10. 120 var issue10Tests = []struct { 121 words []wordBlocks 122 chunks []string 123 canSquash bool 124 canTrunc bool 125 }{ 126 { 127 // This is semantically identical to the test case given in issue 10. 128 words: commonWords, 129 chunks: []string{"<three>", "<five>"}, 130 canSquash: true, 131 canTrunc: false, 132 }, 133 { 134 words: commonWords, 135 chunks: []string{"<one>", "<two>", "<three>"}, 136 canSquash: true, 137 canTrunc: false, 138 }, 139 { 140 words: commonWords, 141 chunks: []string{"<two>", "<three>", "<four>", "<five>"}, 142 canSquash: true, 143 canTrunc: true, 144 }, 145 { 146 words: commonWords, 147 chunks: []string{"<three>", "<four>"}, 148 canSquash: true, 149 canTrunc: true, 150 }, 151 { 152 words: commonWords, 153 chunks: []string{"<seven>", "<eight>"}, 154 canSquash: true, 155 canTrunc: true, 156 }, 157 { 158 words: commonWords, 159 chunks: []string{"<zero>", "<one>", "<two>", "<three>", "<four>", "<five>", "<six>", "<seven>", "<eight>"}, 160 canSquash: true, 161 canTrunc: true, 162 }, 163 { 164 // This case would never happen with an htslib-like index, but 165 // it is a possible use case and not prohibited, so test it. 166 words: commonWords, 167 chunks: []string{"<three>", "<zero>", "<five>", "<seven>", "<two>", "<eight>", "<five>"}, 168 169 // Not in order. 170 canSquash: false, 171 canTrunc: false, 172 }, 173 } 174 175 var commonWords = []wordBlocks{ 176 // Begin:{File:0 Block:0} End:{File:0 Block:6} 177 // Begin:{File:0 Block:6} End:{File:0 Block:11} 178 {word: "<zero>"}, {word: "<one>", flush: true}, 179 // Begin:{File:43 Block:0} End:{File:43 Block:5} 180 // Begin:{File:43 Block:5} End:{File:43 Block:12} 181 // Begin:{File:43 Block:12} End:{File:43 Block:18} 182 {word: "<two>"}, {word: "<three>"}, {word: "<four>", flush: true}, 183 // Begin:{File:93 Block:0} End:{File:93 Block:6} 184 // Begin:{File:93 Block:6} End:{File:93 Block:11} 185 {word: "<five>"}, {word: "<six>"}, {word: "<seven>", flush: true}, 186 // Begin:{File:142 Block:0} End:{File:142 Block:7} 187 {word: "<eight>"}, 188 } 189 190 type wordBlocks struct { 191 word string 192 flush bool 193 } 194 195 type word int 196 197 func (w word) RefID() int { return 0 } 198 func (w word) Start() int { return int(w) } 199 func (w word) End() int { return int(w + 1) } 200 201 func (s *S) TestIssue10(c *check.C) { 202 for _, test := range issue10Tests { 203 var buf bytes.Buffer 204 205 // Write the set of words to a bgzf stream. 206 w := bgzf.NewWriter(&buf, *conc) 207 for _, wb := range test.words { 208 w.Write([]byte(wb.word)) 209 if wb.flush { 210 w.Flush() 211 } 212 } 213 w.Close() 214 215 for _, strategy := range []MergeStrategy{nil, adjacent} { 216 if strategy != nil && !test.canSquash { 217 continue 218 } 219 for _, clean := range []bool{false, true} { 220 for _, truncFinal := range []bool{false, true} { 221 if truncFinal && !test.canTrunc { 222 continue 223 } 224 // Build an index into the words. 225 r, err := bgzf.NewReader(bytes.NewReader(buf.Bytes()), *conc) 226 c.Assert(err, check.Equals, nil) 227 idx := make(map[string]bgzf.Chunk) 228 for i, wb := range test.words { 229 p := make([]byte, len(wb.word)) 230 n, err := r.Read(p) 231 c.Assert(err, check.Equals, nil) 232 c.Assert(string(p[:n]), check.Equals, wb.word) 233 234 last := r.LastChunk() 235 if !clean { 236 // This simulates the index construction behaviour 237 // that appears to be what is done by htslib. The 238 // behaviour of bgzf is to elide seeks that will not 239 // result in a productive read. 240 if i != 0 && test.words[i-1].flush { 241 last.Begin = idx[test.words[i-1].word].End 242 } 243 } 244 idx[wb.word] = last 245 } 246 247 var chunks []bgzf.Chunk 248 for _, w := range test.chunks { 249 chunks = append(chunks, idx[w]) 250 } 251 var want string 252 if truncFinal { 253 want = strings.Join(test.chunks[:len(test.chunks)-1], "") 254 chunks[len(chunks)-2].End = chunks[len(chunks)-1].Begin 255 chunks = chunks[:len(chunks)-1] 256 } else { 257 want = strings.Join(test.chunks, "") 258 } 259 260 if strategy != nil { 261 chunks = strategy(chunks) 262 } 263 cr, err := NewChunkReader(r, chunks) 264 c.Assert(err, check.Equals, nil) 265 266 var got bytes.Buffer 267 io.Copy(&got, cr) 268 c.Check(got.String(), check.Equals, want, 269 check.Commentf("clean=%t merge=%t trunc=%t chunks=%+v", clean, strategy != nil, truncFinal, chunks), 270 ) 271 } 272 } 273 } 274 } 275 }