github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/csi/csi_test.go (about) 1 // Copyright ©2015 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package csi 6 7 import ( 8 "bytes" 9 "testing" 10 11 "github.com/Schaudge/hts/bgzf" 12 "github.com/Schaudge/hts/bgzf/index" 13 14 "gopkg.in/check.v1" 15 ) 16 17 func Test(t *testing.T) { check.TestingT(t) } 18 19 type S struct{} 20 21 var _ = check.Suite(&S{}) 22 23 // conceptualCSIv1data is an uncompressed CSIv1 for the alignments in the BAM 24 // corresponding to: 25 // 26 // @HD VN:1.0 SO:coordinate 27 // @SQ SN:conceptual LN:134217728 28 // 60m66m:bin0 0 conceptual 62914561 40 6291456M * 0 0 * * 29 // 70m76m:bin2 0 conceptual 73400321 40 6291456M * 0 0 * * 30 // 73m75m:bin18 0 conceptual 76546049 40 2097152M * 0 0 * * 31 // 32 // This is a coordinate-translated version of the conceptual example in the 33 // SAM spec using binning as actually used by BAM rather than as presented. 34 /* 35 0x43, 0x53, 0x49, 0x01, // Index\1 36 0x0e, 0x00, 0x00, 0x00, // min_shift 37 0x05, 0x00, 0x00, 0x00, // depth 38 0x00, 0x00, 0x00, 0x00, // l_aux 39 40 // no aux 41 42 0x01, 0x00, 0x00, 0x00, // n_ref 43 44 0x02, 0x00, 0x00, 0x00, // n_bin 45 46 0x00, 0x00, 0x00, 0x00, // bin 47 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // loffset 48 0x01, 0x00, 0x00, 0x00, // n_chunk 49 50 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk_beg {101,0} 51 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk_end {228,0} 52 53 // Not mentioned in the Index spec. 54 0x4a, 0x92, 0x00, 0x00, // bin 55 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // loffset 56 0x02, 0x00, 0x00, 0x00, // n_chunk - always 2 57 58 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_beg 59 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_end 60 61 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mapped_count 62 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_count 63 64 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_no_coor 65 */ 66 var conceptualCSIv1data = []byte{ 67 0x43, 0x53, 0x49, 0x01, 0x0e, 0x00, 0x00, 0x00, 68 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 69 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 70 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x00, 71 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 72 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 73 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 74 0x4a, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 75 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 76 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 77 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 78 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 79 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 80 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 81 } 82 83 var chunkTests = []struct { 84 beg, end int 85 expect []bgzf.Chunk 86 }{ 87 { 88 beg: 65000, end: 71000, // Index does not use tiles, so this is hit. 89 expect: []bgzf.Chunk{ 90 {Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}}, 91 }, 92 }, 93 { 94 beg: 77594624, end: 80740352, // 73m77m:bin2+bin18 - This is the equivalent to the given example. 95 expect: []bgzf.Chunk{ 96 {Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}}, 97 }, 98 }, 99 { 100 beg: 62914561, end: 68157440, // 60m65m:bin0+bin2 101 expect: []bgzf.Chunk{ 102 {Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}}, 103 }, 104 }, 105 { 106 beg: 72351744, end: 80740352, // 69m77m:bin0+bin2+18 107 expect: []bgzf.Chunk{ 108 {Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}}, 109 }, 110 }, 111 { 112 beg: 61865984, end: 80740352, // 59m77m:bin0+bin2+bin18 113 expect: []bgzf.Chunk{ 114 {Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}}, 115 }, 116 }, 117 { 118 beg: 80740352, end: 81788928, // 77m78m - Not in covered region, but Index does not use tiles, so this is hit. 119 expect: []bgzf.Chunk{ 120 {Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}}, 121 }, 122 }, 123 } 124 125 func (s *S) TestConceptualCSIv1(c *check.C) { 126 csi, err := ReadFrom(bytes.NewReader(conceptualCSIv1data)) 127 c.Assert(err, check.Equals, nil) 128 129 for _, test := range chunkTests { 130 c.Check(csi.Chunks(0, test.beg, test.end), check.DeepEquals, test.expect, 131 check.Commentf("Unexpected result for [%d,%d).", test.beg, test.end), 132 ) 133 } 134 stats, ok := csi.ReferenceStats(0) 135 c.Check(ok, check.Equals, true) 136 c.Check(stats, check.Equals, index.ReferenceStats{ 137 Chunk: bgzf.Chunk{ 138 Begin: bgzf.Offset{File: 101, Block: 0}, 139 End: bgzf.Offset{File: 228, Block: 0}, 140 }, 141 Mapped: 3, Unmapped: 0}) 142 unmapped, ok := csi.Unmapped() 143 c.Check(ok, check.Equals, true) 144 c.Check(unmapped, check.Equals, uint64(0)) 145 } 146 147 // conceptualCSIv2data is an uncompressed CSIv1 for the alignments in the BAM 148 // corresponding to: 149 // 150 // @HD VN:1.0 SO:coordinate 151 // @SQ SN:conceptual LN:134217728 152 // 60m66m:bin0 0 conceptual 62914561 40 6291456M * 0 0 * * 153 // 70m76m:bin2 0 conceptual 73400321 40 6291456M * 0 0 * * 154 // 73m75m:bin18 0 conceptual 76546049 40 2097152M * 0 0 * * 155 // 156 // This is a coordinate-translated version of the conceptual example in the 157 // SAM spec using binning as actually used by BAM rather than as presented. 158 /* 159 0x43, 0x53, 0x49, 0x02, // Index\1 160 0x0e, 0x00, 0x00, 0x00, // min_shift 161 0x05, 0x00, 0x00, 0x00, // depth 162 0x00, 0x00, 0x00, 0x00, // l_aux 163 164 // no aux 165 166 0x01, 0x00, 0x00, 0x00, // n_ref 167 168 0x02, 0x00, 0x00, 0x00, // n_bin 169 170 0x00, 0x00, 0x00, 0x00, // bin 171 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // loffset 172 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_rec 173 0x01, 0x00, 0x00, 0x00, // n_chunk 174 175 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk_beg 176 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk_end 177 178 0x4a, 0x92, 0x00, 0x00, // bin 179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // loffset 180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_rec 181 0x02, 0x00, 0x00, 0x00, // n_chunk 182 183 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_beg 184 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_end 185 186 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mapped_count 187 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_count 188 189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 190 */ 191 var conceptualCSIv2data = []byte{ 192 0x43, 0x53, 0x49, 0x02, 0x0e, 0x00, 0x00, 0x00, 193 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 194 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 195 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x00, 196 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 197 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 198 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 199 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 200 0x4a, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 202 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 203 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 204 0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, 205 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 206 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 207 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 208 } 209 210 func (s *S) TestConceptualCSIv2(c *check.C) { 211 csi, err := ReadFrom(bytes.NewReader(conceptualCSIv2data)) 212 c.Assert(err, check.Equals, nil) 213 214 for _, test := range chunkTests { 215 c.Check(csi.Chunks(0, test.beg, test.end), check.DeepEquals, test.expect, 216 check.Commentf("Unexpected result for [%d,%d).", test.beg, test.end), 217 ) 218 } 219 stats, ok := csi.ReferenceStats(0) 220 c.Check(ok, check.Equals, true) 221 c.Check(stats, check.Equals, index.ReferenceStats{ 222 Chunk: bgzf.Chunk{ 223 Begin: bgzf.Offset{File: 101, Block: 0}, 224 End: bgzf.Offset{File: 228, Block: 0}, 225 }, 226 Mapped: 3, Unmapped: 0}) 227 unmapped, ok := csi.Unmapped() 228 c.Check(ok, check.Equals, true) 229 c.Check(unmapped, check.Equals, uint64(0)) 230 } 231 232 func uint64ptr(i uint64) *uint64 { 233 return &i 234 } 235 236 var csiTestData = []struct { 237 csi *Index 238 err error 239 }{ 240 { 241 csi: &Index{ 242 minShift: 14, depth: 5, 243 refs: []refIndex{ 244 { 245 bins: []bin{ 246 { 247 bin: 4681, left: bgzf.Offset{File: 98, Block: 0}, 248 chunks: []bgzf.Chunk{ 249 {Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}}, 250 }, 251 }, 252 }, 253 stats: &index.ReferenceStats{ 254 Chunk: bgzf.Chunk{Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}}, 255 Mapped: 8, 256 Unmapped: 1, 257 }, 258 }, 259 }, 260 unmapped: uint64ptr(1), 261 isSorted: true, 262 }, 263 err: nil, 264 }, 265 { 266 csi: &Index{ 267 minShift: 14, depth: 5, 268 refs: []refIndex{ 269 { 270 bins: []bin{ 271 { 272 bin: 4681, left: bgzf.Offset{File: 98, Block: 0}, 273 chunks: []bgzf.Chunk{ 274 {Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}}, 275 }, 276 }, 277 }, 278 stats: &index.ReferenceStats{ 279 Chunk: bgzf.Chunk{Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}}, 280 Mapped: 8, 281 Unmapped: 1, 282 }, 283 }, 284 }, 285 unmapped: nil, 286 isSorted: true, 287 }, 288 err: nil, 289 }, 290 { 291 csi: &Index{ 292 minShift: 14, depth: 5, 293 refs: []refIndex{ 294 { 295 bins: []bin{ 296 { 297 bin: 4681, left: bgzf.Offset{File: 98, Block: 0}, 298 chunks: []bgzf.Chunk{ 299 {Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}}, 300 }, 301 }, 302 }, 303 stats: nil, 304 }, 305 }, 306 unmapped: uint64ptr(1), 307 isSorted: true, 308 }, 309 err: nil, 310 }, 311 { 312 csi: &Index{ 313 minShift: 14, depth: 5, 314 refs: []refIndex{ 315 { 316 bins: []bin{ 317 { 318 bin: 4681, left: bgzf.Offset{File: 98, Block: 0}, 319 chunks: []bgzf.Chunk{ 320 {Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}}, 321 }, 322 }, 323 }, 324 stats: nil, 325 }, 326 }, 327 unmapped: nil, 328 isSorted: true, 329 }, 330 err: nil, 331 }, 332 { 333 csi: &Index{ 334 Auxilliary: []byte("Various commentary strings"), 335 minShift: 14, depth: 5, 336 refs: func() []refIndex { 337 idx := [86]refIndex{ 338 23: { 339 bins: []bin{ 340 { 341 bin: 0x2070, 342 chunks: []bgzf.Chunk{ 343 { 344 Begin: bgzf.Offset{File: 0x1246, Block: 0x0}, 345 End: bgzf.Offset{File: 0x1246, Block: 0x1cf9}, 346 }, 347 }, 348 }, 349 }, 350 stats: &index.ReferenceStats{ 351 Chunk: bgzf.Chunk{ 352 Begin: bgzf.Offset{File: 0x1246, Block: 0x0}, 353 End: bgzf.Offset{File: 0x1246, Block: 0x1cf9}, 354 }, 355 Mapped: 0, Unmapped: 0x1d, 356 }, 357 }, 358 24: { 359 bins: []bin{ 360 { 361 bin: 0x124a, 362 chunks: []bgzf.Chunk{ 363 { 364 Begin: bgzf.Offset{File: 0x1246, Block: 0x1cf9}, 365 End: bgzf.Offset{File: 0x1246, Block: 0x401d}, 366 }, 367 }, 368 }, 369 }, 370 stats: &index.ReferenceStats{ 371 Chunk: bgzf.Chunk{ 372 Begin: bgzf.Offset{File: 0x1246, Block: 0x1cf9}, 373 End: bgzf.Offset{File: 0x1246, Block: 0x401d}, 374 }, 375 Mapped: 0, Unmapped: 0x25, 376 }, 377 }, 378 72: { 379 bins: []bin{ 380 { 381 bin: 0x1253, 382 chunks: []bgzf.Chunk{ 383 { 384 Begin: bgzf.Offset{File: 0x1246, Block: 0x401d}, 385 End: bgzf.Offset{File: 0x1246, Block: 0x41f5}, 386 }, 387 }, 388 }, 389 }, 390 stats: &index.ReferenceStats{ 391 Chunk: bgzf.Chunk{ 392 Begin: bgzf.Offset{File: 0x1246, Block: 0x401d}, 393 End: bgzf.Offset{File: 0x1246, Block: 0x41f5}, 394 }, 395 Mapped: 0, Unmapped: 0x2, 396 }, 397 }, 398 } 399 return idx[:] 400 }(), 401 unmapped: uint64ptr(932), 402 isSorted: true, 403 }, 404 err: nil, 405 }, 406 } 407 408 func (s *S) TestCSIRoundtrip(c *check.C) { 409 for i, test := range csiTestData { 410 for test.csi.Version = 1; test.csi.Version <= 2; test.csi.Version++ { 411 var buf bytes.Buffer 412 c.Assert(WriteTo(&buf, test.csi), check.Equals, nil) 413 got, err := ReadFrom(&buf) 414 c.Assert(err, check.Equals, nil, check.Commentf("Test %d", i)) 415 c.Check(got, check.DeepEquals, test.csi, check.Commentf("Test %d", i)) 416 } 417 } 418 }