github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/csi/csi_test.go (about)

     1  // Copyright ©2015 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csi
     6  
     7  import (
     8  	"bytes"
     9  	"testing"
    10  
    11  	"github.com/Schaudge/hts/bgzf"
    12  	"github.com/Schaudge/hts/bgzf/index"
    13  
    14  	"gopkg.in/check.v1"
    15  )
    16  
    17  func Test(t *testing.T) { check.TestingT(t) }
    18  
    19  type S struct{}
    20  
    21  var _ = check.Suite(&S{})
    22  
    23  // conceptualCSIv1data is an uncompressed CSIv1 for the alignments in the BAM
    24  // corresponding to:
    25  //
    26  // @HD	VN:1.0	SO:coordinate
    27  // @SQ	SN:conceptual	LN:134217728
    28  // 60m66m:bin0	0	conceptual	62914561	40	6291456M	*	0	0	*	*
    29  // 70m76m:bin2	0	conceptual	73400321	40	6291456M	*	0	0	*	*
    30  // 73m75m:bin18	0	conceptual	76546049	40	2097152M	*	0	0	*	*
    31  //
    32  // This is a coordinate-translated version of the conceptual example in the
    33  // SAM spec using binning as actually used by BAM rather than as presented.
    34  /*
    35  	0x43, 0x53, 0x49, 0x01, // Index\1
    36  	0x0e, 0x00, 0x00, 0x00, // min_shift
    37  	0x05, 0x00, 0x00, 0x00, // depth
    38  	0x00, 0x00, 0x00, 0x00, // l_aux
    39  
    40  	// no aux
    41  
    42  	0x01, 0x00, 0x00, 0x00, // n_ref
    43  
    44  		0x02, 0x00, 0x00, 0x00, // n_bin
    45  
    46  			0x00, 0x00, 0x00, 0x00, // bin
    47  			0x00, 0x00, 0x65, 0x00,	0x00, 0x00, 0x00, 0x00, // loffset
    48  			0x01, 0x00, 0x00, 0x00, // n_chunk
    49  
    50  				0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk_beg {101,0}
    51  				0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk_end {228,0}
    52  
    53  			// Not mentioned in the Index spec.
    54  			0x4a, 0x92, 0x00, 0x00, // bin
    55  			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // loffset
    56  			0x02, 0x00, 0x00, 0x00, // n_chunk - always 2
    57  
    58  				0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_beg
    59  				0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_end
    60  
    61  				0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mapped_count
    62  				0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_count
    63  
    64  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_no_coor
    65  */
    66  var conceptualCSIv1data = []byte{
    67  	0x43, 0x53, 0x49, 0x01, 0x0e, 0x00, 0x00, 0x00,
    68  	0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    69  	0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
    70  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x00,
    71  	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
    72  	0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00,
    73  	0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00,
    74  	0x4a, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    75  	0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
    76  	0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00,
    77  	0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00,
    78  	0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    79  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    80  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
    81  }
    82  
    83  var chunkTests = []struct {
    84  	beg, end int
    85  	expect   []bgzf.Chunk
    86  }{
    87  	{
    88  		beg: 65000, end: 71000, // Index does not use tiles, so this is hit.
    89  		expect: []bgzf.Chunk{
    90  			{Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}},
    91  		},
    92  	},
    93  	{
    94  		beg: 77594624, end: 80740352, // 73m77m:bin2+bin18 - This is the equivalent to the given example.
    95  		expect: []bgzf.Chunk{
    96  			{Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}},
    97  		},
    98  	},
    99  	{
   100  		beg: 62914561, end: 68157440, // 60m65m:bin0+bin2
   101  		expect: []bgzf.Chunk{
   102  			{Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}},
   103  		},
   104  	},
   105  	{
   106  		beg: 72351744, end: 80740352, // 69m77m:bin0+bin2+18
   107  		expect: []bgzf.Chunk{
   108  			{Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}},
   109  		},
   110  	},
   111  	{
   112  		beg: 61865984, end: 80740352, // 59m77m:bin0+bin2+bin18
   113  		expect: []bgzf.Chunk{
   114  			{Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}},
   115  		},
   116  	},
   117  	{
   118  		beg: 80740352, end: 81788928, // 77m78m - Not in covered region, but Index does not use tiles, so this is hit.
   119  		expect: []bgzf.Chunk{
   120  			{Begin: bgzf.Offset{File: 101, Block: 0}, End: bgzf.Offset{File: 228, Block: 0}},
   121  		},
   122  	},
   123  }
   124  
   125  func (s *S) TestConceptualCSIv1(c *check.C) {
   126  	csi, err := ReadFrom(bytes.NewReader(conceptualCSIv1data))
   127  	c.Assert(err, check.Equals, nil)
   128  
   129  	for _, test := range chunkTests {
   130  		c.Check(csi.Chunks(0, test.beg, test.end), check.DeepEquals, test.expect,
   131  			check.Commentf("Unexpected result for [%d,%d).", test.beg, test.end),
   132  		)
   133  	}
   134  	stats, ok := csi.ReferenceStats(0)
   135  	c.Check(ok, check.Equals, true)
   136  	c.Check(stats, check.Equals, index.ReferenceStats{
   137  		Chunk: bgzf.Chunk{
   138  			Begin: bgzf.Offset{File: 101, Block: 0},
   139  			End:   bgzf.Offset{File: 228, Block: 0},
   140  		},
   141  		Mapped: 3, Unmapped: 0})
   142  	unmapped, ok := csi.Unmapped()
   143  	c.Check(ok, check.Equals, true)
   144  	c.Check(unmapped, check.Equals, uint64(0))
   145  }
   146  
   147  // conceptualCSIv2data is an uncompressed CSIv1 for the alignments in the BAM
   148  // corresponding to:
   149  //
   150  // @HD	VN:1.0	SO:coordinate
   151  // @SQ	SN:conceptual	LN:134217728
   152  // 60m66m:bin0	0	conceptual	62914561	40	6291456M	*	0	0	*	*
   153  // 70m76m:bin2	0	conceptual	73400321	40	6291456M	*	0	0	*	*
   154  // 73m75m:bin18	0	conceptual	76546049	40	2097152M	*	0	0	*	*
   155  //
   156  // This is a coordinate-translated version of the conceptual example in the
   157  // SAM spec using binning as actually used by BAM rather than as presented.
   158  /*
   159  	0x43, 0x53, 0x49, 0x02, // Index\1
   160  	0x0e, 0x00, 0x00, 0x00, // min_shift
   161  	0x05, 0x00, 0x00, 0x00, // depth
   162  	0x00, 0x00, 0x00, 0x00, // l_aux
   163  
   164  	// no aux
   165  
   166  	0x01, 0x00, 0x00, 0x00, // n_ref
   167  
   168  		0x02, 0x00, 0x00, 0x00, // n_bin
   169  
   170  			0x00, 0x00, 0x00, 0x00, // bin
   171  			0x00, 0x00, 0x65, 0x00,	0x00, 0x00, 0x00, 0x00, // loffset
   172  			0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_rec
   173  			0x01, 0x00, 0x00, 0x00, // n_chunk
   174  
   175  				0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk_beg
   176  				0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, // chunk_end
   177  
   178  			0x4a, 0x92, 0x00, 0x00, // bin
   179  			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // loffset
   180  			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // n_rec
   181  			0x02, 0x00, 0x00, 0x00, // n_chunk
   182  
   183  			0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_beg
   184  			0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_end
   185  
   186  			0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // mapped_count
   187  			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // unmapped_count
   188  
   189  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   190  */
   191  var conceptualCSIv2data = []byte{
   192  	0x43, 0x53, 0x49, 0x02, 0x0e, 0x00, 0x00, 0x00,
   193  	0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   194  	0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
   195  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x00,
   196  	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
   197  	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
   198  	0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00,
   199  	0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00,
   200  	0x4a, 0x92, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   201  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   202  	0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
   203  	0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00,
   204  	0x00, 0x00, 0xe4, 0x00, 0x00, 0x00, 0x00, 0x00,
   205  	0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   206  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   207  	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
   208  }
   209  
   210  func (s *S) TestConceptualCSIv2(c *check.C) {
   211  	csi, err := ReadFrom(bytes.NewReader(conceptualCSIv2data))
   212  	c.Assert(err, check.Equals, nil)
   213  
   214  	for _, test := range chunkTests {
   215  		c.Check(csi.Chunks(0, test.beg, test.end), check.DeepEquals, test.expect,
   216  			check.Commentf("Unexpected result for [%d,%d).", test.beg, test.end),
   217  		)
   218  	}
   219  	stats, ok := csi.ReferenceStats(0)
   220  	c.Check(ok, check.Equals, true)
   221  	c.Check(stats, check.Equals, index.ReferenceStats{
   222  		Chunk: bgzf.Chunk{
   223  			Begin: bgzf.Offset{File: 101, Block: 0},
   224  			End:   bgzf.Offset{File: 228, Block: 0},
   225  		},
   226  		Mapped: 3, Unmapped: 0})
   227  	unmapped, ok := csi.Unmapped()
   228  	c.Check(ok, check.Equals, true)
   229  	c.Check(unmapped, check.Equals, uint64(0))
   230  }
   231  
   232  func uint64ptr(i uint64) *uint64 {
   233  	return &i
   234  }
   235  
   236  var csiTestData = []struct {
   237  	csi *Index
   238  	err error
   239  }{
   240  	{
   241  		csi: &Index{
   242  			minShift: 14, depth: 5,
   243  			refs: []refIndex{
   244  				{
   245  					bins: []bin{
   246  						{
   247  							bin: 4681, left: bgzf.Offset{File: 98, Block: 0},
   248  							chunks: []bgzf.Chunk{
   249  								{Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}},
   250  							},
   251  						},
   252  					},
   253  					stats: &index.ReferenceStats{
   254  						Chunk:    bgzf.Chunk{Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}},
   255  						Mapped:   8,
   256  						Unmapped: 1,
   257  					},
   258  				},
   259  			},
   260  			unmapped: uint64ptr(1),
   261  			isSorted: true,
   262  		},
   263  		err: nil,
   264  	},
   265  	{
   266  		csi: &Index{
   267  			minShift: 14, depth: 5,
   268  			refs: []refIndex{
   269  				{
   270  					bins: []bin{
   271  						{
   272  							bin: 4681, left: bgzf.Offset{File: 98, Block: 0},
   273  							chunks: []bgzf.Chunk{
   274  								{Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}},
   275  							},
   276  						},
   277  					},
   278  					stats: &index.ReferenceStats{
   279  						Chunk:    bgzf.Chunk{Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}},
   280  						Mapped:   8,
   281  						Unmapped: 1,
   282  					},
   283  				},
   284  			},
   285  			unmapped: nil,
   286  			isSorted: true,
   287  		},
   288  		err: nil,
   289  	},
   290  	{
   291  		csi: &Index{
   292  			minShift: 14, depth: 5,
   293  			refs: []refIndex{
   294  				{
   295  					bins: []bin{
   296  						{
   297  							bin: 4681, left: bgzf.Offset{File: 98, Block: 0},
   298  							chunks: []bgzf.Chunk{
   299  								{Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}},
   300  							},
   301  						},
   302  					},
   303  					stats: nil,
   304  				},
   305  			},
   306  			unmapped: uint64ptr(1),
   307  			isSorted: true,
   308  		},
   309  		err: nil,
   310  	},
   311  	{
   312  		csi: &Index{
   313  			minShift: 14, depth: 5,
   314  			refs: []refIndex{
   315  				{
   316  					bins: []bin{
   317  						{
   318  							bin: 4681, left: bgzf.Offset{File: 98, Block: 0},
   319  							chunks: []bgzf.Chunk{
   320  								{Begin: bgzf.Offset{File: 98, Block: 0}, End: bgzf.Offset{File: 401, Block: 0}},
   321  							},
   322  						},
   323  					},
   324  					stats: nil,
   325  				},
   326  			},
   327  			unmapped: nil,
   328  			isSorted: true,
   329  		},
   330  		err: nil,
   331  	},
   332  	{
   333  		csi: &Index{
   334  			Auxilliary: []byte("Various commentary strings"),
   335  			minShift:   14, depth: 5,
   336  			refs: func() []refIndex {
   337  				idx := [86]refIndex{
   338  					23: {
   339  						bins: []bin{
   340  							{
   341  								bin: 0x2070,
   342  								chunks: []bgzf.Chunk{
   343  									{
   344  										Begin: bgzf.Offset{File: 0x1246, Block: 0x0},
   345  										End:   bgzf.Offset{File: 0x1246, Block: 0x1cf9},
   346  									},
   347  								},
   348  							},
   349  						},
   350  						stats: &index.ReferenceStats{
   351  							Chunk: bgzf.Chunk{
   352  								Begin: bgzf.Offset{File: 0x1246, Block: 0x0},
   353  								End:   bgzf.Offset{File: 0x1246, Block: 0x1cf9},
   354  							},
   355  							Mapped: 0, Unmapped: 0x1d,
   356  						},
   357  					},
   358  					24: {
   359  						bins: []bin{
   360  							{
   361  								bin: 0x124a,
   362  								chunks: []bgzf.Chunk{
   363  									{
   364  										Begin: bgzf.Offset{File: 0x1246, Block: 0x1cf9},
   365  										End:   bgzf.Offset{File: 0x1246, Block: 0x401d},
   366  									},
   367  								},
   368  							},
   369  						},
   370  						stats: &index.ReferenceStats{
   371  							Chunk: bgzf.Chunk{
   372  								Begin: bgzf.Offset{File: 0x1246, Block: 0x1cf9},
   373  								End:   bgzf.Offset{File: 0x1246, Block: 0x401d},
   374  							},
   375  							Mapped: 0, Unmapped: 0x25,
   376  						},
   377  					},
   378  					72: {
   379  						bins: []bin{
   380  							{
   381  								bin: 0x1253,
   382  								chunks: []bgzf.Chunk{
   383  									{
   384  										Begin: bgzf.Offset{File: 0x1246, Block: 0x401d},
   385  										End:   bgzf.Offset{File: 0x1246, Block: 0x41f5},
   386  									},
   387  								},
   388  							},
   389  						},
   390  						stats: &index.ReferenceStats{
   391  							Chunk: bgzf.Chunk{
   392  								Begin: bgzf.Offset{File: 0x1246, Block: 0x401d},
   393  								End:   bgzf.Offset{File: 0x1246, Block: 0x41f5},
   394  							},
   395  							Mapped: 0, Unmapped: 0x2,
   396  						},
   397  					},
   398  				}
   399  				return idx[:]
   400  			}(),
   401  			unmapped: uint64ptr(932),
   402  			isSorted: true,
   403  		},
   404  		err: nil,
   405  	},
   406  }
   407  
   408  func (s *S) TestCSIRoundtrip(c *check.C) {
   409  	for i, test := range csiTestData {
   410  		for test.csi.Version = 1; test.csi.Version <= 2; test.csi.Version++ {
   411  			var buf bytes.Buffer
   412  			c.Assert(WriteTo(&buf, test.csi), check.Equals, nil)
   413  			got, err := ReadFrom(&buf)
   414  			c.Assert(err, check.Equals, nil, check.Commentf("Test %d", i))
   415  			c.Check(got, check.DeepEquals, test.csi, check.Commentf("Test %d", i))
   416  		}
   417  	}
   418  }