github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bam/merger_test.go (about)

     1  // Copyright ©2017 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bam
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"io"
    11  	"sort"
    12  	"testing"
    13  
    14  	"github.com/Schaudge/hts/sam"
    15  )
    16  
    17  type byFunc struct {
    18  	less func(a, b *sam.Record) bool
    19  	recs []*sam.Record
    20  }
    21  
    22  func (r byFunc) Len() int           { return len(r.recs) }
    23  func (r byFunc) Less(i, j int) bool { return r.less(r.recs[i], r.recs[j]) }
    24  func (r byFunc) Swap(i, j int)      { r.recs[i], r.recs[j] = r.recs[j], r.recs[i] }
    25  
    26  func sortBAM(r io.Reader, so sam.SortOrder, less func(a, b *sam.Record) bool, fn func(*sam.Record), shard int) error {
    27  	br, err := NewReader(r, 0)
    28  	if err != nil {
    29  		return fmt.Errorf("failed to open bam reader: %v", err)
    30  	}
    31  	defer br.Close()
    32  
    33  	h := br.Header().Clone()
    34  	h.SortOrder = so
    35  
    36  	recs := make([]*sam.Record, 0, shard)
    37  	var t []*Reader
    38  	it := sam.NewIterator(br)
    39  	for {
    40  		var n int
    41  		for it.Next() {
    42  			recs = append(recs, it.Record())
    43  			if len(recs) == cap(recs) {
    44  				r, err := writeSorted(h, recs, less)
    45  				if err != nil {
    46  					return err
    47  				}
    48  				defer r.Close()
    49  				t = append(t, r)
    50  				n, recs = len(recs), recs[:0]
    51  			}
    52  		}
    53  		if len(recs) != 0 {
    54  			r, err := writeSorted(h, recs, less)
    55  			if err != nil {
    56  				return err
    57  			}
    58  			defer r.Close()
    59  			t = append(t, r)
    60  			break
    61  		}
    62  		err = it.Error()
    63  		if n == 0 || err != nil {
    64  			break
    65  		}
    66  	}
    67  	if err != nil {
    68  		return fmt.Errorf("error during bam reading: %v", err)
    69  	}
    70  
    71  	m, err := NewMerger(less, t...)
    72  	if err != nil {
    73  		return fmt.Errorf("failed to create merger: %v", err)
    74  	}
    75  	sorted := sam.NewIterator(m)
    76  	for sorted.Next() {
    77  		fn(sorted.Record())
    78  	}
    79  	err = sorted.Error()
    80  	if err != nil {
    81  		return fmt.Errorf("error during bam reading: %v", err)
    82  	}
    83  
    84  	return nil
    85  }
    86  
    87  func writeSorted(h *sam.Header, recs []*sam.Record, less func(a, b *sam.Record) bool) (*Reader, error) {
    88  	if less != nil {
    89  		sort.Sort(byFunc{less, recs})
    90  	}
    91  
    92  	var buf bytes.Buffer
    93  
    94  	bw, err := NewWriter(&buf, h, 0)
    95  	if err != nil {
    96  		return nil, fmt.Errorf("failed to open bam writer: %v", err)
    97  	}
    98  	for _, r := range recs {
    99  		err = bw.Write(r)
   100  		if err != nil {
   101  			return nil, fmt.Errorf("failed to write record: %v", err)
   102  		}
   103  	}
   104  	err = bw.Close()
   105  	if err != nil {
   106  		return nil, fmt.Errorf("failed to close bam writer: %v", err)
   107  	}
   108  
   109  	r, err := NewReader(&buf, 0)
   110  	if err != nil {
   111  		return nil, fmt.Errorf("failed to open bam writer: %v", err)
   112  	}
   113  	return r, err
   114  }
   115  
   116  var mergerTests = []struct {
   117  	r func() io.Reader
   118  
   119  	so     sam.SortOrder
   120  	less   func(a, b *sam.Record) bool
   121  	expect func(a, b *sam.Record) bool
   122  	shard  int
   123  }{
   124  	{
   125  		r:      func() io.Reader { return bytes.NewReader(bamHG00096_1000) },
   126  		so:     sam.QueryName,
   127  		less:   nil,
   128  		expect: (*sam.Record).LessByName,
   129  		shard:  199,
   130  	},
   131  	{
   132  		r:      func() io.Reader { return bytes.NewReader(bamHG00096_1000) },
   133  		so:     sam.QueryName,
   134  		less:   nil,
   135  		expect: (*sam.Record).LessByName,
   136  		shard:  1e5,
   137  	},
   138  	{
   139  		r:      func() io.Reader { return bytes.NewReader(bamHG00096_1000) },
   140  		so:     sam.Unsorted,
   141  		less:   nil,
   142  		expect: (*sam.Record).LessByCoordinate, // HG00096 is sorted by coordinate.
   143  		shard:  199,
   144  	},
   145  }
   146  
   147  func TestMerger(t *testing.T) {
   148  	for _, test := range mergerTests {
   149  		var recs []*sam.Record
   150  		fn := func(r *sam.Record) {
   151  			recs = append(recs, r)
   152  		}
   153  
   154  		var less func(a, b *sam.Record) bool
   155  		switch test.so {
   156  		case sam.UnknownOrder:
   157  			less = test.less
   158  		case sam.Unsorted:
   159  		case sam.QueryName:
   160  			less = (*sam.Record).LessByName
   161  		case sam.Coordinate:
   162  			less = (*sam.Record).LessByCoordinate
   163  		}
   164  		err := sortBAM(test.r(), test.so, less, fn, test.shard)
   165  		if err != nil {
   166  			t.Errorf("unexpected error: %v", err)
   167  		}
   168  		if less == nil {
   169  			continue
   170  		}
   171  		if !sort.IsSorted(byFunc{test.expect, recs}) {
   172  			t.Error("not in expected sort order")
   173  		}
   174  	}
   175  }