github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bam/merger_test.go (about) 1 // Copyright ©2017 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bam 6 7 import ( 8 "bytes" 9 "fmt" 10 "io" 11 "sort" 12 "testing" 13 14 "github.com/Schaudge/hts/sam" 15 ) 16 17 type byFunc struct { 18 less func(a, b *sam.Record) bool 19 recs []*sam.Record 20 } 21 22 func (r byFunc) Len() int { return len(r.recs) } 23 func (r byFunc) Less(i, j int) bool { return r.less(r.recs[i], r.recs[j]) } 24 func (r byFunc) Swap(i, j int) { r.recs[i], r.recs[j] = r.recs[j], r.recs[i] } 25 26 func sortBAM(r io.Reader, so sam.SortOrder, less func(a, b *sam.Record) bool, fn func(*sam.Record), shard int) error { 27 br, err := NewReader(r, 0) 28 if err != nil { 29 return fmt.Errorf("failed to open bam reader: %v", err) 30 } 31 defer br.Close() 32 33 h := br.Header().Clone() 34 h.SortOrder = so 35 36 recs := make([]*sam.Record, 0, shard) 37 var t []*Reader 38 it := sam.NewIterator(br) 39 for { 40 var n int 41 for it.Next() { 42 recs = append(recs, it.Record()) 43 if len(recs) == cap(recs) { 44 r, err := writeSorted(h, recs, less) 45 if err != nil { 46 return err 47 } 48 defer r.Close() 49 t = append(t, r) 50 n, recs = len(recs), recs[:0] 51 } 52 } 53 if len(recs) != 0 { 54 r, err := writeSorted(h, recs, less) 55 if err != nil { 56 return err 57 } 58 defer r.Close() 59 t = append(t, r) 60 break 61 } 62 err = it.Error() 63 if n == 0 || err != nil { 64 break 65 } 66 } 67 if err != nil { 68 return fmt.Errorf("error during bam reading: %v", err) 69 } 70 71 m, err := NewMerger(less, t...) 72 if err != nil { 73 return fmt.Errorf("failed to create merger: %v", err) 74 } 75 sorted := sam.NewIterator(m) 76 for sorted.Next() { 77 fn(sorted.Record()) 78 } 79 err = sorted.Error() 80 if err != nil { 81 return fmt.Errorf("error during bam reading: %v", err) 82 } 83 84 return nil 85 } 86 87 func writeSorted(h *sam.Header, recs []*sam.Record, less func(a, b *sam.Record) bool) (*Reader, error) { 88 if less != nil { 89 sort.Sort(byFunc{less, recs}) 90 } 91 92 var buf bytes.Buffer 93 94 bw, err := NewWriter(&buf, h, 0) 95 if err != nil { 96 return nil, fmt.Errorf("failed to open bam writer: %v", err) 97 } 98 for _, r := range recs { 99 err = bw.Write(r) 100 if err != nil { 101 return nil, fmt.Errorf("failed to write record: %v", err) 102 } 103 } 104 err = bw.Close() 105 if err != nil { 106 return nil, fmt.Errorf("failed to close bam writer: %v", err) 107 } 108 109 r, err := NewReader(&buf, 0) 110 if err != nil { 111 return nil, fmt.Errorf("failed to open bam writer: %v", err) 112 } 113 return r, err 114 } 115 116 var mergerTests = []struct { 117 r func() io.Reader 118 119 so sam.SortOrder 120 less func(a, b *sam.Record) bool 121 expect func(a, b *sam.Record) bool 122 shard int 123 }{ 124 { 125 r: func() io.Reader { return bytes.NewReader(bamHG00096_1000) }, 126 so: sam.QueryName, 127 less: nil, 128 expect: (*sam.Record).LessByName, 129 shard: 199, 130 }, 131 { 132 r: func() io.Reader { return bytes.NewReader(bamHG00096_1000) }, 133 so: sam.QueryName, 134 less: nil, 135 expect: (*sam.Record).LessByName, 136 shard: 1e5, 137 }, 138 { 139 r: func() io.Reader { return bytes.NewReader(bamHG00096_1000) }, 140 so: sam.Unsorted, 141 less: nil, 142 expect: (*sam.Record).LessByCoordinate, // HG00096 is sorted by coordinate. 143 shard: 199, 144 }, 145 } 146 147 func TestMerger(t *testing.T) { 148 for _, test := range mergerTests { 149 var recs []*sam.Record 150 fn := func(r *sam.Record) { 151 recs = append(recs, r) 152 } 153 154 var less func(a, b *sam.Record) bool 155 switch test.so { 156 case sam.UnknownOrder: 157 less = test.less 158 case sam.Unsorted: 159 case sam.QueryName: 160 less = (*sam.Record).LessByName 161 case sam.Coordinate: 162 less = (*sam.Record).LessByCoordinate 163 } 164 err := sortBAM(test.r(), test.so, less, fn, test.shard) 165 if err != nil { 166 t.Errorf("unexpected error: %v", err) 167 } 168 if less == nil { 169 continue 170 } 171 if !sort.IsSorted(byFunc{test.expect, recs}) { 172 t.Error("not in expected sort order") 173 } 174 } 175 }