github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bam/merger_example_test.go (about) 1 // Copyright ©2017 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bam_test 6 7 import ( 8 "fmt" 9 "io" 10 "io/ioutil" 11 "log" 12 "os" 13 "sort" 14 15 "github.com/Schaudge/hts/bam" 16 "github.com/Schaudge/hts/sam" 17 ) 18 19 func ExampleMerger_sortByCoordinate() { 20 // Inputs. 21 var ( 22 // Input source of BAM data. 23 r io.Reader 24 25 // Operation to perform on each record of 26 // sorted stream. 27 fn func(*sam.Record) 28 ) 29 30 // Specify sort chunk size. 31 const chunk = 1e5 32 33 // Open source. 34 br, err := bam.NewReader(r, 0) 35 if err != nil { 36 log.Fatalf("failed to open bam reader: %v", err) 37 } 38 defer br.Close() 39 40 // Make header with coordinate sort order. 41 h := br.Header().Clone() 42 h.SortOrder = sam.Coordinate 43 44 // Create file system workspace and prepare 45 // for clean up. 46 dir, err := ioutil.TempDir("", "") 47 if err != nil { 48 log.Fatalf("failed to create temp directory: %v", err) 49 } 50 defer func() { 51 os.RemoveAll(dir) 52 r := recover() 53 if r != nil { 54 log.Fatal(r) 55 } 56 }() 57 58 // Limit number of records for each sort chunk. 59 recs := make([]*sam.Record, 0, chunk) 60 61 // Keep the collection of shards for merging. 62 var t []*bam.Reader 63 64 it := sam.NewIterator(br) 65 for { 66 var n int 67 for it.Next() { 68 recs = append(recs, it.Record()) 69 if len(recs) == cap(recs) { 70 r, err := writeChunk(dir, h, recs) 71 if err != nil { 72 log.Panic(err) 73 } 74 t = append(t, r) 75 n, recs = len(recs), recs[:0] 76 } 77 } 78 if len(recs) != 0 { 79 r, err := writeChunk(dir, h, recs) 80 if err != nil { 81 log.Panic(err) 82 } 83 t = append(t, r) 84 break 85 } 86 err = it.Error() 87 if n == 0 || err != nil { 88 break 89 } 90 } 91 if err != nil { 92 log.Panicf("error during bam reading: %v", err) 93 } 94 95 // Create merge using the coordinate sort order. 96 m, err := bam.NewMerger(nil, t...) 97 if err != nil { 98 log.Panicf("failed to created merger: %v", err) 99 } 100 sorted := sam.NewIterator(m) 101 for sorted.Next() { 102 // Operate on coordinate sorted stream. 103 fn(sorted.Record()) 104 } 105 // Close the underlying Readers. 106 for i, r := range t { 107 err = r.Close() 108 if err != nil { 109 log.Printf("failed to close reader %d: %v", i, err) 110 } 111 } 112 err = sorted.Error() 113 if err != nil { 114 log.Panicf("error during bam reading: %v", err) 115 } 116 } 117 118 // writeChunk writes out the records in recs to the given directory 119 // after sorting them. 120 func writeChunk(dir string, h *sam.Header, recs []*sam.Record) (*bam.Reader, error) { 121 sort.Sort(byCoordinate(recs)) 122 123 f, err := ioutil.TempFile(dir, "") 124 if err != nil { 125 return nil, fmt.Errorf("failed to create temp file in %q: %v", dir, err) 126 } 127 128 bw, err := bam.NewWriter(f, h, 0) 129 if err != nil { 130 return nil, fmt.Errorf("failed to open bam writer: %v", err) 131 } 132 for _, r := range recs { 133 err = bw.Write(r) 134 if err != nil { 135 return nil, fmt.Errorf("failed to write record: %v", err) 136 } 137 } 138 err = bw.Close() 139 if err != nil { 140 return nil, fmt.Errorf("failed to close bam writer: %v", err) 141 } 142 err = f.Sync() 143 if err != nil { 144 return nil, fmt.Errorf("failed to sync file: %v", err) 145 } 146 147 // Make a reader of the written data. 148 _, err = f.Seek(0, os.SEEK_SET) 149 if err != nil { 150 return nil, fmt.Errorf("failed to seek to start: %v", err) 151 } 152 r, err := bam.NewReader(f, 0) 153 if err != nil { 154 return nil, fmt.Errorf("failed to open bam writer: %v", err) 155 } 156 return r, err 157 } 158 159 // byCoordinate implements the coordinate sort order. 160 type byCoordinate []*sam.Record 161 162 func (r byCoordinate) Len() int { return len(r) } 163 func (r byCoordinate) Less(i, j int) bool { return r[i].LessByCoordinate(r[j]) } 164 func (r byCoordinate) Swap(i, j int) { r[i], r[j] = r[j], r[i] }