github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bam/merger_example_test.go (about)

     1  // Copyright ©2017 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bam_test
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"log"
    12  	"os"
    13  	"sort"
    14  
    15  	"github.com/Schaudge/hts/bam"
    16  	"github.com/Schaudge/hts/sam"
    17  )
    18  
    19  func ExampleMerger_sortByCoordinate() {
    20  	// Inputs.
    21  	var (
    22  		// Input source of BAM data.
    23  		r io.Reader
    24  
    25  		// Operation to perform on each record of
    26  		// sorted stream.
    27  		fn func(*sam.Record)
    28  	)
    29  
    30  	// Specify sort chunk size.
    31  	const chunk = 1e5
    32  
    33  	// Open source.
    34  	br, err := bam.NewReader(r, 0)
    35  	if err != nil {
    36  		log.Fatalf("failed to open bam reader: %v", err)
    37  	}
    38  	defer br.Close()
    39  
    40  	// Make header with coordinate sort order.
    41  	h := br.Header().Clone()
    42  	h.SortOrder = sam.Coordinate
    43  
    44  	// Create file system workspace and prepare
    45  	// for clean up.
    46  	dir, err := ioutil.TempDir("", "")
    47  	if err != nil {
    48  		log.Fatalf("failed to create temp directory: %v", err)
    49  	}
    50  	defer func() {
    51  		os.RemoveAll(dir)
    52  		r := recover()
    53  		if r != nil {
    54  			log.Fatal(r)
    55  		}
    56  	}()
    57  
    58  	// Limit number of records for each sort chunk.
    59  	recs := make([]*sam.Record, 0, chunk)
    60  
    61  	// Keep the collection of shards for merging.
    62  	var t []*bam.Reader
    63  
    64  	it := sam.NewIterator(br)
    65  	for {
    66  		var n int
    67  		for it.Next() {
    68  			recs = append(recs, it.Record())
    69  			if len(recs) == cap(recs) {
    70  				r, err := writeChunk(dir, h, recs)
    71  				if err != nil {
    72  					log.Panic(err)
    73  				}
    74  				t = append(t, r)
    75  				n, recs = len(recs), recs[:0]
    76  			}
    77  		}
    78  		if len(recs) != 0 {
    79  			r, err := writeChunk(dir, h, recs)
    80  			if err != nil {
    81  				log.Panic(err)
    82  			}
    83  			t = append(t, r)
    84  			break
    85  		}
    86  		err = it.Error()
    87  		if n == 0 || err != nil {
    88  			break
    89  		}
    90  	}
    91  	if err != nil {
    92  		log.Panicf("error during bam reading: %v", err)
    93  	}
    94  
    95  	// Create merge using the coordinate sort order.
    96  	m, err := bam.NewMerger(nil, t...)
    97  	if err != nil {
    98  		log.Panicf("failed to created merger: %v", err)
    99  	}
   100  	sorted := sam.NewIterator(m)
   101  	for sorted.Next() {
   102  		// Operate on coordinate sorted stream.
   103  		fn(sorted.Record())
   104  	}
   105  	// Close the underlying Readers.
   106  	for i, r := range t {
   107  		err = r.Close()
   108  		if err != nil {
   109  			log.Printf("failed to close reader %d: %v", i, err)
   110  		}
   111  	}
   112  	err = sorted.Error()
   113  	if err != nil {
   114  		log.Panicf("error during bam reading: %v", err)
   115  	}
   116  }
   117  
   118  // writeChunk writes out the records in recs to the given directory
   119  // after sorting them.
   120  func writeChunk(dir string, h *sam.Header, recs []*sam.Record) (*bam.Reader, error) {
   121  	sort.Sort(byCoordinate(recs))
   122  
   123  	f, err := ioutil.TempFile(dir, "")
   124  	if err != nil {
   125  		return nil, fmt.Errorf("failed to create temp file in %q: %v", dir, err)
   126  	}
   127  
   128  	bw, err := bam.NewWriter(f, h, 0)
   129  	if err != nil {
   130  		return nil, fmt.Errorf("failed to open bam writer: %v", err)
   131  	}
   132  	for _, r := range recs {
   133  		err = bw.Write(r)
   134  		if err != nil {
   135  			return nil, fmt.Errorf("failed to write record: %v", err)
   136  		}
   137  	}
   138  	err = bw.Close()
   139  	if err != nil {
   140  		return nil, fmt.Errorf("failed to close bam writer: %v", err)
   141  	}
   142  	err = f.Sync()
   143  	if err != nil {
   144  		return nil, fmt.Errorf("failed to sync file: %v", err)
   145  	}
   146  
   147  	// Make a reader of the written data.
   148  	_, err = f.Seek(0, os.SEEK_SET)
   149  	if err != nil {
   150  		return nil, fmt.Errorf("failed to seek to start: %v", err)
   151  	}
   152  	r, err := bam.NewReader(f, 0)
   153  	if err != nil {
   154  		return nil, fmt.Errorf("failed to open bam writer: %v", err)
   155  	}
   156  	return r, err
   157  }
   158  
   159  // byCoordinate implements the coordinate sort order.
   160  type byCoordinate []*sam.Record
   161  
   162  func (r byCoordinate) Len() int           { return len(r) }
   163  func (r byCoordinate) Less(i, j int) bool { return r[i].LessByCoordinate(r[j]) }
   164  func (r byCoordinate) Swap(i, j int)      { r[i], r[j] = r[j], r[i] }