github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/overlap_example_test.go (about)

     1  // Copyright ©2012 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package sam_test
     6  
     7  import (
     8  	"fmt"
     9  
    10  	"github.com/Schaudge/hts/sam"
    11  )
    12  
    13  func min(a, b int) int {
    14  	if a > b {
    15  		return b
    16  	}
    17  	return a
    18  }
    19  
    20  func max(a, b int) int {
    21  	if a < b {
    22  		return b
    23  	}
    24  	return a
    25  }
    26  
    27  // Overlap returns the length of the overlap between the alignment
    28  // of the SAM record and the interval specified.
    29  //
    30  // Note that this will count repeated matches to the same reference
    31  // location if CigarBack operations are used.
    32  func Overlap(r *sam.Record, start, end int) int {
    33  	var overlap int
    34  	pos := r.Pos
    35  	for _, co := range r.Cigar {
    36  		t := co.Type()
    37  		con := t.Consumes()
    38  		lr := co.Len() * con.Reference
    39  		if con.Query == con.Reference {
    40  			o := min(pos+lr, end) - max(pos, start)
    41  			if o > 0 {
    42  				overlap += o
    43  			}
    44  		}
    45  		pos += lr
    46  	}
    47  
    48  	return overlap
    49  }
    50  
    51  func ExampleConsume() {
    52  	// Example alignments from the SAM specification:
    53  	//
    54  	// @HD	VN:1.5	SO:coordinate
    55  	// @SQ	SN:ref	LN:45
    56  	// @CO	--------------------------------------------------------
    57  	// @CO	Coor     12345678901234  5678901234567890123456789012345
    58  	// @CO	ref      AGCATGTTAGATAA**GATAGCTGTGCTAGTAGGCAGTCAGCGCCAT
    59  	// @CO	--------------------------------------------------------
    60  	// @CO	+r001/1        TTAGATAAAGGATA*CTG
    61  	// @CO	+r002         aaaAGATAA*GGATA
    62  	// @CO	+r003       gcctaAGCTAA
    63  	// @CO	+r004                     ATAGCT..............TCAGC
    64  	// @CO	-r003                            ttagctTAGGC
    65  	// @CO	-r001/2                                        CAGCGGCAT
    66  	// @CO	--------------------------------------------------------
    67  	// r001	99	ref	7	30	8M2I4M1D3M	=	37	39	TTAGATAAAGGATACTG	*
    68  	// r002	0	ref	9	30	3S6M1P1I4M	*	0	0	AAAAGATAAGGATA	*
    69  	// r003	0	ref	9	30	5S6M	*	0	0	GCCTAAGCTAA	*	SA:Z:ref,29,-,6H5M,17,0;
    70  	// r004	0	ref	16	30	6M14N5M	*	0	0	ATAGCTTCAGC	*
    71  	// r003	2064	ref	29	17	6H5M	*	0	0	TAGGC	*	SA:Z:ref,9,+,5S6M,30,1;
    72  	// r001	147	ref	37	30	9M	=	7	-39	CAGCGGCAT	*	NM:i:1
    73  
    74  	const (
    75  		refStart = 0
    76  		refEnd   = 45
    77  	)
    78  
    79  	records := []*sam.Record{
    80  		{Name: "r001/1", Pos: 6, Cigar: []sam.CigarOp{
    81  			sam.NewCigarOp(sam.CigarMatch, 8),
    82  			sam.NewCigarOp(sam.CigarInsertion, 2),
    83  			sam.NewCigarOp(sam.CigarMatch, 4),
    84  			sam.NewCigarOp(sam.CigarDeletion, 1),
    85  			sam.NewCigarOp(sam.CigarMatch, 3),
    86  		}},
    87  		{Name: "r002", Pos: 8, Cigar: []sam.CigarOp{
    88  			sam.NewCigarOp(sam.CigarSoftClipped, 3),
    89  			sam.NewCigarOp(sam.CigarMatch, 6),
    90  			sam.NewCigarOp(sam.CigarPadded, 1),
    91  			sam.NewCigarOp(sam.CigarInsertion, 1),
    92  			sam.NewCigarOp(sam.CigarMatch, 4),
    93  		}},
    94  		{Name: "r003", Pos: 8, Cigar: []sam.CigarOp{
    95  			sam.NewCigarOp(sam.CigarSoftClipped, 5),
    96  			sam.NewCigarOp(sam.CigarMatch, 6),
    97  		}},
    98  		{Name: "r004", Pos: 15, Cigar: []sam.CigarOp{
    99  			sam.NewCigarOp(sam.CigarMatch, 6),
   100  			sam.NewCigarOp(sam.CigarSkipped, 14),
   101  			sam.NewCigarOp(sam.CigarMatch, 5),
   102  		}},
   103  		{Name: "r003", Pos: 28, Cigar: []sam.CigarOp{
   104  			sam.NewCigarOp(sam.CigarHardClipped, 6),
   105  			sam.NewCigarOp(sam.CigarMatch, 5),
   106  		}},
   107  		{Name: "r001/2", Pos: 36, Cigar: []sam.CigarOp{
   108  			sam.NewCigarOp(sam.CigarMatch, 9),
   109  		}},
   110  	}
   111  
   112  	for _, r := range records {
   113  		fmt.Printf("%q overlaps reference by %d letters\n", r.Name, Overlap(r, refStart, refEnd))
   114  	}
   115  
   116  	// Output:
   117  	//
   118  	// "r001/1" overlaps reference by 15 letters
   119  	// "r002" overlaps reference by 10 letters
   120  	// "r003" overlaps reference by 6 letters
   121  	// "r004" overlaps reference by 11 letters
   122  	// "r003" overlaps reference by 5 letters
   123  	// "r001/2" overlaps reference by 9 letters
   124  }