github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/overlap_example_test.go (about) 1 // Copyright ©2012 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package sam_test 6 7 import ( 8 "fmt" 9 10 "github.com/Schaudge/hts/sam" 11 ) 12 13 func min(a, b int) int { 14 if a > b { 15 return b 16 } 17 return a 18 } 19 20 func max(a, b int) int { 21 if a < b { 22 return b 23 } 24 return a 25 } 26 27 // Overlap returns the length of the overlap between the alignment 28 // of the SAM record and the interval specified. 29 // 30 // Note that this will count repeated matches to the same reference 31 // location if CigarBack operations are used. 32 func Overlap(r *sam.Record, start, end int) int { 33 var overlap int 34 pos := r.Pos 35 for _, co := range r.Cigar { 36 t := co.Type() 37 con := t.Consumes() 38 lr := co.Len() * con.Reference 39 if con.Query == con.Reference { 40 o := min(pos+lr, end) - max(pos, start) 41 if o > 0 { 42 overlap += o 43 } 44 } 45 pos += lr 46 } 47 48 return overlap 49 } 50 51 func ExampleConsume() { 52 // Example alignments from the SAM specification: 53 // 54 // @HD VN:1.5 SO:coordinate 55 // @SQ SN:ref LN:45 56 // @CO -------------------------------------------------------- 57 // @CO Coor 12345678901234 5678901234567890123456789012345 58 // @CO ref AGCATGTTAGATAA**GATAGCTGTGCTAGTAGGCAGTCAGCGCCAT 59 // @CO -------------------------------------------------------- 60 // @CO +r001/1 TTAGATAAAGGATA*CTG 61 // @CO +r002 aaaAGATAA*GGATA 62 // @CO +r003 gcctaAGCTAA 63 // @CO +r004 ATAGCT..............TCAGC 64 // @CO -r003 ttagctTAGGC 65 // @CO -r001/2 CAGCGGCAT 66 // @CO -------------------------------------------------------- 67 // r001 99 ref 7 30 8M2I4M1D3M = 37 39 TTAGATAAAGGATACTG * 68 // r002 0 ref 9 30 3S6M1P1I4M * 0 0 AAAAGATAAGGATA * 69 // r003 0 ref 9 30 5S6M * 0 0 GCCTAAGCTAA * SA:Z:ref,29,-,6H5M,17,0; 70 // r004 0 ref 16 30 6M14N5M * 0 0 ATAGCTTCAGC * 71 // r003 2064 ref 29 17 6H5M * 0 0 TAGGC * SA:Z:ref,9,+,5S6M,30,1; 72 // r001 147 ref 37 30 9M = 7 -39 CAGCGGCAT * NM:i:1 73 74 const ( 75 refStart = 0 76 refEnd = 45 77 ) 78 79 records := []*sam.Record{ 80 {Name: "r001/1", Pos: 6, Cigar: []sam.CigarOp{ 81 sam.NewCigarOp(sam.CigarMatch, 8), 82 sam.NewCigarOp(sam.CigarInsertion, 2), 83 sam.NewCigarOp(sam.CigarMatch, 4), 84 sam.NewCigarOp(sam.CigarDeletion, 1), 85 sam.NewCigarOp(sam.CigarMatch, 3), 86 }}, 87 {Name: "r002", Pos: 8, Cigar: []sam.CigarOp{ 88 sam.NewCigarOp(sam.CigarSoftClipped, 3), 89 sam.NewCigarOp(sam.CigarMatch, 6), 90 sam.NewCigarOp(sam.CigarPadded, 1), 91 sam.NewCigarOp(sam.CigarInsertion, 1), 92 sam.NewCigarOp(sam.CigarMatch, 4), 93 }}, 94 {Name: "r003", Pos: 8, Cigar: []sam.CigarOp{ 95 sam.NewCigarOp(sam.CigarSoftClipped, 5), 96 sam.NewCigarOp(sam.CigarMatch, 6), 97 }}, 98 {Name: "r004", Pos: 15, Cigar: []sam.CigarOp{ 99 sam.NewCigarOp(sam.CigarMatch, 6), 100 sam.NewCigarOp(sam.CigarSkipped, 14), 101 sam.NewCigarOp(sam.CigarMatch, 5), 102 }}, 103 {Name: "r003", Pos: 28, Cigar: []sam.CigarOp{ 104 sam.NewCigarOp(sam.CigarHardClipped, 6), 105 sam.NewCigarOp(sam.CigarMatch, 5), 106 }}, 107 {Name: "r001/2", Pos: 36, Cigar: []sam.CigarOp{ 108 sam.NewCigarOp(sam.CigarMatch, 9), 109 }}, 110 } 111 112 for _, r := range records { 113 fmt.Printf("%q overlaps reference by %d letters\n", r.Name, Overlap(r, refStart, refEnd)) 114 } 115 116 // Output: 117 // 118 // "r001/1" overlaps reference by 15 letters 119 // "r002" overlaps reference by 10 letters 120 // "r003" overlaps reference by 6 letters 121 // "r004" overlaps reference by 11 letters 122 // "r003" overlaps reference by 5 letters 123 // "r001/2" overlaps reference by 9 letters 124 }