github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/paper/examples/flagstat/flagstat.go (about) 1 // Copyright ©2017 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This program tabulates statistics on a bam file from the sam flag. 6 // It replicates functionality in samtools flagstat. 7 package main 8 9 import ( 10 "fmt" 11 "io" 12 "log" 13 "os" 14 15 "github.com/Schaudge/hts/bam" 16 "github.com/Schaudge/hts/bgzf" 17 "github.com/Schaudge/hts/sam" 18 ) 19 20 const ( 21 pass = iota 22 fail 23 ) 24 25 func main() { 26 if len(os.Args) != 2 { 27 log.Fatal("Expecting a single bam argument") 28 } 29 f, err := os.Open(os.Args[1]) 30 if err != nil { 31 log.Fatal(err) 32 } 33 defer f.Close() 34 ok, err := bgzf.HasEOF(f) 35 if err != nil { 36 log.Fatal(err) 37 } 38 if !ok { 39 log.Println("EOF block missing") 40 } 41 42 b, err := bam.NewReader(f, 0) 43 if err != nil { 44 log.Fatal(err) 45 } 46 defer b.Close() 47 b.Omit(bam.AllVariableLengthData) 48 49 // counts is indexed by [pass/fail][sam.Flag] where we have 12 possible sam Flags. 50 var counts [2][12]uint64 51 // track mates on different chromosomes. 52 var mates [2]struct{ allMapQ, highMapQ uint64 } 53 var good, singletons, paired [2]uint64 54 var qc int 55 for { 56 read, err := b.Read() 57 if err == io.EOF { 58 break 59 } 60 if err != nil { 61 log.Fatal(err) 62 } 63 if read.Flags&sam.QCFail == 0 { 64 qc = pass 65 } else { 66 qc = fail 67 } 68 69 for i := Paired; i <= Supplementary; i++ { 70 if read.Flags&(1<<i) != 0 { 71 counts[qc][i]++ 72 } 73 } 74 75 const goodMask = sam.ProperPair | sam.Unmapped 76 if read.Flags&goodMask == sam.ProperPair { 77 good[qc]++ 78 } 79 80 const mapMask = sam.MateUnmapped | sam.Unmapped 81 switch read.Flags & mapMask { 82 case sam.MateUnmapped: 83 singletons[qc]++ 84 case 0: 85 paired[qc]++ 86 if read.MateRef != read.Ref && read.MateRef != nil && read.Ref != nil { 87 if read.MapQ > 4 { 88 mates[qc].highMapQ++ 89 } 90 mates[qc].allMapQ++ 91 } 92 } 93 } 94 95 // extract counts to match output from samtools flagstat. 96 fmt.Printf("%d + %d in total (QC-passed reads + QC-failed reads)\n", counts[pass][Paired], counts[fail][Paired]) 97 fmt.Printf("%d + %d in total secondary\n", counts[pass][Secondary], counts[fail][Secondary]) 98 fmt.Printf("%d + %d in total supplementary\n", counts[pass][Supplementary], counts[fail][Supplementary]) 99 fmt.Printf("%d + %d duplicates\n", counts[pass][Duplicate], counts[fail][Duplicate]) 100 mappedPass := counts[pass][Paired] - counts[pass][Unmapped] 101 mappedFail := counts[fail][Paired] - counts[fail][Unmapped] 102 fmt.Printf("%d + %d mapped (%s : %s)\n", mappedPass, mappedFail, percent(mappedPass, counts[pass][Paired]), percent(mappedFail, counts[fail][Paired])) 103 fmt.Printf("%d + %d paired in sequencing\n", counts[pass][Paired], counts[fail][Paired]) 104 fmt.Printf("%d + %d read1\n", counts[pass][Read1], counts[fail][Read1]) 105 fmt.Printf("%d + %d read2\n", counts[pass][Read2], counts[fail][Read2]) 106 fmt.Printf("%d + %d properly paired (%s : %s)\n", good[pass], good[fail], percent(good[pass], counts[pass][Paired]), percent(good[fail], counts[fail][Paired])) 107 fmt.Printf("%d + %d with itself and mate mapped\n", paired[pass], paired[fail]) 108 fmt.Printf("%d + %d singletons (%s : %s)\n", singletons[pass], singletons[fail], percent(singletons[pass], counts[pass][Paired]), percent(singletons[fail], counts[fail][Paired])) 109 fmt.Printf("%d + %d with mate mapped to a different chr\n", mates[pass].allMapQ, mates[fail].allMapQ) 110 fmt.Printf("%d + %d with mate mapped to a different chr (mapQ>=5)\n", mates[pass].highMapQ, mates[fail].highMapQ) 111 } 112 113 func percent(n, total uint64) string { 114 if total == 0 { 115 return "N/A" 116 } 117 return fmt.Sprintf("%.2f%%", 100*float64(n)/float64(total)) 118 } 119 120 // The flag indexes for SAM flags. Reflects sam.Flag order. 121 const ( 122 Paired uint = iota // The read is paired in sequencing, no matter whether it is mapped in a pair. 123 ProperPair // The read is mapped in a proper pair. 124 Unmapped // The read itself is unmapped; conflictive with ProperPair. 125 MateUnmapped // The mate is unmapped. 126 Reverse // The read is mapped to the reverse strand. 127 MateReverse // The mate is mapped to the reverse strand. 128 Read1 // This is read1. 129 Read2 // This is read2. 130 Secondary // Not primary alignment. 131 QCFail // QC failure. 132 Duplicate // Optical or PCR duplicate. 133 Supplementary // Supplementary alignment, indicates alignment is part of a chimeric alignment. 134 )