github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/paper/examples/flagstat/flagstat.go (about)

     1  // Copyright ©2017 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This program tabulates statistics on a bam file from the sam flag.
     6  // It replicates functionality in samtools flagstat.
     7  package main
     8  
     9  import (
    10  	"fmt"
    11  	"io"
    12  	"log"
    13  	"os"
    14  
    15  	"github.com/Schaudge/hts/bam"
    16  	"github.com/Schaudge/hts/bgzf"
    17  	"github.com/Schaudge/hts/sam"
    18  )
    19  
    20  const (
    21  	pass = iota
    22  	fail
    23  )
    24  
    25  func main() {
    26  	if len(os.Args) != 2 {
    27  		log.Fatal("Expecting a single bam argument")
    28  	}
    29  	f, err := os.Open(os.Args[1])
    30  	if err != nil {
    31  		log.Fatal(err)
    32  	}
    33  	defer f.Close()
    34  	ok, err := bgzf.HasEOF(f)
    35  	if err != nil {
    36  		log.Fatal(err)
    37  	}
    38  	if !ok {
    39  		log.Println("EOF block missing")
    40  	}
    41  
    42  	b, err := bam.NewReader(f, 0)
    43  	if err != nil {
    44  		log.Fatal(err)
    45  	}
    46  	defer b.Close()
    47  	b.Omit(bam.AllVariableLengthData)
    48  
    49  	// counts is indexed by [pass/fail][sam.Flag] where we have 12 possible sam Flags.
    50  	var counts [2][12]uint64
    51  	// track mates on different chromosomes.
    52  	var mates [2]struct{ allMapQ, highMapQ uint64 }
    53  	var good, singletons, paired [2]uint64
    54  	var qc int
    55  	for {
    56  		read, err := b.Read()
    57  		if err == io.EOF {
    58  			break
    59  		}
    60  		if err != nil {
    61  			log.Fatal(err)
    62  		}
    63  		if read.Flags&sam.QCFail == 0 {
    64  			qc = pass
    65  		} else {
    66  			qc = fail
    67  		}
    68  
    69  		for i := Paired; i <= Supplementary; i++ {
    70  			if read.Flags&(1<<i) != 0 {
    71  				counts[qc][i]++
    72  			}
    73  		}
    74  
    75  		const goodMask = sam.ProperPair | sam.Unmapped
    76  		if read.Flags&goodMask == sam.ProperPair {
    77  			good[qc]++
    78  		}
    79  
    80  		const mapMask = sam.MateUnmapped | sam.Unmapped
    81  		switch read.Flags & mapMask {
    82  		case sam.MateUnmapped:
    83  			singletons[qc]++
    84  		case 0:
    85  			paired[qc]++
    86  			if read.MateRef != read.Ref && read.MateRef != nil && read.Ref != nil {
    87  				if read.MapQ > 4 {
    88  					mates[qc].highMapQ++
    89  				}
    90  				mates[qc].allMapQ++
    91  			}
    92  		}
    93  	}
    94  
    95  	// extract counts to match output from samtools flagstat.
    96  	fmt.Printf("%d + %d in total (QC-passed reads + QC-failed reads)\n", counts[pass][Paired], counts[fail][Paired])
    97  	fmt.Printf("%d + %d in total secondary\n", counts[pass][Secondary], counts[fail][Secondary])
    98  	fmt.Printf("%d + %d in total supplementary\n", counts[pass][Supplementary], counts[fail][Supplementary])
    99  	fmt.Printf("%d + %d duplicates\n", counts[pass][Duplicate], counts[fail][Duplicate])
   100  	mappedPass := counts[pass][Paired] - counts[pass][Unmapped]
   101  	mappedFail := counts[fail][Paired] - counts[fail][Unmapped]
   102  	fmt.Printf("%d + %d mapped (%s : %s)\n", mappedPass, mappedFail, percent(mappedPass, counts[pass][Paired]), percent(mappedFail, counts[fail][Paired]))
   103  	fmt.Printf("%d + %d paired in sequencing\n", counts[pass][Paired], counts[fail][Paired])
   104  	fmt.Printf("%d + %d read1\n", counts[pass][Read1], counts[fail][Read1])
   105  	fmt.Printf("%d + %d read2\n", counts[pass][Read2], counts[fail][Read2])
   106  	fmt.Printf("%d + %d properly paired (%s : %s)\n", good[pass], good[fail], percent(good[pass], counts[pass][Paired]), percent(good[fail], counts[fail][Paired]))
   107  	fmt.Printf("%d + %d with itself and mate mapped\n", paired[pass], paired[fail])
   108  	fmt.Printf("%d + %d singletons (%s : %s)\n", singletons[pass], singletons[fail], percent(singletons[pass], counts[pass][Paired]), percent(singletons[fail], counts[fail][Paired]))
   109  	fmt.Printf("%d + %d with mate mapped to a different chr\n", mates[pass].allMapQ, mates[fail].allMapQ)
   110  	fmt.Printf("%d + %d with mate mapped to a different chr (mapQ>=5)\n", mates[pass].highMapQ, mates[fail].highMapQ)
   111  }
   112  
   113  func percent(n, total uint64) string {
   114  	if total == 0 {
   115  		return "N/A"
   116  	}
   117  	return fmt.Sprintf("%.2f%%", 100*float64(n)/float64(total))
   118  }
   119  
   120  // The flag indexes for SAM flags. Reflects sam.Flag order.
   121  const (
   122  	Paired        uint = iota // The read is paired in sequencing, no matter whether it is mapped in a pair.
   123  	ProperPair                // The read is mapped in a proper pair.
   124  	Unmapped                  // The read itself is unmapped; conflictive with ProperPair.
   125  	MateUnmapped              // The mate is unmapped.
   126  	Reverse                   // The read is mapped to the reverse strand.
   127  	MateReverse               // The mate is mapped to the reverse strand.
   128  	Read1                     // This is read1.
   129  	Read2                     // This is read2.
   130  	Secondary                 // Not primary alignment.
   131  	QCFail                    // QC failure.
   132  	Duplicate                 // Optical or PCR duplicate.
   133  	Supplementary             // Supplementary alignment, indicates alignment is part of a chimeric alignment.
   134  )