github.com/vertgenlab/gonomics@v1.0.0/cmd/vcfFormat/vcfFormat.go (about)

     1  // Command Group: "VCF Tools"
     2  
     3  // Options alter VCF formatting
     4  package main
     5  
     6  import (
     7  	"flag"
     8  	"fmt"
     9  	"log"
    10  	"strings"
    11  
    12  	"github.com/vertgenlab/gonomics/convert"
    13  	"github.com/vertgenlab/gonomics/dna"
    14  	"github.com/vertgenlab/gonomics/exception"
    15  	"github.com/vertgenlab/gonomics/fasta"
    16  	"github.com/vertgenlab/gonomics/fileio"
    17  	"github.com/vertgenlab/gonomics/vcf"
    18  )
    19  
    20  func vcfFormat(infile string, outfile string, ensemblToUCSC bool, UCSCToEnsembl bool, fixVcfRecords bool, ref string, clearInfo bool, tableOutput bool) {
    21  	if ensemblToUCSC && UCSCToEnsembl {
    22  		log.Fatalf("Both conversions (UCSCToEnsembl and EnsemblToUCSC) are incompatible.")
    23  	}
    24  
    25  	var maxAlts int
    26  	if tableOutput {
    27  		maxAlts = getMaxAltCount(infile)
    28  	}
    29  
    30  	ch, header := vcf.GoReadToChan(infile)
    31  	out := fileio.EasyCreate(outfile)
    32  	var err error
    33  
    34  	var refMap map[string][]dna.Base
    35  	var infoOrder []vcf.InfoHeader
    36  	var formatOrder []vcf.FormatHeader
    37  
    38  	if tableOutput {
    39  		infoOrder, formatOrder = writeTableHeader(out, header, maxAlts)
    40  	} else { // normal vcf output
    41  		vcf.NewWriteHeader(out, header)
    42  	}
    43  
    44  	if fixVcfRecords {
    45  		refMap = fasta.ToMap(fasta.Read(ref))
    46  	}
    47  
    48  	s := new(strings.Builder)
    49  	for v := range ch {
    50  		if clearInfo {
    51  			v.Info = "."
    52  		}
    53  		if fixVcfRecords {
    54  			v = vcf.FixVcf(v, refMap)
    55  		}
    56  		if ensemblToUCSC {
    57  			v.Chr = convert.EnsemblToUCSC(v.Chr)
    58  		}
    59  		if UCSCToEnsembl {
    60  			v.Chr = convert.UCSCToEnsembl(v.Chr)
    61  		}
    62  		if tableOutput {
    63  			writeAsTable(s, out, v, header, infoOrder, formatOrder, maxAlts)
    64  		} else { // normal vcf output
    65  			vcf.WriteVcf(out, v)
    66  		}
    67  	}
    68  
    69  	err = out.Close()
    70  	exception.PanicOnErr(err)
    71  }
    72  
    73  func usage() {
    74  	fmt.Print(
    75  		"vcfFormat - Options alter VCF formatting.\n" +
    76  			"Usage:\n" +
    77  			"vcfFormat input.vcf output.vcf\n" +
    78  			"options:\n")
    79  	flag.PrintDefaults()
    80  }
    81  
    82  func main() {
    83  	var expectedNumArgs int = 2
    84  	var ensemblToUCSC *bool = flag.Bool("ensemblToUCSC", false, "Changes chromosome format type.")
    85  	var UCSCToEnsembl *bool = flag.Bool("UCSCToEnsembl", false, "Changes chromosome format type.")
    86  	var clearInfo *bool = flag.Bool("clearInfo", false, "Removes the information in the INFO field and replaces it with a '.'")
    87  	var fixVcfRecords *bool = flag.Bool("fix", false, "Fixes improperly formatted vcf records (e.g. '-' in ALT field")
    88  	var ref *string = flag.String("ref", "", "Reference fasta. Only needed if using -fix.")
    89  	var tableOutput *bool = flag.Bool("csv", false, "Output as CSV file for spreadsheet analysis. Requires well-formed header.")
    90  
    91  	flag.Usage = usage
    92  	log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
    93  	flag.Parse()
    94  
    95  	if len(flag.Args()) != expectedNumArgs {
    96  		flag.Usage()
    97  		log.Fatalf("Error: expecting %d arguments, but got %d\n",
    98  			expectedNumArgs, len(flag.Args()))
    99  	}
   100  
   101  	infile := flag.Arg(0)
   102  	outfile := flag.Arg(1)
   103  	vcfFormat(infile, outfile, *ensemblToUCSC, *UCSCToEnsembl, *fixVcfRecords, *ref, *clearInfo, *tableOutput)
   104  }