github.com/vertgenlab/gonomics@v1.0.0/cmd/vcfFormat/vcfFormat.go (about) 1 // Command Group: "VCF Tools" 2 3 // Options alter VCF formatting 4 package main 5 6 import ( 7 "flag" 8 "fmt" 9 "log" 10 "strings" 11 12 "github.com/vertgenlab/gonomics/convert" 13 "github.com/vertgenlab/gonomics/dna" 14 "github.com/vertgenlab/gonomics/exception" 15 "github.com/vertgenlab/gonomics/fasta" 16 "github.com/vertgenlab/gonomics/fileio" 17 "github.com/vertgenlab/gonomics/vcf" 18 ) 19 20 func vcfFormat(infile string, outfile string, ensemblToUCSC bool, UCSCToEnsembl bool, fixVcfRecords bool, ref string, clearInfo bool, tableOutput bool) { 21 if ensemblToUCSC && UCSCToEnsembl { 22 log.Fatalf("Both conversions (UCSCToEnsembl and EnsemblToUCSC) are incompatible.") 23 } 24 25 var maxAlts int 26 if tableOutput { 27 maxAlts = getMaxAltCount(infile) 28 } 29 30 ch, header := vcf.GoReadToChan(infile) 31 out := fileio.EasyCreate(outfile) 32 var err error 33 34 var refMap map[string][]dna.Base 35 var infoOrder []vcf.InfoHeader 36 var formatOrder []vcf.FormatHeader 37 38 if tableOutput { 39 infoOrder, formatOrder = writeTableHeader(out, header, maxAlts) 40 } else { // normal vcf output 41 vcf.NewWriteHeader(out, header) 42 } 43 44 if fixVcfRecords { 45 refMap = fasta.ToMap(fasta.Read(ref)) 46 } 47 48 s := new(strings.Builder) 49 for v := range ch { 50 if clearInfo { 51 v.Info = "." 52 } 53 if fixVcfRecords { 54 v = vcf.FixVcf(v, refMap) 55 } 56 if ensemblToUCSC { 57 v.Chr = convert.EnsemblToUCSC(v.Chr) 58 } 59 if UCSCToEnsembl { 60 v.Chr = convert.UCSCToEnsembl(v.Chr) 61 } 62 if tableOutput { 63 writeAsTable(s, out, v, header, infoOrder, formatOrder, maxAlts) 64 } else { // normal vcf output 65 vcf.WriteVcf(out, v) 66 } 67 } 68 69 err = out.Close() 70 exception.PanicOnErr(err) 71 } 72 73 func usage() { 74 fmt.Print( 75 "vcfFormat - Options alter VCF formatting.\n" + 76 "Usage:\n" + 77 "vcfFormat input.vcf output.vcf\n" + 78 "options:\n") 79 flag.PrintDefaults() 80 } 81 82 func main() { 83 var expectedNumArgs int = 2 84 var ensemblToUCSC *bool = flag.Bool("ensemblToUCSC", false, "Changes chromosome format type.") 85 var UCSCToEnsembl *bool = flag.Bool("UCSCToEnsembl", false, "Changes chromosome format type.") 86 var clearInfo *bool = flag.Bool("clearInfo", false, "Removes the information in the INFO field and replaces it with a '.'") 87 var fixVcfRecords *bool = flag.Bool("fix", false, "Fixes improperly formatted vcf records (e.g. '-' in ALT field") 88 var ref *string = flag.String("ref", "", "Reference fasta. Only needed if using -fix.") 89 var tableOutput *bool = flag.Bool("csv", false, "Output as CSV file for spreadsheet analysis. Requires well-formed header.") 90 91 flag.Usage = usage 92 log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile) 93 flag.Parse() 94 95 if len(flag.Args()) != expectedNumArgs { 96 flag.Usage() 97 log.Fatalf("Error: expecting %d arguments, but got %d\n", 98 expectedNumArgs, len(flag.Args())) 99 } 100 101 infile := flag.Arg(0) 102 outfile := flag.Arg(1) 103 vcfFormat(infile, outfile, *ensemblToUCSC, *UCSCToEnsembl, *fixVcfRecords, *ref, *clearInfo, *tableOutput) 104 }