github.com/vertgenlab/gonomics@v1.0.0/cmd/gsw/ggTools.go (about) 1 package main 2 3 import ( 4 "flag" 5 "fmt" 6 "log" 7 "os" 8 "strings" 9 10 "github.com/vertgenlab/gonomics/axt" 11 "github.com/vertgenlab/gonomics/bed" 12 "github.com/vertgenlab/gonomics/chain" 13 "github.com/vertgenlab/gonomics/fasta" 14 "github.com/vertgenlab/gonomics/fileio" 15 "github.com/vertgenlab/gonomics/genomeGraph" 16 "github.com/vertgenlab/gonomics/vcf" 17 ) 18 19 type GgToolsSettings struct { 20 Cmd *flag.FlagSet 21 TargetFa string 22 QueryFa string 23 FmtOutput string 24 Out string 25 } 26 27 func ggToolsUsage() { 28 fmt.Printf( 29 " ggtools\tGenomic utilities to create, manipulate and operate on genome graphs\n") 30 } 31 32 func ggtoolsExtend() { 33 fmt.Print( 34 "Usage:\n" + 35 " gsw ggtools [options] input.file\n\n" + 36 "Required:\n" + 37 " -t, --target\tTarget reference fasta file\n" + 38 " -q, --query\tQuery fasta file required for chain file inputs\n\n" + 39 "Options:\n" + 40 " -f, --format\t Pick file format for output, only applies to file conversions utils\n" + 41 " -o, --out\t [.chain/.gg/.sam/.vcf/.gz] (default: /dev/stdout)\n\n") 42 } 43 44 func initGgtoolsArgs() *GgToolsSettings { 45 ggT := &GgToolsSettings{Cmd: flag.NewFlagSet("ggtools", flag.ExitOnError)} 46 ggT.Cmd.StringVar(&ggT.FmtOutput, "format", "", "Pick file format for output, only applies to file conversions utils") 47 ggT.Cmd.StringVar(&ggT.TargetFa, "target", "", "Specify target reference fasta file") 48 ggT.Cmd.StringVar(&ggT.QueryFa, "query", "", "Specify query fasta file") 49 ggT.Cmd.StringVar(&ggT.Out, "out", "/dev/stdout", "Output filename, [.gg/.vcf]") 50 51 ggT.Cmd.StringVar(&ggT.TargetFa, "t", "", "Specify target reference fasta file") 52 ggT.Cmd.StringVar(&ggT.QueryFa, "q", "", "Specify query fasta file") 53 ggT.Cmd.StringVar(&ggT.FmtOutput, "f", "", "Pick file format for output, only applies to file conversions utils") 54 ggT.Cmd.StringVar(&ggT.Out, "o", "/dev/stdout", "Output filename, [.gg/.vcf/.sam]") 55 ggT.Cmd.Usage = ggtoolsExtend 56 return ggT 57 } 58 func RunGgTools() { 59 ggT := initGgtoolsArgs() 60 ggT.Cmd.Parse(os.Args[2:]) 61 if len(ggT.Cmd.Args()) != 1 { 62 ggT.Cmd.Usage() 63 } else { 64 inFile := ggT.Cmd.Arg(0) 65 log.Printf("Reading: %s\n", inFile) 66 switch true { 67 case chain.IsChainFile(inFile): 68 log.Printf("Input chain detected...\n") 69 if strings.Compare(ggT.TargetFa, "") != 0 && strings.Compare(ggT.QueryFa, "") != 0 { 70 log.Printf("Converting chain to %s...\n", ggT.FmtOutput) 71 convertChains(inFile, ggT.TargetFa, ggT.QueryFa, ggT.FmtOutput, ggT.Out) 72 } else { 73 ggT.Cmd.Usage() 74 log.Fatalf("Error: Must specify both target and query fasta files...\n") 75 } 76 case vcf.IsVcfFile(inFile): 77 log.Printf("Input vcf detected...\n") 78 if strings.Compare(ggT.TargetFa, "") != 0 { 79 log.Printf("Converting vcf to %s...\n", ggT.FmtOutput) 80 genomeGraph.Write(ggT.Out, vcfToGenomeGraph(inFile, ggT.TargetFa)) 81 } else { 82 ggT.Cmd.Usage() 83 log.Fatalf("Error: Must specify target reference fasta file...\n") 84 } 85 case axt.IsAxtFile(inFile): 86 log.Printf("Input axt detected...\n") 87 if strings.Compare(ggT.TargetFa, "") != 0 { 88 log.Printf("Converting axt alignment to %s...\n", ggT.FmtOutput) 89 convertAxt(inFile, ggT.FmtOutput, ggT.TargetFa, ggT.Out) 90 } 91 default: 92 // /ggT.Cmd.Usage() 93 errorMessage() 94 } 95 log.Printf("Finished writing %s file\n-xoxo gg\n", ggT.FmtOutput) 96 } 97 } 98 99 func isFasta(filename string) bool { 100 if strings.HasSuffix(filename, ".fasta") || strings.HasSuffix(filename, ".fa") || strings.HasSuffix(filename, ".fasta.gz") || strings.HasSuffix(filename, ".fa.gz") { 101 return true 102 } else { 103 //log.Fatalf("Error a fasta reference must be provided this is specific file conversion\n") 104 return false 105 } 106 } 107 108 func chainToBed(chainFmt, bedFmt string, target bool, out string) { 109 reader, _ := chain.GoReadToChan(chainFmt) 110 111 outFile := fileio.MustCreate(out) 112 defer outFile.Close() 113 for each := range reader { 114 bed.WriteBed(outFile, chain.ChainToBed(each, target)) 115 } 116 } 117 118 /* 119 func chainToAxt(chainFmt string, axtFmt string) { 120 input := fileio.EasyOpen(axtFmt) 121 defer input.Close() 122 reader, writer := make(chan *chain.Chain), make(chan *axt.Axt) 123 go chain.ReadToChan(input, reader) 124 var i int = 0 125 var wg sync.WaitGroup 126 wg.Add(1) 127 for each := range reader { 128 writer <- chain.ChainToAxt(each, i) 129 i++ 130 } 131 132 }*/ 133 134 func vcfSplitChrNoN(vcfInput string) map[string][]vcf.Vcf { 135 reader := fileio.EasyOpen(vcfInput) 136 defer reader.Close() 137 vcf.ReadHeader(reader) 138 var data vcf.Vcf 139 var done bool 140 chrMap := make(map[string][]vcf.Vcf) 141 for data, done = vcf.NextVcf(reader); !done; data, done = vcf.NextVcf(reader) { 142 if !strings.Contains(data.Ref, "N") && !strings.Contains(data.Alt[0], "N") { 143 chrMap[data.Chr] = append(chrMap[data.Chr], data) 144 } 145 } 146 return chrMap 147 } 148 149 func FaVcfChannels(ref string, vcfInput string) *genomeGraph.GenomeGraph { 150 vcfFilteredMap := vcfSplitChrNoN(vcfInput) 151 faReader := fasta.GoReadToChan(ref) 152 gg := genomeGraph.VariantGraph(faReader, vcfFilteredMap) 153 return gg 154 } 155 156 /* 157 //axt is the select/target regions we are looking for overlaps from 158 func findAxtBedOverlap(axtFmt string, bedFmt string, output string, invert bool) { 159 query := make(chan *bed.Bed) 160 go bed.ReadToChan(bedFmt, query) 161 162 target := mkAxtMap(axtFmt) 163 164 answer := fileio.MustCreate(output) 165 defer answer.Close() 166 for eachRegion := range query { 167 if overlapAxtCompareBeds(target[eachRegion.Chrom], eachRegion) { 168 //TODO: consider changing this so people can input n fields 169 bed.WriteBed(answer, eachRegion, 5) 170 } else { 171 if invert { 172 bed.WriteBed(answer, eachRegion, 5) 173 } 174 } 175 } 176 } 177 /* 178 func mkAxtMap(axtFmt string) map[string][]*axt.Axt { 179 input := fileio.EasyOpen(axtFmt) 180 reader := make(chan *axt.Axt) 181 182 target := make(map[string][]*axt.Axt) 183 184 go axt.ReadToChan(input, reader) 185 for each := range reader { 186 target[each.RName] = append(target[each.RName], each) 187 } 188 return target 189 } 190 191 //check if a single bed record has overlap 192 func overlapAxtCompareBeds(target []*axt.Axt, query *bed.Bed) bool { 193 for _, each := range target { 194 if axt.OverlapAxtBed(each, query) { 195 return true 196 } 197 } 198 return false 199 } 200 201 //check if a single bed record has overlap 202 func overlapChainCompareBeds(target []*chain.Chain, query *bed.Bed) bool { 203 for _, each := range target { 204 //TODO: can only select regions from target at the moment, will add query option 205 if chain.OverlapChainBed(each, query, true) { 206 return true 207 } 208 } 209 return false 210 }*/