github.com/vertgenlab/gonomics@v1.0.0/cmd/gsw/chainHelper.go (about) 1 package main 2 3 import ( 4 "strings" 5 6 "github.com/vertgenlab/gonomics/axt" 7 "github.com/vertgenlab/gonomics/chain" 8 "github.com/vertgenlab/gonomics/dna" 9 "github.com/vertgenlab/gonomics/fasta" 10 "github.com/vertgenlab/gonomics/fileio" 11 "github.com/vertgenlab/gonomics/genomeGraph" 12 "github.com/vertgenlab/gonomics/vcf" 13 ) 14 15 func convertChains(chainFile, targetFa, queryFa, format, output string) { 16 switch format { 17 case "axt": 18 axtChannel := goChainToAxt(chainFile, targetFa, queryFa) 19 file := fileio.EasyCreate(output) 20 var idx int = 0 21 for i := range axtChannel { 22 axt.WriteToFileHandle(file, i, idx) 23 } 24 case "vcf": 25 vcfChannel := goChainToVcf(chainFile, targetFa, queryFa) 26 file := fileio.EasyCreate(output) 27 vcf.NewWriteHeader(file, vcf.NewHeader()) 28 for i := range vcfChannel { 29 vcf.WriteVcf(file, i) 30 } 31 file.Close() 32 case "gg": 33 genomeGraph.Write(output, chainToGenomeGraph(chainFile, targetFa, queryFa)) 34 default: 35 ggToolsUsage() 36 errorMessage() 37 } 38 } 39 40 func goChainToAxt(chainFile, targetFa, queryFa string) <-chan axt.Axt { 41 target, query := fasta.Read(targetFa), fasta.Read(queryFa) 42 chainFa := chain.GoReadSeqChain(chainFile, target, query) 43 ans := make(chan axt.Axt, 2408) 44 go workThreadChainAxt(chainFa, ans) 45 return ans 46 } 47 48 func goChainToVcf(chainFile, targetFa, queryFa string) <-chan vcf.Vcf { 49 ans := make(chan vcf.Vcf, 2408) 50 axtChannel := goChainToAxt(chainFile, targetFa, queryFa) 51 go workThreadAxtVcf(axtChannel, ans) 52 return ans 53 } 54 55 func chainToGenomeGraph(chainFile, targetFa, queryFa string) *genomeGraph.GenomeGraph { 56 target, query := fasta.Read(targetFa), fasta.Read(queryFa) 57 chainFa := chain.GoReadSeqChain(chainFile, target, query) 58 axtChannel := make(chan axt.Axt, 2408) 59 go workThreadChainAxt(chainFa, axtChannel) 60 61 vcfChannel := make(chan vcf.Vcf, 2408) 62 go workThreadAxtVcf(axtChannel, vcfChannel) 63 64 chrVcfMap := make(map[string][]vcf.Vcf) 65 for i := range vcfChannel { 66 chrVcfMap[i.Chr] = append(chrVcfMap[i.Chr], i) 67 } 68 //set up fa channel 69 ref := goFaChannel(target) 70 //return the genome graph 71 return genomeGraph.VariantGraph(ref, chrVcfMap) 72 } 73 74 func workThreadChainAxt(chFa chain.SeqChain, ans chan<- axt.Axt) { 75 for i := range chFa.Chains { 76 ans <- chain.ToAxt(i, chFa.TSeq[i.TName], chFa.QSeq[i.QName]) 77 } 78 close(ans) 79 } 80 81 func workThreadAxtVcf(axtChannel <-chan axt.Axt, ans chan<- vcf.Vcf) { 82 var j int = 0 83 var curr []vcf.Vcf 84 for i := range axtChannel { 85 //filter for uniq 86 curr = filterVcfPos(axt.ToVcf(i)) 87 for j = 0; j < len(curr); j++ { 88 if !strings.Contains(curr[j].Ref, "N") && !strings.Contains(curr[j].Alt[0], "N") { 89 ans <- curr[j] 90 } 91 } 92 } 93 close(ans) 94 } 95 96 // FilterVcfPos will filter out records that appear as the same position more than once, keeping the first one it encounters. In addition, if records contains Ns, those records will also be filtered out. 97 func filterVcfPos(vcfs []vcf.Vcf) []vcf.Vcf { 98 vcf.Sort(vcfs) 99 var answer []vcf.Vcf 100 chrVcfMap := make(map[string][]vcf.Vcf) 101 102 var ref []dna.Base 103 var alt [][]dna.Base 104 var containsN bool 105 for _, v := range vcfs { 106 chrVcfMap[v.Chr] = append(chrVcfMap[v.Chr], v) 107 } 108 var i int 109 var curr []vcf.Vcf 110 for key := range chrVcfMap { 111 curr = chrVcfMap[key] 112 encountered := make(map[int]bool) 113 for i = 0; i < len(curr); i++ { 114 if encountered[curr[i].Pos] { 115 //do not add 116 } else { 117 encountered[curr[i].Pos] = true 118 containsN = false 119 ref = dna.StringToBases(curr[i].Ref) 120 alt = vcf.GetAltBases(curr[i].Alt) 121 if dna.CountBaseInterval(ref, dna.N, 0, len(ref)) != 0 { 122 containsN = true 123 } 124 for j := 0; j < len(alt); j++ { 125 if dna.CountBaseInterval(alt[j], dna.N, 0, len(alt[j])) != 0 { 126 containsN = true 127 } 128 } 129 if !containsN { 130 answer = append(answer, curr[i]) 131 } 132 } 133 } 134 } 135 return answer 136 } 137 138 func goFaChannel(ref []fasta.Fasta) <-chan fasta.Fasta { 139 //set up faChan 140 ans := make(chan fasta.Fasta, 100) 141 go faWorker(ref, ans) 142 return ans 143 } 144 145 func faWorker(ref []fasta.Fasta, faChan chan<- fasta.Fasta) { 146 for _, chr := range ref { 147 faChan <- chr 148 } 149 close(faChan) 150 }