github.com/vertgenlab/gonomics@v1.0.0/cmd/gsw/gsw.go (about) 1 // Command Group: "Genome Graph Tools" 2 // Command Usage: "Genome Graph Creation, Alignment, and Manipulation" 3 4 // Graph-Smith-Waterman: align single or paired end fastqs 5 package main 6 7 import ( 8 "flag" 9 "fmt" 10 "os" 11 "strings" 12 13 "github.com/vertgenlab/gonomics/align" 14 "github.com/vertgenlab/gonomics/chromInfo" 15 "github.com/vertgenlab/gonomics/genomeGraph" 16 "github.com/vertgenlab/gonomics/sam" 17 ) 18 19 type GswSettings struct { 20 Cmd *flag.FlagSet 21 Index int 22 StepSize int 23 Cpus int 24 Matrix string 25 Liftover string 26 Help string 27 Out string 28 } 29 30 func alignUsage() { 31 fmt.Printf( 32 " align\t\tGraph-Smith-Waterman: align single or paired end fastqs\n") 33 } 34 35 func extendedAlignUsage() { 36 fmt.Print( 37 "Usage:\n" + 38 " gsw align [options] ref.gg R1.fastq.gz R2.fastq.gz\n\n" + 39 "Required:\n" + 40 " ref[.gg/.fa]\t\tReference genome: accepts both fasta and graph formats\n\n" + 41 "Options:\n" + 42 " -i, --index\t\tKmer length for index hash look up (default: 32)\n" + 43 " -s, --step\t\tOffset sliding window step size for hash set up (default 32)\n" + 44 " -t, --threads\t\tNumber of CPUs for Goroutine concurrency (default: 4)\n" + 45 " -p, --project\t\tConvert alignment coordinate to linear reference in sam format\n" + 46 " -m, --matrix\t\tScores used to align matches and mismatches (default: humanChimp)\n\n") 47 } 48 49 func initArgsGsw() *GswSettings { 50 gsw := &GswSettings{Cmd: flag.NewFlagSet("align", flag.ExitOnError)} 51 gsw.Cmd.Usage = extendedAlignUsage 52 53 gsw.Cmd.IntVar(&gsw.Index, "index", 32, "``hash look up length") 54 gsw.Cmd.IntVar(&gsw.Index, "i", 32, "``hash look up length") 55 56 gsw.Cmd.IntVar(&gsw.StepSize, "window", 32, "offset position of sliding window of hash") 57 gsw.Cmd.IntVar(&gsw.StepSize, "w", 32, "offset position of sliding window of hash") 58 59 gsw.Cmd.IntVar(&gsw.Cpus, "threads", 4, "Number of CPUs for goroutines") 60 gsw.Cmd.IntVar(&gsw.Cpus, "t", 4, "Number of CPUs for goroutines") 61 62 gsw.Cmd.StringVar(&gsw.Matrix, "matrix", "humanChimp", "Scoring matrix for alignment") 63 gsw.Cmd.StringVar(&gsw.Matrix, "m", "humanChimp", "Scoring matrix for alignment") 64 65 gsw.Cmd.StringVar(&gsw.Liftover, "liftover", "", "liftover to linear reference sam file") 66 gsw.Cmd.StringVar(&gsw.Liftover, "l", "", "liftover to linear reference sam file") 67 68 gsw.Cmd.StringVar(&gsw.Help, "help", "", "display usage") 69 gsw.Cmd.StringVar(&gsw.Help, "h", "", "display usage") 70 71 gsw.Cmd.StringVar(&gsw.Out, "out", "/dev/stdout", "Output filename, [.gg/.vcf/.sam]") 72 gsw.Cmd.StringVar(&gsw.Out, "o", "/dev/stdout", "Output filename, [.gg/.vcf/.sam]") 73 74 return gsw 75 } 76 77 func RunAlignExe() { 78 gsw := initArgsGsw() 79 gsw.Cmd.Parse(os.Args[2:]) 80 if len(gsw.Cmd.Args()) == 0 { 81 gsw.Cmd.Usage() 82 } else { 83 graphSmithWaterman(gsw.Index, gsw.StepSize, gsw.Cpus, gsw.Matrix, gsw.Out, gsw.Liftover, gsw.Cmd.Args()) 84 } 85 } 86 87 // TODO: add check for too many args or an implementation for multiple fastq pairs. 88 func graphSmithWaterman(seedNum int, stepSize int, cpus int, score string, out string, liftover string, args []string) { 89 //should be at most 3 args to add to the input, reference, readOne and/or readTwo 90 var genomeGraph *genomeGraph.GenomeGraph = genomeGraph.Read(args[0]) 91 var readOne string 92 var readTwo string 93 94 if len(args) > 1 { 95 readOne = args[1] 96 } 97 if len(args) == 3 { 98 readTwo = args[2] 99 } 100 switch true { 101 case len(args) == 2 && !strings.HasSuffix(liftover, ".sizes"): 102 GswToGiraf(genomeGraph, readOne, out, cpus, seedNum, stepSize, selectScoreMatrix(score)) 103 case len(args) == 3 && !strings.HasSuffix(liftover, ".sizes"): 104 GswToGirafPair(genomeGraph, readOne, readTwo, out, cpus, seedNum, stepSize, selectScoreMatrix(score)) 105 case strings.HasSuffix(liftover, ".sizes"): 106 chrSize := chromInfo.ReadToSlice(liftover) 107 header := sam.GenerateHeader(chrSize, nil, sam.Unsorted, sam.None) 108 if len(args) == 2 { 109 GswToSam(genomeGraph, readOne, out, cpus, seedNum, stepSize, selectScoreMatrix(score), header) 110 } 111 if len(args) == 3 { 112 GswToSamPair(genomeGraph, readOne, readTwo, out, cpus, seedNum, stepSize, selectScoreMatrix(score), header) 113 } 114 default: 115 extendedAlignUsage() 116 errorMessage() 117 } 118 } 119 120 func selectScoreMatrix(score string) [][]int64 { 121 var scoreMatrix [][]int64 122 switch { 123 case strings.Contains(score, "humanChimp"): 124 scoreMatrix = align.HumanChimpTwoScoreMatrix 125 case strings.Contains(score, "hoxD55"): 126 scoreMatrix = align.HoxD55ScoreMatrix 127 case strings.Contains(score, "mouseRat"): 128 scoreMatrix = align.MouseRatScoreMatrix 129 case strings.Contains(score, "general"): 130 scoreMatrix = align.DefaultScoreMatrix 131 default: 132 errorMessage() 133 } 134 return scoreMatrix 135 }